From f87bd49b66a303706c4b13e6003fad105ed3aa38 Mon Sep 17 00:00:00 2001
From: dumerrill <dumerrill@nvidia.com>
Date: Mon, 14 Dec 2015 13:12:25 -0500
Subject: [PATCH] 1.5.0 doxgen added bool sorting test

Former-commit-id: 9607db2517fced1c3bda71dac951007be0a02300
---
 CHANGE_LOG.TXT                                |  15 +
 cub/block/block_load.cuh                      |   3 +-
 docs/download_cub.html                        |   6 +-
 docs/html/CHANGE_LOG.TXT                      |  15 +
 docs/html/annotated.html                      | 121 ++--
 .../arg__index__input__iterator_8cuh.html     |   4 +-
 ...g__index__input__iterator_8cuh_source.html |  46 +-
 docs/html/block__discontinuity_8cuh.html      |   2 +-
 .../block__discontinuity_8cuh_source.html     |   4 +-
 docs/html/block__exchange_8cuh.html           |   2 +-
 ...__exchange_8cuh_source.html.REMOVED.git-id |   2 +-
 docs/html/block__histogram_8cuh.html          |   2 +-
 docs/html/block__histogram_8cuh_source.html   |   4 +-
 docs/html/block__load_8cuh.html               |   2 +-
 ...lock__load_8cuh_source.html.REMOVED.git-id |   2 +-
 docs/html/block__radix__sort_8cuh.html        |   2 +-
 docs/html/block__radix__sort_8cuh_source.html |  60 +-
 docs/html/block__reduce_8cuh.html             |   2 +-
 docs/html/block__reduce_8cuh_source.html      |   4 +-
 docs/html/block__scan_8cuh.html               |   2 +-
 ...lock__scan_8cuh_source.html.REMOVED.git-id |   2 +-
 docs/html/block__store_8cuh.html              |   2 +-
 docs/html/block__store_8cuh_source.html       |  16 +-
 ...cache__modified__input__iterator_8cuh.html |   2 +-
 ...modified__input__iterator_8cuh_source.html |   2 +-
 ...ache__modified__output__iterator_8cuh.html |   2 +-
 ...odified__output__iterator_8cuh_source.html |   2 +-
 ..._1_1_arg_index_input_iterator-members.html |  37 +-
 ...classcub_1_1_arg_index_input_iterator.html |  25 +-
 ...sscub_1_1_block_discontinuity-members.html |   2 +-
 .../classcub_1_1_block_discontinuity.html     |   2 +-
 .../classcub_1_1_block_exchange-members.html  |   2 +-
 docs/html/classcub_1_1_block_exchange.html    |   2 +-
 .../classcub_1_1_block_histogram-members.html |   2 +-
 docs/html/classcub_1_1_block_histogram.html   |   2 +-
 .../html/classcub_1_1_block_load-members.html |   2 +-
 docs/html/classcub_1_1_block_load.html        |  12 +-
 ...classcub_1_1_block_radix_sort-members.html |   2 +-
 docs/html/classcub_1_1_block_radix_sort.html  |   4 +-
 .../classcub_1_1_block_reduce-members.html    |   2 +-
 docs/html/classcub_1_1_block_reduce.html      |  14 +-
 .../html/classcub_1_1_block_scan-members.html |   2 +-
 ...lasscub_1_1_block_scan.html.REMOVED.git-id |   2 +-
 .../classcub_1_1_block_store-members.html     |   2 +-
 docs/html/classcub_1_1_block_store.html       |   2 +-
 ...cache_modified_input_iterator-members.html |   2 +-
 ...cub_1_1_cache_modified_input_iterator.html |   2 +-
 ...ache_modified_output_iterator-members.html |   2 +-
 ...ub_1_1_cache_modified_output_iterator.html |   2 +-
 ...b_1_1_constant_input_iterator-members.html |   2 +-
 .../classcub_1_1_constant_input_iterator.html |   2 +-
 ...b_1_1_counting_input_iterator-members.html |   2 +-
 .../classcub_1_1_counting_input_iterator.html |   2 +-
 .../classcub_1_1_swizzle_scan_op-members.html |   2 +-
 docs/html/classcub_1_1_swizzle_scan_op.html   |   2 +-
 ...ub_1_1_tex_obj_input_iterator-members.html |   2 +-
 .../classcub_1_1_tex_obj_input_iterator.html  |   2 +-
 ...ub_1_1_tex_ref_input_iterator-members.html |   2 +-
 .../classcub_1_1_tex_ref_input_iterator.html  |   2 +-
 ..._1_1_transform_input_iterator-members.html |   2 +-
 ...classcub_1_1_transform_input_iterator.html |   2 +-
 .../classcub_1_1_warp_reduce-members.html     |   2 +-
 docs/html/classcub_1_1_warp_reduce.html       |   2 +-
 docs/html/classcub_1_1_warp_scan-members.html |   2 +-
 ...classcub_1_1_warp_scan.html.REMOVED.git-id |   2 +-
 docs/html/classes.html                        |  69 +-
 docs/html/constant__input__iterator_8cuh.html |   2 +-
 ...constant__input__iterator_8cuh_source.html |   2 +-
 docs/html/counting__input__iterator_8cuh.html |   2 +-
 ...counting__input__iterator_8cuh_source.html |   2 +-
 docs/html/cub_8cuh.html                       |   4 +-
 docs/html/cub_8cuh_source.html                |  88 +--
 docs/html/device__histogram_8cuh.html         |   2 +-
 docs/html/device__histogram_8cuh_source.html  | 638 +++++++++---------
 docs/html/device__partition_8cuh.html         |   2 +-
 docs/html/device__partition_8cuh_source.html  | 152 ++---
 docs/html/device__radix__sort_8cuh.html       |   2 +-
 .../html/device__radix__sort_8cuh_source.html | 554 +++++++--------
 docs/html/device__reduce_8cuh.html            |   3 +-
 docs/html/device__reduce_8cuh_source.html     | 488 +++++++-------
 .../device__run__length__encode_8cuh.html     |   2 +-
 ...vice__run__length__encode_8cuh_source.html | 176 ++---
 docs/html/device__scan_8cuh.html              |   2 +-
 docs/html/device__scan_8cuh_source.html       | 268 ++++----
 .../device__segmented__radix__sort_8cuh.html  |   2 +-
 ...e__segmented__radix__sort_8cuh_source.html | 633 +++++++++--------
 docs/html/device__segmented__reduce_8cuh.html |   3 +-
 ...device__segmented__reduce_8cuh_source.html | 461 ++++++-------
 docs/html/device__select_8cuh.html            |   4 +-
 docs/html/device__select_8cuh_source.html     | 224 +++---
 docs/html/device__spmv_8cuh.html              |   2 +-
 docs/html/device__spmv_8cuh_source.html       |   2 +-
 .../dir_011e1c944d88f71be72e1e24a5fda7cf.html |   2 +-
 .../dir_18fc672d63781b5a743137aee24ff656.html |   4 +-
 .../dir_80932b4cec52750ff92b1a1912314cf5.html |   2 +-
 .../dir_bb50a5ef59f19d030d06415663184d05.html |   2 +-
 .../dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html |   2 +-
 .../dir_d583f216f1aafe19404e836b0c097ad2.html |   2 +-
 docs/html/download_cub.html                   |   6 +-
 .../example_block_radix_sort_8cu-example.html |   2 +-
 .../example_block_reduce_8cu-example.html     |   2 +-
 docs/html/example_block_scan_8cu-example.html |   2 +-
 .../example_device_histogram_8cu-example.html |   2 +-
 ..._device_partition_flagged_8cu-example.html |   2 +-
 ...ample_device_partition_if_8cu-example.html |   2 +-
 ...example_device_radix_sort_8cu-example.html |  36 +-
 .../example_device_reduce_8cu-example.html    |   6 +-
 .../html/example_device_scan_8cu-example.html |   2 +-
 ...ple_device_select_flagged_8cu-example.html |   2 +-
 .../example_device_select_if_8cu-example.html |   2 +-
 ...mple_device_select_unique_8cu-example.html |   2 +-
 docs/html/examples.html                       |   2 +-
 docs/html/files.html                          |  35 +-
 docs/html/functions.html                      |  14 +-
 docs/html/functions_0x62.html                 |   3 +-
 docs/html/functions_0x63.html                 |   6 +-
 docs/html/functions_0x64.html                 |  18 +-
 docs/html/functions_0x65.html                 |   3 +-
 docs/html/functions_0x66.html                 |   3 +-
 docs/html/functions_0x68.html                 |   3 +-
 docs/html/functions_0x69.html                 |   3 +-
 docs/html/functions_0x6c.html                 |   3 +-
 docs/html/functions_0x6d.html                 |   9 +-
 docs/html/functions_0x6e.html                 |   6 +-
 docs/html/functions_0x6f.html                 |  80 ++-
 docs/html/functions_0x70.html                 |   3 +-
 docs/html/functions_0x72.html                 |  15 +-
 docs/html/functions_0x73.html                 |  35 +-
 docs/html/functions_0x74.html                 |   3 +-
 docs/html/functions_0x75.html                 |   3 +-
 docs/html/functions_0x76.html                 |  15 +-
 docs/html/functions_0x77.html                 |   3 +-
 docs/html/functions_0x7e.html                 |   3 +-
 docs/html/functions_func.html                 |  10 +-
 docs/html/functions_func_0x62.html            |   2 +-
 docs/html/functions_func_0x63.html            |   5 +-
 docs/html/functions_func_0x64.html            |   7 +-
 docs/html/functions_func_0x65.html            |   2 +-
 docs/html/functions_func_0x66.html            |   2 +-
 docs/html/functions_func_0x68.html            |   2 +-
 docs/html/functions_func_0x69.html            |   2 +-
 docs/html/functions_func_0x6c.html            |   2 +-
 docs/html/functions_func_0x6d.html            |   8 +-
 docs/html/functions_func_0x6e.html            |   5 +-
 docs/html/functions_func_0x6f.html            |  80 +--
 docs/html/functions_func_0x72.html            |  10 +-
 docs/html/functions_func_0x73.html            |  17 +-
 docs/html/functions_func_0x74.html            |   2 +-
 docs/html/functions_func_0x75.html            |   2 +-
 docs/html/functions_func_0x77.html            |   2 +-
 docs/html/functions_func_0x7e.html            |   2 +-
 docs/html/functions_rela.html                 |   2 +-
 docs/html/functions_type.html                 |  24 +-
 docs/html/functions_vars.html                 |  20 +-
 docs/html/globals.html                        |   8 +-
 docs/html/globals_defs.html                   |   8 +-
 docs/html/group___block_module.html           |   2 +-
 docs/html/group___collective_module.html      |   2 +-
 docs/html/group___device_module.html          |   2 +-
 docs/html/group___segmented_module.html       |  10 +-
 docs/html/group___single_module.html          |   2 +-
 docs/html/group___util_io.html                |  20 +-
 docs/html/group___util_iterator.html          |   4 +-
 docs/html/group___util_mgmt.html              |  12 +-
 docs/html/group___util_module.html            |  20 +-
 docs/html/group___util_ptx.html               |   2 +-
 docs/html/group___warp_module.html            |   2 +-
 docs/html/hierarchy.html                      | 105 ++-
 docs/html/index.html                          |  12 +-
 docs/html/modules.html                        |   2 +-
 docs/html/namespacecub.html.REMOVED.git-id    |   2 +-
 docs/html/namespacemembers.html               |   2 +-
 docs/html/namespacemembers_enum.html          |   2 +-
 docs/html/namespacemembers_eval.html          |   2 +-
 docs/html/namespacemembers_func.html          |   2 +-
 docs/html/namespaces.html                     |   2 +-
 docs/html/search/all_61.js                    |   9 +-
 docs/html/search/all_63.js                    |   7 +-
 docs/html/search/all_64.js                    |   6 +-
 docs/html/search/all_69.js                    |   1 -
 docs/html/search/all_6d.js                    |   4 +-
 docs/html/search/all_6e.js                    |   2 +-
 docs/html/search/all_6f.js                    |   2 +-
 docs/html/search/all_72.js                    |   4 +-
 docs/html/search/all_73.js                    |  10 +-
 docs/html/search/all_74.js                    |  18 +-
 docs/html/search/all_75.js                    |   2 -
 docs/html/search/all_76.js                    |   1 -
 docs/html/search/classes_63.js                |   3 +-
 docs/html/search/classes_64.js                |   4 +-
 docs/html/search/classes_69.js                |   1 -
 docs/html/search/classes_74.js                |  20 +-
 docs/html/search/files_64.js                  |   1 +
 docs/html/search/functions_61.js              |   6 +-
 docs/html/search/functions_63.js              |   3 +-
 docs/html/search/functions_64.js              |   3 +-
 docs/html/search/functions_6d.js              |   4 +-
 docs/html/search/functions_6e.js              |   3 +-
 docs/html/search/functions_6f.js              |   2 +-
 docs/html/search/functions_72.js              |   4 +-
 docs/html/search/functions_73.js              |   4 +-
 docs/html/search/search.js                    |   8 +-
 docs/html/search/typedefs_64.js               |   1 -
 docs/html/search/typedefs_76.js               |   1 -
 docs/html/structcub_1_1_arg_max-members.html  |   2 +-
 docs/html/structcub_1_1_arg_max.html          |   6 +-
 docs/html/structcub_1_1_arg_min-members.html  |   2 +-
 docs/html/structcub_1_1_arg_min.html          |   6 +-
 ..._block_discontinuity_1_1_temp_storage.html |  28 +-
 ...1_block_discontinuity_1_1_temp_storage.png | Bin 1495 -> 1458 bytes
 ...b_1_1_block_exchange_1_1_temp_storage.html |  28 +-
 ...ub_1_1_block_exchange_1_1_temp_storage.png | Bin 1962 -> 1931 bytes
 ..._1_1_block_histogram_1_1_temp_storage.html |  28 +-
 ...b_1_1_block_histogram_1_1_temp_storage.png | Bin 1956 -> 1926 bytes
 ...00_01_d_u_m_m_y_01_4_1_1_temp_storage.html |  30 +-
 ..._00_01_d_u_m_m_y_01_4_1_1_temp_storage.png | Bin 2382 -> 2345 bytes
 ..._o_s_402c3164d23f1ec647db5dad06a54584.html |  30 +-
 ...p_o_s_402c3164d23f1ec647db5dad06a54584.png | Bin 2406 -> 2375 bytes
 ..._o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html |  30 +-
 ...p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.png | Bin 2520 -> 2487 bytes
 ...ctcub_1_1_block_load_1_1_temp_storage.html |  30 +-
 ...uctcub_1_1_block_load_1_1_temp_storage.png | Bin 1919 -> 1890 bytes
 ...1_1_block_radix_sort_1_1_temp_storage.html |  28 +-
 ..._1_1_block_radix_sort_1_1_temp_storage.png | Bin 2567 -> 2538 bytes
 ...cub_1_1_block_reduce_1_1_temp_storage.html |  28 +-
 ...tcub_1_1_block_reduce_1_1_temp_storage.png | Bin 1576 -> 1538 bytes
 ...ctcub_1_1_block_scan_1_1_temp_storage.html |  28 +-
 ...uctcub_1_1_block_scan_1_1_temp_storage.png | Bin 1563 -> 1531 bytes
 ..._00_09dfae03f13932c7dbdb41be30a5767ba.html |  28 +-
 ...e_00_09dfae03f13932c7dbdb41be30a5767ba.png | Bin 2399 -> 2361 bytes
 ..._s_p_263becc1ca5b47586740c2f7bb0d0145.html |  28 +-
 ...n_s_p_263becc1ca5b47586740c2f7bb0d0145.png | Bin 2595 -> 2561 bytes
 ..._s_p_8d170856b7ed1df0ed565731a681b449.html |  28 +-
 ...n_s_p_8d170856b7ed1df0ed565731a681b449.png | Bin 2439 -> 2421 bytes
 ...tcub_1_1_block_store_1_1_temp_storage.html |  28 +-
 ...ctcub_1_1_block_store_1_1_temp_storage.png | Bin 1929 -> 1899 bytes
 ..._1_1_caching_device_allocator-members.html |   2 +-
 ...tructcub_1_1_caching_device_allocator.html |   2 +-
 docs/html/structcub_1_1_cast-members.html     |   2 +-
 docs/html/structcub_1_1_cast.html             |   2 +-
 ...tructcub_1_1_device_histogram-members.html |   2 +-
 ...b_1_1_device_histogram.html.REMOVED.git-id |   2 +-
 ...tructcub_1_1_device_partition-members.html |   2 +-
 docs/html/structcub_1_1_device_partition.html |  14 +-
 ...ructcub_1_1_device_radix_sort-members.html |   2 +-
 .../html/structcub_1_1_device_radix_sort.html |  94 ++-
 .../structcub_1_1_device_reduce-members.html  |   6 +-
 docs/html/structcub_1_1_device_reduce.html    | 190 +++---
 ..._1_1_device_run_length_encode-members.html |   2 +-
 ...tructcub_1_1_device_run_length_encode.html |  14 +-
 .../structcub_1_1_device_scan-members.html    |   2 +-
 docs/html/structcub_1_1_device_scan.html      |  31 +-
 ...1_device_segmented_radix_sort-members.html |   4 +-
 ...e_segmented_radix_sort.html.REMOVED.git-id |   2 +-
 ...b_1_1_device_segmented_reduce-members.html |  14 +-
 ...structcub_1_1_device_segmented_reduce.html | 270 ++++----
 .../structcub_1_1_device_select-members.html  |   2 +-
 docs/html/structcub_1_1_device_select.html    |  19 +-
 .../structcub_1_1_device_spmv-members.html    |   2 +-
 docs/html/structcub_1_1_device_spmv.html      |   4 +-
 docs/html/structcub_1_1_equality-members.html |   2 +-
 docs/html/structcub_1_1_equality.html         |   2 +-
 docs/html/structcub_1_1_equals-members.html   |   2 +-
 docs/html/structcub_1_1_equals.html           |   4 +-
 docs/html/structcub_1_1_if-members.html       |   2 +-
 docs/html/structcub_1_1_if.html               |   4 +-
 .../structcub_1_1_inequality-members.html     |   2 +-
 docs/html/structcub_1_1_inequality.html       |   2 +-
 ...uctcub_1_1_inequality_wrapper-members.html |   2 +-
 .../structcub_1_1_inequality_wrapper.html     |   2 +-
 .../structcub_1_1_is_pointer-members.html     |   2 +-
 docs/html/structcub_1_1_is_pointer.html       |   8 +-
 .../structcub_1_1_is_volatile-members.html    |   2 +-
 docs/html/structcub_1_1_is_volatile.html      |   8 +-
 docs/html/structcub_1_1_log2-members.html     |   2 +-
 docs/html/structcub_1_1_log2.html             |  10 +-
 docs/html/structcub_1_1_max-members.html      |   2 +-
 docs/html/structcub_1_1_max.html              |   2 +-
 docs/html/structcub_1_1_min-members.html      |   2 +-
 docs/html/structcub_1_1_min.html              |   2 +-
 .../structcub_1_1_power_of_two-members.html   |   2 +-
 docs/html/structcub_1_1_power_of_two.html     |   8 +-
 ...tructcub_1_1_reduce_by_key_op-members.html |   2 +-
 docs/html/structcub_1_1_reduce_by_key_op.html |   2 +-
 ...tcub_1_1_reduce_by_segment_op-members.html |   2 +-
 .../structcub_1_1_reduce_by_segment_op.html   |   8 +-
 ...ructcub_1_1_remove_qualifiers-members.html |   2 +-
 .../html/structcub_1_1_remove_qualifiers.html |   4 +-
 docs/html/structcub_1_1_sum-members.html      |   2 +-
 docs/html/structcub_1_1_sum.html              |   2 +-
 ...tcub_1_1_warp_reduce_1_1_temp_storage.html |  28 +-
 ...ctcub_1_1_warp_reduce_1_1_temp_storage.png | Bin 1310 -> 1280 bytes
 ...uctcub_1_1_warp_scan_1_1_temp_storage.html |  28 +-
 ...ructcub_1_1_warp_scan_1_1_temp_storage.png | Bin 1287 -> 1259 bytes
 docs/html/tex__obj__input__iterator_8cuh.html |   2 +-
 ...tex__obj__input__iterator_8cuh_source.html |   2 +-
 docs/html/tex__ref__input__iterator_8cuh.html |   2 +-
 ...tex__ref__input__iterator_8cuh_source.html |   2 +-
 docs/html/thread__load_8cuh.html              |   2 +-
 docs/html/thread__load_8cuh_source.html       |  38 +-
 docs/html/thread__operators_8cuh.html         |   2 +-
 docs/html/thread__operators_8cuh_source.html  |  18 +-
 docs/html/thread__store_8cuh.html             |   2 +-
 docs/html/thread__store_8cuh_source.html      | 137 ++--
 .../html/transform__input__iterator_8cuh.html |   2 +-
 ...ransform__input__iterator_8cuh_source.html |   2 +-
 docs/html/util__allocator_8cuh_source.html    |  14 +-
 docs/html/util__arch_8cuh.html                |   2 +-
 docs/html/util__arch_8cuh_source.html         |   2 +-
 docs/html/util__debug_8cuh.html               |  10 +-
 docs/html/util__debug_8cuh_source.html        |   8 +-
 docs/html/util__device_8cuh.html              |   5 +-
 docs/html/util__device_8cuh_source.html       | 448 ++++++------
 docs/html/util__ptx_8cuh.html                 |   2 +-
 docs/html/util__ptx_8cuh_source.html          |   4 +-
 docs/html/util__type_8cuh.html                |  21 +-
 ...util__type_8cuh_source.html.REMOVED.git-id |   2 +-
 docs/html/warp__reduce_8cuh.html              |   2 +-
 docs/html/warp__reduce_8cuh_source.html       |   4 +-
 docs/html/warp__scan_8cuh.html                |   2 +-
 docs/html/warp__scan_8cuh_source.html         |   4 +-
 docs/mainpage.dox                             |   6 +-
 test/test_device_radix_sort.cu                |   3 +
 test/test_util.h                              |  25 +
 324 files changed, 3345 insertions(+), 3878 deletions(-)

diff --git a/CHANGE_LOG.TXT b/CHANGE_LOG.TXT
index 202ec6f160..96973a5e90 100644
--- a/CHANGE_LOG.TXT
+++ b/CHANGE_LOG.TXT
@@ -1,5 +1,20 @@
 //-----------------------------------------------------------------------------
 
+1.5.0    12/14/2015
+    - New Features:
+        - Added new segmented device-wide operations for device-wide sort and 
+          reduction primitives.
+    - Bug fixes: 
+        - Fix for Git Issue 36 (Compilation error with GCC 4.8.4 nvcc 7.0.27) and
+          Forums thread (ThreadLoad generates compiler errors when loading from 
+          pointer-to-const)
+        - Fix for Git Issue 29 (DeviceRadixSort::SortKeys<bool> yields compiler 
+          errors)
+        - Fix for Git Issue 26 (CUDA error: misaligned address after 
+          cub::DeviceRadixSort::SortKeys())
+          
+//-----------------------------------------------------------------------------
+
 1.4.1    04/13/2015
     - Bug fixes: 
         - Fixes for CUDA 7.0 issues with SHFL-based warp-scan and warp-reduction 
diff --git a/cub/block/block_load.cuh b/cub/block/block_load.cuh
index 2df8cfebb5..4803576922 100644
--- a/cub/block/block_load.cuh
+++ b/cub/block/block_load.cuh
@@ -200,7 +200,8 @@ __device__ __forceinline__ void InternalLoadDirectBlockedVectorized(
     #pragma unroll
     for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++)
     {
-        items[ITEM] = reinterpret_cast<T*>(vec_items)[ITEM];
+//        items[ITEM] = reinterpret_cast<T*>(vec_items)[ITEM];
+        items[ITEM] = *(reinterpret_cast<T*>(vec_items) + ITEM);
     }
 }
 
diff --git a/docs/download_cub.html b/docs/download_cub.html
index e900a57c53..a9f8ae9530 100644
--- a/docs/download_cub.html
+++ b/docs/download_cub.html
@@ -37,14 +37,14 @@
 </head>
 
 <body 
-	onload="downloadURL('https://github.com/NVlabs/cub/archive/1.5.1.zip');" 
+	onload="downloadURL('https://github.com/NVlabs/cub/archive/1.5.0.zip');" 
 	style="color: rgb(102, 102, 102); font-family: Helvetica, arial, freesans, clean, sans-serif; font-size: 13px; font-style: normal; font-variant: normal; font-weight: 300; height: 18px;">
 
 <center>
 If your download doesn't start in 3s:
 <br><br>
-<a href="https://github.com/NVlabs/cub/archive/1.5.1.zip"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
-<a href="https://github.com/NVlabs/cub/archive/1.5.1.zip"><em>Download CUB!</em></a>
+<a href="https://github.com/NVlabs/cub/archive/1.5.0.zip"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
+<a href="https://github.com/NVlabs/cub/archive/1.5.0.zip"><em>Download CUB!</em></a>
 </center>
 
 </body>
diff --git a/docs/html/CHANGE_LOG.TXT b/docs/html/CHANGE_LOG.TXT
index 202ec6f160..96973a5e90 100644
--- a/docs/html/CHANGE_LOG.TXT
+++ b/docs/html/CHANGE_LOG.TXT
@@ -1,5 +1,20 @@
 //-----------------------------------------------------------------------------
 
+1.5.0    12/14/2015
+    - New Features:
+        - Added new segmented device-wide operations for device-wide sort and 
+          reduction primitives.
+    - Bug fixes: 
+        - Fix for Git Issue 36 (Compilation error with GCC 4.8.4 nvcc 7.0.27) and
+          Forums thread (ThreadLoad generates compiler errors when loading from 
+          pointer-to-const)
+        - Fix for Git Issue 29 (DeviceRadixSort::SortKeys<bool> yields compiler 
+          errors)
+        - Fix for Git Issue 26 (CUDA error: misaligned address after 
+          cub::DeviceRadixSort::SortKeys())
+          
+//-----------------------------------------------------------------------------
+
 1.4.1    04/13/2015
     - Bug fixes: 
         - Fixes for CUDA 7.0 issues with SHFL-based warp-scan and warp-reduction 
diff --git a/docs/html/annotated.html b/docs/html/annotated.html
index a31cdfc151..2398acf04e 100644
--- a/docs/html/annotated.html
+++ b/docs/html/annotated.html
@@ -103,145 +103,144 @@
 <tr id="row_0_0_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_caching_device_allocator.html" target="_self">CachingDeviceAllocator</a></td><td class="desc">A simple caching allocator for device memory allocations </td></tr>
 <tr id="row_0_1_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_if.html" target="_self">If</a></td><td class="desc">Type selection (<code>IF ? ThenType : ElseType</code>) </td></tr>
 <tr id="row_0_2_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equals.html" target="_self">Equals</a></td><td class="desc">Type equality test </td></tr>
-<tr id="row_0_3_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_null_type.html" target="_self">NullType</a></td><td class="desc">A simple "NULL" marker type </td></tr>
-<tr id="row_0_4_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_int2_type.html" target="_self">Int2Type</a></td><td class="desc">Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) </td></tr>
-<tr id="row_0_5_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_cub_vector.html" target="_self">CubVector</a></td><td class="desc">Exposes a member typedef <code>Type</code> that names the corresponding CUDA vector type if one exists. Otherwise <code>Type</code> refers to the <a class="el" href="structcub_1_1_cub_vector.html" title="Exposes a member typedef Type that names the corresponding CUDA vector type if one exists...">CubVector</a> structure itself, which will wrap the corresponding <code>x</code>, <code>y</code>, etc. vector fields </td></tr>
-<tr id="row_0_6_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_uninitialized.html" target="_self">Uninitialized</a></td><td class="desc">A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions </td></tr>
-<tr id="row_0_7_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_key_value_pair.html" target="_self">KeyValuePair</a></td><td class="desc">A key identifier paired with a corresponding value </td></tr>
-<tr id="row_0_8_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_double_buffer.html" target="_self">DoubleBuffer</a></td><td class="desc">Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth </td></tr>
-<tr id="row_0_9_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_log2.html" target="_self">Log2</a></td><td class="desc">Statically determine log2(N), rounded up </td></tr>
-<tr id="row_0_10_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_power_of_two.html" target="_self">PowerOfTwo</a></td><td class="desc">Statically determine if N is a power-of-two </td></tr>
-<tr id="row_0_11_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_pointer.html" target="_self">IsPointer</a></td><td class="desc">Pointer vs. iterator </td></tr>
-<tr id="row_0_12_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_volatile.html" target="_self">IsVolatile</a></td><td class="desc">Volatile modifier test </td></tr>
-<tr id="row_0_13_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_remove_qualifiers.html" target="_self">RemoveQualifiers</a></td><td class="desc">Removes <code>const</code> and <code>volatile</code> qualifiers from type <code>Tp</code> </td></tr>
-<tr id="row_0_14_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_arg_index_input_iterator.html" target="_self">ArgIndexInputIterator</a></td><td class="desc">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> tuples) </td></tr>
-<tr id="row_0_15_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_cache_modified_input_iterator.html" target="_self">CacheModifiedInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values using a PTX cache load modifier </td></tr>
-<tr id="row_0_16_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_cache_modified_output_iterator.html" target="_self">CacheModifiedOutputIterator</a></td><td class="desc">A random-access output wrapper for storing array values using a PTX cache-modifier </td></tr>
-<tr id="row_0_17_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_constant_input_iterator.html" target="_self">ConstantInputIterator</a></td><td class="desc">A random-access input generator for dereferencing a sequence of homogeneous values </td></tr>
-<tr id="row_0_18_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_counting_input_iterator.html" target="_self">CountingInputIterator</a></td><td class="desc">A random-access input generator for dereferencing a sequence of incrementing integer values </td></tr>
-<tr id="row_0_19_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_obj_input_iterator.html" target="_self">TexObjInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects </td></tr>
-<tr id="row_0_20_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_ref_input_iterator.html" target="_self">TexRefInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references </td></tr>
-<tr id="row_0_21_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_transform_input_iterator.html" target="_self">TransformInputIterator</a></td><td class="desc">A random-access input wrapper for transforming dereferenced values </td></tr>
-<tr id="row_0_22_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equality.html" target="_self">Equality</a></td><td class="desc">Default equality functor </td></tr>
-<tr id="row_0_23_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality.html" target="_self">Inequality</a></td><td class="desc">Default inequality functor </td></tr>
-<tr id="row_0_24_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality_wrapper.html" target="_self">InequalityWrapper</a></td><td class="desc"><a class="el" href="structcub_1_1_inequality.html" title="Default inequality functor. ">Inequality</a> functor (wraps equality functor) </td></tr>
-<tr id="row_0_25_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_sum.html" target="_self">Sum</a></td><td class="desc">Default sum functor </td></tr>
-<tr id="row_0_26_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_max.html" target="_self">Max</a></td><td class="desc">Default max functor </td></tr>
-<tr id="row_0_27_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_max.html" target="_self">ArgMax</a></td><td class="desc">Arg max functor (keeps the value and offset of the first occurrence of the larger item) </td></tr>
-<tr id="row_0_28_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_min.html" target="_self">Min</a></td><td class="desc">Default min functor </td></tr>
-<tr id="row_0_29_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_min.html" target="_self">ArgMin</a></td><td class="desc">Arg min functor (keeps the value and offset of the first occurrence of the smallest item) </td></tr>
-<tr id="row_0_30_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_cast.html" target="_self">Cast</a></td><td class="desc">Default cast functor </td></tr>
-<tr id="row_0_31_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_swizzle_scan_op.html" target="_self">SwizzleScanOp</a></td><td class="desc">Binary operator wrapper for switching non-commutative scan arguments </td></tr>
-<tr id="row_0_32_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_segment_op.html" target="_self">ReduceBySegmentOp</a></td><td class="desc">Reduce-by-segment functor </td></tr>
-<tr id="row_0_33_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_key_op.html" target="_self">ReduceByKeyOp</a></td><td class="desc">&lt; Binary reduction operator to apply to values </td></tr>
-<tr id="row_0_34_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_34_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_34_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_discontinuity.html" target="_self">BlockDiscontinuity</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. </p>
+<tr id="row_0_3_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_log2.html" target="_self">Log2</a></td><td class="desc">Statically determine log2(N), rounded up </td></tr>
+<tr id="row_0_4_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_power_of_two.html" target="_self">PowerOfTwo</a></td><td class="desc">Statically determine if N is a power-of-two </td></tr>
+<tr id="row_0_5_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_pointer.html" target="_self">IsPointer</a></td><td class="desc">Pointer vs. iterator </td></tr>
+<tr id="row_0_6_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_volatile.html" target="_self">IsVolatile</a></td><td class="desc">Volatile modifier test </td></tr>
+<tr id="row_0_7_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_remove_qualifiers.html" target="_self">RemoveQualifiers</a></td><td class="desc">Removes <code>const</code> and <code>volatile</code> qualifiers from type <code>Tp</code> </td></tr>
+<tr id="row_0_8_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_arg_index_input_iterator.html" target="_self">ArgIndexInputIterator</a></td><td class="desc">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code>KeyValuePair</code> tuples) </td></tr>
+<tr id="row_0_9_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_cache_modified_input_iterator.html" target="_self">CacheModifiedInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values using a PTX cache load modifier </td></tr>
+<tr id="row_0_10_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_cache_modified_output_iterator.html" target="_self">CacheModifiedOutputIterator</a></td><td class="desc">A random-access output wrapper for storing array values using a PTX cache-modifier </td></tr>
+<tr id="row_0_11_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_constant_input_iterator.html" target="_self">ConstantInputIterator</a></td><td class="desc">A random-access input generator for dereferencing a sequence of homogeneous values </td></tr>
+<tr id="row_0_12_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_counting_input_iterator.html" target="_self">CountingInputIterator</a></td><td class="desc">A random-access input generator for dereferencing a sequence of incrementing integer values </td></tr>
+<tr id="row_0_13_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_obj_input_iterator.html" target="_self">TexObjInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects </td></tr>
+<tr id="row_0_14_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_ref_input_iterator.html" target="_self">TexRefInputIterator</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references </td></tr>
+<tr id="row_0_15_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_transform_input_iterator.html" target="_self">TransformInputIterator</a></td><td class="desc">A random-access input wrapper for transforming dereferenced values </td></tr>
+<tr id="row_0_16_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equality.html" target="_self">Equality</a></td><td class="desc">Default equality functor </td></tr>
+<tr id="row_0_17_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality.html" target="_self">Inequality</a></td><td class="desc">Default inequality functor </td></tr>
+<tr id="row_0_18_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality_wrapper.html" target="_self">InequalityWrapper</a></td><td class="desc"><a class="el" href="structcub_1_1_inequality.html" title="Default inequality functor. ">Inequality</a> functor (wraps equality functor) </td></tr>
+<tr id="row_0_19_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_sum.html" target="_self">Sum</a></td><td class="desc">Default sum functor </td></tr>
+<tr id="row_0_20_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_max.html" target="_self">Max</a></td><td class="desc">Default max functor </td></tr>
+<tr id="row_0_21_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_max.html" target="_self">ArgMax</a></td><td class="desc">Arg max functor (keeps the value and offset of the first occurrence of the larger item) </td></tr>
+<tr id="row_0_22_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_min.html" target="_self">Min</a></td><td class="desc">Default min functor </td></tr>
+<tr id="row_0_23_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_min.html" target="_self">ArgMin</a></td><td class="desc">Arg min functor (keeps the value and offset of the first occurrence of the smallest item) </td></tr>
+<tr id="row_0_24_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_cast.html" target="_self">Cast</a></td><td class="desc">Default cast functor </td></tr>
+<tr id="row_0_25_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_swizzle_scan_op.html" target="_self">SwizzleScanOp</a></td><td class="desc">Binary operator wrapper for switching non-commutative scan arguments </td></tr>
+<tr id="row_0_26_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_segment_op.html" target="_self">ReduceBySegmentOp</a></td><td class="desc">Reduce-by-segment functor </td></tr>
+<tr id="row_0_27_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_key_op.html" target="_self">ReduceByKeyOp</a></td><td class="desc">&lt; Binary reduction operator to apply to values </td></tr>
+<tr id="row_0_28_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_28_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_28_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_discontinuity.html" target="_self">BlockDiscontinuity</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. </p>
 <div class="image">
 <img src="discont_logo.png" alt="discont_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_34_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_35_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_35_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_35_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_exchange.html" target="_self">BlockExchange</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for rearranging data partitioned across a CUDA thread block. </p>
+<tr id="row_0_28_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_29_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_29_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_29_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_exchange.html" target="_self">BlockExchange</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for rearranging data partitioned across a CUDA thread block. </p>
 <div class="image">
 <img src="transpose_logo.png" alt="transpose_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_35_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_36_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_36_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_36_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_histogram.html" target="_self">BlockHistogram</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. </p>
+<tr id="row_0_29_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_30_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_30_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_30_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_histogram.html" target="_self">BlockHistogram</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. </p>
 <div class="image">
 <img src="histogram_logo.png" alt="histogram_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_36_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_37_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_37_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_37_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_load.html" target="_self">BlockLoad</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> class provides <a href="index.html#sec0"><em>collective</em></a> data movement methods for loading a linear segment of items from memory into a <a href="index.html#sec5sec3"><em>blocked arrangement</em></a> across a CUDA thread block. </p>
+<tr id="row_0_30_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_31_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_31_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_31_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_load.html" target="_self">BlockLoad</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> class provides <a href="index.html#sec0"><em>collective</em></a> data movement methods for loading a linear segment of items from memory into a <a href="index.html#sec5sec3"><em>blocked arrangement</em></a> across a CUDA thread block. </p>
 <div class="image">
 <img src="block_load_logo.png" alt="block_load_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_37_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_38_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_38_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_38_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_radix_sort.html" target="_self">BlockRadixSort</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_radix_sort.html" title="The BlockRadixSort class provides collective methods for sorting items partitioned across a CUDA thre...">BlockRadixSort</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for sorting items partitioned across a CUDA thread block using a radix sorting method. </p>
+<tr id="row_0_31_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_32_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_32_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_32_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_radix_sort.html" target="_self">BlockRadixSort</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_radix_sort.html" title="The BlockRadixSort class provides collective methods for sorting items partitioned across a CUDA thre...">BlockRadixSort</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for sorting items partitioned across a CUDA thread block using a radix sorting method. </p>
 <div class="image">
 <img src="sorting_logo.png" alt="sorting_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_38_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_39_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_39_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_39_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_reduce.html" target="_self">BlockReduce</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread block. </p>
+<tr id="row_0_32_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_33_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_33_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_33_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_reduce.html" target="_self">BlockReduce</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread block. </p>
 <div class="image">
 <img src="reduce_logo.png" alt="reduce_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_39_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_40_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_40_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_40_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_scan.html" target="_self">BlockScan</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. </p>
+<tr id="row_0_33_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_34_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_34_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_34_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_scan.html" target="_self">BlockScan</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. </p>
 <div class="image">
 <img src="block_scan_logo.png" alt="block_scan_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_40_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_41_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_41_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_41_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_store.html" target="_self">BlockStore</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> class provides <a href="index.html#sec0"><em>collective</em></a> data movement methods for writing a <a href="index.html#sec5sec3"><em>blocked arrangement</em></a> of items partitioned across a CUDA thread block to a linear segment of memory. </p>
+<tr id="row_0_34_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_35_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_35_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_35_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_store.html" target="_self">BlockStore</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> class provides <a href="index.html#sec0"><em>collective</em></a> data movement methods for writing a <a href="index.html#sec5sec3"><em>blocked arrangement</em></a> of items partitioned across a CUDA thread block to a linear segment of memory. </p>
 <div class="image">
 <img src="block_store_logo.png" alt="block_store_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_41_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_42_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_histogram.html" target="_self">DeviceHistogram</a></td><td class="desc"><a class="el" href="structcub_1_1_device_histogram.html" title="DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...">DeviceHistogram</a> provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. </p>
+<tr id="row_0_35_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_36_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_histogram.html" target="_self">DeviceHistogram</a></td><td class="desc"><a class="el" href="structcub_1_1_device_histogram.html" title="DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...">DeviceHistogram</a> provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. </p>
 <div class="image">
 <img src="histogram_logo.png" alt="histogram_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_43_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_partition.html" target="_self">DevicePartition</a></td><td class="desc"><a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. </p>
+<tr id="row_0_37_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_partition.html" target="_self">DevicePartition</a></td><td class="desc"><a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="partition_logo.png" alt="partition_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_44_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_radix_sort.html" target="_self">DeviceRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_0_38_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_radix_sort.html" target="_self">DeviceRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="sorting_logo.png" alt="sorting_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_45_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_reduce.html" target="_self">DeviceReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_0_39_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_reduce.html" target="_self">DeviceReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="reduce_logo.png" alt="reduce_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_46_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_run_length_encode.html" target="_self">DeviceRunLengthEncode</a></td><td class="desc"><a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. </p>
+<tr id="row_0_40_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_run_length_encode.html" target="_self">DeviceRunLengthEncode</a></td><td class="desc"><a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. </p>
 <div class="image">
 <img src="run_length_encode_logo.png" alt="run_length_encode_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_47_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_scan.html" target="_self">DeviceScan</a></td><td class="desc"><a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_0_41_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_scan.html" target="_self">DeviceScan</a></td><td class="desc"><a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="device_scan.png" alt="device_scan.png"/>
 </div>
  </td></tr>
-<tr id="row_0_48_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" target="_self">DeviceSegmentedRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" title="DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort...">DeviceSegmentedRadixSort</a> provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. </p>
+<tr id="row_0_42_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" target="_self">DeviceSegmentedRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" title="DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort...">DeviceSegmentedRadixSort</a> provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="segmented_sorting_logo.png" alt="segmented_sorting_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_49_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_select.html" target="_self">DeviceSelect</a></td><td class="desc"><a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a> provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. </p>
+<tr id="row_0_43_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_reduce.html" target="_self">DeviceSegmentedReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_reduce.html" title="DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across mult...">DeviceSegmentedReduce</a> provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. </p>
+<div class="image">
+<img src="reduce_logo.png" alt="reduce_logo.png"/>
+</div>
+ </td></tr>
+<tr id="row_0_44_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_select.html" target="_self">DeviceSelect</a></td><td class="desc"><a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a> provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="select_logo.png" alt="select_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_50_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_spmv.html" target="_self">DeviceSpmv</a></td><td class="desc"><a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">DeviceSpmv</a> provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV) </td></tr>
-<tr id="row_0_51_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_51_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_51_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_scan.html" target="_self">WarpScan</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. </p>
+<tr id="row_0_45_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_spmv.html" target="_self">DeviceSpmv</a></td><td class="desc"><a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">DeviceSpmv</a> provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV) </td></tr>
+<tr id="row_0_46_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_46_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('0_46_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_scan.html" target="_self">WarpScan</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. </p>
 <div class="image">
 <img src="warp_scan_logo.png" alt="warp_scan_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_51_0_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_0_52_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_52_" src="ftv2mlastnode.png" alt="\" width="16" height="22" onclick="toggleFolder('0_52_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_reduce.html" target="_self">WarpReduce</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread warp. </p>
+<tr id="row_0_46_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_47_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img id="arr_0_47_" src="ftv2mlastnode.png" alt="\" width="16" height="22" onclick="toggleFolder('0_47_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_reduce.html" target="_self">WarpReduce</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread warp. </p>
 <div class="image">
 <img src="warp_reduce_logo.png" alt="warp_reduce_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_0_52_0_"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_0_47_0_" class="even"><td class="entry"><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2blank.png" alt="&#160;" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" target="_self">TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
 </table>
 </div><!-- directory -->
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/arg__index__input__iterator_8cuh.html b/docs/html/arg__index__input__iterator_8cuh.html
index 74e1b88e89..2f1540b18c 100644
--- a/docs/html/arg__index__input__iterator_8cuh.html
+++ b/docs/html/arg__index__input__iterator_8cuh.html
@@ -117,7 +117,7 @@
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
 Classes</h2></td></tr>
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> tuples).  <a href="classcub_1_1_arg_index_input_iterator.html#details">More...</a><br/></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code>KeyValuePair</code> tuples).  <a href="classcub_1_1_arg_index_input_iterator.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
@@ -134,7 +134,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/arg__index__input__iterator_8cuh_source.html b/docs/html/arg__index__input__iterator_8cuh_source.html
index a2c6a9f367..599e17c1c6 100644
--- a/docs/html/arg__index__input__iterator_8cuh_source.html
+++ b/docs/html/arg__index__input__iterator_8cuh_source.html
@@ -167,9 +167,9 @@
 <div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;    <span class="comment">// Required iterator traits</span></div>
 <div class="line"><a name="l00122"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">  122</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator</a>               <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6" title="My own type. ">self_type</a>;              </div>
 <div class="line"><a name="l00123"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">  123</a></span>&#160;    <span class="keyword">typedef</span> OffsetT                             <a class="code" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6" title="Type to express the result of subtracting one iterator from another. ">difference_type</a>;        </div>
-<div class="line"><a name="l00124"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">  124</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;difference_type, T&gt;</a>    <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a>;             </div>
-<div class="line"><a name="l00125"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c">  125</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">value_type</a>*                         <a class="code" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c" title="The type of a pointer to an element the iterator can point to. ">pointer</a>;                </div>
-<div class="line"><a name="l00126"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">  126</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">value_type</a>                          <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d" title="The type of a reference to an element the iterator can point to. ">reference</a>;              </div>
+<div class="line"><a name="l00124"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">  124</a></span>&#160;    <span class="keyword">typedef</span> KeyValuePair&lt;difference_type, T&gt;    <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a>;             </div>
+<div class="line"><a name="l00125"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c">  125</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a>*                         <a class="code" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c" title="The type of a pointer to an element the iterator can point to. ">pointer</a>;                </div>
+<div class="line"><a name="l00126"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">  126</a></span>&#160;    <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a>                          <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d" title="The type of a reference to an element the iterator can point to. ">reference</a>;              </div>
 <div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;</div>
 <div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;<span class="preprocessor">#if (THRUST_VERSION &gt;= 100700)</span></div>
 <div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;<span class="preprocessor"></span>    <span class="comment">// Use Thrust&#39;s iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods</span></div>
@@ -177,7 +177,7 @@
 <div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        thrust::any_system_tag,</div>
 <div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        thrust::random_access_traversal_tag,</div>
 <div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a>,</div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">reference</a></div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d" title="The type of a reference to an element the iterator can point to. ">reference</a></div>
 <div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;      &gt;::type <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a2cff9aacc1ba59ae9f74735c257261e5" title="The iterator category. ">iterator_category</a>;                                        </div>
 <div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;<span class="preprocessor">#else</span></div>
 <div class="line"><a name="l00137"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a2cff9aacc1ba59ae9f74735c257261e5">  137</a></span>&#160;<span class="preprocessor"></span>    <span class="keyword">typedef</span> std::random_access_iterator_tag     <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a2cff9aacc1ba59ae9f74735c257261e5" title="The iterator category. ">iterator_category</a>;      </div>
@@ -211,11 +211,11 @@
 <div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;        <span class="keywordflow">return</span> *<span class="keyword">this</span>;</div>
 <div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;    }</div>
 <div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;</div>
-<div class="line"><a name="l00172"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">  172</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">reference</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f" title="Indirection. ">operator*</a>()<span class="keyword"> const</span></div>
+<div class="line"><a name="l00172"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">  172</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d" title="The type of a reference to an element the iterator can point to. ">reference</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f" title="Indirection. ">operator*</a>()<span class="keyword"> const</span></div>
 <div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;<span class="keyword">    </span>{</div>
-<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;        <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">value_type</a> retval;</div>
-<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;        retval.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a> = itr[offset];</div>
-<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;        retval.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a> = offset;</div>
+<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;        <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">value_type</a> retval;</div>
+<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;        retval.value = itr[offset];</div>
+<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;        retval.key = offset;</div>
 <div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;        <span class="keywordflow">return</span> retval;</div>
 <div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;    }</div>
 <div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;</div>
@@ -253,13 +253,13 @@
 <div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;    }</div>
 <div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;</div>
 <div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> Distance&gt;</div>
-<div class="line"><a name="l00220"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">  220</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">reference</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6" title="Array subscript. ">operator[]</a>(Distance n)<span class="keyword"> const</span></div>
+<div class="line"><a name="l00220"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">  220</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d" title="The type of a reference to an element the iterator can point to. ">reference</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6" title="Array subscript. ">operator[]</a>(Distance n)<span class="keyword"> const</span></div>
 <div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;<span class="keyword">    </span>{</div>
 <div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">self_type</a> offset = (*this) + n;</div>
 <div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keywordflow">return</span> *offset;</div>
 <div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;    }</div>
 <div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;</div>
-<div class="line"><a name="l00227"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">  227</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">pointer</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b" title="Structure dereference. ">operator-&gt;</a>()</div>
+<div class="line"><a name="l00227"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">  227</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c" title="The type of a pointer to an element the iterator can point to. ">pointer</a> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b" title="Structure dereference. ">operator-&gt;</a>()</div>
 <div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;    {</div>
 <div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        <span class="keywordflow">return</span> &amp;(*(*this));</div>
 <div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;    }</div>
@@ -274,22 +274,28 @@
 <div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        <span class="keywordflow">return</span> ((itr != rhs.itr) || (offset != rhs.offset));</div>
 <div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;    }</div>
 <div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;</div>
-<div class="line"><a name="l00245"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">  245</a></span>&#160;    <span class="keyword">friend</span> std::ostream&amp; <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80" title="ostream operator ">operator&lt;&lt;</a>(std::ostream&amp; os, <span class="keyword">const</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">self_type</a>&amp; itr)</div>
+<div class="line"><a name="l00245"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc">  245</a></span>&#160;    __host__ __device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc" title="Normalize. ">normalize</a>()</div>
 <div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    {</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keywordflow">return</span> os;</div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;    }</div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;};</div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        itr += offset;</div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        offset = 0;</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    }</div>
 <div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;</div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;       <span class="comment">// end group UtilIterator</span></div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;</div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00252"></a><span class="lineno"><a class="code" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">  252</a></span>&#160;    <span class="keyword">friend</span> std::ostream&amp; <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80" title="ostream operator ">operator&lt;&lt;</a>(std::ostream&amp; os, <span class="keyword">const</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">self_type</a>&amp; itr)</div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;    {</div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordflow">return</span> os;</div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;    }</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;};</div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;</div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;       <span class="comment">// end group UtilIterator</span></div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;</div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__discontinuity_8cuh.html b/docs/html/block__discontinuity_8cuh.html
index 412f338a0a..924df6faa7 100644
--- a/docs/html/block__discontinuity_8cuh.html
+++ b/docs/html/block__discontinuity_8cuh.html
@@ -138,7 +138,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__discontinuity_8cuh_source.html b/docs/html/block__discontinuity_8cuh_source.html
index 8f94d667e1..b4ee4efef1 100644
--- a/docs/html/block__discontinuity_8cuh_source.html
+++ b/docs/html/block__discontinuity_8cuh_source.html
@@ -286,7 +286,7 @@
 <div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;</div>
 <div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div>
-<div class="line"><a name="l00260"></a><span class="lineno"><a class="code" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html">  260</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" title="The operations exposed by BlockDiscontinuity require a temporary memory allocation of this nested typ...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00260"></a><span class="lineno"><a class="code" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html">  260</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" title="The operations exposed by BlockDiscontinuity require a temporary memory allocation of this nested typ...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;</div>
 <div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;</div>
 <div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -679,7 +679,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__exchange_8cuh.html b/docs/html/block__exchange_8cuh.html
index 3f6ca145b8..80d3eae731 100644
--- a/docs/html/block__exchange_8cuh.html
+++ b/docs/html/block__exchange_8cuh.html
@@ -140,7 +140,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__exchange_8cuh_source.html.REMOVED.git-id b/docs/html/block__exchange_8cuh_source.html.REMOVED.git-id
index b262a1b85f..97ec72db0e 100644
--- a/docs/html/block__exchange_8cuh_source.html.REMOVED.git-id
+++ b/docs/html/block__exchange_8cuh_source.html.REMOVED.git-id
@@ -1 +1 @@
-7b242fc630635a9576fbb98ed7104c98e20cf7c1
\ No newline at end of file
+c806ddfb4031fade9e67fee00d92ec496a8f9b90
\ No newline at end of file
diff --git a/docs/html/block__histogram_8cuh.html b/docs/html/block__histogram_8cuh.html
index d4fbb5681d..ec4df08145 100644
--- a/docs/html/block__histogram_8cuh.html
+++ b/docs/html/block__histogram_8cuh.html
@@ -149,7 +149,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__histogram_8cuh_source.html b/docs/html/block__histogram_8cuh_source.html
index 4408af40fb..8b5b248fe9 100644
--- a/docs/html/block__histogram_8cuh_source.html
+++ b/docs/html/block__histogram_8cuh_source.html
@@ -218,7 +218,7 @@
 <div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;</div>
 <div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;</div>
-<div class="line"><a name="l00218"></a><span class="lineno"><a class="code" href="structcub_1_1_block_histogram_1_1_temp_storage.html">  218</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_histogram_1_1_temp_storage.html" title="The operations exposed by BlockHistogram require a temporary memory allocation of this nested type fo...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00218"></a><span class="lineno"><a class="code" href="structcub_1_1_block_histogram_1_1_temp_storage.html">  218</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_histogram_1_1_temp_storage.html" title="The operations exposed by BlockHistogram require a temporary memory allocation of this nested type fo...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;</div>
 <div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;</div>
 <div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -295,7 +295,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__load_8cuh.html b/docs/html/block__load_8cuh.html
index 4182ba48ae..b80ab9674e 100644
--- a/docs/html/block__load_8cuh.html
+++ b/docs/html/block__load_8cuh.html
@@ -211,7 +211,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__load_8cuh_source.html.REMOVED.git-id b/docs/html/block__load_8cuh_source.html.REMOVED.git-id
index a92f73336b..56a9ba86b2 100644
--- a/docs/html/block__load_8cuh_source.html.REMOVED.git-id
+++ b/docs/html/block__load_8cuh_source.html.REMOVED.git-id
@@ -1 +1 @@
-f16e3469ee2feac63eb12d1fd509886f96672f72
\ No newline at end of file
+4f8a06e07d757fc2158c92b1d7f4a7bb7aa92332
\ No newline at end of file
diff --git a/docs/html/block__radix__sort_8cuh.html b/docs/html/block__radix__sort_8cuh.html
index b3fe1d860b..2d0a5cddef 100644
--- a/docs/html/block__radix__sort_8cuh.html
+++ b/docs/html/block__radix__sort_8cuh.html
@@ -141,7 +141,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__radix__sort_8cuh_source.html b/docs/html/block__radix__sort_8cuh_source.html
index 3db5e3e2eb..fef42dcec2 100644
--- a/docs/html/block__radix__sort_8cuh_source.html
+++ b/docs/html/block__radix__sort_8cuh_source.html
@@ -172,7 +172,7 @@
 <div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;    };</div>
 <div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;</div>
 <div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;    <span class="comment">// KeyT traits and unsigned bits type</span></div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;    <span class="keyword">typedef</span> NumericTraits&lt;KeyT&gt;                  KeyTraits;</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;    <span class="keyword">typedef</span> Traits&lt;KeyT&gt;                        KeyTraits;</div>
 <div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> KeyTraits::UnsignedBits    UnsignedBits;</div>
 <div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;</div>
 <div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;    <span class="keyword">typedef</span> BlockRadixRank&lt;</div>
@@ -238,7 +238,7 @@
 <div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        <span class="keywordtype">int</span>             (&amp;ranks)[ITEMS_PER_THREAD],</div>
 <div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keywordtype">int</span>             begin_bit,</div>
 <div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keywordtype">int</span>             pass_bits,</div>
-<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a> is_descending)</div>
+<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        Int2Type&lt;false&gt; is_descending)</div>
 <div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;    {</div>
 <div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        AscendingBlockRadixRank(temp_storage.asending_ranking_storage).RankKeys(</div>
 <div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;            unsigned_keys,</div>
@@ -252,7 +252,7 @@
 <div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;        <span class="keywordtype">int</span>             (&amp;ranks)[ITEMS_PER_THREAD],</div>
 <div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        <span class="keywordtype">int</span>             begin_bit,</div>
 <div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;        <span class="keywordtype">int</span>             pass_bits,</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>  is_descending)</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        Int2Type&lt;true&gt;  is_descending)</div>
 <div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    {</div>
 <div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        DescendingBlockRadixRank(temp_storage.descending_ranking_storage).RankKeys(</div>
 <div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;            unsigned_keys,</div>
@@ -264,8 +264,8 @@
 <div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    __device__ __forceinline__ <span class="keywordtype">void</span> ExchangeValues(</div>
 <div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        ValueT          (&amp;values)[ITEMS_PER_THREAD],</div>
 <div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        <span class="keywordtype">int</span>             (&amp;ranks)[ITEMS_PER_THREAD],</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a> is_keys_only,</div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>  is_blocked)</div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        Int2Type&lt;false&gt; is_keys_only,</div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        Int2Type&lt;true&gt;  is_blocked)</div>
 <div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;    {</div>
 <div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        __syncthreads();</div>
 <div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;</div>
@@ -276,8 +276,8 @@
 <div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;    __device__ __forceinline__ <span class="keywordtype">void</span> ExchangeValues(</div>
 <div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;        ValueT          (&amp;values)[ITEMS_PER_THREAD],</div>
 <div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;        <span class="keywordtype">int</span>             (&amp;ranks)[ITEMS_PER_THREAD],</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a> is_keys_only,</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a> is_blocked)</div>
+<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;        Int2Type&lt;false&gt; is_keys_only,</div>
+<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;        Int2Type&lt;false&gt; is_blocked)</div>
 <div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;    {</div>
 <div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;        __syncthreads();</div>
 <div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;</div>
@@ -289,8 +289,8 @@
 <div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;    __device__ __forceinline__ <span class="keywordtype">void</span> ExchangeValues(</div>
 <div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;        ValueT                  (&amp;values)[ITEMS_PER_THREAD],</div>
 <div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;        <span class="keywordtype">int</span>                     (&amp;ranks)[ITEMS_PER_THREAD],</div>
-<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>          is_keys_only,</div>
-<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;IS_BLOCKED&gt;</a>    is_blocked)</div>
+<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;        Int2Type&lt;true&gt;          is_keys_only,</div>
+<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;        Int2Type&lt;IS_BLOCKED&gt;    is_blocked)</div>
 <div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;    {}</div>
 <div class="line"><a name="l00282"></a><span class="lineno">  282</span>&#160;</div>
 <div class="line"><a name="l00284"></a><span class="lineno">  284</span>&#160;    <span class="keyword">template</span> &lt;<span class="keywordtype">int</span> DESCENDING, <span class="keywordtype">int</span> KEYS_ONLY&gt;</div>
@@ -299,8 +299,8 @@
 <div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;        ValueT                  (&amp;values)[ITEMS_PER_THREAD],        </div>
 <div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;        <span class="keywordtype">int</span>                     begin_bit,                          </div>
 <div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;        <span class="keywordtype">int</span>                     end_bit,                            </div>
-<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;DESCENDING&gt;</a>    is_descending,                      </div>
-<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>     is_keys_only)                       </div>
+<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;        Int2Type&lt;DESCENDING&gt;    is_descending,                      </div>
+<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;        Int2Type&lt;KEYS_ONLY&gt;     is_keys_only)                       </div>
 <div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;    {</div>
 <div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;        UnsignedBits (&amp;unsigned_keys)[ITEMS_PER_THREAD] =</div>
 <div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;            <span class="keyword">reinterpret_cast&lt;</span>UnsignedBits (&amp;)[ITEMS_PER_THREAD]<span class="keyword">&gt;</span>(keys);</div>
@@ -328,7 +328,7 @@
 <div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;            BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks);</div>
 <div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;</div>
 <div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;            <span class="comment">// Exchange values through shared memory in blocked arrangement</span></div>
-<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;            ExchangeValues(values, ranks, is_keys_only, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>());</div>
+<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;            ExchangeValues(values, ranks, is_keys_only, Int2Type&lt;true&gt;());</div>
 <div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;</div>
 <div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;            <span class="comment">// Quit if done</span></div>
 <div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;            <span class="keywordflow">if</span> (begin_bit &gt;= end_bit) <span class="keywordflow">break</span>;</div>
@@ -354,8 +354,8 @@
 <div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;        ValueT                  (&amp;values)[ITEMS_PER_THREAD],        </div>
 <div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        <span class="keywordtype">int</span>                     begin_bit,                          </div>
 <div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        <span class="keywordtype">int</span>                     end_bit,                            </div>
-<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;DESCENDING&gt;</a>    is_descending,                      </div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>     is_keys_only)                       </div>
+<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;        Int2Type&lt;DESCENDING&gt;    is_descending,                      </div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        Int2Type&lt;KEYS_ONLY&gt;     is_keys_only)                       </div>
 <div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;    {</div>
 <div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;        UnsignedBits (&amp;unsigned_keys)[ITEMS_PER_THREAD] =</div>
 <div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            <span class="keyword">reinterpret_cast&lt;</span>UnsignedBits (&amp;)[ITEMS_PER_THREAD]<span class="keyword">&gt;</span>(keys);</div>
@@ -386,7 +386,7 @@
 <div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;                BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(keys, ranks);</div>
 <div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;</div>
 <div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;                <span class="comment">// Last pass exchanges through shared memory in striped arrangement</span></div>
-<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;                ExchangeValues(values, ranks, is_keys_only, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a>());</div>
+<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;                ExchangeValues(values, ranks, is_keys_only, Int2Type&lt;false&gt;());</div>
 <div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;</div>
 <div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;                <span class="comment">// Quit</span></div>
 <div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;                <span class="keywordflow">break</span>;</div>
@@ -396,7 +396,7 @@
 <div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;            BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks);</div>
 <div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;</div>
 <div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;            <span class="comment">// Exchange values through shared memory in blocked arrangement</span></div>
-<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;            ExchangeValues(values, ranks, is_keys_only, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>());</div>
+<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;            ExchangeValues(values, ranks, is_keys_only, Int2Type&lt;true&gt;());</div>
 <div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;</div>
 <div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;            __syncthreads();</div>
 <div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;        }</div>
@@ -411,7 +411,7 @@
 <div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;</div>
 <div class="line"><a name="l00401"></a><span class="lineno">  401</span>&#160;<span class="preprocessor">#endif // DOXYGEN_SHOULD_SKIP_THIS</span></div>
 <div class="line"><a name="l00402"></a><span class="lineno">  402</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00404"></a><span class="lineno"><a class="code" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html">  404</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" title="The operations exposed by BlockScan require a temporary memory allocation of this nested type for thr...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00404"></a><span class="lineno"><a class="code" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html">  404</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" title="The operations exposed by BlockScan require a temporary memory allocation of this nested type for thr...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;</div>
 <div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;</div>
 <div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -438,9 +438,9 @@
 <div class="line"><a name="l00478"></a><span class="lineno">  478</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00479"></a><span class="lineno">  479</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00480"></a><span class="lineno">  480</span>&#160;    {</div>
-<div class="line"><a name="l00481"></a><span class="lineno">  481</span>&#160;        <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> values[ITEMS_PER_THREAD];</div>
+<div class="line"><a name="l00481"></a><span class="lineno">  481</span>&#160;        NullType values[ITEMS_PER_THREAD];</div>
 <div class="line"><a name="l00482"></a><span class="lineno">  482</span>&#160;</div>
-<div class="line"><a name="l00483"></a><span class="lineno">  483</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00483"></a><span class="lineno">  483</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, Int2Type&lt;false&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00484"></a><span class="lineno">  484</span>&#160;    }</div>
 <div class="line"><a name="l00485"></a><span class="lineno">  485</span>&#160;</div>
 <div class="line"><a name="l00486"></a><span class="lineno">  486</span>&#160;</div>
@@ -450,7 +450,7 @@
 <div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;    {</div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, Int2Type&lt;false&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;    }</div>
 <div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;</div>
 <div class="line"><a name="l00577"></a><span class="lineno"><a class="code" href="classcub_1_1_block_radix_sort.html#a2c89d00a1082632104498352b15264b3">  577</a></span>&#160;    __device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="classcub_1_1_block_radix_sort.html#a2c89d00a1082632104498352b15264b3" title="Performs a descending block-wide radix sort over a blocked arrangement of keys. ">SortDescending</a>(</div>
@@ -458,9 +458,9 @@
 <div class="line"><a name="l00579"></a><span class="lineno">  579</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00580"></a><span class="lineno">  580</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00581"></a><span class="lineno">  581</span>&#160;    {</div>
-<div class="line"><a name="l00582"></a><span class="lineno">  582</span>&#160;        <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> values[ITEMS_PER_THREAD];</div>
+<div class="line"><a name="l00582"></a><span class="lineno">  582</span>&#160;        NullType values[ITEMS_PER_THREAD];</div>
 <div class="line"><a name="l00583"></a><span class="lineno">  583</span>&#160;</div>
-<div class="line"><a name="l00584"></a><span class="lineno">  584</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00584"></a><span class="lineno">  584</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, Int2Type&lt;true&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00585"></a><span class="lineno">  585</span>&#160;    }</div>
 <div class="line"><a name="l00586"></a><span class="lineno">  586</span>&#160;</div>
 <div class="line"><a name="l00587"></a><span class="lineno">  587</span>&#160;</div>
@@ -470,7 +470,7 @@
 <div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;    {</div>
-<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        SortBlocked(keys, values, begin_bit, end_bit, Int2Type&lt;true&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;    }</div>
 <div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;</div>
 <div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;</div>
@@ -482,9 +482,9 @@
 <div class="line"><a name="l00689"></a><span class="lineno">  689</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00690"></a><span class="lineno">  690</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00691"></a><span class="lineno">  691</span>&#160;    {</div>
-<div class="line"><a name="l00692"></a><span class="lineno">  692</span>&#160;        <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> values[ITEMS_PER_THREAD];</div>
+<div class="line"><a name="l00692"></a><span class="lineno">  692</span>&#160;        NullType values[ITEMS_PER_THREAD];</div>
 <div class="line"><a name="l00693"></a><span class="lineno">  693</span>&#160;</div>
-<div class="line"><a name="l00694"></a><span class="lineno">  694</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00694"></a><span class="lineno">  694</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type&lt;false&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00695"></a><span class="lineno">  695</span>&#160;    }</div>
 <div class="line"><a name="l00696"></a><span class="lineno">  696</span>&#160;</div>
 <div class="line"><a name="l00697"></a><span class="lineno">  697</span>&#160;</div>
@@ -494,7 +494,7 @@
 <div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00747"></a><span class="lineno">  747</span>&#160;    {</div>
-<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;false&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type&lt;false&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;    }</div>
 <div class="line"><a name="l00750"></a><span class="lineno">  750</span>&#160;</div>
 <div class="line"><a name="l00751"></a><span class="lineno">  751</span>&#160;</div>
@@ -503,9 +503,9 @@
 <div class="line"><a name="l00792"></a><span class="lineno">  792</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00793"></a><span class="lineno">  793</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00794"></a><span class="lineno">  794</span>&#160;    {</div>
-<div class="line"><a name="l00795"></a><span class="lineno">  795</span>&#160;        <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> values[ITEMS_PER_THREAD];</div>
+<div class="line"><a name="l00795"></a><span class="lineno">  795</span>&#160;        NullType values[ITEMS_PER_THREAD];</div>
 <div class="line"><a name="l00796"></a><span class="lineno">  796</span>&#160;</div>
-<div class="line"><a name="l00797"></a><span class="lineno">  797</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00797"></a><span class="lineno">  797</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type&lt;true&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00798"></a><span class="lineno">  798</span>&#160;    }</div>
 <div class="line"><a name="l00799"></a><span class="lineno">  799</span>&#160;</div>
 <div class="line"><a name="l00800"></a><span class="lineno">  800</span>&#160;</div>
@@ -515,7 +515,7 @@
 <div class="line"><a name="l00848"></a><span class="lineno">  848</span>&#160;        <span class="keywordtype">int</span>     begin_bit   = 0,                    </div>
 <div class="line"><a name="l00849"></a><span class="lineno">  849</span>&#160;        <span class="keywordtype">int</span>     end_bit     = <span class="keyword">sizeof</span>(KeyT) * 8)      </div>
 <div class="line"><a name="l00850"></a><span class="lineno">  850</span>&#160;    {</div>
-<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;true&gt;</a>(), <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;KEYS_ONLY&gt;</a>());</div>
+<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type&lt;true&gt;(), Int2Type&lt;KEYS_ONLY&gt;());</div>
 <div class="line"><a name="l00852"></a><span class="lineno">  852</span>&#160;    }</div>
 <div class="line"><a name="l00853"></a><span class="lineno">  853</span>&#160;</div>
 <div class="line"><a name="l00854"></a><span class="lineno">  854</span>&#160;</div>
@@ -529,7 +529,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__reduce_8cuh.html b/docs/html/block__reduce_8cuh.html
index cb6db45a82..00bf842208 100644
--- a/docs/html/block__reduce_8cuh.html
+++ b/docs/html/block__reduce_8cuh.html
@@ -151,7 +151,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__reduce_8cuh_source.html b/docs/html/block__reduce_8cuh_source.html
index 9405d5d6b2..fd466c475d 100644
--- a/docs/html/block__reduce_8cuh_source.html
+++ b/docs/html/block__reduce_8cuh_source.html
@@ -221,7 +221,7 @@
 <div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;</div>
 <div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;</div>
-<div class="line"><a name="l00277"></a><span class="lineno"><a class="code" href="structcub_1_1_block_reduce_1_1_temp_storage.html">  277</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_reduce_1_1_temp_storage.html" title="The operations exposed by BlockReduce require a temporary memory allocation of this nested type for t...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00277"></a><span class="lineno"><a class="code" href="structcub_1_1_block_reduce_1_1_temp_storage.html">  277</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_reduce_1_1_temp_storage.html" title="The operations exposed by BlockReduce require a temporary memory allocation of this nested type for t...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;</div>
 <div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;</div>
 <div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -328,7 +328,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__scan_8cuh.html b/docs/html/block__scan_8cuh.html
index cfccdd27ca..561a96141a 100644
--- a/docs/html/block__scan_8cuh.html
+++ b/docs/html/block__scan_8cuh.html
@@ -151,7 +151,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__scan_8cuh_source.html.REMOVED.git-id b/docs/html/block__scan_8cuh_source.html.REMOVED.git-id
index 3a8cc12acf..3716f488da 100644
--- a/docs/html/block__scan_8cuh_source.html.REMOVED.git-id
+++ b/docs/html/block__scan_8cuh_source.html.REMOVED.git-id
@@ -1 +1 @@
-5448840077c040640722b0624858306969fe6a45
\ No newline at end of file
+753cec633a135d727295bfdeeac9c4426a237ab5
\ No newline at end of file
diff --git a/docs/html/block__store_8cuh.html b/docs/html/block__store_8cuh.html
index 40d87fa334..8d261660a5 100644
--- a/docs/html/block__store_8cuh.html
+++ b/docs/html/block__store_8cuh.html
@@ -199,7 +199,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/block__store_8cuh_source.html b/docs/html/block__store_8cuh_source.html
index 78eb55a778..10223f9ebe 100644
--- a/docs/html/block__store_8cuh_source.html
+++ b/docs/html/block__store_8cuh_source.html
@@ -207,7 +207,7 @@
 <div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;    };</div>
 <div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;</div>
 <div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;    <span class="comment">// Vector type</span></div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> <a class="code" href="structcub_1_1_cub_vector.html" title="Exposes a member typedef Type that names the corresponding CUDA vector type if one exists...">CubVector&lt;T, VEC_SIZE&gt;::Type</a> Vector;</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> CubVector&lt;T, VEC_SIZE&gt;::Type Vector;</div>
 <div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div>
 <div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;    <span class="comment">// Alias global pointer</span></div>
 <div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;    Vector *block_ptr_vectors = <span class="keyword">reinterpret_cast&lt;</span>Vector*<span class="keyword">&gt;</span>(<span class="keyword">const_cast&lt;</span>T*<span class="keyword">&gt;</span>(block_ptr));</div>
@@ -384,7 +384,7 @@
 <div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;    <span class="keyword">template</span> &lt;<span class="keywordtype">int</span> DUMMY&gt;</div>
 <div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;    <span class="keyword">struct </span>StoreInternal&lt;BLOCK_STORE_DIRECT, DUMMY&gt;</div>
 <div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;    {</div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> <a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a>;</div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="keyword">typedef</span> NullType <a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a>;</div>
 <div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;</div>
 <div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;        <span class="keywordtype">int</span> linear_tid;</div>
 <div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;</div>
@@ -415,7 +415,7 @@
 <div class="line"><a name="l00572"></a><span class="lineno">  572</span>&#160;    <span class="keyword">template</span> &lt;<span class="keywordtype">int</span> DUMMY&gt;</div>
 <div class="line"><a name="l00573"></a><span class="lineno">  573</span>&#160;    <span class="keyword">struct </span>StoreInternal&lt;BLOCK_STORE_VECTORIZE, DUMMY&gt;</div>
 <div class="line"><a name="l00574"></a><span class="lineno">  574</span>&#160;    {</div>
-<div class="line"><a name="l00576"></a><span class="lineno">  576</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a> <a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a>;</div>
+<div class="line"><a name="l00576"></a><span class="lineno">  576</span>&#160;        <span class="keyword">typedef</span> NullType <a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a>;</div>
 <div class="line"><a name="l00577"></a><span class="lineno">  577</span>&#160;</div>
 <div class="line"><a name="l00579"></a><span class="lineno">  579</span>&#160;        <span class="keywordtype">int</span> linear_tid;</div>
 <div class="line"><a name="l00580"></a><span class="lineno">  580</span>&#160;</div>
@@ -459,7 +459,7 @@
 <div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;</div>
 <div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> <a class="code" href="structcub_1_1_block_exchange_1_1_temp_storage.html" title="The operations exposed by BlockExchange require a temporary memory allocation of this nested type for...">BlockExchange::TempStorage</a> _TempStorage;</div>
 <div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;</div>
-<div class="line"><a name="l00630"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html">  630</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00630"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html">  630</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;</div>
 <div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;        <a class="code" href="structcub_1_1_block_exchange_1_1_temp_storage.html" title="The operations exposed by BlockExchange require a temporary memory allocation of this nested type for...">_TempStorage</a> &amp;temp_storage;</div>
 <div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;</div>
@@ -508,7 +508,7 @@
 <div class="line"><a name="l00684"></a><span class="lineno">  684</span>&#160;</div>
 <div class="line"><a name="l00686"></a><span class="lineno">  686</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> BlockExchange::TempStorage _TempStorage;</div>
 <div class="line"><a name="l00687"></a><span class="lineno">  687</span>&#160;</div>
-<div class="line"><a name="l00689"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html">  689</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00689"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html">  689</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00690"></a><span class="lineno">  690</span>&#160;</div>
 <div class="line"><a name="l00692"></a><span class="lineno">  692</span>&#160;        <a class="code" href="structcub_1_1_block_exchange_1_1_temp_storage.html" title="The operations exposed by BlockExchange require a temporary memory allocation of this nested type for...">_TempStorage</a> &amp;temp_storage;</div>
 <div class="line"><a name="l00693"></a><span class="lineno">  693</span>&#160;</div>
@@ -557,7 +557,7 @@
 <div class="line"><a name="l00743"></a><span class="lineno">  743</span>&#160;</div>
 <div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> BlockExchange::TempStorage _TempStorage;</div>
 <div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;</div>
-<div class="line"><a name="l00748"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html">  748</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00748"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html">  748</a></span>&#160;        <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;</div>
 <div class="line"><a name="l00751"></a><span class="lineno">  751</span>&#160;        <a class="code" href="structcub_1_1_block_exchange_1_1_temp_storage.html" title="The operations exposed by BlockExchange require a temporary memory allocation of this nested type for...">_TempStorage</a> &amp;temp_storage;</div>
 <div class="line"><a name="l00752"></a><span class="lineno">  752</span>&#160;</div>
@@ -621,7 +621,7 @@
 <div class="line"><a name="l00819"></a><span class="lineno">  819</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00820"></a><span class="lineno">  820</span>&#160;</div>
 <div class="line"><a name="l00821"></a><span class="lineno">  821</span>&#160;</div>
-<div class="line"><a name="l00823"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html">  823</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00823"></a><span class="lineno"><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html">  823</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_block_store_1_1_temp_storage.html" title="The operations exposed by BlockStore require a temporary memory allocation of this nested type for th...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00824"></a><span class="lineno">  824</span>&#160;</div>
 <div class="line"><a name="l00825"></a><span class="lineno">  825</span>&#160;</div>
 <div class="line"><a name="l00826"></a><span class="lineno">  826</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -668,7 +668,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cache__modified__input__iterator_8cuh.html b/docs/html/cache__modified__input__iterator_8cuh.html
index c06ec30f7c..b73c2ea373 100644
--- a/docs/html/cache__modified__input__iterator_8cuh.html
+++ b/docs/html/cache__modified__input__iterator_8cuh.html
@@ -133,7 +133,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cache__modified__input__iterator_8cuh_source.html b/docs/html/cache__modified__input__iterator_8cuh_source.html
index f682bcedeb..68a9808d19 100644
--- a/docs/html/cache__modified__input__iterator_8cuh_source.html
+++ b/docs/html/cache__modified__input__iterator_8cuh_source.html
@@ -278,7 +278,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cache__modified__output__iterator_8cuh.html b/docs/html/cache__modified__output__iterator_8cuh.html
index cef32060c6..2415b9b43f 100644
--- a/docs/html/cache__modified__output__iterator_8cuh.html
+++ b/docs/html/cache__modified__output__iterator_8cuh.html
@@ -133,7 +133,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cache__modified__output__iterator_8cuh_source.html b/docs/html/cache__modified__output__iterator_8cuh_source.html
index 6d1dd74225..028b1c710b 100644
--- a/docs/html/cache__modified__output__iterator_8cuh_source.html
+++ b/docs/html/cache__modified__output__iterator_8cuh_source.html
@@ -289,7 +289,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_arg_index_input_iterator-members.html b/docs/html/classcub_1_1_arg_index_input_iterator-members.html
index f335777631..871c4cc828 100644
--- a/docs/html/classcub_1_1_arg_index_input_iterator-members.html
+++ b/docs/html/classcub_1_1_arg_index_input_iterator-members.html
@@ -107,28 +107,29 @@
   <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6">ArgIndexInputIterator</a>(InputIteratorT itr, difference_type offset=0)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">difference_type</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a2cff9aacc1ba59ae9f74735c257261e5">iterator_category</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">operator!=</a>(const self_type &amp;rhs)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">operator*</a>() const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030">operator+</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a293aa0bf1aa6706505cb85d4d475c9fd">operator++</a>(int)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a5856850705522ded2cb41418d8abb065">operator++</a>()</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9ecc94fabf03d35374955388414609e2">operator+=</a>(Distance n)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#ac47c90b1bda91b67e7a6aab76b69bf7a">operator-</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a4edc9b6609fac1ac62303d32ca21169a">operator-</a>(self_type other) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a223d782b6abddd640dbcb99a0ea5cca1">operator-=</a>(Distance n)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">operator-&gt;</a>()</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">operator&lt;&lt;</a>(std::ostream &amp;os, const self_type &amp;itr)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9925c001d0a1dd40b35b8ce831e85f13">operator==</a>(const self_type &amp;rhs)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">operator[]</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c">pointer</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">reference</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">self_type</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">value_type</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc">normalize</a>()</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">operator!=</a>(const self_type &amp;rhs)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">operator*</a>() const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030">operator+</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a293aa0bf1aa6706505cb85d4d475c9fd">operator++</a>(int)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a5856850705522ded2cb41418d8abb065">operator++</a>()</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9ecc94fabf03d35374955388414609e2">operator+=</a>(Distance n)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#ac47c90b1bda91b67e7a6aab76b69bf7a">operator-</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a4edc9b6609fac1ac62303d32ca21169a">operator-</a>(self_type other) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a223d782b6abddd640dbcb99a0ea5cca1">operator-=</a>(Distance n)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">operator-&gt;</a>()</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">operator&lt;&lt;</a>(std::ostream &amp;os, const self_type &amp;itr)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9925c001d0a1dd40b35b8ce831e85f13">operator==</a>(const self_type &amp;rhs)</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">operator[]</a>(Distance n) const </td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c">pointer</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">reference</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">self_type</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">value_type</a> typedef</td><td class="entry"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="entry"></td></tr>
 </table></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_arg_index_input_iterator.html b/docs/html/classcub_1_1_arg_index_input_iterator.html
index 7d19899df1..d555ec036d 100644
--- a/docs/html/classcub_1_1_arg_index_input_iterator.html
+++ b/docs/html/classcub_1_1_arg_index_input_iterator.html
@@ -112,9 +112,9 @@
     typename OffsetT = ptrdiff_t&gt;<br/>
 class cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</h3>
 
-<p>A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> tuples). </p>
+<p>A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code>KeyValuePair</code> tuples). </p>
 <dl class="section user"><dt>Overview</dt><dd><ul>
-<li>ArgIndexInputIteratorTwraps a random access input iterator <code>itr</code> of type <code>InputIteratorT</code>. Dereferencing an ArgIndexInputIteratorTat offset <code>i</code> produces a <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> value whose <code>key</code> field is <code>i</code> and whose <code>value</code> field is <code>itr[i]</code>.</li>
+<li>ArgIndexInputIteratorTwraps a random access input iterator <code>itr</code> of type <code>InputIteratorT</code>. Dereferencing an ArgIndexInputIteratorTat offset <code>i</code> produces a <code>KeyValuePair</code> value whose <code>key</code> field is <code>i</code> and whose <code>value</code> field is <code>itr[i]</code>.</li>
 <li>Can be used with any data type.</li>
 <li>Can be constructed, manipulated, and exchanged within and between host and device functions. Wrapped host memory can only be dereferenced on the host, and wrapped device memory can only be dereferenced on the device.</li>
 <li>Compatible with Thrust API v1.7 or newer.</li>
@@ -130,17 +130,17 @@
 <div class="line"><a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">cub::ArgIndexInputIterator&lt;double*&gt;</a> itr(d_in);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Within device code:</span></div>
-<div class="line"><span class="keyword">typedef</span> <span class="keyword">typename</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">cub::ArgIndexInputIterator&lt;double*&gt;::value_type</a> Tuple;</div>
-<div class="line">Tuple item_offset_pair.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a> = *itr;</div>
+<div class="line"><span class="keyword">typedef</span> <span class="keyword">typename</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22" title="The type of the element the iterator can point to. ">cub::ArgIndexInputIterator&lt;double*&gt;::value_type</a> Tuple;</div>
+<div class="line">Tuple item_offset_pair.key = *itr;</div>
 <div class="line">printf(<span class="stringliteral">&quot;%f @ %d\n&quot;</span>,</div>
-<div class="line"> item_offset_pair.value,</div>
-<div class="line"> item_offset_pair.key);   <span class="comment">// 8.0 @ 0</span></div>
+<div class="line">  item_offset_pair.value,</div>
+<div class="line">  item_offset_pair.key);   <span class="comment">// 8.0 @ 0</span></div>
 <div class="line"></div>
 <div class="line">itr = itr + 6;</div>
 <div class="line">item_offset_pair.key = *itr;</div>
 <div class="line">printf(<span class="stringliteral">&quot;%f @ %d\n&quot;</span>,</div>
-<div class="line"> item_offset_pair.value,</div>
-<div class="line"> item_offset_pair.key);   <span class="comment">// 9.0 @ 6</span></div>
+<div class="line">  item_offset_pair.value,</div>
+<div class="line">  item_offset_pair.key);   <span class="comment">// 9.0 @ 6</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
@@ -163,7 +163,7 @@
 <tr class="memdesc:acd5e39570dd51f4883547ef33f9ca8c6"><td class="mdescLeft">&#160;</td><td class="mdescRight">Type to express the result of subtracting one iterator from another. <br/></td></tr>
 <tr class="separator:acd5e39570dd51f4883547ef33f9ca8c6"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a6c40ea1dc7c0923b9010f1e938e07d22"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a6c40ea1dc7c0923b9010f1e938e07d22"></a>
-typedef <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a><br class="typebreak"/>
+typedef KeyValuePair<br class="typebreak"/>
 &lt; <a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">difference_type</a>, T &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">value_type</a></td></tr>
 <tr class="memdesc:a6c40ea1dc7c0923b9010f1e938e07d22"><td class="mdescLeft">&#160;</td><td class="mdescRight">The type of the element the iterator can point to. <br/></td></tr>
 <tr class="separator:a6c40ea1dc7c0923b9010f1e938e07d22"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -252,6 +252,11 @@
 __forceinline__ bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">operator!=</a> (const <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">self_type</a> &amp;rhs)</td></tr>
 <tr class="memdesc:a17b0be0179d38b73e8a612d4d0202772"><td class="mdescLeft">&#160;</td><td class="mdescRight">Not equal to. <br/></td></tr>
 <tr class="separator:a17b0be0179d38b73e8a612d4d0202772"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aa3dd1dfb19d87d8e0b5fc3c8773fd1dc"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="aa3dd1dfb19d87d8e0b5fc3c8773fd1dc"></a>
+__host__ __device__ <br class="typebreak"/>
+__forceinline__ void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc">normalize</a> ()</td></tr>
+<tr class="memdesc:aa3dd1dfb19d87d8e0b5fc3c8773fd1dc"><td class="mdescLeft">&#160;</td><td class="mdescRight">Normalize. <br/></td></tr>
+<tr class="separator:aa3dd1dfb19d87d8e0b5fc3c8773fd1dc"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="friends"></a>
 Friends</h2></td></tr>
@@ -315,7 +320,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_discontinuity-members.html b/docs/html/classcub_1_1_block_discontinuity-members.html
index 6b2fe38bda..e371714be5 100644
--- a/docs/html/classcub_1_1_block_discontinuity-members.html
+++ b/docs/html/classcub_1_1_block_discontinuity-members.html
@@ -118,7 +118,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_discontinuity.html b/docs/html/classcub_1_1_block_discontinuity.html
index 958d9f9416..1f75cb43af 100644
--- a/docs/html/classcub_1_1_block_discontinuity.html
+++ b/docs/html/classcub_1_1_block_discontinuity.html
@@ -1116,7 +1116,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_exchange-members.html b/docs/html/classcub_1_1_block_exchange-members.html
index 6b921bcf36..cc58407824 100644
--- a/docs/html/classcub_1_1_block_exchange-members.html
+++ b/docs/html/classcub_1_1_block_exchange-members.html
@@ -118,7 +118,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_exchange.html b/docs/html/classcub_1_1_block_exchange.html
index c46c77b5f7..9fb3e824de 100644
--- a/docs/html/classcub_1_1_block_exchange.html
+++ b/docs/html/classcub_1_1_block_exchange.html
@@ -749,7 +749,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_histogram-members.html b/docs/html/classcub_1_1_block_histogram-members.html
index 94b024c19e..2b21e183aa 100644
--- a/docs/html/classcub_1_1_block_histogram-members.html
+++ b/docs/html/classcub_1_1_block_histogram-members.html
@@ -113,7 +113,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_histogram.html b/docs/html/classcub_1_1_block_histogram.html
index 87d9f37154..ad5fe798b8 100644
--- a/docs/html/classcub_1_1_block_histogram.html
+++ b/docs/html/classcub_1_1_block_histogram.html
@@ -479,7 +479,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_load-members.html b/docs/html/classcub_1_1_block_load-members.html
index f5ea4a4329..fe5d952392 100644
--- a/docs/html/classcub_1_1_block_load-members.html
+++ b/docs/html/classcub_1_1_block_load-members.html
@@ -113,7 +113,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_load.html b/docs/html/classcub_1_1_block_load.html
index 1c6c94440a..e64f00570e 100644
--- a/docs/html/classcub_1_1_block_load.html
+++ b/docs/html/classcub_1_1_block_load.html
@@ -165,7 +165,7 @@
 <dl class="section user"><dt></dt><dd>Suppose the input <code>d_data</code> is <code>0, 1, 2, 3, 4, 5, ...</code>. The set of <code>thread_data</code> across the block of threads in those threads will be <code>{ [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }</code>. </dd></dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_block_radix_sort_8cu-example.html#_a0">example_block_radix_sort.cu</a>, and <a class="el" href="example_block_scan_8cu-example.html#_a0">example_block_scan.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00659">659</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00674">674</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
 Classes</h2></td></tr>
@@ -227,7 +227,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01073">1073</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01088">1088</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -296,7 +296,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01126">1126</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01141">1141</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -371,7 +371,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01171">1171</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01186">1186</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -453,7 +453,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01218">1218</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01233">1233</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -464,7 +464,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_radix_sort-members.html b/docs/html/classcub_1_1_block_radix_sort-members.html
index 0cf302a181..d694c8a9de 100644
--- a/docs/html/classcub_1_1_block_radix_sort-members.html
+++ b/docs/html/classcub_1_1_block_radix_sort-members.html
@@ -118,7 +118,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_radix_sort.html b/docs/html/classcub_1_1_block_radix_sort.html
index 159f1b06ad..c7d1c408d0 100644
--- a/docs/html/classcub_1_1_block_radix_sort.html
+++ b/docs/html/classcub_1_1_block_radix_sort.html
@@ -129,7 +129,7 @@
     <tr><td class="paramname">KeyT</td><td>KeyT type </td></tr>
     <tr><td class="paramname">BLOCK_DIM_X</td><td>The thread block length in threads along the X dimension </td></tr>
     <tr><td class="paramname">ITEMS_PER_THREAD</td><td>The number of items per thread </td></tr>
-    <tr><td class="paramname">ValueT</td><td><b>[optional]</b> ValueT type (default: <a class="el" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">cub::NullType</a>, which indicates a keys-only sort) </td></tr>
+    <tr><td class="paramname">ValueT</td><td><b>[optional]</b> ValueT type (default: cub::NullType, which indicates a keys-only sort) </td></tr>
     <tr><td class="paramname">RADIX_BITS</td><td><b>[optional]</b> The number of radix bits per digit place (default: 4 bits) </td></tr>
     <tr><td class="paramname">MEMOIZE_OUTER_SCAN</td><td><b>[optional]</b> Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). </td></tr>
     <tr><td class="paramname">INNER_SCAN_ALGORITHM</td><td><b>[optional]</b> The <a class="el" href="namespacecub.html#abec44bba36037c547e7e84906d0d23ab" title="BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix ...">cub::BlockScanAlgorithm</a> algorithm to use (default: <a class="el" href="namespacecub.html#abec44bba36037c547e7e84906d0d23aba7f51e58246eb53f1a97bd1bc8c0f400f">cub::BLOCK_SCAN_WARP_SCANS</a>) </td></tr>
@@ -924,7 +924,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_reduce-members.html b/docs/html/classcub_1_1_block_reduce-members.html
index de5ec20b00..0362e1f917 100644
--- a/docs/html/classcub_1_1_block_reduce-members.html
+++ b/docs/html/classcub_1_1_block_reduce-members.html
@@ -116,7 +116,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_reduce.html b/docs/html/classcub_1_1_block_reduce.html
index 0602c4aec0..2b42dfbb0b 100644
--- a/docs/html/classcub_1_1_block_reduce.html
+++ b/docs/html/classcub_1_1_block_reduce.html
@@ -319,14 +319,14 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
+    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
   </table>
   </dd>
 </dl>
 <dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramdir">[in]</td><td class="paramname">input</td><td>Calling thread's input </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
   </table>
   </dd>
 </dl>
@@ -399,14 +399,14 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">ITEMS_PER_THREAD</td><td><b>[inferred]</b> The number of consecutive items partitioned onto each thread. </td></tr>
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
+    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
   </table>
   </dd>
 </dl>
 <dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramdir">[in]</td><td class="paramname">inputs</td><td>Calling thread's input segment </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
   </table>
   </dd>
 </dl>
@@ -484,14 +484,14 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
+    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
   </table>
   </dd>
 </dl>
 <dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramdir">[in]</td><td class="paramname">input</td><td>Calling thread's input </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_valid</td><td>Number of threads containing valid elements (may be less than BLOCK_THREADS) </td></tr>
   </table>
   </dd>
@@ -708,7 +708,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_scan-members.html b/docs/html/classcub_1_1_block_scan-members.html
index 3cc5f8b38a..48abbc03b1 100644
--- a/docs/html/classcub_1_1_block_scan-members.html
+++ b/docs/html/classcub_1_1_block_scan-members.html
@@ -134,7 +134,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id b/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id
index e04db5ddde..04b4fb437a 100644
--- a/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id
+++ b/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id
@@ -1 +1 @@
-05c53712eb7c477ef7baefa462ddc7be1ff916bf
\ No newline at end of file
+7756e9c499e97154bdf8b2fa4343354784e594d1
\ No newline at end of file
diff --git a/docs/html/classcub_1_1_block_store-members.html b/docs/html/classcub_1_1_block_store-members.html
index 1aa7e39eca..e93c53a5d4 100644
--- a/docs/html/classcub_1_1_block_store-members.html
+++ b/docs/html/classcub_1_1_block_store-members.html
@@ -112,7 +112,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_block_store.html b/docs/html/classcub_1_1_block_store.html
index 3e651f7bbe..007a7c5a96 100644
--- a/docs/html/classcub_1_1_block_store.html
+++ b/docs/html/classcub_1_1_block_store.html
@@ -390,7 +390,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_cache_modified_input_iterator-members.html b/docs/html/classcub_1_1_cache_modified_input_iterator-members.html
index 7832d3cc3d..72d655ec03 100644
--- a/docs/html/classcub_1_1_cache_modified_input_iterator-members.html
+++ b/docs/html/classcub_1_1_cache_modified_input_iterator-members.html
@@ -129,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_cache_modified_input_iterator.html b/docs/html/classcub_1_1_cache_modified_input_iterator.html
index 4fc544548d..bddefb784c 100644
--- a/docs/html/classcub_1_1_cache_modified_input_iterator.html
+++ b/docs/html/classcub_1_1_cache_modified_input_iterator.html
@@ -307,7 +307,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_cache_modified_output_iterator-members.html b/docs/html/classcub_1_1_cache_modified_output_iterator-members.html
index dc3b8fd0a6..21d5c0d821 100644
--- a/docs/html/classcub_1_1_cache_modified_output_iterator-members.html
+++ b/docs/html/classcub_1_1_cache_modified_output_iterator-members.html
@@ -127,7 +127,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_cache_modified_output_iterator.html b/docs/html/classcub_1_1_cache_modified_output_iterator.html
index afb413e958..6cf697dbb0 100644
--- a/docs/html/classcub_1_1_cache_modified_output_iterator.html
+++ b/docs/html/classcub_1_1_cache_modified_output_iterator.html
@@ -300,7 +300,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_constant_input_iterator-members.html b/docs/html/classcub_1_1_constant_input_iterator-members.html
index cf60a0d63e..e03e9a10bb 100644
--- a/docs/html/classcub_1_1_constant_input_iterator-members.html
+++ b/docs/html/classcub_1_1_constant_input_iterator-members.html
@@ -128,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_constant_input_iterator.html b/docs/html/classcub_1_1_constant_input_iterator.html
index b2cf7d8402..3b4839e81d 100644
--- a/docs/html/classcub_1_1_constant_input_iterator.html
+++ b/docs/html/classcub_1_1_constant_input_iterator.html
@@ -302,7 +302,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_counting_input_iterator-members.html b/docs/html/classcub_1_1_counting_input_iterator-members.html
index 45bb323ee1..c936431ca3 100644
--- a/docs/html/classcub_1_1_counting_input_iterator-members.html
+++ b/docs/html/classcub_1_1_counting_input_iterator-members.html
@@ -128,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_counting_input_iterator.html b/docs/html/classcub_1_1_counting_input_iterator.html
index 1cf4f2e2ef..c5fa03dcd9 100644
--- a/docs/html/classcub_1_1_counting_input_iterator.html
+++ b/docs/html/classcub_1_1_counting_input_iterator.html
@@ -290,7 +290,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_swizzle_scan_op-members.html b/docs/html/classcub_1_1_swizzle_scan_op-members.html
index 9b9d8f04eb..f2fb5ac0ff 100644
--- a/docs/html/classcub_1_1_swizzle_scan_op-members.html
+++ b/docs/html/classcub_1_1_swizzle_scan_op-members.html
@@ -110,7 +110,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_swizzle_scan_op.html b/docs/html/classcub_1_1_swizzle_scan_op.html
index 0722a42a08..2f65a1b020 100644
--- a/docs/html/classcub_1_1_swizzle_scan_op.html
+++ b/docs/html/classcub_1_1_swizzle_scan_op.html
@@ -133,7 +133,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_tex_obj_input_iterator-members.html b/docs/html/classcub_1_1_tex_obj_input_iterator-members.html
index ec000a8eaf..1d00600edf 100644
--- a/docs/html/classcub_1_1_tex_obj_input_iterator-members.html
+++ b/docs/html/classcub_1_1_tex_obj_input_iterator-members.html
@@ -130,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_tex_obj_input_iterator.html b/docs/html/classcub_1_1_tex_obj_input_iterator.html
index 8b76589fac..eb8fa18164 100644
--- a/docs/html/classcub_1_1_tex_obj_input_iterator.html
+++ b/docs/html/classcub_1_1_tex_obj_input_iterator.html
@@ -331,7 +331,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_tex_ref_input_iterator-members.html b/docs/html/classcub_1_1_tex_ref_input_iterator-members.html
index 939ea07c22..291fd23fea 100644
--- a/docs/html/classcub_1_1_tex_ref_input_iterator-members.html
+++ b/docs/html/classcub_1_1_tex_ref_input_iterator-members.html
@@ -129,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_tex_ref_input_iterator.html b/docs/html/classcub_1_1_tex_ref_input_iterator.html
index 2f96d62aa0..276762c641 100644
--- a/docs/html/classcub_1_1_tex_ref_input_iterator.html
+++ b/docs/html/classcub_1_1_tex_ref_input_iterator.html
@@ -332,7 +332,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_transform_input_iterator-members.html b/docs/html/classcub_1_1_transform_input_iterator-members.html
index ce4fbaf0c2..631e07b05e 100644
--- a/docs/html/classcub_1_1_transform_input_iterator-members.html
+++ b/docs/html/classcub_1_1_transform_input_iterator-members.html
@@ -128,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_transform_input_iterator.html b/docs/html/classcub_1_1_transform_input_iterator.html
index 7be29dfd33..6ef534526b 100644
--- a/docs/html/classcub_1_1_transform_input_iterator.html
+++ b/docs/html/classcub_1_1_transform_input_iterator.html
@@ -320,7 +320,7 @@ <h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_warp_reduce-members.html b/docs/html/classcub_1_1_warp_reduce-members.html
index 53df6aa02e..5adb946e49 100644
--- a/docs/html/classcub_1_1_warp_reduce-members.html
+++ b/docs/html/classcub_1_1_warp_reduce-members.html
@@ -117,7 +117,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_warp_reduce.html b/docs/html/classcub_1_1_warp_reduce.html
index 23aedc27a2..f262d6ce47 100644
--- a/docs/html/classcub_1_1_warp_reduce.html
+++ b/docs/html/classcub_1_1_warp_reduce.html
@@ -885,7 +885,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_warp_scan-members.html b/docs/html/classcub_1_1_warp_scan-members.html
index 2b4965be29..c47f350506 100644
--- a/docs/html/classcub_1_1_warp_scan-members.html
+++ b/docs/html/classcub_1_1_warp_scan-members.html
@@ -123,7 +123,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/classcub_1_1_warp_scan.html.REMOVED.git-id b/docs/html/classcub_1_1_warp_scan.html.REMOVED.git-id
index c66485791d..086844ed64 100644
--- a/docs/html/classcub_1_1_warp_scan.html.REMOVED.git-id
+++ b/docs/html/classcub_1_1_warp_scan.html.REMOVED.git-id
@@ -1 +1 @@
-1e2360acc25ce6d42a73bc80aa33477290d69d8f
\ No newline at end of file
+a1b3c49e0d85019e2659b4c116b40b52c77c02ff
\ No newline at end of file
diff --git a/docs/html/classes.html b/docs/html/classes.html
index b90a5c7253..5f8a7463f9 100644
--- a/docs/html/classes.html
+++ b/docs/html/classes.html
@@ -97,54 +97,49 @@
 <div class="title">Class Index</div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|&#160;<a class="qindex" href="#letter_K">K</a>&#160;|&#160;<a class="qindex" href="#letter_L">L</a>&#160;|&#160;<a class="qindex" href="#letter_M">M</a>&#160;|&#160;<a class="qindex" href="#letter_N">N</a>&#160;|&#160;<a class="qindex" href="#letter_P">P</a>&#160;|&#160;<a class="qindex" href="#letter_R">R</a>&#160;|&#160;<a class="qindex" href="#letter_S">S</a>&#160;|&#160;<a class="qindex" href="#letter_T">T</a>&#160;|&#160;<a class="qindex" href="#letter_U">U</a>&#160;|&#160;<a class="qindex" href="#letter_W">W</a></div>
+<div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|&#160;<a class="qindex" href="#letter_L">L</a>&#160;|&#160;<a class="qindex" href="#letter_M">M</a>&#160;|&#160;<a class="qindex" href="#letter_P">P</a>&#160;|&#160;<a class="qindex" href="#letter_R">R</a>&#160;|&#160;<a class="qindex" href="#letter_S">S</a>&#160;|&#160;<a class="qindex" href="#letter_T">T</a>&#160;|&#160;<a class="qindex" href="#letter_W">W</a></div>
 <table style="margin: 10px; white-space: nowrap;" align="center" width="95%" border="0" cellspacing="0" cellpadding="0">
 <tr><td rowspan="2" valign="bottom"><a name="letter_A"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;A&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classcub_1_1_constant_input_iterator.html">ConstantInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_inequality.html">Inequality</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_R"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;R&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html">WarpReduce::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_counting_input_iterator.html">CountingInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_inequality_wrapper.html">InequalityWrapper</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html">BlockStore::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">ArgIndexInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_cub_vector.html">CubVector</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_int2_type.html">Int2Type</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_reduce_by_key_op.html">ReduceByKeyOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html">BlockScan::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_arg_max.html">ArgMax</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_D"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;D&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_is_pointer.html">IsPointer</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_reduce_by_segment_op.html">ReduceBySegmentOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html">WarpScan::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_arg_min.html">ArgMin</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_is_volatile.html">IsVolatile</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_remove_qualifiers.html">RemoveQualifiers</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html">BlockHistogram::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+</td><td valign="top"><a class="el" href="classcub_1_1_cache_modified_output_iterator.html">CacheModifiedOutputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_equals.html">Equals</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_R"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;R&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html">BlockStore::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structcub_1_1_caching_device_allocator.html">CachingDeviceAllocator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_I"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;I&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">ArgIndexInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_cast.html">Cast</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_reduce_by_key_op.html">ReduceByKeyOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html">BlockRadixSort::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structcub_1_1_arg_max.html">ArgMax</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_constant_input_iterator.html">ConstantInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_if.html">If</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_reduce_by_segment_op.html">ReduceBySegmentOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html">BlockScan::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structcub_1_1_arg_min.html">ArgMin</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_counting_input_iterator.html">CountingInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_inequality.html">Inequality</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_remove_qualifiers.html">RemoveQualifiers</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html">BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
 <tr><td rowspan="2" valign="bottom"><a name="letter_B"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;B&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_device_histogram.html">DeviceHistogram</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_K"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;K&#160;&#160;</div></td></tr></table>
-</td><td rowspan="2" valign="bottom"><a name="letter_S"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;S&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html">BlockReduce::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_device_partition.html">DevicePartition</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_tex_obj_input_iterator.html">TexObjInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_discontinuity.html">BlockDiscontinuity</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_radix_sort.html">DeviceRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_sum.html">Sum</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_tex_ref_input_iterator.html">TexRefInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_exchange.html">BlockExchange</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_reduce.html">DeviceReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_L"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;L&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classcub_1_1_swizzle_scan_op.html">SwizzleScanOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_transform_input_iterator.html">TransformInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_histogram.html">BlockHistogram</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_run_length_encode.html">DeviceRunLengthEncode</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_T"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;T&#160;&#160;</div></td></tr></table>
-</td><td rowspan="2" valign="bottom"><a name="letter_U"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;U&#160;&#160;</div></td></tr></table>
-</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_load.html">BlockLoad</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_scan.html">DeviceScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_log2.html">Log2</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_radix_sort.html">BlockRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">DeviceSegmentedRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_M"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;M&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_uninitialized.html">Uninitialized</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_reduce.html">BlockReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_select.html">DeviceSelect</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html">BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_W"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;W&#160;&#160;</div></td></tr></table>
+</td><td rowspan="2" valign="bottom"><a name="letter_D"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;D&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_inequality_wrapper.html">InequalityWrapper</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_S"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;S&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structcub_1_1_is_pointer.html">IsPointer</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html">WarpReduce::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_discontinuity.html">BlockDiscontinuity</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_histogram.html">DeviceHistogram</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_is_volatile.html">IsVolatile</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_sum.html">Sum</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html">BlockLoad::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_exchange.html">BlockExchange</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_partition.html">DevicePartition</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_L"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;L&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classcub_1_1_swizzle_scan_op.html">SwizzleScanOp</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html">BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_histogram.html">BlockHistogram</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_radix_sort.html">DeviceRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_T"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;T&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classcub_1_1_tex_obj_input_iterator.html">TexObjInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_load.html">BlockLoad</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_reduce.html">DeviceReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_log2.html">Log2</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_tex_ref_input_iterator.html">TexRefInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_radix_sort.html">BlockRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_run_length_encode.html">DeviceRunLengthEncode</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_M"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;M&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html">BlockStore::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_transform_input_iterator.html">TransformInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_reduce.html">BlockReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_scan.html">DeviceScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html">BlockDiscontinuity::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_W"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;W&#160;&#160;</div></td></tr></table>
 </td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_scan.html">BlockScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_spmv.html">DeviceSpmv</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_max.html">Max</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_block_store.html">BlockStore</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_min.html">Min</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html">BlockDiscontinuity::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_warp_reduce.html">WarpReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_scan.html">BlockScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">DeviceSegmentedRadixSort</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_max.html">Max</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_block_store.html">BlockStore</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_device_segmented_reduce.html">DeviceSegmentedReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_min.html">Min</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html">BlockReduce::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_warp_reduce.html">WarpReduce</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
 <tr><td rowspan="2" valign="bottom"><a name="letter_C"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;C&#160;&#160;</div></td></tr></table>
-</td><td rowspan="2" valign="bottom"><a name="letter_E"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;E&#160;&#160;</div></td></tr></table>
-</td><td rowspan="2" valign="bottom"><a name="letter_N"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;N&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html">BlockExchange::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_warp_scan.html">WarpScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html">BlockStore::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_cache_modified_input_iterator.html">CacheModifiedInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_equality.html">Equality</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_null_type.html">NullType</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html">BlockLoad::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
-<tr><td valign="top"><a class="el" href="classcub_1_1_cache_modified_output_iterator.html">CacheModifiedOutputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_equals.html">Equals</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_P"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;P&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html">BlockRadixSort::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_caching_device_allocator.html">CachingDeviceAllocator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_I"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;I&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html">BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
-<tr><td valign="top"><a class="el" href="structcub_1_1_cast.html">Cast</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_power_of_two.html">PowerOfTwo</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html">BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
-<tr><td></td><td valign="top"><a class="el" href="structcub_1_1_if.html">If</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td><td></td><td></td></tr>
+</td><td valign="top"><a class="el" href="structcub_1_1_device_select.html">DeviceSelect</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_P"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;P&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html">BlockHistogram::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classcub_1_1_warp_scan.html">WarpScan</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structcub_1_1_device_spmv.html">DeviceSpmv</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html">WarpScan::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
+<tr><td valign="top"><a class="el" href="classcub_1_1_cache_modified_input_iterator.html">CacheModifiedInputIterator</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_E"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;E&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structcub_1_1_power_of_two.html">PowerOfTwo</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html">BlockExchange::TempStorage</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td></tr>
+<tr><td></td><td></td><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a class="el" href="structcub_1_1_equality.html">Equality</a> (<a class="el" href="namespacecub.html">cub</a>)&#160;&#160;&#160;</td><td></td><td></td><td></td></tr>
 <tr><td></td><td></td><td></td><td></td><td></td></tr>
 </table>
-<div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|&#160;<a class="qindex" href="#letter_K">K</a>&#160;|&#160;<a class="qindex" href="#letter_L">L</a>&#160;|&#160;<a class="qindex" href="#letter_M">M</a>&#160;|&#160;<a class="qindex" href="#letter_N">N</a>&#160;|&#160;<a class="qindex" href="#letter_P">P</a>&#160;|&#160;<a class="qindex" href="#letter_R">R</a>&#160;|&#160;<a class="qindex" href="#letter_S">S</a>&#160;|&#160;<a class="qindex" href="#letter_T">T</a>&#160;|&#160;<a class="qindex" href="#letter_U">U</a>&#160;|&#160;<a class="qindex" href="#letter_W">W</a></div>
+<div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|&#160;<a class="qindex" href="#letter_L">L</a>&#160;|&#160;<a class="qindex" href="#letter_M">M</a>&#160;|&#160;<a class="qindex" href="#letter_P">P</a>&#160;|&#160;<a class="qindex" href="#letter_R">R</a>&#160;|&#160;<a class="qindex" href="#letter_S">S</a>&#160;|&#160;<a class="qindex" href="#letter_T">T</a>&#160;|&#160;<a class="qindex" href="#letter_W">W</a></div>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/constant__input__iterator_8cuh.html b/docs/html/constant__input__iterator_8cuh.html
index d9fd868f0f..08714d1cee 100644
--- a/docs/html/constant__input__iterator_8cuh.html
+++ b/docs/html/constant__input__iterator_8cuh.html
@@ -132,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/constant__input__iterator_8cuh_source.html b/docs/html/constant__input__iterator_8cuh_source.html
index 63be06f7b9..1c2bfb7d89 100644
--- a/docs/html/constant__input__iterator_8cuh_source.html
+++ b/docs/html/constant__input__iterator_8cuh_source.html
@@ -282,7 +282,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/counting__input__iterator_8cuh.html b/docs/html/counting__input__iterator_8cuh.html
index 2fefba4608..64b2345c03 100644
--- a/docs/html/counting__input__iterator_8cuh.html
+++ b/docs/html/counting__input__iterator_8cuh.html
@@ -133,7 +133,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/counting__input__iterator_8cuh_source.html b/docs/html/counting__input__iterator_8cuh_source.html
index c40ec44b26..0c6a140b11 100644
--- a/docs/html/counting__input__iterator_8cuh_source.html
+++ b/docs/html/counting__input__iterator_8cuh_source.html
@@ -277,7 +277,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cub_8cuh.html b/docs/html/cub_8cuh.html
index df805b753a..9ea76f4d4c 100644
--- a/docs/html/cub_8cuh.html
+++ b/docs/html/cub_8cuh.html
@@ -113,6 +113,8 @@
 <code>#include &quot;<a class="el" href="device__reduce_8cuh_source.html">device/device_reduce.cuh</a>&quot;</code><br/>
 <code>#include &quot;<a class="el" href="device__run__length__encode_8cuh_source.html">device/device_run_length_encode.cuh</a>&quot;</code><br/>
 <code>#include &quot;<a class="el" href="device__scan_8cuh_source.html">device/device_scan.cuh</a>&quot;</code><br/>
+<code>#include &quot;<a class="el" href="device__segmented__radix__sort_8cuh_source.html">device/device_segmented_radix_sort.cuh</a>&quot;</code><br/>
+<code>#include &quot;<a class="el" href="device__segmented__reduce_8cuh_source.html">device/device_segmented_reduce.cuh</a>&quot;</code><br/>
 <code>#include &quot;<a class="el" href="device__select_8cuh_source.html">device/device_select.cuh</a>&quot;</code><br/>
 <code>#include &quot;<a class="el" href="device__spmv_8cuh_source.html">device/device_spmv.cuh</a>&quot;</code><br/>
 <code>#include &quot;grid/grid_even_share.cuh&quot;</code><br/>
@@ -136,7 +138,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/cub_8cuh_source.html b/docs/html/cub_8cuh_source.html
index bf9ae8fdef..d9c61fb3af 100644
--- a/docs/html/cub_8cuh_source.html
+++ b/docs/html/cub_8cuh_source.html
@@ -151,53 +151,55 @@
 <div class="line"><a name="l00053"></a><span class="lineno">   53</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__reduce_8cuh.html">device/device_reduce.cuh</a>&quot;</span></div>
 <div class="line"><a name="l00054"></a><span class="lineno">   54</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__run__length__encode_8cuh.html">device/device_run_length_encode.cuh</a>&quot;</span></div>
 <div class="line"><a name="l00055"></a><span class="lineno">   55</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__scan_8cuh.html">device/device_scan.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00056"></a><span class="lineno">   56</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__select_8cuh.html">device/device_select.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00057"></a><span class="lineno">   57</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__spmv_8cuh.html">device/device_spmv.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00058"></a><span class="lineno">   58</span>&#160;</div>
-<div class="line"><a name="l00059"></a><span class="lineno">   59</span>&#160;<span class="comment">// Grid</span></div>
-<div class="line"><a name="l00060"></a><span class="lineno">   60</span>&#160;<span class="comment">//#include &quot;grid/grid_barrier.cuh&quot;</span></div>
-<div class="line"><a name="l00061"></a><span class="lineno">   61</span>&#160;<span class="preprocessor">#include &quot;grid/grid_even_share.cuh&quot;</span></div>
-<div class="line"><a name="l00062"></a><span class="lineno">   62</span>&#160;<span class="preprocessor">#include &quot;grid/grid_mapping.cuh&quot;</span></div>
-<div class="line"><a name="l00063"></a><span class="lineno">   63</span>&#160;<span class="preprocessor">#include &quot;grid/grid_queue.cuh&quot;</span></div>
-<div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;</div>
-<div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;<span class="comment">// Host</span></div>
-<div class="line"><a name="l00066"></a><span class="lineno">   66</span>&#160;<span class="preprocessor">#include &quot;host/spinlock.cuh&quot;</span></div>
-<div class="line"><a name="l00067"></a><span class="lineno">   67</span>&#160;</div>
-<div class="line"><a name="l00068"></a><span class="lineno">   68</span>&#160;<span class="comment">// Thread</span></div>
-<div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__load_8cuh.html">thread/thread_load.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00070"></a><span class="lineno">   70</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__operators_8cuh.html">thread/thread_operators.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00071"></a><span class="lineno">   71</span>&#160;<span class="preprocessor">#include &quot;thread/thread_reduce.cuh&quot;</span></div>
-<div class="line"><a name="l00072"></a><span class="lineno">   72</span>&#160;<span class="preprocessor">#include &quot;thread/thread_scan.cuh&quot;</span></div>
-<div class="line"><a name="l00073"></a><span class="lineno">   73</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__store_8cuh.html">thread/thread_store.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;</div>
-<div class="line"><a name="l00075"></a><span class="lineno">   75</span>&#160;<span class="comment">// Warp</span></div>
-<div class="line"><a name="l00076"></a><span class="lineno">   76</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="warp__reduce_8cuh.html">warp/warp_reduce.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00077"></a><span class="lineno">   77</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="warp__scan_8cuh.html">warp/warp_scan.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;</div>
-<div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;<span class="comment">// Iterator</span></div>
-<div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="arg__index__input__iterator_8cuh.html">iterator/arg_index_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00081"></a><span class="lineno">   81</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="cache__modified__input__iterator_8cuh.html">iterator/cache_modified_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="cache__modified__output__iterator_8cuh.html">iterator/cache_modified_output_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="constant__input__iterator_8cuh.html">iterator/constant_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="counting__input__iterator_8cuh.html">iterator/counting_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="tex__obj__input__iterator_8cuh.html">iterator/tex_obj_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00086"></a><span class="lineno">   86</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="tex__ref__input__iterator_8cuh.html">iterator/tex_ref_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00087"></a><span class="lineno">   87</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="transform__input__iterator_8cuh.html">iterator/transform_input_iterator.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00088"></a><span class="lineno">   88</span>&#160;</div>
-<div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;<span class="comment">// Util</span></div>
-<div class="line"><a name="l00090"></a><span class="lineno">   90</span>&#160;<span class="preprocessor">#include &quot;util_allocator.cuh&quot;</span></div>
-<div class="line"><a name="l00091"></a><span class="lineno">   91</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__arch_8cuh.html">util_arch.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00092"></a><span class="lineno">   92</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__debug_8cuh.html">util_debug.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00093"></a><span class="lineno">   93</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__device_8cuh.html">util_device.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00094"></a><span class="lineno">   94</span>&#160;<span class="preprocessor">#include &quot;util_macro.cuh&quot;</span></div>
-<div class="line"><a name="l00095"></a><span class="lineno">   95</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__ptx_8cuh.html">util_ptx.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00096"></a><span class="lineno">   96</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__type_8cuh.html">util_type.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00097"></a><span class="lineno">   97</span>&#160;</div>
+<div class="line"><a name="l00056"></a><span class="lineno">   56</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__segmented__radix__sort_8cuh.html">device/device_segmented_radix_sort.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00057"></a><span class="lineno">   57</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__segmented__reduce_8cuh.html">device/device_segmented_reduce.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00058"></a><span class="lineno">   58</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__select_8cuh.html">device/device_select.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00059"></a><span class="lineno">   59</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="device__spmv_8cuh.html">device/device_spmv.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00060"></a><span class="lineno">   60</span>&#160;</div>
+<div class="line"><a name="l00061"></a><span class="lineno">   61</span>&#160;<span class="comment">// Grid</span></div>
+<div class="line"><a name="l00062"></a><span class="lineno">   62</span>&#160;<span class="comment">//#include &quot;grid/grid_barrier.cuh&quot;</span></div>
+<div class="line"><a name="l00063"></a><span class="lineno">   63</span>&#160;<span class="preprocessor">#include &quot;grid/grid_even_share.cuh&quot;</span></div>
+<div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;<span class="preprocessor">#include &quot;grid/grid_mapping.cuh&quot;</span></div>
+<div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;<span class="preprocessor">#include &quot;grid/grid_queue.cuh&quot;</span></div>
+<div class="line"><a name="l00066"></a><span class="lineno">   66</span>&#160;</div>
+<div class="line"><a name="l00067"></a><span class="lineno">   67</span>&#160;<span class="comment">// Host</span></div>
+<div class="line"><a name="l00068"></a><span class="lineno">   68</span>&#160;<span class="preprocessor">#include &quot;host/spinlock.cuh&quot;</span></div>
+<div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;</div>
+<div class="line"><a name="l00070"></a><span class="lineno">   70</span>&#160;<span class="comment">// Thread</span></div>
+<div class="line"><a name="l00071"></a><span class="lineno">   71</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__load_8cuh.html">thread/thread_load.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00072"></a><span class="lineno">   72</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__operators_8cuh.html">thread/thread_operators.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00073"></a><span class="lineno">   73</span>&#160;<span class="preprocessor">#include &quot;thread/thread_reduce.cuh&quot;</span></div>
+<div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;<span class="preprocessor">#include &quot;thread/thread_scan.cuh&quot;</span></div>
+<div class="line"><a name="l00075"></a><span class="lineno">   75</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="thread__store_8cuh.html">thread/thread_store.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00076"></a><span class="lineno">   76</span>&#160;</div>
+<div class="line"><a name="l00077"></a><span class="lineno">   77</span>&#160;<span class="comment">// Warp</span></div>
+<div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="warp__reduce_8cuh.html">warp/warp_reduce.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="warp__scan_8cuh.html">warp/warp_scan.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;</div>
+<div class="line"><a name="l00081"></a><span class="lineno">   81</span>&#160;<span class="comment">// Iterator</span></div>
+<div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="arg__index__input__iterator_8cuh.html">iterator/arg_index_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="cache__modified__input__iterator_8cuh.html">iterator/cache_modified_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="cache__modified__output__iterator_8cuh.html">iterator/cache_modified_output_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="constant__input__iterator_8cuh.html">iterator/constant_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00086"></a><span class="lineno">   86</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="counting__input__iterator_8cuh.html">iterator/counting_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00087"></a><span class="lineno">   87</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="tex__obj__input__iterator_8cuh.html">iterator/tex_obj_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00088"></a><span class="lineno">   88</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="tex__ref__input__iterator_8cuh.html">iterator/tex_ref_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="transform__input__iterator_8cuh.html">iterator/transform_input_iterator.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00090"></a><span class="lineno">   90</span>&#160;</div>
+<div class="line"><a name="l00091"></a><span class="lineno">   91</span>&#160;<span class="comment">// Util</span></div>
+<div class="line"><a name="l00092"></a><span class="lineno">   92</span>&#160;<span class="preprocessor">#include &quot;util_allocator.cuh&quot;</span></div>
+<div class="line"><a name="l00093"></a><span class="lineno">   93</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__arch_8cuh.html">util_arch.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00094"></a><span class="lineno">   94</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__debug_8cuh.html">util_debug.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00095"></a><span class="lineno">   95</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__device_8cuh.html">util_device.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00096"></a><span class="lineno">   96</span>&#160;<span class="preprocessor">#include &quot;util_macro.cuh&quot;</span></div>
+<div class="line"><a name="l00097"></a><span class="lineno">   97</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__ptx_8cuh.html">util_ptx.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00098"></a><span class="lineno">   98</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__type_8cuh.html">util_type.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00099"></a><span class="lineno">   99</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__histogram_8cuh.html b/docs/html/device__histogram_8cuh.html
index 5fa7a626b9..68250778fc 100644
--- a/docs/html/device__histogram_8cuh.html
+++ b/docs/html/device__histogram_8cuh.html
@@ -137,7 +137,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__histogram_8cuh_source.html b/docs/html/device__histogram_8cuh_source.html
index 2ba5dfd544..25c2f364d5 100644
--- a/docs/html/device__histogram_8cuh_source.html
+++ b/docs/html/device__histogram_8cuh_source.html
@@ -148,333 +148,333 @@
 <div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;{</div>
 <div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;    <span class="comment">/******************************************************************/</span></div>
 <div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;</div>
-<div class="line"><a name="l00119"></a><span class="lineno">  119</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00120"></a><span class="lineno">  120</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00125"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#ac7bfad764da8e81be215a39d341ef841">  125</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#ac7bfad764da8e81be215a39d341ef841" title="Computes an intensity histogram from a sequence of data samples using equal-width bins...">HistogramEven</a>(</div>
-<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
-<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
-<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        SampleIteratorT     d_samples,                                  </div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        CounterT*           d_histogram,                                </div>
-<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                                 </div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        LevelT              lower_level,                                </div>
-<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        LevelT              upper_level,                                </div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        OffsetT             num_samples,                                </div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        cudaStream_t        stream                  = 0,                </div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;    {</div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;</div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        LevelT              lower_level1[1]     = {lower_level};</div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        LevelT              upper_level1[1]     = {upper_level};</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;1, 1&gt;(</div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            d_histogram1,</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            num_levels1,</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            lower_level1,</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            upper_level1,</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            num_samples,</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            1,</div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * num_samples,</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            stream,</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    }</div>
+<div class="line"><a name="l00118"></a><span class="lineno">  118</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00119"></a><span class="lineno">  119</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00120"></a><span class="lineno">  120</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00124"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#ac7bfad764da8e81be215a39d341ef841">  124</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#ac7bfad764da8e81be215a39d341ef841" title="Computes an intensity histogram from a sequence of data samples using equal-width bins...">HistogramEven</a>(</div>
+<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
+<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
+<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        SampleIteratorT     d_samples,                                  </div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        CounterT*           d_histogram,                                </div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                                 </div>
+<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        LevelT              lower_level,                                </div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        LevelT              upper_level,                                </div>
+<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        OffsetT             num_samples,                                </div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        cudaStream_t        stream                  = 0,                </div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;    {</div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;</div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        LevelT              lower_level1[1]     = {lower_level};</div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        LevelT              upper_level1[1]     = {upper_level};</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;1, 1&gt;(</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            d_histogram1,</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            num_levels1,</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            lower_level1,</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            upper_level1,</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            num_samples,</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            1,</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * num_samples,</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            stream,</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;    }</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;</div>
 <div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;</div>
-<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00225"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#ad101280b8a1f2d61acb4aa06a4a5df1b">  225</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#ad101280b8a1f2d61acb4aa06a4a5df1b" title="Computes an intensity histogram from a sequence of data samples using equal-width bins...">HistogramEven</a>(</div>
-<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
-<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
-<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;        SampleIteratorT     d_samples,                                  </div>
-<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        CounterT*           d_histogram,                                </div>
-<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                                 </div>
-<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;        LevelT              lower_level,                                </div>
-<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        LevelT              upper_level,                                </div>
-<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;        OffsetT             num_row_samples,                            </div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        OffsetT             num_rows,                                   </div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                           </div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;        cudaStream_t        stream                  = 0,                </div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;    {</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        LevelT              lower_level1[1]     = {lower_level};</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        LevelT              upper_level1[1]     = {upper_level};</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;1, 1&gt;(</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;            d_histogram1,</div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;            num_levels1,</div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;            lower_level1,</div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;            upper_level1,</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;            num_row_samples,</div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;            num_rows,</div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;            row_stride_bytes,</div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;            stream,</div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;    }</div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div>
-<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
-<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
-<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00328"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95">  328</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramEven</a>(</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
-<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
-<div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;        SampleIteratorT     d_samples,                                  </div>
-<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],            </div>
-<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        LevelT              lower_level[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        LevelT              upper_level[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        OffsetT             num_pixels,                                 </div>
-<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        cudaStream_t        stream                  = 0,                </div>
-<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;    {</div>
-<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;</div>
-<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS&gt;(</div>
-<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;            d_histogram,</div>
-<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;            num_levels,</div>
-<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;            lower_level,</div>
-<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            upper_level,</div>
-<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            num_pixels,</div>
-<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            1,</div>
-<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * NUM_CHANNELS * num_pixels,</div>
-<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;            stream,</div>
-<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;    }</div>
-<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;</div>
-<div class="line"><a name="l00428"></a><span class="lineno">  428</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
-<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
-<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00436"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668">  436</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramEven</a>(</div>
-<div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
-<div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
-<div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;        SampleIteratorT     d_samples,                                  </div>
-<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],            </div>
-<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;        LevelT              lower_level[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;        LevelT              upper_level[NUM_ACTIVE_CHANNELS],           </div>
-<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;        OffsetT             num_row_pixels,                             </div>
-<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;        OffsetT             num_rows,                                   </div>
-<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                           </div>
-<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        cudaStream_t        stream                  = 0,                </div>
-<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;    {</div>
-<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;sizeof(SampleT) == 1&gt;</a> is_byte_sample;</div>
+<div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00223"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#ad101280b8a1f2d61acb4aa06a4a5df1b">  223</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#ad101280b8a1f2d61acb4aa06a4a5df1b" title="Computes an intensity histogram from a sequence of data samples using equal-width bins...">HistogramEven</a>(</div>
+<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
+<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
+<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        SampleIteratorT     d_samples,                                  </div>
+<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        CounterT*           d_histogram,                                </div>
+<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                                 </div>
+<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        LevelT              lower_level,                                </div>
+<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        LevelT              upper_level,                                </div>
+<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;        OffsetT             num_row_samples,                            </div>
+<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        OffsetT             num_rows,                                   </div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                           </div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        cudaStream_t        stream                  = 0,                </div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;    {</div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        LevelT              lower_level1[1]     = {lower_level};</div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        LevelT              upper_level1[1]     = {upper_level};</div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;</div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;1, 1&gt;(</div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;            d_histogram1,</div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;            num_levels1,</div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;            lower_level1,</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;            upper_level1,</div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;            num_row_samples,</div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;            num_rows,</div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;            row_stride_bytes,</div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;            stream,</div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;    }</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;</div>
+<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
+<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
+<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00325"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95">  325</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramEven</a>(</div>
+<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
+<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
+<div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;        SampleIteratorT     d_samples,                                  </div>
+<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],            </div>
+<div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;        LevelT              lower_level[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        LevelT              upper_level[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        OffsetT             num_pixels,                                 </div>
+<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        cudaStream_t        stream                  = 0,                </div>
+<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;    {</div>
+<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;</div>
+<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keywordflow">return</span> MultiHistogramEven&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS&gt;(</div>
+<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;            d_histogram,</div>
+<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;            num_levels,</div>
+<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;            lower_level,</div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;            upper_level,</div>
+<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;            num_pixels,</div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;            1,</div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * NUM_CHANNELS * num_pixels,</div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            stream,</div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;    }</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;</div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;</div>
+<div class="line"><a name="l00424"></a><span class="lineno">  424</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
+<div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
+<div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00428"></a><span class="lineno">  428</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00432"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668">  432</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramEven</a>(</div>
+<div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                             </div>
+<div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                        </div>
+<div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;        SampleIteratorT     d_samples,                                  </div>
+<div class="line"><a name="l00436"></a><span class="lineno">  436</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],            </div>
+<div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;        LevelT              lower_level[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;        LevelT              upper_level[NUM_ACTIVE_CHANNELS],           </div>
+<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;        OffsetT             num_row_pixels,                             </div>
+<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;        OffsetT             num_rows,                                   </div>
+<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                           </div>
+<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;        cudaStream_t        stream                  = 0,                </div>
+<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous       = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;    {</div>
+<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        Int2Type&lt;sizeof(SampleT) == 1&gt; is_byte_sample;</div>
+<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;</div>
+<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;        <span class="keywordflow">if</span> ((<span class="keyword">sizeof</span>(OffsetT) &gt; <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>)) &amp;&amp; (row_stride_bytes * num_rows &lt; std::numeric_limits&lt;int&gt;::max()))</div>
+<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        {</div>
+<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;            <span class="comment">// Down-convert OffsetT data type</span></div>
 <div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;</div>
-<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;        <span class="keywordflow">if</span> ((<span class="keyword">sizeof</span>(OffsetT) &gt; <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>)) &amp;&amp; (row_stride_bytes * num_rows &lt; std::numeric_limits&lt;int&gt;::max()))</div>
-<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;        {</div>
-<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;            <span class="comment">// Down-convert OffsetT data type</span></div>
-<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;</div>
-<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;</div>
-<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;            <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int&gt;::DispatchEven(</div>
-<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;                d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,</div>
-<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;                (<span class="keywordtype">int</span>) num_row_pixels, (<span class="keywordtype">int</span>) num_rows, (<span class="keywordtype">int</span>) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
-<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;                stream, debug_synchronous, is_byte_sample);</div>
-<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;        }</div>
-<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;</div>
-<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;        <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT&gt;::DispatchEven(</div>
-<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;            d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,</div>
-<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;            num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
-<div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;            stream, debug_synchronous, is_byte_sample);</div>
-<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;    }</div>
-<div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;</div>
-<div class="line"><a name="l00471"></a><span class="lineno">  471</span>&#160;</div>
-<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;    <span class="comment">/******************************************************************/</span></div>
-<div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;</div>
-<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00532"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a11ec6d941fb6779c2a4d124b6f5b0813">  532</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a11ec6d941fb6779c2a4d124b6f5b0813" title="Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...">HistogramRange</a>(</div>
-<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
-<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
-<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        SampleIteratorT     d_samples,                              </div>
-<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;        CounterT*           d_histogram,                            </div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                             </div>
-<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;        LevelT*             d_levels,                               </div>
-<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;        OffsetT             num_samples,                            </div>
-<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;    {</div>
-<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;</div>
-<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;        CounterT*           d_histogram1[1] = {d_histogram};</div>
-<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]  = {num_levels};</div>
-<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;        LevelT*             d_levels1[1]    = {d_levels};</div>
-<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;</div>
-<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;1, 1&gt;(</div>
-<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;            d_histogram1,</div>
-<div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;            num_levels1,</div>
-<div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;            d_levels1,</div>
-<div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;            num_samples,</div>
-<div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;            1,</div>
-<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * num_samples,</div>
-<div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;            stream,</div>
-<div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;    }</div>
-<div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;</div>
-<div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;</div>
-<div class="line"><a name="l00622"></a><span class="lineno">  622</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00624"></a><span class="lineno">  624</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00626"></a><span class="lineno">  626</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00628"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a2b9278ea5e6442c9af944c83d745eb13">  628</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a2b9278ea5e6442c9af944c83d745eb13" title="Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...">HistogramRange</a>(</div>
-<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
-<div class="line"><a name="l00630"></a><span class="lineno">  630</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
-<div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;        SampleIteratorT     d_samples,                              </div>
-<div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;        CounterT*           d_histogram,                            </div>
-<div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                             </div>
-<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;        LevelT*             d_levels,                               </div>
-<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        OffsetT             num_row_samples,                        </div>
-<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        OffsetT             num_rows,                               </div>
-<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                       </div>
-<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;    {</div>
-<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
-<div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
-<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;        LevelT*             d_levels1[1]        = {d_levels};</div>
-<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;</div>
-<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;1, 1&gt;(</div>
-<div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;            d_histogram1,</div>
-<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;            num_levels1,</div>
-<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;            d_levels1,</div>
-<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;            num_row_samples,</div>
-<div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;            num_rows,</div>
-<div class="line"><a name="l00654"></a><span class="lineno">  654</span>&#160;            row_stride_bytes,</div>
-<div class="line"><a name="l00655"></a><span class="lineno">  655</span>&#160;            stream,</div>
-<div class="line"><a name="l00656"></a><span class="lineno">  656</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00657"></a><span class="lineno">  657</span>&#160;    }</div>
-<div class="line"><a name="l00658"></a><span class="lineno">  658</span>&#160;</div>
-<div class="line"><a name="l00720"></a><span class="lineno">  720</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00721"></a><span class="lineno">  721</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
-<div class="line"><a name="l00722"></a><span class="lineno">  722</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
-<div class="line"><a name="l00723"></a><span class="lineno">  723</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00724"></a><span class="lineno">  724</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00725"></a><span class="lineno">  725</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00726"></a><span class="lineno">  726</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00727"></a><span class="lineno">  727</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00728"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe">  728</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramRange</a>(</div>
-<div class="line"><a name="l00729"></a><span class="lineno">  729</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
-<div class="line"><a name="l00730"></a><span class="lineno">  730</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
-<div class="line"><a name="l00731"></a><span class="lineno">  731</span>&#160;        SampleIteratorT     d_samples,                              </div>
-<div class="line"><a name="l00732"></a><span class="lineno">  732</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],       </div>
-<div class="line"><a name="l00733"></a><span class="lineno">  733</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],        </div>
-<div class="line"><a name="l00734"></a><span class="lineno">  734</span>&#160;        LevelT*             d_levels[NUM_ACTIVE_CHANNELS],          </div>
-<div class="line"><a name="l00735"></a><span class="lineno">  735</span>&#160;        OffsetT             num_pixels,                             </div>
-<div class="line"><a name="l00736"></a><span class="lineno">  736</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00737"></a><span class="lineno">  737</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00738"></a><span class="lineno">  738</span>&#160;    {</div>
-<div class="line"><a name="l00740"></a><span class="lineno">  740</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00741"></a><span class="lineno">  741</span>&#160;</div>
-<div class="line"><a name="l00742"></a><span class="lineno">  742</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS&gt;(</div>
-<div class="line"><a name="l00743"></a><span class="lineno">  743</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00744"></a><span class="lineno">  744</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;            d_samples,</div>
-<div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;            d_histogram,</div>
-<div class="line"><a name="l00747"></a><span class="lineno">  747</span>&#160;            num_levels,</div>
-<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;            d_levels,</div>
-<div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;            num_pixels,</div>
-<div class="line"><a name="l00750"></a><span class="lineno">  750</span>&#160;            1,</div>
-<div class="line"><a name="l00751"></a><span class="lineno">  751</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * NUM_CHANNELS * num_pixels,</div>
-<div class="line"><a name="l00752"></a><span class="lineno">  752</span>&#160;            stream,</div>
-<div class="line"><a name="l00753"></a><span class="lineno">  753</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00754"></a><span class="lineno">  754</span>&#160;    }</div>
-<div class="line"><a name="l00755"></a><span class="lineno">  755</span>&#160;</div>
-<div class="line"><a name="l00756"></a><span class="lineno">  756</span>&#160;</div>
-<div class="line"><a name="l00824"></a><span class="lineno">  824</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00825"></a><span class="lineno">  825</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
-<div class="line"><a name="l00826"></a><span class="lineno">  826</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
-<div class="line"><a name="l00827"></a><span class="lineno">  827</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
-<div class="line"><a name="l00828"></a><span class="lineno">  828</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
-<div class="line"><a name="l00829"></a><span class="lineno">  829</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
-<div class="line"><a name="l00830"></a><span class="lineno">  830</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00831"></a><span class="lineno">  831</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00832"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f">  832</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramRange</a>(</div>
-<div class="line"><a name="l00833"></a><span class="lineno">  833</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
-<div class="line"><a name="l00834"></a><span class="lineno">  834</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
-<div class="line"><a name="l00835"></a><span class="lineno">  835</span>&#160;        SampleIteratorT     d_samples,                              </div>
-<div class="line"><a name="l00836"></a><span class="lineno">  836</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],       </div>
-<div class="line"><a name="l00837"></a><span class="lineno">  837</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],        </div>
-<div class="line"><a name="l00838"></a><span class="lineno">  838</span>&#160;        LevelT*             d_levels[NUM_ACTIVE_CHANNELS],          </div>
-<div class="line"><a name="l00839"></a><span class="lineno">  839</span>&#160;        OffsetT             num_row_pixels,                         </div>
-<div class="line"><a name="l00840"></a><span class="lineno">  840</span>&#160;        OffsetT             num_rows,                               </div>
-<div class="line"><a name="l00841"></a><span class="lineno">  841</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                       </div>
-<div class="line"><a name="l00842"></a><span class="lineno">  842</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00843"></a><span class="lineno">  843</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00844"></a><span class="lineno">  844</span>&#160;    {</div>
-<div class="line"><a name="l00846"></a><span class="lineno">  846</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
-<div class="line"><a name="l00847"></a><span class="lineno">  847</span>&#160;        <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type&lt;sizeof(SampleT) == 1&gt;</a> is_byte_sample;</div>
-<div class="line"><a name="l00848"></a><span class="lineno">  848</span>&#160;</div>
-<div class="line"><a name="l00849"></a><span class="lineno">  849</span>&#160;        <span class="keywordflow">if</span> ((<span class="keyword">sizeof</span>(OffsetT) &gt; <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>)) &amp;&amp; (row_stride_bytes * num_rows &lt; std::numeric_limits&lt;int&gt;::max()))</div>
-<div class="line"><a name="l00850"></a><span class="lineno">  850</span>&#160;        {</div>
-<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;            <span class="comment">// Down-convert OffsetT data type</span></div>
-<div class="line"><a name="l00852"></a><span class="lineno">  852</span>&#160;            <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int&gt;::DispatchRange(</div>
-<div class="line"><a name="l00853"></a><span class="lineno">  853</span>&#160;                d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,</div>
-<div class="line"><a name="l00854"></a><span class="lineno">  854</span>&#160;                (<span class="keywordtype">int</span>) num_row_pixels, (<span class="keywordtype">int</span>) num_rows, (<span class="keywordtype">int</span>) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
-<div class="line"><a name="l00855"></a><span class="lineno">  855</span>&#160;                stream, debug_synchronous, is_byte_sample);</div>
-<div class="line"><a name="l00856"></a><span class="lineno">  856</span>&#160;        }</div>
+<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;</div>
+<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;            <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int&gt;::DispatchEven(</div>
+<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;                d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,</div>
+<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;                (<span class="keywordtype">int</span>) num_row_pixels, (<span class="keywordtype">int</span>) num_rows, (<span class="keywordtype">int</span>) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
+<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;                stream, debug_synchronous, is_byte_sample);</div>
+<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;        }</div>
+<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;</div>
+<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;        <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT&gt;::DispatchEven(</div>
+<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;            d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,</div>
+<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;            num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
+<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;            stream, debug_synchronous, is_byte_sample);</div>
+<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;    }</div>
+<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;</div>
+<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;</div>
+<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;    <span class="comment">/******************************************************************/</span></div>
+<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;</div>
+<div class="line"><a name="l00521"></a><span class="lineno">  521</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00522"></a><span class="lineno">  522</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00523"></a><span class="lineno">  523</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00524"></a><span class="lineno">  524</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00525"></a><span class="lineno">  525</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00527"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a11ec6d941fb6779c2a4d124b6f5b0813">  527</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a11ec6d941fb6779c2a4d124b6f5b0813" title="Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...">HistogramRange</a>(</div>
+<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
+<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
+<div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;        SampleIteratorT     d_samples,                              </div>
+<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;        CounterT*           d_histogram,                            </div>
+<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                             </div>
+<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;        LevelT*             d_levels,                               </div>
+<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;        OffsetT             num_samples,                            </div>
+<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;    {</div>
+<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;</div>
+<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;        CounterT*           d_histogram1[1] = {d_histogram};</div>
+<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]  = {num_levels};</div>
+<div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;        LevelT*             d_levels1[1]    = {d_levels};</div>
+<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;</div>
+<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;1, 1&gt;(</div>
+<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;            d_histogram1,</div>
+<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;            num_levels1,</div>
+<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;            d_levels1,</div>
+<div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;            num_samples,</div>
+<div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;            1,</div>
+<div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * num_samples,</div>
+<div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;            stream,</div>
+<div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;    }</div>
+<div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;</div>
+<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;</div>
+<div class="line"><a name="l00616"></a><span class="lineno">  616</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00617"></a><span class="lineno">  617</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00618"></a><span class="lineno">  618</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00619"></a><span class="lineno">  619</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00620"></a><span class="lineno">  620</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00621"></a><span class="lineno">  621</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00622"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a2b9278ea5e6442c9af944c83d745eb13">  622</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a2b9278ea5e6442c9af944c83d745eb13" title="Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...">HistogramRange</a>(</div>
+<div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
+<div class="line"><a name="l00624"></a><span class="lineno">  624</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
+<div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;        SampleIteratorT     d_samples,                              </div>
+<div class="line"><a name="l00626"></a><span class="lineno">  626</span>&#160;        CounterT*           d_histogram,                            </div>
+<div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;        <span class="keywordtype">int</span>                 num_levels,                             </div>
+<div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;        LevelT*             d_levels,                               </div>
+<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;        OffsetT             num_row_samples,                        </div>
+<div class="line"><a name="l00630"></a><span class="lineno">  630</span>&#160;        OffsetT             num_rows,                               </div>
+<div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                       </div>
+<div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;    {</div>
+<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        CounterT*           d_histogram1[1]     = {d_histogram};</div>
+<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        <span class="keywordtype">int</span>                 num_levels1[1]      = {num_levels};</div>
+<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;        LevelT*             d_levels1[1]        = {d_levels};</div>
+<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;</div>
+<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;1, 1&gt;(</div>
+<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;            d_histogram1,</div>
+<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;            num_levels1,</div>
+<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;            d_levels1,</div>
+<div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;            num_row_samples,</div>
+<div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;            num_rows,</div>
+<div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;            row_stride_bytes,</div>
+<div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;            stream,</div>
+<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;    }</div>
+<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;</div>
+<div class="line"><a name="l00713"></a><span class="lineno">  713</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00714"></a><span class="lineno">  714</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
+<div class="line"><a name="l00715"></a><span class="lineno">  715</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
+<div class="line"><a name="l00716"></a><span class="lineno">  716</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00717"></a><span class="lineno">  717</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00718"></a><span class="lineno">  718</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00719"></a><span class="lineno">  719</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00720"></a><span class="lineno">  720</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00721"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe">  721</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramRange</a>(</div>
+<div class="line"><a name="l00722"></a><span class="lineno">  722</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
+<div class="line"><a name="l00723"></a><span class="lineno">  723</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
+<div class="line"><a name="l00724"></a><span class="lineno">  724</span>&#160;        SampleIteratorT     d_samples,                              </div>
+<div class="line"><a name="l00725"></a><span class="lineno">  725</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],       </div>
+<div class="line"><a name="l00726"></a><span class="lineno">  726</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],        </div>
+<div class="line"><a name="l00727"></a><span class="lineno">  727</span>&#160;        LevelT*             d_levels[NUM_ACTIVE_CHANNELS],          </div>
+<div class="line"><a name="l00728"></a><span class="lineno">  728</span>&#160;        OffsetT             num_pixels,                             </div>
+<div class="line"><a name="l00729"></a><span class="lineno">  729</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00730"></a><span class="lineno">  730</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00731"></a><span class="lineno">  731</span>&#160;    {</div>
+<div class="line"><a name="l00733"></a><span class="lineno">  733</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00734"></a><span class="lineno">  734</span>&#160;</div>
+<div class="line"><a name="l00735"></a><span class="lineno">  735</span>&#160;        <span class="keywordflow">return</span> MultiHistogramRange&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS&gt;(</div>
+<div class="line"><a name="l00736"></a><span class="lineno">  736</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00737"></a><span class="lineno">  737</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00738"></a><span class="lineno">  738</span>&#160;            d_samples,</div>
+<div class="line"><a name="l00739"></a><span class="lineno">  739</span>&#160;            d_histogram,</div>
+<div class="line"><a name="l00740"></a><span class="lineno">  740</span>&#160;            num_levels,</div>
+<div class="line"><a name="l00741"></a><span class="lineno">  741</span>&#160;            d_levels,</div>
+<div class="line"><a name="l00742"></a><span class="lineno">  742</span>&#160;            num_pixels,</div>
+<div class="line"><a name="l00743"></a><span class="lineno">  743</span>&#160;            1,</div>
+<div class="line"><a name="l00744"></a><span class="lineno">  744</span>&#160;            <span class="keyword">sizeof</span>(SampleT) * NUM_CHANNELS * num_pixels,</div>
+<div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;            stream,</div>
+<div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00747"></a><span class="lineno">  747</span>&#160;    }</div>
+<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;</div>
+<div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;</div>
+<div class="line"><a name="l00816"></a><span class="lineno">  816</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00817"></a><span class="lineno">  817</span>&#160;        <span class="keywordtype">int</span>                 NUM_CHANNELS,</div>
+<div class="line"><a name="l00818"></a><span class="lineno">  818</span>&#160;        <span class="keywordtype">int</span>                 NUM_ACTIVE_CHANNELS,</div>
+<div class="line"><a name="l00819"></a><span class="lineno">  819</span>&#160;        <span class="keyword">typename</span>            SampleIteratorT,</div>
+<div class="line"><a name="l00820"></a><span class="lineno">  820</span>&#160;        <span class="keyword">typename</span>            CounterT,</div>
+<div class="line"><a name="l00821"></a><span class="lineno">  821</span>&#160;        <span class="keyword">typename</span>            LevelT,</div>
+<div class="line"><a name="l00822"></a><span class="lineno">  822</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
+<div class="line"><a name="l00823"></a><span class="lineno">  823</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00824"></a><span class="lineno"><a class="code" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f">  824</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f" title="Computes per-channel intensity histograms from a sequence of multi-channel &quot;pixel&quot; data samples using...">MultiHistogramRange</a>(</div>
+<div class="line"><a name="l00825"></a><span class="lineno">  825</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                         </div>
+<div class="line"><a name="l00826"></a><span class="lineno">  826</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                    </div>
+<div class="line"><a name="l00827"></a><span class="lineno">  827</span>&#160;        SampleIteratorT     d_samples,                              </div>
+<div class="line"><a name="l00828"></a><span class="lineno">  828</span>&#160;        CounterT*           d_histogram[NUM_ACTIVE_CHANNELS],       </div>
+<div class="line"><a name="l00829"></a><span class="lineno">  829</span>&#160;        <span class="keywordtype">int</span>                 num_levels[NUM_ACTIVE_CHANNELS],        </div>
+<div class="line"><a name="l00830"></a><span class="lineno">  830</span>&#160;        LevelT*             d_levels[NUM_ACTIVE_CHANNELS],          </div>
+<div class="line"><a name="l00831"></a><span class="lineno">  831</span>&#160;        OffsetT             num_row_pixels,                         </div>
+<div class="line"><a name="l00832"></a><span class="lineno">  832</span>&#160;        OffsetT             num_rows,                               </div>
+<div class="line"><a name="l00833"></a><span class="lineno">  833</span>&#160;        <span class="keywordtype">size_t</span>              row_stride_bytes,                       </div>
+<div class="line"><a name="l00834"></a><span class="lineno">  834</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00835"></a><span class="lineno">  835</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00836"></a><span class="lineno">  836</span>&#160;    {</div>
+<div class="line"><a name="l00838"></a><span class="lineno">  838</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;SampleIteratorT&gt;::value_type SampleT;</div>
+<div class="line"><a name="l00839"></a><span class="lineno">  839</span>&#160;        Int2Type&lt;sizeof(SampleT) == 1&gt; is_byte_sample;</div>
+<div class="line"><a name="l00840"></a><span class="lineno">  840</span>&#160;</div>
+<div class="line"><a name="l00841"></a><span class="lineno">  841</span>&#160;        <span class="keywordflow">if</span> ((<span class="keyword">sizeof</span>(OffsetT) &gt; <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>)) &amp;&amp; (row_stride_bytes * num_rows &lt; std::numeric_limits&lt;int&gt;::max()))</div>
+<div class="line"><a name="l00842"></a><span class="lineno">  842</span>&#160;        {</div>
+<div class="line"><a name="l00843"></a><span class="lineno">  843</span>&#160;            <span class="comment">// Down-convert OffsetT data type</span></div>
+<div class="line"><a name="l00844"></a><span class="lineno">  844</span>&#160;            <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int&gt;::DispatchRange(</div>
+<div class="line"><a name="l00845"></a><span class="lineno">  845</span>&#160;                d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,</div>
+<div class="line"><a name="l00846"></a><span class="lineno">  846</span>&#160;                (<span class="keywordtype">int</span>) num_row_pixels, (<span class="keywordtype">int</span>) num_rows, (<span class="keywordtype">int</span>) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
+<div class="line"><a name="l00847"></a><span class="lineno">  847</span>&#160;                stream, debug_synchronous, is_byte_sample);</div>
+<div class="line"><a name="l00848"></a><span class="lineno">  848</span>&#160;        }</div>
+<div class="line"><a name="l00849"></a><span class="lineno">  849</span>&#160;</div>
+<div class="line"><a name="l00850"></a><span class="lineno">  850</span>&#160;        <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT&gt;::DispatchRange(</div>
+<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;            d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,</div>
+<div class="line"><a name="l00852"></a><span class="lineno">  852</span>&#160;            num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
+<div class="line"><a name="l00853"></a><span class="lineno">  853</span>&#160;            stream, debug_synchronous, is_byte_sample);</div>
+<div class="line"><a name="l00854"></a><span class="lineno">  854</span>&#160;    }</div>
+<div class="line"><a name="l00855"></a><span class="lineno">  855</span>&#160;</div>
+<div class="line"><a name="l00856"></a><span class="lineno">  856</span>&#160;</div>
 <div class="line"><a name="l00857"></a><span class="lineno">  857</span>&#160;</div>
-<div class="line"><a name="l00858"></a><span class="lineno">  858</span>&#160;        <span class="keywordflow">return</span> DipatchHistogram&lt;NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT&gt;::DispatchRange(</div>
-<div class="line"><a name="l00859"></a><span class="lineno">  859</span>&#160;            d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,</div>
-<div class="line"><a name="l00860"></a><span class="lineno">  860</span>&#160;            num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / <span class="keyword">sizeof</span>(SampleT)),</div>
-<div class="line"><a name="l00861"></a><span class="lineno">  861</span>&#160;            stream, debug_synchronous, is_byte_sample);</div>
-<div class="line"><a name="l00862"></a><span class="lineno">  862</span>&#160;    }</div>
-<div class="line"><a name="l00863"></a><span class="lineno">  863</span>&#160;</div>
-<div class="line"><a name="l00864"></a><span class="lineno">  864</span>&#160;</div>
-<div class="line"><a name="l00865"></a><span class="lineno">  865</span>&#160;</div>
-<div class="line"><a name="l00867"></a><span class="lineno">  867</span>&#160;};</div>
+<div class="line"><a name="l00859"></a><span class="lineno">  859</span>&#160;};</div>
+<div class="line"><a name="l00860"></a><span class="lineno">  860</span>&#160;</div>
+<div class="line"><a name="l00865"></a><span class="lineno">  865</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00866"></a><span class="lineno">  866</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00867"></a><span class="lineno">  867</span>&#160;</div>
 <div class="line"><a name="l00868"></a><span class="lineno">  868</span>&#160;</div>
-<div class="line"><a name="l00873"></a><span class="lineno">  873</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00874"></a><span class="lineno">  874</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00875"></a><span class="lineno">  875</span>&#160;</div>
-<div class="line"><a name="l00876"></a><span class="lineno">  876</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__partition_8cuh.html b/docs/html/device__partition_8cuh.html
index 999d5ee6f6..7ee16ef467 100644
--- a/docs/html/device__partition_8cuh.html
+++ b/docs/html/device__partition_8cuh.html
@@ -136,7 +136,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__partition_8cuh_source.html b/docs/html/device__partition_8cuh_source.html
index ae92126710..28ef8bb5ff 100644
--- a/docs/html/device__partition_8cuh_source.html
+++ b/docs/html/device__partition_8cuh_source.html
@@ -145,88 +145,88 @@
 <div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;</div>
 <div class="line"><a name="l00073"></a><span class="lineno"><a class="code" href="structcub_1_1_device_partition.html">   73</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a></div>
 <div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;{</div>
-<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;        <span class="keyword">typename</span>                    FlagIterator,</div>
-<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
-<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00127"></a><span class="lineno"><a class="code" href="structcub_1_1_device_partition.html#a47515ec2a15804719db1b8f3b3124e43">  127</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_partition.html#a47515ec2a15804719db1b8f3b3124e43" title="Uses the d_flags sequence to split the corresponding items from d_in into a partitioned sequence d_ou...">Flagged</a>(</div>
-<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        FlagIterator                d_flags,                        </div>
-<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        OutputIteratorT             d_out,                          </div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        NumSelectedIteratorT        d_num_selected_out,             </div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;    {</div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;</div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, true&gt;::Dispatch(</div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;            d_in,</div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;            d_flags,</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;            d_out,</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            d_num_selected_out,</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            SelectOp(),</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            num_items,</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            stream,</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;    }</div>
+<div class="line"><a name="l00120"></a><span class="lineno">  120</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;        <span class="keyword">typename</span>                    FlagIterator,</div>
+<div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
+<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00126"></a><span class="lineno"><a class="code" href="structcub_1_1_device_partition.html#a47515ec2a15804719db1b8f3b3124e43">  126</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_partition.html#a47515ec2a15804719db1b8f3b3124e43" title="Uses the d_flags sequence to split the corresponding items from d_in into a partitioned sequence d_ou...">Flagged</a>(</div>
+<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        FlagIterator                d_flags,                        </div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        OutputIteratorT             d_out,                          </div>
+<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        NumSelectedIteratorT        d_num_selected_out,             </div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;    {</div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typedef</span> NullType                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keyword">typedef</span> NullType                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;</div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, true&gt;::Dispatch(</div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;            d_in,</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;            d_flags,</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;            d_out,</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;            d_num_selected_out,</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            SelectOp(),</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            num_items,</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            stream,</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;    }</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;</div>
 <div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;</div>
-<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT,</div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        <span class="keyword">typename</span>                    SelectOp&gt;</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00236"></a><span class="lineno"><a class="code" href="structcub_1_1_device_partition.html#ac6bb4773e615e68be34cd7d491b6d8e4">  236</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_partition.html#ac6bb4773e615e68be34cd7d491b6d8e4" title="Uses the select_op functor to split the corresponding items from d_in into a partitioned sequence d_o...">If</a>(</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        OutputIteratorT             d_out,                          </div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        NumSelectedIteratorT        d_num_selected_out,             </div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        SelectOp                    select_op,                      </div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    {</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;</div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, true&gt;::Dispatch(</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;            d_in,</div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;            NULL,</div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;            d_out,</div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;            d_num_selected_out,</div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;            select_op,</div>
-<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;            num_items,</div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;            stream,</div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;    }</div>
+<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT,</div>
+<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        <span class="keyword">typename</span>                    SelectOp&gt;</div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00234"></a><span class="lineno"><a class="code" href="structcub_1_1_device_partition.html#ac6bb4773e615e68be34cd7d491b6d8e4">  234</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_partition.html#ac6bb4773e615e68be34cd7d491b6d8e4" title="Uses the select_op functor to split the corresponding items from d_in into a partitioned sequence d_o...">If</a>(</div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;        OutputIteratorT             d_out,                          </div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        NumSelectedIteratorT        d_num_selected_out,             </div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        SelectOp                    select_op,                      </div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    {</div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        <span class="keyword">typedef</span> NullType*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keyword">typedef</span> NullType                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, true&gt;::Dispatch(</div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;            d_in,</div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;            NULL,</div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;            d_out,</div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;            d_num_selected_out,</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;            select_op,</div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;            num_items,</div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;            stream,</div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;    }</div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;</div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;};</div>
 <div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;};</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;</div>
-<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;</div>
-<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;</div>
+<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;</div>
+<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__radix__sort_8cuh.html b/docs/html/device__radix__sort_8cuh.html
index 95b986135e..a675af87a9 100644
--- a/docs/html/device__radix__sort_8cuh.html
+++ b/docs/html/device__radix__sort_8cuh.html
@@ -137,7 +137,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__radix__sort_8cuh_source.html b/docs/html/device__radix__sort_8cuh_source.html
index 4958207fda..b30b00fdf2 100644
--- a/docs/html/device__radix__sort_8cuh_source.html
+++ b/docs/html/device__radix__sort_8cuh_source.html
@@ -149,290 +149,290 @@
 <div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;</div>
 <div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;    <span class="comment">/******************************************************************/</span></div>
 <div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00148"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9">  148</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">SortPairs</a>(</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        ValueT              *d_values_in,                           </div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        ValueT              *d_values_out,                          </div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;    {</div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;</div>
-<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>     d_values(d_values_in, d_values_out);</div>
-<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;</div>
-<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;            d_values,</div>
-<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;            num_items,</div>
-<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;            stream,</div>
-<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;    }</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00147"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9">  147</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">SortPairs</a>(</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        ValueT              *d_values_in,                           </div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        ValueT              *d_values_out,                          </div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;    {</div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;</div>
+<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;        DoubleBuffer&lt;ValueT&gt;     d_values(d_values_in, d_values_out);</div>
+<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;</div>
+<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;            d_values,</div>
+<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;            num_items,</div>
+<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;            stream,</div>
+<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;    }</div>
+<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;</div>
 <div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;</div>
-<div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00250"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027">  250</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027" title="Sorts key-value pairs into ascending order. (~N auxiliary storage required) ">SortPairs</a>(</div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      &amp;d_keys,                                </div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>    &amp;d_values,                              </div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;        cudaStream_t            stream              = 0,                </div>
-<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;    {</div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;</div>
-<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            d_values,</div>
-<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            num_items,</div>
-<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;            stream,</div>
-<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;    }</div>
-<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;</div>
-<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;</div>
-<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00331"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a">  331</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a" title="Sorts key-value pairs into descending order. (~2N auxiliary storage required). ">SortPairsDescending</a>(</div>
-<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        ValueT              *d_values_in,                           </div>
-<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        ValueT              *d_values_out,                          </div>
-<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;    {</div>
-<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00248"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027">  248</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027" title="Sorts key-value pairs into ascending order. (~N auxiliary storage required) ">SortPairs</a>(</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        DoubleBuffer&lt;KeyT&gt;      &amp;d_keys,                                </div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        DoubleBuffer&lt;ValueT&gt;    &amp;d_values,                              </div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        cudaStream_t            stream              = 0,                </div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;    {</div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;</div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            d_values,</div>
+<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            num_items,</div>
+<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;            stream,</div>
+<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;    }</div>
+<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;</div>
+<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;</div>
+<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00328"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a">  328</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a" title="Sorts key-value pairs into descending order. (~2N auxiliary storage required). ">SortPairsDescending</a>(</div>
+<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        ValueT              *d_values_in,                           </div>
+<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        ValueT              *d_values_out,                          </div>
+<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;    {</div>
+<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;</div>
+<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        DoubleBuffer&lt;ValueT&gt;     d_values(d_values_in, d_values_out);</div>
 <div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;</div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>     d_values(d_values_in, d_values_out);</div>
-<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;</div>
-<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;            d_values,</div>
-<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;            num_items,</div>
-<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;            stream,</div>
-<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;    }</div>
-<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;</div>
-<div class="line"><a name="l00363"></a><span class="lineno">  363</span>&#160;</div>
-<div class="line"><a name="l00424"></a><span class="lineno">  424</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00428"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">  428</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e" title="Sorts key-value pairs into descending order. (~N auxiliary storage required). ">SortPairsDescending</a>(</div>
-<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
-<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      &amp;d_keys,                                </div>
-<div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>    &amp;d_values,                              </div>
-<div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
-<div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
-<div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00436"></a><span class="lineno">  436</span>&#160;        cudaStream_t            stream              = 0,                </div>
-<div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;    {</div>
-<div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;</div>
-<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;            d_values,</div>
-<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;            num_items,</div>
-<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;            stream,</div>
-<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;    }</div>
-<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;</div>
-<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;</div>
-<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;    <span class="comment">/******************************************************************/</span></div>
-<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;</div>
-<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;</div>
-<div class="line"><a name="l00509"></a><span class="lineno">  509</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00510"></a><span class="lineno">  510</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00511"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6">  511</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6" title="Sorts keys into ascending order. (~2N auxiliary storage required) ">SortKeys</a>(</div>
-<div class="line"><a name="l00512"></a><span class="lineno">  512</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00513"></a><span class="lineno">  513</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00514"></a><span class="lineno">  514</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00515"></a><span class="lineno">  515</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00516"></a><span class="lineno">  516</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00517"></a><span class="lineno">  517</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00518"></a><span class="lineno">  518</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00519"></a><span class="lineno">  519</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00520"></a><span class="lineno">  520</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00521"></a><span class="lineno">  521</span>&#160;    {</div>
-<div class="line"><a name="l00522"></a><span class="lineno">  522</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00523"></a><span class="lineno">  523</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00524"></a><span class="lineno">  524</span>&#160;</div>
-<div class="line"><a name="l00525"></a><span class="lineno">  525</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a>  d_values;</div>
-<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;</div>
-<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;            d_values,</div>
-<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;            num_items,</div>
-<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;            stream,</div>
-<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;    }</div>
-<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;</div>
-<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;</div>
-<div class="line"><a name="l00599"></a><span class="lineno">  599</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00600"></a><span class="lineno">  600</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00601"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191">  601</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191" title="Sorts keys into ascending order. (~N auxiliary storage required). ">SortKeys</a>(</div>
-<div class="line"><a name="l00602"></a><span class="lineno">  602</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00603"></a><span class="lineno">  603</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00604"></a><span class="lineno">  604</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>  &amp;d_keys,                                </div>
-<div class="line"><a name="l00605"></a><span class="lineno">  605</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00606"></a><span class="lineno">  606</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00607"></a><span class="lineno">  607</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00608"></a><span class="lineno">  608</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00609"></a><span class="lineno">  609</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00610"></a><span class="lineno">  610</span>&#160;    {</div>
-<div class="line"><a name="l00611"></a><span class="lineno">  611</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00612"></a><span class="lineno">  612</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00613"></a><span class="lineno">  613</span>&#160;</div>
-<div class="line"><a name="l00614"></a><span class="lineno">  614</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00615"></a><span class="lineno">  615</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a> d_values;</div>
-<div class="line"><a name="l00616"></a><span class="lineno">  616</span>&#160;</div>
-<div class="line"><a name="l00617"></a><span class="lineno">  617</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00618"></a><span class="lineno">  618</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00619"></a><span class="lineno">  619</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00620"></a><span class="lineno">  620</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00621"></a><span class="lineno">  621</span>&#160;            d_values,</div>
-<div class="line"><a name="l00622"></a><span class="lineno">  622</span>&#160;            num_items,</div>
-<div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00624"></a><span class="lineno">  624</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00626"></a><span class="lineno">  626</span>&#160;            stream,</div>
-<div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;    }</div>
-<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;</div>
-<div class="line"><a name="l00675"></a><span class="lineno">  675</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00676"></a><span class="lineno">  676</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00677"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef">  677</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef" title="Sorts keys into descending order. (~2N auxiliary storage required). ">SortKeysDescending</a>(</div>
-<div class="line"><a name="l00678"></a><span class="lineno">  678</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00679"></a><span class="lineno">  679</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00680"></a><span class="lineno">  680</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00681"></a><span class="lineno">  681</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00682"></a><span class="lineno">  682</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00683"></a><span class="lineno">  683</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00684"></a><span class="lineno">  684</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00685"></a><span class="lineno">  685</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00686"></a><span class="lineno">  686</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00687"></a><span class="lineno">  687</span>&#160;    {</div>
-<div class="line"><a name="l00688"></a><span class="lineno">  688</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00689"></a><span class="lineno">  689</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00690"></a><span class="lineno">  690</span>&#160;</div>
-<div class="line"><a name="l00691"></a><span class="lineno">  691</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00692"></a><span class="lineno">  692</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a>  d_values;</div>
-<div class="line"><a name="l00693"></a><span class="lineno">  693</span>&#160;</div>
-<div class="line"><a name="l00694"></a><span class="lineno">  694</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00695"></a><span class="lineno">  695</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00696"></a><span class="lineno">  696</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00697"></a><span class="lineno">  697</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00698"></a><span class="lineno">  698</span>&#160;            d_values,</div>
-<div class="line"><a name="l00699"></a><span class="lineno">  699</span>&#160;            num_items,</div>
-<div class="line"><a name="l00700"></a><span class="lineno">  700</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00701"></a><span class="lineno">  701</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00702"></a><span class="lineno">  702</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00703"></a><span class="lineno">  703</span>&#160;            stream,</div>
-<div class="line"><a name="l00704"></a><span class="lineno">  704</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00705"></a><span class="lineno">  705</span>&#160;    }</div>
-<div class="line"><a name="l00706"></a><span class="lineno">  706</span>&#160;</div>
-<div class="line"><a name="l00707"></a><span class="lineno">  707</span>&#160;</div>
-<div class="line"><a name="l00760"></a><span class="lineno">  760</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00761"></a><span class="lineno">  761</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00762"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628">  762</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628" title="Sorts keys into descending order. (~N auxiliary storage required). ">SortKeysDescending</a>(</div>
-<div class="line"><a name="l00763"></a><span class="lineno">  763</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00764"></a><span class="lineno">  764</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00765"></a><span class="lineno">  765</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>  &amp;d_keys,                                </div>
-<div class="line"><a name="l00766"></a><span class="lineno">  766</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00767"></a><span class="lineno">  767</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00768"></a><span class="lineno">  768</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00769"></a><span class="lineno">  769</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00770"></a><span class="lineno">  770</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00771"></a><span class="lineno">  771</span>&#160;    {</div>
-<div class="line"><a name="l00772"></a><span class="lineno">  772</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00773"></a><span class="lineno">  773</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00774"></a><span class="lineno">  774</span>&#160;</div>
-<div class="line"><a name="l00775"></a><span class="lineno">  775</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00776"></a><span class="lineno">  776</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a> d_values;</div>
-<div class="line"><a name="l00777"></a><span class="lineno">  777</span>&#160;</div>
-<div class="line"><a name="l00778"></a><span class="lineno">  778</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00779"></a><span class="lineno">  779</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00780"></a><span class="lineno">  780</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00781"></a><span class="lineno">  781</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00782"></a><span class="lineno">  782</span>&#160;            d_values,</div>
-<div class="line"><a name="l00783"></a><span class="lineno">  783</span>&#160;            num_items,</div>
-<div class="line"><a name="l00784"></a><span class="lineno">  784</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00785"></a><span class="lineno">  785</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00786"></a><span class="lineno">  786</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00787"></a><span class="lineno">  787</span>&#160;            stream,</div>
-<div class="line"><a name="l00788"></a><span class="lineno">  788</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00789"></a><span class="lineno">  789</span>&#160;    }</div>
-<div class="line"><a name="l00790"></a><span class="lineno">  790</span>&#160;</div>
-<div class="line"><a name="l00791"></a><span class="lineno">  791</span>&#160;</div>
-<div class="line"><a name="l00793"></a><span class="lineno">  793</span>&#160;</div>
-<div class="line"><a name="l00794"></a><span class="lineno">  794</span>&#160;</div>
-<div class="line"><a name="l00795"></a><span class="lineno">  795</span>&#160;};</div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            d_values,</div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            num_items,</div>
+<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;            stream,</div>
+<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;    }</div>
+<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;</div>
+<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;</div>
+<div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00424"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">  424</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e" title="Sorts key-value pairs into descending order. (~N auxiliary storage required). ">SortPairsDescending</a>(</div>
+<div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
+<div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;        DoubleBuffer&lt;KeyT&gt;      &amp;d_keys,                                </div>
+<div class="line"><a name="l00428"></a><span class="lineno">  428</span>&#160;        DoubleBuffer&lt;ValueT&gt;    &amp;d_values,                              </div>
+<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
+<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
+<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;        cudaStream_t            stream              = 0,                </div>
+<div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;    {</div>
+<div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00436"></a><span class="lineno">  436</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;</div>
+<div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;            d_values,</div>
+<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;            num_items,</div>
+<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;            stream,</div>
+<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;    }</div>
+<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;</div>
+<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;</div>
+<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;    <span class="comment">/******************************************************************/</span></div>
+<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;</div>
+<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;</div>
+<div class="line"><a name="l00504"></a><span class="lineno">  504</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00505"></a><span class="lineno">  505</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00506"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6">  506</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6" title="Sorts keys into ascending order. (~2N auxiliary storage required) ">SortKeys</a>(</div>
+<div class="line"><a name="l00507"></a><span class="lineno">  507</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00508"></a><span class="lineno">  508</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00509"></a><span class="lineno">  509</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00510"></a><span class="lineno">  510</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00511"></a><span class="lineno">  511</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00512"></a><span class="lineno">  512</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00513"></a><span class="lineno">  513</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00514"></a><span class="lineno">  514</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00515"></a><span class="lineno">  515</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00516"></a><span class="lineno">  516</span>&#160;    {</div>
+<div class="line"><a name="l00517"></a><span class="lineno">  517</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00518"></a><span class="lineno">  518</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00519"></a><span class="lineno">  519</span>&#160;</div>
+<div class="line"><a name="l00520"></a><span class="lineno">  520</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00521"></a><span class="lineno">  521</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00522"></a><span class="lineno">  522</span>&#160;        DoubleBuffer&lt;NullType&gt;  d_values;</div>
+<div class="line"><a name="l00523"></a><span class="lineno">  523</span>&#160;</div>
+<div class="line"><a name="l00524"></a><span class="lineno">  524</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00525"></a><span class="lineno">  525</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;            d_values,</div>
+<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;            num_items,</div>
+<div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;            stream,</div>
+<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;    }</div>
+<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;</div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;</div>
+<div class="line"><a name="l00593"></a><span class="lineno">  593</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00594"></a><span class="lineno">  594</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00595"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191">  595</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191" title="Sorts keys into ascending order. (~N auxiliary storage required). ">SortKeys</a>(</div>
+<div class="line"><a name="l00596"></a><span class="lineno">  596</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00597"></a><span class="lineno">  597</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00598"></a><span class="lineno">  598</span>&#160;        DoubleBuffer&lt;KeyT&gt;  &amp;d_keys,                                </div>
+<div class="line"><a name="l00599"></a><span class="lineno">  599</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00600"></a><span class="lineno">  600</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00601"></a><span class="lineno">  601</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00602"></a><span class="lineno">  602</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00603"></a><span class="lineno">  603</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00604"></a><span class="lineno">  604</span>&#160;    {</div>
+<div class="line"><a name="l00605"></a><span class="lineno">  605</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00606"></a><span class="lineno">  606</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00607"></a><span class="lineno">  607</span>&#160;</div>
+<div class="line"><a name="l00608"></a><span class="lineno">  608</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00609"></a><span class="lineno">  609</span>&#160;        DoubleBuffer&lt;NullType&gt; d_values;</div>
+<div class="line"><a name="l00610"></a><span class="lineno">  610</span>&#160;</div>
+<div class="line"><a name="l00611"></a><span class="lineno">  611</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00612"></a><span class="lineno">  612</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00613"></a><span class="lineno">  613</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00614"></a><span class="lineno">  614</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00615"></a><span class="lineno">  615</span>&#160;            d_values,</div>
+<div class="line"><a name="l00616"></a><span class="lineno">  616</span>&#160;            num_items,</div>
+<div class="line"><a name="l00617"></a><span class="lineno">  617</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00618"></a><span class="lineno">  618</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00619"></a><span class="lineno">  619</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00620"></a><span class="lineno">  620</span>&#160;            stream,</div>
+<div class="line"><a name="l00621"></a><span class="lineno">  621</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00622"></a><span class="lineno">  622</span>&#160;    }</div>
+<div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;</div>
+<div class="line"><a name="l00668"></a><span class="lineno">  668</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00669"></a><span class="lineno">  669</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00670"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef">  670</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef" title="Sorts keys into descending order. (~2N auxiliary storage required). ">SortKeysDescending</a>(</div>
+<div class="line"><a name="l00671"></a><span class="lineno">  671</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00672"></a><span class="lineno">  672</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00673"></a><span class="lineno">  673</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00674"></a><span class="lineno">  674</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00675"></a><span class="lineno">  675</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00676"></a><span class="lineno">  676</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00677"></a><span class="lineno">  677</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00678"></a><span class="lineno">  678</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00679"></a><span class="lineno">  679</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00680"></a><span class="lineno">  680</span>&#160;    {</div>
+<div class="line"><a name="l00681"></a><span class="lineno">  681</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00682"></a><span class="lineno">  682</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00683"></a><span class="lineno">  683</span>&#160;</div>
+<div class="line"><a name="l00684"></a><span class="lineno">  684</span>&#160;        DoubleBuffer&lt;KeyT&gt;      d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00685"></a><span class="lineno">  685</span>&#160;        DoubleBuffer&lt;NullType&gt;  d_values;</div>
+<div class="line"><a name="l00686"></a><span class="lineno">  686</span>&#160;</div>
+<div class="line"><a name="l00687"></a><span class="lineno">  687</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00688"></a><span class="lineno">  688</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00689"></a><span class="lineno">  689</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00690"></a><span class="lineno">  690</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00691"></a><span class="lineno">  691</span>&#160;            d_values,</div>
+<div class="line"><a name="l00692"></a><span class="lineno">  692</span>&#160;            num_items,</div>
+<div class="line"><a name="l00693"></a><span class="lineno">  693</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00694"></a><span class="lineno">  694</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00695"></a><span class="lineno">  695</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00696"></a><span class="lineno">  696</span>&#160;            stream,</div>
+<div class="line"><a name="l00697"></a><span class="lineno">  697</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00698"></a><span class="lineno">  698</span>&#160;    }</div>
+<div class="line"><a name="l00699"></a><span class="lineno">  699</span>&#160;</div>
+<div class="line"><a name="l00700"></a><span class="lineno">  700</span>&#160;</div>
+<div class="line"><a name="l00752"></a><span class="lineno">  752</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00753"></a><span class="lineno">  753</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00754"></a><span class="lineno"><a class="code" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628">  754</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628" title="Sorts keys into descending order. (~N auxiliary storage required). ">SortKeysDescending</a>(</div>
+<div class="line"><a name="l00755"></a><span class="lineno">  755</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00756"></a><span class="lineno">  756</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00757"></a><span class="lineno">  757</span>&#160;        DoubleBuffer&lt;KeyT&gt;  &amp;d_keys,                                </div>
+<div class="line"><a name="l00758"></a><span class="lineno">  758</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00759"></a><span class="lineno">  759</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00760"></a><span class="lineno">  760</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00761"></a><span class="lineno">  761</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00762"></a><span class="lineno">  762</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00763"></a><span class="lineno">  763</span>&#160;    {</div>
+<div class="line"><a name="l00764"></a><span class="lineno">  764</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00765"></a><span class="lineno">  765</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00766"></a><span class="lineno">  766</span>&#160;</div>
+<div class="line"><a name="l00767"></a><span class="lineno">  767</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00768"></a><span class="lineno">  768</span>&#160;        DoubleBuffer&lt;NullType&gt; d_values;</div>
+<div class="line"><a name="l00769"></a><span class="lineno">  769</span>&#160;</div>
+<div class="line"><a name="l00770"></a><span class="lineno">  770</span>&#160;        <span class="keywordflow">return</span> DispatchRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00771"></a><span class="lineno">  771</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00772"></a><span class="lineno">  772</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00773"></a><span class="lineno">  773</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00774"></a><span class="lineno">  774</span>&#160;            d_values,</div>
+<div class="line"><a name="l00775"></a><span class="lineno">  775</span>&#160;            num_items,</div>
+<div class="line"><a name="l00776"></a><span class="lineno">  776</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00777"></a><span class="lineno">  777</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00778"></a><span class="lineno">  778</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00779"></a><span class="lineno">  779</span>&#160;            stream,</div>
+<div class="line"><a name="l00780"></a><span class="lineno">  780</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00781"></a><span class="lineno">  781</span>&#160;    }</div>
+<div class="line"><a name="l00782"></a><span class="lineno">  782</span>&#160;</div>
+<div class="line"><a name="l00783"></a><span class="lineno">  783</span>&#160;</div>
+<div class="line"><a name="l00785"></a><span class="lineno">  785</span>&#160;</div>
+<div class="line"><a name="l00786"></a><span class="lineno">  786</span>&#160;</div>
+<div class="line"><a name="l00787"></a><span class="lineno">  787</span>&#160;};</div>
+<div class="line"><a name="l00788"></a><span class="lineno">  788</span>&#160;</div>
+<div class="line"><a name="l00793"></a><span class="lineno">  793</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00794"></a><span class="lineno">  794</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00795"></a><span class="lineno">  795</span>&#160;</div>
 <div class="line"><a name="l00796"></a><span class="lineno">  796</span>&#160;</div>
-<div class="line"><a name="l00801"></a><span class="lineno">  801</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00802"></a><span class="lineno">  802</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00803"></a><span class="lineno">  803</span>&#160;</div>
-<div class="line"><a name="l00804"></a><span class="lineno">  804</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__reduce_8cuh.html b/docs/html/device__reduce_8cuh.html
index f3f384a9e6..621d856088 100644
--- a/docs/html/device__reduce_8cuh.html
+++ b/docs/html/device__reduce_8cuh.html
@@ -106,6 +106,7 @@
 <div class="contents">
 <div class="textblock"><code>#include &lt;stdio.h&gt;</code><br/>
 <code>#include &lt;iterator&gt;</code><br/>
+<code>#include &lt;limits&gt;</code><br/>
 <code>#include &quot;dispatch/dispatch_reduce.cuh&quot;</code><br/>
 <code>#include &quot;dispatch/dispatch_reduce_by_key.cuh&quot;</code><br/>
 <code>#include &quot;../util_namespace.cuh&quot;</code><br/>
@@ -137,7 +138,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__reduce_8cuh_source.html b/docs/html/device__reduce_8cuh_source.html
index 94f5ef601f..e6c2a32cb7 100644
--- a/docs/html/device__reduce_8cuh_source.html
+++ b/docs/html/device__reduce_8cuh_source.html
@@ -134,266 +134,256 @@
 <div class="line"><a name="l00036"></a><span class="lineno">   36</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00037"></a><span class="lineno">   37</span>&#160;<span class="preprocessor">#include &lt;stdio.h&gt;</span></div>
 <div class="line"><a name="l00038"></a><span class="lineno">   38</span>&#160;<span class="preprocessor">#include &lt;iterator&gt;</span></div>
-<div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;</div>
-<div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce.cuh&quot;</span></div>
-<div class="line"><a name="l00041"></a><span class="lineno">   41</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce_by_key.cuh&quot;</span></div>
-<div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;<span class="preprocessor">#include &quot;../util_namespace.cuh&quot;</span></div>
-<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;</div>
-<div class="line"><a name="l00045"></a><span class="lineno">   45</span>&#160;CUB_NS_PREFIX</div>
-<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;</div>
-<div class="line"><a name="l00048"></a><span class="lineno">   48</span>&#160;<span class="keyword">namespace </span>cub {</div>
-<div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;</div>
+<div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;<span class="preprocessor">#include &lt;limits&gt;</span></div>
+<div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;</div>
+<div class="line"><a name="l00041"></a><span class="lineno">   41</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce.cuh&quot;</span></div>
+<div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce_by_key.cuh&quot;</span></div>
+<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;<span class="preprocessor">#include &quot;../util_namespace.cuh&quot;</span></div>
+<div class="line"><a name="l00044"></a><span class="lineno">   44</span>&#160;</div>
+<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;CUB_NS_PREFIX</div>
+<div class="line"><a name="l00047"></a><span class="lineno">   47</span>&#160;</div>
+<div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;<span class="keyword">namespace </span>cub {</div>
 <div class="line"><a name="l00050"></a><span class="lineno">   50</span>&#160;</div>
-<div class="line"><a name="l00082"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html">   82</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a></div>
-<div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;{</div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keyword">typename</span>                    ReductionOp&gt;</div>
+<div class="line"><a name="l00051"></a><span class="lineno">   51</span>&#160;</div>
+<div class="line"><a name="l00083"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html">   83</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a></div>
+<div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;{</div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keyword">typename</span>                    ReductionOpT,</div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keyword">typename</span>                    T&gt;</div>
 <div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00142"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c">  142</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c" title="Computes a device-wide reduction using the specified binary reduction_op functor. ...">Reduce</a>(</div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keywordtype">void</span>*                       d_temp_storage,                     </div>
+<div class="line"><a name="l00142"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb">  142</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb" title="Computes a device-wide reduction using the specified binary reduction_op functor and initial value in...">Reduce</a>(</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
 <div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
 <div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        InputIteratorT              d_in,                               </div>
 <div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        OutputIteratorT             d_out,                              </div>
 <div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        ReductionOp                 reduction_op,                       </div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;    {</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        ReductionOpT                reduction_op,                       </div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        T                           init,                               </div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;    {</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;  <span class="comment">// Signed integer type for global offsets</span></div>
 <div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;</div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, ReductionOp&gt; DispatchReduce;</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;</div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;            d_in,</div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;            d_out,</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;            num_items,</div>
-<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;            reduction_op,</div>
-<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;            stream,</div>
-<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;    }</div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;</div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;</div>
-<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
-<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00219"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">  219</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">Sum</a>(</div>
-<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        <span class="keywordtype">void</span>*                       d_temp_storage,                    </div>
-<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        InputIteratorT              d_in,                               </div>
-<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        OutputIteratorT             d_out,                              </div>
-<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;    {</div>
-<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;</div>
-<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Sum&gt; DispatchReduce;</div>
-<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;</div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;            d_in,</div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;            d_out,</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;            num_items,</div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>(),</div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;            stream,</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;    }</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;</div>
-<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
-<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00291"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">  291</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">Min</a>(</div>
-<div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;        <span class="keywordtype">void</span>*                       d_temp_storage,                    </div>
-<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;        InputIteratorT              d_in,                               </div>
-<div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;        OutputIteratorT             d_out,                              </div>
-<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;    {</div>
-<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;</div>
-<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Min&gt; DispatchReduce;</div>
-<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;</div>
-<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;            d_in,</div>
-<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;            d_out,</div>
-<div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;            num_items,</div>
-<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;            <a class="code" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>(),</div>
-<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;            stream,</div>
-<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;    }</div>
-<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;</div>
-<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;</div>
-<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
-<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00368"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9">  368</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9" title="Finds the first device-wide minimum using the less-than (&#39;&lt;&#39;) operator, also returning the index o...">ArgMin</a>(</div>
-<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                    </div>
-<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;        InputIteratorT              d_in,                               </div>
-<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;        OutputIteratorT             d_out,                              </div>
-<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;    {</div>
-<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;</div>
-<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;        <span class="comment">// Wrapped input iterator</span></div>
-<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;</div>
-<div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;        ArgIndexInputIteratorT d_argmin_in(d_in, 0);</div>
-<div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;</div>
-<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin&gt; DispatchReduce;</div>
-<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;</div>
-<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;            d_argmin_in,</div>
-<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;            d_out,</div>
-<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;            num_items,</div>
-<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;            <a class="code" href="structcub_1_1_arg_min.html" title="Arg min functor (keeps the value and offset of the first occurrence of the smallest item) ...">cub::ArgMin</a>(),</div>
-<div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;            stream,</div>
-<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;    }</div>
-<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;</div>
-<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;</div>
-<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
-<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00444"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48">  444</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48" title="Computes a device-wide maximum using the greater-than (&#39;&gt;&#39;) operator. ">Max</a>(</div>
-<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                    </div>
-<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        InputIteratorT              d_in,                               </div>
-<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        OutputIteratorT             d_out,                              </div>
-<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;    {</div>
-<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;</div>
-<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Max&gt; DispatchReduce;</div>
-<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;</div>
-<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;            d_in,</div>
-<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;            d_out,</div>
-<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;            num_items,</div>
-<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;            <a class="code" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>(),</div>
-<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;            stream,</div>
-<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;    }</div>
-<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;</div>
-<div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;</div>
-<div class="line"><a name="l00517"></a><span class="lineno">  517</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00518"></a><span class="lineno">  518</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00519"></a><span class="lineno">  519</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
-<div class="line"><a name="l00520"></a><span class="lineno">  520</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00521"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276">  521</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276" title="Finds the first device-wide maximum using the greater-than (&#39;&gt;&#39;) operator, also returning the inde...">ArgMax</a>(</div>
-<div class="line"><a name="l00522"></a><span class="lineno">  522</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                    </div>
-<div class="line"><a name="l00523"></a><span class="lineno">  523</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00524"></a><span class="lineno">  524</span>&#160;        InputIteratorT              d_in,                               </div>
-<div class="line"><a name="l00525"></a><span class="lineno">  525</span>&#160;        OutputIteratorT             d_out,                              </div>
-<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
-<div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;        cudaStream_t                stream              = 0,            </div>
-<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;    {</div>
-<div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;</div>
-<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;        <span class="comment">// Wrapped input iterator</span></div>
-<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;</div>
-<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        ArgIndexInputIteratorT d_argmax_in(d_in, 0);</div>
-<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;</div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="comment">// Dispatch type</span></div>
-<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;        <span class="keyword">typedef</span> DispatchReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax&gt; DispatchReduce;</div>
-<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;</div>
-<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;        <span class="keywordflow">return</span> DispatchReduce::Dispatch(</div>
-<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;            d_argmax_in,</div>
-<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;            d_out,</div>
-<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;            num_items,</div>
-<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;            <a class="code" href="structcub_1_1_arg_max.html" title="Arg max functor (keeps the value and offset of the first occurrence of the larger item) ...">cub::ArgMax</a>(),</div>
-<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;            stream,</div>
-<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;    }</div>
-<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;</div>
-<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;</div>
-<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        <span class="keyword">typename</span>                    KeysInputIteratorT,</div>
-<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        <span class="keyword">typename</span>                    UniqueOutputIteratorT,</div>
-<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;        <span class="keyword">typename</span>                    ValuesInputIteratorT,</div>
-<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        <span class="keyword">typename</span>                    AggregatesOutputIteratorT,</div>
-<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;        <span class="keyword">typename</span>                    NumRunsOutputIteratorT,</div>
-<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;        <span class="keyword">typename</span>                    ReductionOp&gt;</div>
-<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00642"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02">  642</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">ReduceByKey</a>(</div>
-<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;        KeysInputIteratorT          d_keys_in,                      </div>
-<div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;        UniqueOutputIteratorT       d_unique_out,                   </div>
-<div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;        ValuesInputIteratorT        d_values_in,                    </div>
-<div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;        AggregatesOutputIteratorT   d_aggregates_out,               </div>
-<div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;        NumRunsOutputIteratorT      d_num_runs_out,                     </div>
-<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;        ReductionOp                 reduction_op,                   </div>
-<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00654"></a><span class="lineno">  654</span>&#160;    {</div>
-<div class="line"><a name="l00655"></a><span class="lineno">  655</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                 OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00656"></a><span class="lineno">  656</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>*           FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
-<div class="line"><a name="l00657"></a><span class="lineno">  657</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>            SelectOp;       <span class="comment">// Selection op (not used)</span></div>
-<div class="line"><a name="l00658"></a><span class="lineno">  658</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>            EqualityOp;     <span class="comment">// Default == operator</span></div>
-<div class="line"><a name="l00659"></a><span class="lineno">  659</span>&#160;</div>
-<div class="line"><a name="l00660"></a><span class="lineno">  660</span>&#160;        <span class="keywordflow">return</span> DispatchReduceByKey&lt;KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00661"></a><span class="lineno">  661</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00662"></a><span class="lineno">  662</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;            d_keys_in,</div>
-<div class="line"><a name="l00664"></a><span class="lineno">  664</span>&#160;            d_unique_out,</div>
-<div class="line"><a name="l00665"></a><span class="lineno">  665</span>&#160;            d_values_in,</div>
-<div class="line"><a name="l00666"></a><span class="lineno">  666</span>&#160;            d_aggregates_out,</div>
-<div class="line"><a name="l00667"></a><span class="lineno">  667</span>&#160;            d_num_runs_out,</div>
-<div class="line"><a name="l00668"></a><span class="lineno">  668</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00669"></a><span class="lineno">  669</span>&#160;            reduction_op,</div>
-<div class="line"><a name="l00670"></a><span class="lineno">  670</span>&#160;            num_items,</div>
-<div class="line"><a name="l00671"></a><span class="lineno">  671</span>&#160;            stream,</div>
-<div class="line"><a name="l00672"></a><span class="lineno">  672</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00673"></a><span class="lineno">  673</span>&#160;    }</div>
-<div class="line"><a name="l00674"></a><span class="lineno">  674</span>&#160;</div>
-<div class="line"><a name="l00675"></a><span class="lineno">  675</span>&#160;};</div>
-<div class="line"><a name="l00676"></a><span class="lineno">  676</span>&#160;</div>
-<div class="line"><a name="l00681"></a><span class="lineno">  681</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00682"></a><span class="lineno">  682</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00683"></a><span class="lineno">  683</span>&#160;</div>
-<div class="line"><a name="l00684"></a><span class="lineno">  684</span>&#160;</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT&gt;::Dispatch(</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;            d_in,</div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;            d_out,</div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;            num_items,</div>
+<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;            reduction_op,</div>
+<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;            init,</div>
+<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;            stream,</div>
+<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;    }</div>
+<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;</div>
+<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;</div>
+<div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
+<div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00217"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">  217</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">Sum</a>(</div>
+<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
+<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        InputIteratorT              d_in,                               </div>
+<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        OutputIteratorT             d_out,                              </div>
+<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
+<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;    {</div>
+<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;</div>
+<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Sum&gt;::Dispatch(</div>
+<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;            d_in,</div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;            d_out,</div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;            num_items,</div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>(),</div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;            T(),            <span class="comment">// zero-initialize</span></div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;            stream,</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;    }</div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;</div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;</div>
+<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00282"></a><span class="lineno">  282</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
+<div class="line"><a name="l00283"></a><span class="lineno">  283</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00284"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">  284</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">Min</a>(</div>
+<div class="line"><a name="l00285"></a><span class="lineno">  285</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
+<div class="line"><a name="l00286"></a><span class="lineno">  286</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;        InputIteratorT              d_in,                               </div>
+<div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;        OutputIteratorT             d_out,                              </div>
+<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
+<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;    {</div>
+<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;</div>
+<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Min&gt;::Dispatch(</div>
+<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;            d_in,</div>
+<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;            d_out,</div>
+<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;            num_items,</div>
+<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;            <a class="code" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>(),</div>
+<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;            Traits&lt;T&gt;::Max(),    <span class="comment">// replace with std::numeric_limits&lt;T&gt;::max() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;            stream,</div>
+<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;    }</div>
+<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;</div>
+<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;</div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00353"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9">  353</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9" title="Finds the first device-wide minimum using the less-than (&#39;&lt;&#39;) operator, also returning the index o...">ArgMin</a>(</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;        InputIteratorT              d_in,                               </div>
+<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;        OutputIteratorT             d_out,                              </div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
+<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;    {</div>
+<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00363"></a><span class="lineno">  363</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;        <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;  <span class="comment">// Wrapped input iterator type</span></div>
+<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;        ArgIndexInputIteratorT      d_argmin_in(d_in);</div>
+<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;        KeyValuePair&lt;OffsetT, T&gt;    init = {1, Traits&lt;T&gt;::Max()};   <span class="comment">// replace with std::numeric_limits&lt;T&gt;::max() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;</div>
+<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin&gt;::Dispatch(</div>
+<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;            d_argmin_in,</div>
+<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;            d_out,</div>
+<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;            num_items,</div>
+<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;            <a class="code" href="structcub_1_1_arg_min.html" title="Arg min functor (keeps the value and offset of the first occurrence of the smallest item) ...">cub::ArgMin</a>(),</div>
+<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;            init,</div>
+<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;            stream,</div>
+<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;    }</div>
+<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;</div>
+<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;</div>
+<div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
+<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00424"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48">  424</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48" title="Computes a device-wide maximum using the greater-than (&#39;&gt;&#39;) operator. ">Max</a>(</div>
+<div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
+<div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;        InputIteratorT              d_in,                               </div>
+<div class="line"><a name="l00428"></a><span class="lineno">  428</span>&#160;        OutputIteratorT             d_out,                              </div>
+<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
+<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;    {</div>
+<div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;</div>
+<div class="line"><a name="l00436"></a><span class="lineno">  436</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Max&gt;::Dispatch(</div>
+<div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;            d_in,</div>
+<div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;            d_out,</div>
+<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;            num_items,</div>
+<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;            <a class="code" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>(),</div>
+<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;            Traits&lt;T&gt;::Lowest(),    <span class="comment">// replace with std::numeric_limits&lt;T&gt;::lowest() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;            stream,</div>
+<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;    }</div>
+<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;</div>
+<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;</div>
+<div class="line"><a name="l00489"></a><span class="lineno">  489</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00490"></a><span class="lineno">  490</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00491"></a><span class="lineno">  491</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT&gt;</div>
+<div class="line"><a name="l00492"></a><span class="lineno">  492</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00493"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276">  493</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276" title="Finds the first device-wide maximum using the greater-than (&#39;&gt;&#39;) operator, also returning the inde...">ArgMax</a>(</div>
+<div class="line"><a name="l00494"></a><span class="lineno">  494</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                    </div>
+<div class="line"><a name="l00495"></a><span class="lineno">  495</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00496"></a><span class="lineno">  496</span>&#160;        InputIteratorT              d_in,                               </div>
+<div class="line"><a name="l00497"></a><span class="lineno">  497</span>&#160;        OutputIteratorT             d_out,                              </div>
+<div class="line"><a name="l00498"></a><span class="lineno">  498</span>&#160;        <span class="keywordtype">int</span>                         num_items,                          </div>
+<div class="line"><a name="l00499"></a><span class="lineno">  499</span>&#160;        cudaStream_t                stream              = 0,            </div>
+<div class="line"><a name="l00500"></a><span class="lineno">  500</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00501"></a><span class="lineno">  501</span>&#160;    {</div>
+<div class="line"><a name="l00502"></a><span class="lineno">  502</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                            <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00503"></a><span class="lineno">  503</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;            <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00504"></a><span class="lineno">  504</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;      <span class="comment">// Wrapped input iterator</span></div>
+<div class="line"><a name="l00505"></a><span class="lineno">  505</span>&#160;</div>
+<div class="line"><a name="l00506"></a><span class="lineno">  506</span>&#160;        ArgIndexInputIteratorT      d_argmax_in(d_in);</div>
+<div class="line"><a name="l00507"></a><span class="lineno">  507</span>&#160;        KeyValuePair&lt;OffsetT, T&gt;    init = {1, Traits&lt;T&gt;::Lowest()};                    <span class="comment">// replace with std::numeric_limits&lt;T&gt;::lowest() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00508"></a><span class="lineno">  508</span>&#160;</div>
+<div class="line"><a name="l00509"></a><span class="lineno">  509</span>&#160;        <span class="keywordflow">return</span> DispatchReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax&gt;::Dispatch(</div>
+<div class="line"><a name="l00510"></a><span class="lineno">  510</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00511"></a><span class="lineno">  511</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00512"></a><span class="lineno">  512</span>&#160;            d_argmax_in,</div>
+<div class="line"><a name="l00513"></a><span class="lineno">  513</span>&#160;            d_out,</div>
+<div class="line"><a name="l00514"></a><span class="lineno">  514</span>&#160;            num_items,</div>
+<div class="line"><a name="l00515"></a><span class="lineno">  515</span>&#160;            <a class="code" href="structcub_1_1_arg_max.html" title="Arg max functor (keeps the value and offset of the first occurrence of the larger item) ...">cub::ArgMax</a>(),</div>
+<div class="line"><a name="l00516"></a><span class="lineno">  516</span>&#160;            init,</div>
+<div class="line"><a name="l00517"></a><span class="lineno">  517</span>&#160;            stream,</div>
+<div class="line"><a name="l00518"></a><span class="lineno">  518</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00519"></a><span class="lineno">  519</span>&#160;    }</div>
+<div class="line"><a name="l00520"></a><span class="lineno">  520</span>&#160;</div>
+<div class="line"><a name="l00521"></a><span class="lineno">  521</span>&#160;</div>
+<div class="line"><a name="l00603"></a><span class="lineno">  603</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00604"></a><span class="lineno">  604</span>&#160;        <span class="keyword">typename</span>                    KeysInputIteratorT,</div>
+<div class="line"><a name="l00605"></a><span class="lineno">  605</span>&#160;        <span class="keyword">typename</span>                    UniqueOutputIteratorT,</div>
+<div class="line"><a name="l00606"></a><span class="lineno">  606</span>&#160;        <span class="keyword">typename</span>                    ValuesInputIteratorT,</div>
+<div class="line"><a name="l00607"></a><span class="lineno">  607</span>&#160;        <span class="keyword">typename</span>                    AggregatesOutputIteratorT,</div>
+<div class="line"><a name="l00608"></a><span class="lineno">  608</span>&#160;        <span class="keyword">typename</span>                    NumRunsOutputIteratorT,</div>
+<div class="line"><a name="l00609"></a><span class="lineno">  609</span>&#160;        <span class="keyword">typename</span>                    ReductionOpT&gt;</div>
+<div class="line"><a name="l00610"></a><span class="lineno">  610</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00611"></a><span class="lineno"><a class="code" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1">  611</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">ReduceByKey</a>(</div>
+<div class="line"><a name="l00612"></a><span class="lineno">  612</span>&#160;        <span class="keywordtype">void</span>                        *d_temp_storage,                </div>
+<div class="line"><a name="l00613"></a><span class="lineno">  613</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00614"></a><span class="lineno">  614</span>&#160;        KeysInputIteratorT          d_keys_in,                      </div>
+<div class="line"><a name="l00615"></a><span class="lineno">  615</span>&#160;        UniqueOutputIteratorT       d_unique_out,                   </div>
+<div class="line"><a name="l00616"></a><span class="lineno">  616</span>&#160;        ValuesInputIteratorT        d_values_in,                    </div>
+<div class="line"><a name="l00617"></a><span class="lineno">  617</span>&#160;        AggregatesOutputIteratorT   d_aggregates_out,               </div>
+<div class="line"><a name="l00618"></a><span class="lineno">  618</span>&#160;        NumRunsOutputIteratorT      d_num_runs_out,                 </div>
+<div class="line"><a name="l00619"></a><span class="lineno">  619</span>&#160;        ReductionOpT                reduction_op,                   </div>
+<div class="line"><a name="l00620"></a><span class="lineno">  620</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00621"></a><span class="lineno">  621</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00622"></a><span class="lineno">  622</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;    {</div>
+<div class="line"><a name="l00624"></a><span class="lineno">  624</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                 OffsetT;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;        <span class="keyword">typedef</span> NullType*           FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
+<div class="line"><a name="l00626"></a><span class="lineno">  626</span>&#160;        <span class="keyword">typedef</span> NullType            SelectOp;       <span class="comment">// Selection op (not used)</span></div>
+<div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>            EqualityOp;     <span class="comment">// Default == operator</span></div>
+<div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;</div>
+<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;        <span class="keywordflow">return</span> DispatchReduceByKey&lt;KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOpT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00630"></a><span class="lineno">  630</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;            d_keys_in,</div>
+<div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;            d_unique_out,</div>
+<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;            d_values_in,</div>
+<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;            d_aggregates_out,</div>
+<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;            d_num_runs_out,</div>
+<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;            reduction_op,</div>
+<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;            num_items,</div>
+<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;            stream,</div>
+<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;    }</div>
+<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;</div>
+<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;};</div>
+<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;</div>
+<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;</div>
+<div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__run__length__encode_8cuh.html b/docs/html/device__run__length__encode_8cuh.html
index 1bd7ef3344..b45c9335cf 100644
--- a/docs/html/device__run__length__encode_8cuh.html
+++ b/docs/html/device__run__length__encode_8cuh.html
@@ -137,7 +137,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__run__length__encode_8cuh_source.html b/docs/html/device__run__length__encode_8cuh_source.html
index 3e79613958..1cd4f8eda7 100644
--- a/docs/html/device__run__length__encode_8cuh_source.html
+++ b/docs/html/device__run__length__encode_8cuh_source.html
@@ -147,100 +147,100 @@
 <div class="line"><a name="l00078"></a><span class="lineno"><a class="code" href="structcub_1_1_device_run_length_encode.html">   78</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a></div>
 <div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;{</div>
 <div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;</div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typename</span>                    UniqueOutputIteratorT,</div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typename</span>                    LengthsOutputIteratorT,</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keyword">typename</span>                    NumRunsOutputIteratorT&gt;</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00149"></a><span class="lineno"><a class="code" href="structcub_1_1_device_run_length_encode.html#ab25e5e8289fe198b8fea68ac5f010118">  149</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_run_length_encode.html#ab25e5e8289fe198b8fea68ac5f010118" title="Computes a run-length encoding of the sequence d_in. ">Encode</a>(</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        UniqueOutputIteratorT       d_unique_out,                   </div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        LengthsOutputIteratorT      d_counts_out,                   </div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        NumRunsOutputIteratorT      d_num_runs_out,                     </div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;    {</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;        <span class="comment">// Data type of value iterator</span></div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;LengthsOutputIteratorT&gt;::value_type Value;</div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>         OffsetT;                     <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>*   FlagIterator;               <span class="comment">// FlagT iterator type (not used)</span></div>
-<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>    SelectOp;                   <span class="comment">// Selection op (not used)</span></div>
-<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>    EqualityOp;                 <span class="comment">// Default == operator</span></div>
-<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>    ReductionOp;                <span class="comment">// Value reduction operator</span></div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;</div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;        <span class="comment">// Generator type for providing 1s values for run-length reduction</span></div>
-<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_constant_input_iterator.html" title="A random-access input generator for dereferencing a sequence of homogeneous values. ">ConstantInputIterator&lt;Value, OffsetT&gt;</a> LengthsInputIteratorT;</div>
-<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;</div>
-<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;        Value one_val;</div>
-<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;        one_val = 1;</div>
-<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;</div>
-<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;        <span class="keywordflow">return</span> DispatchReduceByKey&lt;InputIteratorT, UniqueOutputIteratorT, LengthsInputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;            d_in,</div>
-<div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;            d_unique_out,</div>
-<div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;            LengthsInputIteratorT(one_val),</div>
-<div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;            d_counts_out,</div>
-<div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;            d_num_runs_out,</div>
-<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;            ReductionOp(),</div>
-<div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;            num_items,</div>
-<div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;            stream,</div>
-<div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;    }</div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keyword">typename</span>                    UniqueOutputIteratorT,</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typename</span>                    LengthsOutputIteratorT,</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typename</span>                    NumRunsOutputIteratorT&gt;</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00148"></a><span class="lineno"><a class="code" href="structcub_1_1_device_run_length_encode.html#ab25e5e8289fe198b8fea68ac5f010118">  148</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_run_length_encode.html#ab25e5e8289fe198b8fea68ac5f010118" title="Computes a run-length encoding of the sequence d_in. ">Encode</a>(</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        UniqueOutputIteratorT       d_unique_out,                   </div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        LengthsOutputIteratorT      d_counts_out,                   </div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        NumRunsOutputIteratorT      d_num_runs_out,                     </div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    {</div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;        <span class="comment">// Data type of value iterator</span></div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;LengthsOutputIteratorT&gt;::value_type Value;</div>
+<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;</div>
+<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>         OffsetT;                     <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;        <span class="keyword">typedef</span> NullType*   FlagIterator;               <span class="comment">// FlagT iterator type (not used)</span></div>
+<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;        <span class="keyword">typedef</span> NullType    SelectOp;                   <span class="comment">// Selection op (not used)</span></div>
+<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>    EqualityOp;                 <span class="comment">// Default == operator</span></div>
+<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>    ReductionOp;                <span class="comment">// Value reduction operator</span></div>
+<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;</div>
+<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;        <span class="comment">// Generator type for providing 1s values for run-length reduction</span></div>
+<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_constant_input_iterator.html" title="A random-access input generator for dereferencing a sequence of homogeneous values. ">ConstantInputIterator&lt;Value, OffsetT&gt;</a> LengthsInputIteratorT;</div>
+<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;</div>
+<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;        Value one_val;</div>
+<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;        one_val = 1;</div>
+<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;</div>
+<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;        <span class="keywordflow">return</span> DispatchReduceByKey&lt;InputIteratorT, UniqueOutputIteratorT, LengthsInputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;            d_in,</div>
+<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;            d_unique_out,</div>
+<div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;            LengthsInputIteratorT(one_val),</div>
+<div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;            d_counts_out,</div>
+<div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;            d_num_runs_out,</div>
+<div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;            ReductionOp(),</div>
+<div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;            num_items,</div>
+<div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;            stream,</div>
+<div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;    }</div>
+<div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;</div>
 <div class="line"><a name="l00189"></a><span class="lineno">  189</span>&#160;</div>
-<div class="line"><a name="l00190"></a><span class="lineno">  190</span>&#160;</div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        <span class="keyword">typename</span>                InputIteratorT,</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        <span class="keyword">typename</span>                OffsetsOutputIteratorT,</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        <span class="keyword">typename</span>                LengthsOutputIteratorT,</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keyword">typename</span>                NumRunsOutputIteratorT&gt;</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00247"></a><span class="lineno"><a class="code" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a">  247</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a" title="Enumerates the starting offsets and lengths of all non-trivial runs (of length &gt; 1) of same-valued ke...">NonTrivialRuns</a>(</div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        InputIteratorT          d_in,                           </div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        OffsetsOutputIteratorT  d_offsets_out,                  </div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        LengthsOutputIteratorT  d_lengths_out,                  </div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        NumRunsOutputIteratorT  d_num_runs_out,                 </div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordtype">int</span>                     num_items,                      </div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        cudaStream_t            stream             = 0,         </div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;    {</div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>         OffsetT;                     <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>    EqualityOp;                 <span class="comment">// Default == operator</span></div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;</div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;        <span class="keywordflow">return</span> DeviceRleDispatch&lt;InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;            d_in,</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            d_offsets_out,</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            d_lengths_out,</div>
-<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            d_num_runs_out,</div>
-<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            num_items,</div>
-<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;            stream,</div>
-<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;    }</div>
-<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        <span class="keyword">typename</span>                InputIteratorT,</div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        <span class="keyword">typename</span>                OffsetsOutputIteratorT,</div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        <span class="keyword">typename</span>                LengthsOutputIteratorT,</div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        <span class="keyword">typename</span>                NumRunsOutputIteratorT&gt;</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00245"></a><span class="lineno"><a class="code" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a">  245</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a" title="Enumerates the starting offsets and lengths of all non-trivial runs (of length &gt; 1) of same-valued ke...">NonTrivialRuns</a>(</div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        InputIteratorT          d_in,                           </div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        OffsetsOutputIteratorT  d_offsets_out,                  </div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        LengthsOutputIteratorT  d_lengths_out,                  </div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        NumRunsOutputIteratorT  d_num_runs_out,                 </div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <span class="keywordtype">int</span>                     num_items,                      </div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        cudaStream_t            stream             = 0,         </div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;    {</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>         OffsetT;                     <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>    EqualityOp;                 <span class="comment">// Default == operator</span></div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        <span class="keywordflow">return</span> DeviceRleDispatch&lt;InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;            d_in,</div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;            d_offsets_out,</div>
+<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;            d_lengths_out,</div>
+<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            d_num_runs_out,</div>
+<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            num_items,</div>
+<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            stream,</div>
+<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;    }</div>
+<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;</div>
+<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;</div>
+<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;};</div>
 <div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;</div>
-<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;};</div>
-<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;</div>
-<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;</div>
-<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;</div>
-<div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;</div>
+<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;</div>
+<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;</div>
+<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__scan_8cuh.html b/docs/html/device__scan_8cuh.html
index f9ca7bc126..0aa8676a39 100644
--- a/docs/html/device__scan_8cuh.html
+++ b/docs/html/device__scan_8cuh.html
@@ -136,7 +136,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__scan_8cuh_source.html b/docs/html/device__scan_8cuh_source.html
index d661a0f0d5..6ab23988fd 100644
--- a/docs/html/device__scan_8cuh_source.html
+++ b/docs/html/device__scan_8cuh_source.html
@@ -147,145 +147,145 @@
 <div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;{</div>
 <div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;    <span class="comment">/******************************************************************/</span></div>
 <div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;</div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT&gt;</div>
-<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00133"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b">  133</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b" title="Computes a device-wide exclusive prefix sum. ">ExclusiveSum</a>(</div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        InputIteratorT  d_in,                               </div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        OutputIteratorT d_out,                              </div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        cudaStream_t    stream              = 0,            </div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;    {</div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="comment">// Scan data type</span></div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, Sum, T, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            d_in,</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            d_out,</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">Sum</a>(),</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            T(),</div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            num_items,</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            stream,</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    }</div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
+<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT&gt;</div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00132"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b">  132</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b" title="Computes a device-wide exclusive prefix sum. ">ExclusiveSum</a>(</div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        InputIteratorT  d_in,                               </div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        OutputIteratorT d_out,                              </div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        cudaStream_t    stream              = 0,            </div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;    {</div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="comment">// Scan data type</span></div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, Sum, T, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            d_in,</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            d_out,</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">Sum</a>(),</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            T(),</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            num_items,</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            stream,</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;    }</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;</div>
 <div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;</div>
-<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
-<div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT,</div>
-<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        <span class="keyword">typename</span>        ScanOp,</div>
-<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        <span class="keyword">typename</span>        Identity&gt;</div>
-<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00221"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a59441119927316bea7c4e3f127d01cbf">  221</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a59441119927316bea7c4e3f127d01cbf" title="Computes a device-wide exclusive prefix scan using the specified binary scan_op functor. ">ExclusiveScan</a>(</div>
-<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
-<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        InputIteratorT  d_in,                               </div>
-<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;        OutputIteratorT d_out,                              </div>
-<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        ScanOp          scan_op,                            </div>
-<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        Identity        identity,                           </div>
-<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
-<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        cudaStream_t    stream              = 0,            </div>
-<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;    {</div>
-<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, ScanOp, Identity, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;            d_in,</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;            d_out,</div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;            scan_op,</div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;            identity,</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;            num_items,</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;            stream,</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;    }</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;</div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    <span class="comment">/******************************************************************/</span></div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;</div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;</div>
-<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
-<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00300"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a">  300</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a" title="Computes a device-wide inclusive prefix sum. ">InclusiveSum</a>(</div>
-<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                    </div>
-<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                </div>
-<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;        <span class="keywordtype">int</span>                 num_items,                          </div>
-<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;        cudaStream_t        stream             = 0,             </div>
-<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous  = <span class="keyword">false</span>)         </div>
-<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;    {</div>
-<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;</div>
-<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, Sum, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;            d_in,</div>
-<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;            d_out,</div>
-<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">Sum</a>(),</div>
-<div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;            <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>(),</div>
-<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;            num_items,</div>
-<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;            stream,</div>
-<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;    }</div>
-<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;</div>
-<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;</div>
-<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
-<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT,</div>
-<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;        <span class="keyword">typename</span>        ScanOp&gt;</div>
-<div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00383"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#af27a73a9a8daef1b4fe5a16233932e30">  383</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#af27a73a9a8daef1b4fe5a16233932e30" title="Computes a device-wide inclusive prefix scan using the specified binary scan_op functor. ">InclusiveScan</a>(</div>
-<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
-<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;        InputIteratorT  d_in,                               </div>
-<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;        OutputIteratorT d_out,                              </div>
-<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;        ScanOp          scan_op,                            </div>
-<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
-<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;        cudaStream_t    stream             = 0,             </div>
-<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous  = <span class="keyword">false</span>)         </div>
-<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;    {</div>
-<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;</div>
-<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, ScanOp, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00399"></a><span class="lineno">  399</span>&#160;            d_in,</div>
-<div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;            d_out,</div>
-<div class="line"><a name="l00401"></a><span class="lineno">  401</span>&#160;            scan_op,</div>
-<div class="line"><a name="l00402"></a><span class="lineno">  402</span>&#160;            <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>(),</div>
-<div class="line"><a name="l00403"></a><span class="lineno">  403</span>&#160;            num_items,</div>
-<div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;            stream,</div>
-<div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;    }</div>
-<div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;</div>
-<div class="line"><a name="l00409"></a><span class="lineno">  409</span>&#160;</div>
-<div class="line"><a name="l00410"></a><span class="lineno">  410</span>&#160;};</div>
-<div class="line"><a name="l00411"></a><span class="lineno">  411</span>&#160;</div>
-<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;</div>
-<div class="line"><a name="l00419"></a><span class="lineno">  419</span>&#160;</div>
+<div class="line"><a name="l00210"></a><span class="lineno">  210</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00211"></a><span class="lineno">  211</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
+<div class="line"><a name="l00212"></a><span class="lineno">  212</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT,</div>
+<div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;        <span class="keyword">typename</span>        ScanOp,</div>
+<div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;        <span class="keyword">typename</span>        Identity&gt;</div>
+<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00216"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a59441119927316bea7c4e3f127d01cbf">  216</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a59441119927316bea7c4e3f127d01cbf" title="Computes a device-wide exclusive prefix scan using the specified binary scan_op functor. ">ExclusiveScan</a>(</div>
+<div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
+<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        InputIteratorT  d_in,                               </div>
+<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        OutputIteratorT d_out,                              </div>
+<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        ScanOp          scan_op,                            </div>
+<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        Identity        identity,                           </div>
+<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
+<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        cudaStream_t    stream              = 0,            </div>
+<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;    {</div>
+<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;</div>
+<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, ScanOp, Identity, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;            d_in,</div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;            d_out,</div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;            scan_op,</div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;            identity,</div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;            num_items,</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;            stream,</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    }</div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;</div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    <span class="comment">/******************************************************************/</span></div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;</div>
+<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00291"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a">  291</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a" title="Computes a device-wide inclusive prefix sum. ">InclusiveSum</a>(</div>
+<div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                    </div>
+<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;        <span class="keywordtype">size_t</span>&amp;             temp_storage_bytes,                </div>
+<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;        <span class="keywordtype">int</span>                 num_items,                          </div>
+<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;        cudaStream_t        stream             = 0,             </div>
+<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous  = <span class="keyword">false</span>)         </div>
+<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;    {</div>
+<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;</div>
+<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, Sum, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;            d_in,</div>
+<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;            d_out,</div>
+<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">Sum</a>(),</div>
+<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;            NullType(),</div>
+<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;            num_items,</div>
+<div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;            stream,</div>
+<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;    }</div>
+<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;</div>
+<div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;</div>
+<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;        <span class="keyword">typename</span>        InputIteratorT,</div>
+<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;        <span class="keyword">typename</span>        OutputIteratorT,</div>
+<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;        <span class="keyword">typename</span>        ScanOp&gt;</div>
+<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00370"></a><span class="lineno"><a class="code" href="structcub_1_1_device_scan.html#af27a73a9a8daef1b4fe5a16233932e30">  370</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_scan.html#af27a73a9a8daef1b4fe5a16233932e30" title="Computes a device-wide inclusive prefix scan using the specified binary scan_op functor. ">InclusiveScan</a>(</div>
+<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;        <span class="keywordtype">void</span>            *d_temp_storage,                    </div>
+<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;        <span class="keywordtype">size_t</span>          &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;        InputIteratorT  d_in,                               </div>
+<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;        OutputIteratorT d_out,                              </div>
+<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;        ScanOp          scan_op,                            </div>
+<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;        <span class="keywordtype">int</span>             num_items,                          </div>
+<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;        cudaStream_t    stream             = 0,             </div>
+<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;        <span class="keywordtype">bool</span>            debug_synchronous  = <span class="keyword">false</span>)         </div>
+<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;    {</div>
+<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;</div>
+<div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;        <span class="keywordflow">return</span> DispatchScan&lt;InputIteratorT, OutputIteratorT, ScanOp, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;            d_in,</div>
+<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;            d_out,</div>
+<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;            scan_op,</div>
+<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;            NullType(),</div>
+<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;            num_items,</div>
+<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;            stream,</div>
+<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;    }</div>
+<div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;</div>
+<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;</div>
+<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;};</div>
+<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;</div>
+<div class="line"><a name="l00403"></a><span class="lineno">  403</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;</div>
+<div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__segmented__radix__sort_8cuh.html b/docs/html/device__segmented__radix__sort_8cuh.html
index f1d9bb0a04..c3d8fecb38 100644
--- a/docs/html/device__segmented__radix__sort_8cuh.html
+++ b/docs/html/device__segmented__radix__sort_8cuh.html
@@ -137,7 +137,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__segmented__radix__sort_8cuh_source.html b/docs/html/device__segmented__radix__sort_8cuh_source.html
index be1c36dff3..af9d3e3d98 100644
--- a/docs/html/device__segmented__radix__sort_8cuh_source.html
+++ b/docs/html/device__segmented__radix__sort_8cuh_source.html
@@ -151,337 +151,336 @@
 <div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;</div>
 <div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;    <span class="keyword">template</span> &lt;</div>
 <div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typename</span>            ValueT,</div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keyword">typename</span>            OffsetT&gt;</div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00141"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597">  141</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597" title="Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required) ...">SortPairs</a>(</div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        ValueT              *d_values_in,                           </div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        ValueT              *d_values_out,                          </div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        OffsetT             num_segments,                           </div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        OffsetT             *d_begin_offsets,                       </div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        OffsetT             *d_end_offsets,                         </div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;    {</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>     d_values(d_values_in, d_values_out);</div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;            d_values,</div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;            num_items,</div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;            stream,</div>
-<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;    }</div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00140"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0">  140</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0" title="Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required) ...">SortPairs</a>(</div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        ValueT              *d_values_in,                           </div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        ValueT              *d_values_out,                          </div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;    {</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;</div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;        DoubleBuffer&lt;ValueT&gt;     d_values(d_values_in, d_values_out);</div>
+<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;</div>
+<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;            d_values,</div>
+<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;            num_items,</div>
+<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;            stream,</div>
+<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;    }</div>
+<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;</div>
 <div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;</div>
-<div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keyword">typename</span>                KeyT,</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        <span class="keyword">typename</span>                ValueT&gt;</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00248"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">  248</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148" title="Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required) ...">SortPairs</a>(</div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      &amp;d_keys,                                </div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>    &amp;d_values,                              </div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordtype">int</span>                     num_segments,                           </div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        <span class="keywordtype">int</span>                     *d_begin_offsets,                       </div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keywordtype">int</span>                     *d_end_offsets,                         </div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        cudaStream_t            stream              = 0,                </div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;    {</div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            d_values,</div>
-<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;            num_items,</div>
-<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;            stream,</div>
-<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;    }</div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        <span class="keyword">typename</span>                KeyT,</div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keyword">typename</span>                ValueT&gt;</div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00247"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">  247</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148" title="Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required) ...">SortPairs</a>(</div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        DoubleBuffer&lt;KeyT&gt;      &amp;d_keys,                                </div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        DoubleBuffer&lt;ValueT&gt;    &amp;d_values,                              </div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <span class="keywordtype">int</span>                     num_segments,                           </div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordtype">int</span>                     *d_begin_offsets,                       </div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        <span class="keywordtype">int</span>                     *d_end_offsets,                         </div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;        cudaStream_t            stream              = 0,                </div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;    {</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;</div>
+<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;            d_values,</div>
+<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;            num_items,</div>
+<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;            stream,</div>
+<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;    }</div>
+<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;</div>
 <div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;</div>
-<div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;</div>
-<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00339"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4">  339</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4" title="Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required). ">SortPairsDescending</a>(</div>
-<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        ValueT              *d_values_in,                           </div>
-<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        ValueT              *d_values_out,                          </div>
-<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
-<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
-<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
-<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;    {</div>
-<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>     d_values(d_values_in, d_values_out);</div>
-<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;</div>
-<div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00363"></a><span class="lineno">  363</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;            d_values,</div>
-<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;            num_items,</div>
-<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;            stream,</div>
-<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;    }</div>
+<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00338"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4">  338</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4" title="Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required). ">SortPairsDescending</a>(</div>
+<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;        ValueT              *d_values_in,                           </div>
+<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        ValueT              *d_values_out,                          </div>
+<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;    {</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;</div>
+<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;        DoubleBuffer&lt;ValueT&gt;     d_values(d_values_in, d_values_out);</div>
+<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;</div>
+<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00363"></a><span class="lineno">  363</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;            d_values,</div>
+<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;            num_items,</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;            stream,</div>
+<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;    }</div>
+<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;</div>
 <div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;</div>
-<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;</div>
-<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
-<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
-<div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00446"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347">  446</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347" title="Sorts segments of key-value pairs into descending order. (~N auxiliary storage required). ">SortPairsDescending</a>(</div>
-<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
-<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      &amp;d_keys,                                </div>
-<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;ValueT&gt;</a>    &amp;d_values,                              </div>
-<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
-<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;        <span class="keywordtype">int</span>                     num_segments,                           </div>
-<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;        <span class="keywordtype">int</span>                     *d_begin_offsets,                       </div>
-<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;        <span class="keywordtype">int</span>                     *d_end_offsets,                         </div>
-<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
-<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;        cudaStream_t            stream              = 0,                </div>
-<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;    {</div>
-<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;</div>
-<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;            d_values,</div>
-<div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;            num_items,</div>
-<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00471"></a><span class="lineno">  471</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00472"></a><span class="lineno">  472</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00474"></a><span class="lineno">  474</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00475"></a><span class="lineno">  475</span>&#160;            stream,</div>
-<div class="line"><a name="l00476"></a><span class="lineno">  476</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;    }</div>
+<div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;        <span class="keyword">typename</span>            KeyT,</div>
+<div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;        <span class="keyword">typename</span>            ValueT&gt;</div>
+<div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00445"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347">  445</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347" title="Sorts segments of key-value pairs into descending order. (~N auxiliary storage required). ">SortPairsDescending</a>(</div>
+<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;        <span class="keywordtype">void</span>                    *d_temp_storage,                        </div>
+<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        <span class="keywordtype">size_t</span>                  &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        DoubleBuffer&lt;KeyT&gt;      &amp;d_keys,                                </div>
+<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;        DoubleBuffer&lt;ValueT&gt;    &amp;d_values,                              </div>
+<div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;        <span class="keywordtype">int</span>                     num_items,                              </div>
+<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        <span class="keywordtype">int</span>                     num_segments,                           </div>
+<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;        <span class="keywordtype">int</span>                     *d_begin_offsets,                       </div>
+<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;        <span class="keywordtype">int</span>                     *d_end_offsets,                         </div>
+<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;        <span class="keywordtype">int</span>                     begin_bit           = 0,                </div>
+<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;        <span class="keywordtype">int</span>                     end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;        cudaStream_t            stream              = 0,                </div>
+<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;        <span class="keywordtype">bool</span>                    debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;    {</div>
+<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;</div>
+<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, ValueT, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;            d_values,</div>
+<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;            num_items,</div>
+<div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00471"></a><span class="lineno">  471</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00472"></a><span class="lineno">  472</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00474"></a><span class="lineno">  474</span>&#160;            stream,</div>
+<div class="line"><a name="l00475"></a><span class="lineno">  475</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00476"></a><span class="lineno">  476</span>&#160;    }</div>
+<div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;</div>
 <div class="line"><a name="l00478"></a><span class="lineno">  478</span>&#160;</div>
-<div class="line"><a name="l00479"></a><span class="lineno">  479</span>&#160;</div>
-<div class="line"><a name="l00481"></a><span class="lineno">  481</span>&#160;    <span class="comment">/******************************************************************/</span></div>
+<div class="line"><a name="l00480"></a><span class="lineno">  480</span>&#160;    <span class="comment">/******************************************************************/</span></div>
+<div class="line"><a name="l00484"></a><span class="lineno">  484</span>&#160;</div>
 <div class="line"><a name="l00485"></a><span class="lineno">  485</span>&#160;</div>
-<div class="line"><a name="l00486"></a><span class="lineno">  486</span>&#160;</div>
-<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00535"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba">  535</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba" title="Sorts segments of keys into ascending order. (~2N auxiliary storage required) ">SortKeys</a>(</div>
-<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
-<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
-<div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
-<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;    {</div>
-<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;</div>
-<div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>      d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a>  d_values;</div>
-<div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;</div>
-<div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;            d_values,</div>
-<div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;            num_items,</div>
-<div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00565"></a><span class="lineno">  565</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00566"></a><span class="lineno">  566</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00567"></a><span class="lineno">  567</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00568"></a><span class="lineno">  568</span>&#160;            stream,</div>
-<div class="line"><a name="l00569"></a><span class="lineno">  569</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00570"></a><span class="lineno">  570</span>&#160;    }</div>
+<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00534"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba">  534</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba" title="Sorts segments of keys into ascending order. (~2N auxiliary storage required) ">SortKeys</a>(</div>
+<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;    {</div>
+<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;</div>
+<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;        DoubleBuffer&lt;KeyT&gt;      d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;        DoubleBuffer&lt;NullType&gt;  d_values;</div>
+<div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;</div>
+<div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;            d_values,</div>
+<div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;            num_items,</div>
+<div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00565"></a><span class="lineno">  565</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00566"></a><span class="lineno">  566</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00567"></a><span class="lineno">  567</span>&#160;            stream,</div>
+<div class="line"><a name="l00568"></a><span class="lineno">  568</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00569"></a><span class="lineno">  569</span>&#160;    }</div>
+<div class="line"><a name="l00570"></a><span class="lineno">  570</span>&#160;</div>
 <div class="line"><a name="l00571"></a><span class="lineno">  571</span>&#160;</div>
-<div class="line"><a name="l00572"></a><span class="lineno">  572</span>&#160;</div>
-<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00630"></a><span class="lineno">  630</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00631"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac">  631</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac" title="Sorts segments of keys into ascending order. (~N auxiliary storage required). ">SortKeys</a>(</div>
-<div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>  &amp;d_keys,                                </div>
-<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
-<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
-<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
-<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;    {</div>
-<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;</div>
-<div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a> d_values;</div>
-<div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;</div>
-<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00654"></a><span class="lineno">  654</span>&#160;            d_values,</div>
-<div class="line"><a name="l00655"></a><span class="lineno">  655</span>&#160;            num_items,</div>
-<div class="line"><a name="l00656"></a><span class="lineno">  656</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00657"></a><span class="lineno">  657</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00658"></a><span class="lineno">  658</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00659"></a><span class="lineno">  659</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00660"></a><span class="lineno">  660</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00661"></a><span class="lineno">  661</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00662"></a><span class="lineno">  662</span>&#160;            stream,</div>
-<div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00664"></a><span class="lineno">  664</span>&#160;    }</div>
-<div class="line"><a name="l00665"></a><span class="lineno">  665</span>&#160;</div>
-<div class="line"><a name="l00715"></a><span class="lineno">  715</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00716"></a><span class="lineno">  716</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00717"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701">  717</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701" title="Sorts segments of keys into descending order. (~2N auxiliary storage required). ">SortKeysDescending</a>(</div>
-<div class="line"><a name="l00718"></a><span class="lineno">  718</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00719"></a><span class="lineno">  719</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00720"></a><span class="lineno">  720</span>&#160;        KeyT                *d_keys_in,                             </div>
-<div class="line"><a name="l00721"></a><span class="lineno">  721</span>&#160;        KeyT                *d_keys_out,                            </div>
-<div class="line"><a name="l00722"></a><span class="lineno">  722</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00723"></a><span class="lineno">  723</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
-<div class="line"><a name="l00724"></a><span class="lineno">  724</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
-<div class="line"><a name="l00725"></a><span class="lineno">  725</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
-<div class="line"><a name="l00726"></a><span class="lineno">  726</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00727"></a><span class="lineno">  727</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00728"></a><span class="lineno">  728</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00729"></a><span class="lineno">  729</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00730"></a><span class="lineno">  730</span>&#160;    {</div>
-<div class="line"><a name="l00731"></a><span class="lineno">  731</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00732"></a><span class="lineno">  732</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00733"></a><span class="lineno">  733</span>&#160;</div>
-<div class="line"><a name="l00734"></a><span class="lineno">  734</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>       d_keys(d_keys_in, d_keys_out);</div>
-<div class="line"><a name="l00735"></a><span class="lineno">  735</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a>  d_values;</div>
-<div class="line"><a name="l00736"></a><span class="lineno">  736</span>&#160;</div>
-<div class="line"><a name="l00737"></a><span class="lineno">  737</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00738"></a><span class="lineno">  738</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00739"></a><span class="lineno">  739</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00740"></a><span class="lineno">  740</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00741"></a><span class="lineno">  741</span>&#160;            d_values,</div>
-<div class="line"><a name="l00742"></a><span class="lineno">  742</span>&#160;            num_items,</div>
-<div class="line"><a name="l00743"></a><span class="lineno">  743</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00744"></a><span class="lineno">  744</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00747"></a><span class="lineno">  747</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;            <span class="keyword">false</span>,</div>
-<div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;            stream,</div>
-<div class="line"><a name="l00750"></a><span class="lineno">  750</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00751"></a><span class="lineno">  751</span>&#160;    }</div>
+<div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00630"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac">  630</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac" title="Sorts segments of keys into ascending order. (~N auxiliary storage required). ">SortKeys</a>(</div>
+<div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;        DoubleBuffer&lt;KeyT&gt;  &amp;d_keys,                                </div>
+<div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;    {</div>
+<div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;</div>
+<div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;        DoubleBuffer&lt;NullType&gt; d_values;</div>
+<div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;</div>
+<div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;false, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;            d_values,</div>
+<div class="line"><a name="l00654"></a><span class="lineno">  654</span>&#160;            num_items,</div>
+<div class="line"><a name="l00655"></a><span class="lineno">  655</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00656"></a><span class="lineno">  656</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00657"></a><span class="lineno">  657</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00658"></a><span class="lineno">  658</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00659"></a><span class="lineno">  659</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00660"></a><span class="lineno">  660</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00661"></a><span class="lineno">  661</span>&#160;            stream,</div>
+<div class="line"><a name="l00662"></a><span class="lineno">  662</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;    }</div>
+<div class="line"><a name="l00664"></a><span class="lineno">  664</span>&#160;</div>
+<div class="line"><a name="l00714"></a><span class="lineno">  714</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00715"></a><span class="lineno">  715</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00716"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701">  716</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701" title="Sorts segments of keys into descending order. (~2N auxiliary storage required). ">SortKeysDescending</a>(</div>
+<div class="line"><a name="l00717"></a><span class="lineno">  717</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00718"></a><span class="lineno">  718</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00719"></a><span class="lineno">  719</span>&#160;        KeyT                *d_keys_in,                             </div>
+<div class="line"><a name="l00720"></a><span class="lineno">  720</span>&#160;        KeyT                *d_keys_out,                            </div>
+<div class="line"><a name="l00721"></a><span class="lineno">  721</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00722"></a><span class="lineno">  722</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00723"></a><span class="lineno">  723</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00724"></a><span class="lineno">  724</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00725"></a><span class="lineno">  725</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00726"></a><span class="lineno">  726</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00727"></a><span class="lineno">  727</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00728"></a><span class="lineno">  728</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00729"></a><span class="lineno">  729</span>&#160;    {</div>
+<div class="line"><a name="l00730"></a><span class="lineno">  730</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00731"></a><span class="lineno">  731</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00732"></a><span class="lineno">  732</span>&#160;</div>
+<div class="line"><a name="l00733"></a><span class="lineno">  733</span>&#160;        DoubleBuffer&lt;KeyT&gt;       d_keys(d_keys_in, d_keys_out);</div>
+<div class="line"><a name="l00734"></a><span class="lineno">  734</span>&#160;        DoubleBuffer&lt;NullType&gt;  d_values;</div>
+<div class="line"><a name="l00735"></a><span class="lineno">  735</span>&#160;</div>
+<div class="line"><a name="l00736"></a><span class="lineno">  736</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00737"></a><span class="lineno">  737</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00738"></a><span class="lineno">  738</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00739"></a><span class="lineno">  739</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00740"></a><span class="lineno">  740</span>&#160;            d_values,</div>
+<div class="line"><a name="l00741"></a><span class="lineno">  741</span>&#160;            num_items,</div>
+<div class="line"><a name="l00742"></a><span class="lineno">  742</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00743"></a><span class="lineno">  743</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00744"></a><span class="lineno">  744</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00745"></a><span class="lineno">  745</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00746"></a><span class="lineno">  746</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00747"></a><span class="lineno">  747</span>&#160;            <span class="keyword">false</span>,</div>
+<div class="line"><a name="l00748"></a><span class="lineno">  748</span>&#160;            stream,</div>
+<div class="line"><a name="l00749"></a><span class="lineno">  749</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00750"></a><span class="lineno">  750</span>&#160;    }</div>
+<div class="line"><a name="l00751"></a><span class="lineno">  751</span>&#160;</div>
 <div class="line"><a name="l00752"></a><span class="lineno">  752</span>&#160;</div>
-<div class="line"><a name="l00753"></a><span class="lineno">  753</span>&#160;</div>
-<div class="line"><a name="l00810"></a><span class="lineno">  810</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
-<div class="line"><a name="l00811"></a><span class="lineno">  811</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00812"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94">  812</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94" title="Sorts segments of keys into descending order. (~N auxiliary storage required). ">SortKeysDescending</a>(</div>
-<div class="line"><a name="l00813"></a><span class="lineno">  813</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
-<div class="line"><a name="l00814"></a><span class="lineno">  814</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
-<div class="line"><a name="l00815"></a><span class="lineno">  815</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;KeyT&gt;</a>  &amp;d_keys,                                </div>
-<div class="line"><a name="l00816"></a><span class="lineno">  816</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
-<div class="line"><a name="l00817"></a><span class="lineno">  817</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
-<div class="line"><a name="l00818"></a><span class="lineno">  818</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
-<div class="line"><a name="l00819"></a><span class="lineno">  819</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
-<div class="line"><a name="l00820"></a><span class="lineno">  820</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
-<div class="line"><a name="l00821"></a><span class="lineno">  821</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
-<div class="line"><a name="l00822"></a><span class="lineno">  822</span>&#160;        cudaStream_t        stream              = 0,                </div>
-<div class="line"><a name="l00823"></a><span class="lineno">  823</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
-<div class="line"><a name="l00824"></a><span class="lineno">  824</span>&#160;    {</div>
-<div class="line"><a name="l00825"></a><span class="lineno">  825</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00826"></a><span class="lineno">  826</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00827"></a><span class="lineno">  827</span>&#160;</div>
-<div class="line"><a name="l00828"></a><span class="lineno">  828</span>&#160;        <span class="comment">// Null value type</span></div>
-<div class="line"><a name="l00829"></a><span class="lineno">  829</span>&#160;        <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;NullType&gt;</a> d_values;</div>
-<div class="line"><a name="l00830"></a><span class="lineno">  830</span>&#160;</div>
-<div class="line"><a name="l00831"></a><span class="lineno">  831</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
-<div class="line"><a name="l00832"></a><span class="lineno">  832</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00833"></a><span class="lineno">  833</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00834"></a><span class="lineno">  834</span>&#160;            d_keys,</div>
-<div class="line"><a name="l00835"></a><span class="lineno">  835</span>&#160;            d_values,</div>
-<div class="line"><a name="l00836"></a><span class="lineno">  836</span>&#160;            num_items,</div>
-<div class="line"><a name="l00837"></a><span class="lineno">  837</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00838"></a><span class="lineno">  838</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00839"></a><span class="lineno">  839</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00840"></a><span class="lineno">  840</span>&#160;            begin_bit,</div>
-<div class="line"><a name="l00841"></a><span class="lineno">  841</span>&#160;            end_bit,</div>
-<div class="line"><a name="l00842"></a><span class="lineno">  842</span>&#160;            <span class="keyword">true</span>,</div>
-<div class="line"><a name="l00843"></a><span class="lineno">  843</span>&#160;            stream,</div>
-<div class="line"><a name="l00844"></a><span class="lineno">  844</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00845"></a><span class="lineno">  845</span>&#160;    }</div>
+<div class="line"><a name="l00809"></a><span class="lineno">  809</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> KeyT&gt;</div>
+<div class="line"><a name="l00810"></a><span class="lineno">  810</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00811"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94">  811</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94" title="Sorts segments of keys into descending order. (~N auxiliary storage required). ">SortKeysDescending</a>(</div>
+<div class="line"><a name="l00812"></a><span class="lineno">  812</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                        </div>
+<div class="line"><a name="l00813"></a><span class="lineno">  813</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                    </div>
+<div class="line"><a name="l00814"></a><span class="lineno">  814</span>&#160;        DoubleBuffer&lt;KeyT&gt;  &amp;d_keys,                                </div>
+<div class="line"><a name="l00815"></a><span class="lineno">  815</span>&#160;        <span class="keywordtype">int</span>                 num_items,                              </div>
+<div class="line"><a name="l00816"></a><span class="lineno">  816</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                           </div>
+<div class="line"><a name="l00817"></a><span class="lineno">  817</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                       </div>
+<div class="line"><a name="l00818"></a><span class="lineno">  818</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                         </div>
+<div class="line"><a name="l00819"></a><span class="lineno">  819</span>&#160;        <span class="keywordtype">int</span>                 begin_bit           = 0,                </div>
+<div class="line"><a name="l00820"></a><span class="lineno">  820</span>&#160;        <span class="keywordtype">int</span>                 end_bit             = <span class="keyword">sizeof</span>(KeyT) * 8, </div>
+<div class="line"><a name="l00821"></a><span class="lineno">  821</span>&#160;        cudaStream_t        stream              = 0,                </div>
+<div class="line"><a name="l00822"></a><span class="lineno">  822</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)            </div>
+<div class="line"><a name="l00823"></a><span class="lineno">  823</span>&#160;    {</div>
+<div class="line"><a name="l00824"></a><span class="lineno">  824</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00825"></a><span class="lineno">  825</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00826"></a><span class="lineno">  826</span>&#160;</div>
+<div class="line"><a name="l00827"></a><span class="lineno">  827</span>&#160;        <span class="comment">// Null value type</span></div>
+<div class="line"><a name="l00828"></a><span class="lineno">  828</span>&#160;        DoubleBuffer&lt;NullType&gt; d_values;</div>
+<div class="line"><a name="l00829"></a><span class="lineno">  829</span>&#160;</div>
+<div class="line"><a name="l00830"></a><span class="lineno">  830</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedRadixSort&lt;true, KeyT, NullType, OffsetT&gt;::Dispatch(</div>
+<div class="line"><a name="l00831"></a><span class="lineno">  831</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00832"></a><span class="lineno">  832</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00833"></a><span class="lineno">  833</span>&#160;            d_keys,</div>
+<div class="line"><a name="l00834"></a><span class="lineno">  834</span>&#160;            d_values,</div>
+<div class="line"><a name="l00835"></a><span class="lineno">  835</span>&#160;            num_items,</div>
+<div class="line"><a name="l00836"></a><span class="lineno">  836</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00837"></a><span class="lineno">  837</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00838"></a><span class="lineno">  838</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00839"></a><span class="lineno">  839</span>&#160;            begin_bit,</div>
+<div class="line"><a name="l00840"></a><span class="lineno">  840</span>&#160;            end_bit,</div>
+<div class="line"><a name="l00841"></a><span class="lineno">  841</span>&#160;            <span class="keyword">true</span>,</div>
+<div class="line"><a name="l00842"></a><span class="lineno">  842</span>&#160;            stream,</div>
+<div class="line"><a name="l00843"></a><span class="lineno">  843</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00844"></a><span class="lineno">  844</span>&#160;    }</div>
+<div class="line"><a name="l00845"></a><span class="lineno">  845</span>&#160;</div>
 <div class="line"><a name="l00846"></a><span class="lineno">  846</span>&#160;</div>
-<div class="line"><a name="l00847"></a><span class="lineno">  847</span>&#160;</div>
+<div class="line"><a name="l00848"></a><span class="lineno">  848</span>&#160;</div>
 <div class="line"><a name="l00849"></a><span class="lineno">  849</span>&#160;</div>
-<div class="line"><a name="l00850"></a><span class="lineno">  850</span>&#160;</div>
-<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;};</div>
-<div class="line"><a name="l00852"></a><span class="lineno">  852</span>&#160;</div>
-<div class="line"><a name="l00853"></a><span class="lineno">  853</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00854"></a><span class="lineno">  854</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00850"></a><span class="lineno">  850</span>&#160;};</div>
+<div class="line"><a name="l00851"></a><span class="lineno">  851</span>&#160;</div>
+<div class="line"><a name="l00852"></a><span class="lineno">  852</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00853"></a><span class="lineno">  853</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00854"></a><span class="lineno">  854</span>&#160;</div>
 <div class="line"><a name="l00855"></a><span class="lineno">  855</span>&#160;</div>
-<div class="line"><a name="l00856"></a><span class="lineno">  856</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__segmented__reduce_8cuh.html b/docs/html/device__segmented__reduce_8cuh.html
index 40c32eb081..fa843eb667 100644
--- a/docs/html/device__segmented__reduce_8cuh.html
+++ b/docs/html/device__segmented__reduce_8cuh.html
@@ -108,6 +108,7 @@
 <code>#include &lt;iterator&gt;</code><br/>
 <code>#include &quot;dispatch/dispatch_reduce.cuh&quot;</code><br/>
 <code>#include &quot;dispatch/dispatch_reduce_by_key.cuh&quot;</code><br/>
+<code>#include &quot;<a class="el" href="util__type_8cuh_source.html">../util_type.cuh</a>&quot;</code><br/>
 <code>#include &quot;../util_namespace.cuh&quot;</code><br/>
 </div>
 <p><a href="device__segmented__reduce_8cuh_source.html">Go to the source code of this file.</a></p>
@@ -137,7 +138,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Wed Nov 18 2015 10:24:43 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__segmented__reduce_8cuh_source.html b/docs/html/device__segmented__reduce_8cuh_source.html
index 734ebc1f66..712c987b24 100644
--- a/docs/html/device__segmented__reduce_8cuh_source.html
+++ b/docs/html/device__segmented__reduce_8cuh_source.html
@@ -137,249 +137,236 @@
 <div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;</div>
 <div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce.cuh&quot;</span></div>
 <div class="line"><a name="l00041"></a><span class="lineno">   41</span>&#160;<span class="preprocessor">#include &quot;dispatch/dispatch_reduce_by_key.cuh&quot;</span></div>
-<div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;<span class="preprocessor">#include &quot;../util_namespace.cuh&quot;</span></div>
-<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;</div>
-<div class="line"><a name="l00045"></a><span class="lineno">   45</span>&#160;CUB_NS_PREFIX</div>
-<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;</div>
-<div class="line"><a name="l00048"></a><span class="lineno">   48</span>&#160;<span class="keyword">namespace </span>cub {</div>
-<div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;</div>
+<div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;<span class="preprocessor">#include &quot;../util_type.cuh&quot;</span></div>
+<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;<span class="preprocessor">#include &quot;../util_namespace.cuh&quot;</span></div>
+<div class="line"><a name="l00044"></a><span class="lineno">   44</span>&#160;</div>
+<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;CUB_NS_PREFIX</div>
+<div class="line"><a name="l00047"></a><span class="lineno">   47</span>&#160;</div>
+<div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;<span class="keyword">namespace </span>cub {</div>
 <div class="line"><a name="l00050"></a><span class="lineno">   50</span>&#160;</div>
-<div class="line"><a name="l00063"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html">   63</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_segmented_reduce.html" title="DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across mult...">DeviceSegmentedReduce</a></div>
-<div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;{</div>
-<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keyword">typename</span>            ReductionOp,</div>
-<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00132"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf">  132</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">Reduce</a>(</div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        ReductionOp         reduction_op,                       </div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    {</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, ReductionOp&gt;::Dispatch(</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            d_in,</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            d_out,</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            reduction_op,</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            identity,</div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;            stream,</div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;    }</div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;</div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;</div>
-<div class="line"><a name="l00212"></a><span class="lineno">  212</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00217"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3">  217</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">Sum</a>(</div>
-<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;    {</div>
-<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;</div>
-<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Sum&gt;::Dispatch(</div>
-<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;            d_in,</div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;            d_out,</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>(),</div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;            identity,</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;            stream,</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;    }</div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;</div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;</div>
-<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00301"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe">  301</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">Min</a>(</div>
-<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;    {</div>
-<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00051"></a><span class="lineno">   51</span>&#160;</div>
+<div class="line"><a name="l00064"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html">   64</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_segmented_reduce.html" title="DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across mult...">DeviceSegmentedReduce</a></div>
+<div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;{</div>
+<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
+<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        <span class="keyword">typename</span>            ReductionOp,</div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00130"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364">  130</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">Reduce</a>(</div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        ReductionOp         reduction_op,                       </div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        T                   init,                               </div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;    {</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;</div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, ReductionOp&gt;::Dispatch(</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            d_in,</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            d_out,</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            reduction_op,</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            init,</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            stream,</div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;    }</div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;</div>
+<div class="line"><a name="l00206"></a><span class="lineno">  206</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00207"></a><span class="lineno">  207</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00208"></a><span class="lineno">  208</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00209"></a><span class="lineno">  209</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00210"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb">  210</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">Sum</a>(</div>
+<div class="line"><a name="l00211"></a><span class="lineno">  211</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00212"></a><span class="lineno">  212</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;    {</div>
+<div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;</div>
+<div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Sum&gt;::Dispatch(</div>
+<div class="line"><a name="l00225"></a><span class="lineno">  225</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;            d_in,</div>
+<div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;            d_out,</div>
+<div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;            <a class="code" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>(),</div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;            T(),            <span class="comment">// zero-initialize</span></div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;            stream,</div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;    }</div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;</div>
+<div class="line"><a name="l00284"></a><span class="lineno">  284</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00285"></a><span class="lineno">  285</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00286"></a><span class="lineno">  286</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00288"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29">  288</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">Min</a>(</div>
+<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;    {</div>
+<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;</div>
+<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Min&gt;::Dispatch(</div>
+<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;            d_in,</div>
+<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;            d_out,</div>
+<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;            <a class="code" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>(),</div>
+<div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;            Traits&lt;T&gt;::Max(),    <span class="comment">// replace with std::numeric_limits&lt;T&gt;::max() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;            stream,</div>
+<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;    }</div>
 <div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;</div>
-<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Min&gt;::Dispatch(</div>
-<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;            d_in,</div>
-<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;            d_out,</div>
-<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;            <a class="code" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>(),</div>
-<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;            identity,</div>
-<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;            stream,</div>
-<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;    }</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;</div>
-<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;</div>
-<div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00388"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03">  388</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">ArgMin</a>(</div>
-<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00399"></a><span class="lineno">  399</span>&#160;    {</div>
-<div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00401"></a><span class="lineno">  401</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00402"></a><span class="lineno">  402</span>&#160;</div>
-<div class="line"><a name="l00403"></a><span class="lineno">  403</span>&#160;        <span class="comment">// Wrapped input iterator</span></div>
-<div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;</div>
-<div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;        ArgIndexInputIteratorT d_argmin_in(d_in, 0);</div>
-<div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;</div>
-<div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;        <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> identity_pair = {-1, identity};</div>
-<div class="line"><a name="l00408"></a><span class="lineno">  408</span>&#160;</div>
-<div class="line"><a name="l00409"></a><span class="lineno">  409</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin&gt;::Dispatch(</div>
-<div class="line"><a name="l00410"></a><span class="lineno">  410</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00411"></a><span class="lineno">  411</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00412"></a><span class="lineno">  412</span>&#160;            d_argmin_in,</div>
-<div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;            d_out,</div>
-<div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;            <a class="code" href="structcub_1_1_arg_min.html" title="Arg min functor (keeps the value and offset of the first occurrence of the smallest item) ...">cub::ArgMin</a>(),</div>
-<div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;            identity_pair,</div>
-<div class="line"><a name="l00419"></a><span class="lineno">  419</span>&#160;            stream,</div>
-<div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;    }</div>
-<div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;</div>
-<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;</div>
-<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00474"></a><span class="lineno">  474</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00475"></a><span class="lineno">  475</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00476"></a><span class="lineno">  476</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00478"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16">  478</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">Max</a>(</div>
-<div class="line"><a name="l00479"></a><span class="lineno">  479</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00480"></a><span class="lineno">  480</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00481"></a><span class="lineno">  481</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00482"></a><span class="lineno">  482</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00483"></a><span class="lineno">  483</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00484"></a><span class="lineno">  484</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00485"></a><span class="lineno">  485</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00486"></a><span class="lineno">  486</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00487"></a><span class="lineno">  487</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00488"></a><span class="lineno">  488</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00489"></a><span class="lineno">  489</span>&#160;    {</div>
-<div class="line"><a name="l00490"></a><span class="lineno">  490</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00491"></a><span class="lineno">  491</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00492"></a><span class="lineno">  492</span>&#160;</div>
-<div class="line"><a name="l00493"></a><span class="lineno">  493</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Max&gt;::Dispatch(</div>
-<div class="line"><a name="l00494"></a><span class="lineno">  494</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00495"></a><span class="lineno">  495</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00496"></a><span class="lineno">  496</span>&#160;            d_in,</div>
-<div class="line"><a name="l00497"></a><span class="lineno">  497</span>&#160;            d_out,</div>
-<div class="line"><a name="l00498"></a><span class="lineno">  498</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00499"></a><span class="lineno">  499</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00500"></a><span class="lineno">  500</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00501"></a><span class="lineno">  501</span>&#160;            <a class="code" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>(),</div>
-<div class="line"><a name="l00502"></a><span class="lineno">  502</span>&#160;            identity,</div>
-<div class="line"><a name="l00503"></a><span class="lineno">  503</span>&#160;            stream,</div>
-<div class="line"><a name="l00504"></a><span class="lineno">  504</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00505"></a><span class="lineno">  505</span>&#160;    }</div>
-<div class="line"><a name="l00506"></a><span class="lineno">  506</span>&#160;</div>
-<div class="line"><a name="l00507"></a><span class="lineno">  507</span>&#160;</div>
-<div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
-<div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT,</div>
-<div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;        <span class="keyword">typename</span>            T&gt;</div>
-<div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;    CUB_RUNTIME_FUNCTION</div>
-<div class="line"><a name="l00565"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da">  565</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">ArgMax</a>(</div>
-<div class="line"><a name="l00566"></a><span class="lineno">  566</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
-<div class="line"><a name="l00567"></a><span class="lineno">  567</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00568"></a><span class="lineno">  568</span>&#160;        InputIteratorT      d_in,                               </div>
-<div class="line"><a name="l00569"></a><span class="lineno">  569</span>&#160;        OutputIteratorT     d_out,                              </div>
-<div class="line"><a name="l00570"></a><span class="lineno">  570</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
-<div class="line"><a name="l00571"></a><span class="lineno">  571</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
-<div class="line"><a name="l00572"></a><span class="lineno">  572</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
-<div class="line"><a name="l00573"></a><span class="lineno">  573</span>&#160;        T                   identity,                           </div>
-<div class="line"><a name="l00574"></a><span class="lineno">  574</span>&#160;        cudaStream_t        stream              = 0,            </div>
-<div class="line"><a name="l00575"></a><span class="lineno">  575</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
-<div class="line"><a name="l00576"></a><span class="lineno">  576</span>&#160;    {</div>
-<div class="line"><a name="l00577"></a><span class="lineno">  577</span>&#160;        <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00578"></a><span class="lineno">  578</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;</div>
-<div class="line"><a name="l00579"></a><span class="lineno">  579</span>&#160;</div>
-<div class="line"><a name="l00580"></a><span class="lineno">  580</span>&#160;        <span class="comment">// Wrapped input iterator</span></div>
-<div class="line"><a name="l00581"></a><span class="lineno">  581</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, OffsetT&gt;</a> ArgIndexInputIteratorT;</div>
-<div class="line"><a name="l00582"></a><span class="lineno">  582</span>&#160;        ArgIndexInputIteratorT d_argmax_in(d_in, 0);</div>
-<div class="line"><a name="l00583"></a><span class="lineno">  583</span>&#160;</div>
-<div class="line"><a name="l00584"></a><span class="lineno">  584</span>&#160;        <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> identity_pair = {-1, identity};</div>
-<div class="line"><a name="l00585"></a><span class="lineno">  585</span>&#160;</div>
-<div class="line"><a name="l00586"></a><span class="lineno">  586</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax&gt;::Dispatch(</div>
-<div class="line"><a name="l00587"></a><span class="lineno">  587</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00588"></a><span class="lineno">  588</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00589"></a><span class="lineno">  589</span>&#160;            d_argmax_in,</div>
-<div class="line"><a name="l00590"></a><span class="lineno">  590</span>&#160;            d_out,</div>
-<div class="line"><a name="l00591"></a><span class="lineno">  591</span>&#160;            num_segments,</div>
-<div class="line"><a name="l00592"></a><span class="lineno">  592</span>&#160;            d_begin_offsets,</div>
-<div class="line"><a name="l00593"></a><span class="lineno">  593</span>&#160;            d_end_offsets,</div>
-<div class="line"><a name="l00594"></a><span class="lineno">  594</span>&#160;            <a class="code" href="structcub_1_1_arg_max.html" title="Arg max functor (keeps the value and offset of the first occurrence of the larger item) ...">cub::ArgMax</a>(),</div>
-<div class="line"><a name="l00595"></a><span class="lineno">  595</span>&#160;            identity_pair,</div>
-<div class="line"><a name="l00596"></a><span class="lineno">  596</span>&#160;            stream,</div>
-<div class="line"><a name="l00597"></a><span class="lineno">  597</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00598"></a><span class="lineno">  598</span>&#160;    }</div>
-<div class="line"><a name="l00599"></a><span class="lineno">  599</span>&#160;</div>
-<div class="line"><a name="l00600"></a><span class="lineno">  600</span>&#160;};</div>
-<div class="line"><a name="l00601"></a><span class="lineno">  601</span>&#160;</div>
-<div class="line"><a name="l00602"></a><span class="lineno">  602</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00603"></a><span class="lineno">  603</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00604"></a><span class="lineno">  604</span>&#160;</div>
-<div class="line"><a name="l00605"></a><span class="lineno">  605</span>&#160;</div>
+<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;</div>
+<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00368"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce">  368</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">ArgMin</a>(</div>
+<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;    {</div>
+<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                        <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;        <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;  <span class="comment">// Wrapped input iterator type</span></div>
+<div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;</div>
+<div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;        ArgIndexInputIteratorT      d_argmin_in(d_in);</div>
+<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;        KeyValuePair&lt;OffsetT, T&gt;    init = {1, Traits&lt;T&gt;::Max()};   <span class="comment">// replace with std::numeric_limits&lt;T&gt;::max() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;</div>
+<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin&gt;::Dispatch(</div>
+<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;            d_argmin_in,</div>
+<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;            d_out,</div>
+<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;            <a class="code" href="structcub_1_1_arg_min.html" title="Arg min functor (keeps the value and offset of the first occurrence of the smallest item) ...">cub::ArgMin</a>(),</div>
+<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;            init,</div>
+<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;            stream,</div>
+<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;    }</div>
+<div class="line"><a name="l00399"></a><span class="lineno">  399</span>&#160;</div>
+<div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;</div>
+<div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00450"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183">  450</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">Max</a>(</div>
+<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;    {</div>
+<div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                    <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;    <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;</div>
+<div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;InputIteratorT, OutputIteratorT, OffsetT, cub::Max&gt;::Dispatch(</div>
+<div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;            d_in,</div>
+<div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;            d_out,</div>
+<div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00471"></a><span class="lineno">  471</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00472"></a><span class="lineno">  472</span>&#160;            <a class="code" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>(),</div>
+<div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;            Traits&lt;T&gt;::Lowest(),    <span class="comment">// replace with std::numeric_limits&lt;T&gt;::lowest() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00474"></a><span class="lineno">  474</span>&#160;            stream,</div>
+<div class="line"><a name="l00475"></a><span class="lineno">  475</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00476"></a><span class="lineno">  476</span>&#160;    }</div>
+<div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;</div>
+<div class="line"><a name="l00478"></a><span class="lineno">  478</span>&#160;</div>
+<div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;        <span class="keyword">typename</span>            InputIteratorT,</div>
+<div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;        <span class="keyword">typename</span>            OutputIteratorT&gt;</div>
+<div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;    CUB_RUNTIME_FUNCTION</div>
+<div class="line"><a name="l00530"></a><span class="lineno"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329">  530</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">ArgMax</a>(</div>
+<div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;        <span class="keywordtype">void</span>                *d_temp_storage,                    </div>
+<div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;        <span class="keywordtype">size_t</span>              &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;        InputIteratorT      d_in,                               </div>
+<div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;        OutputIteratorT     d_out,                              </div>
+<div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;        <span class="keywordtype">int</span>                 num_segments,                       </div>
+<div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;        <span class="keywordtype">int</span>                 *d_begin_offsets,                   </div>
+<div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;        <span class="keywordtype">int</span>                 *d_end_offsets,                     </div>
+<div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;        cudaStream_t        stream              = 0,            </div>
+<div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;        <span class="keywordtype">bool</span>                debug_synchronous   = <span class="keyword">false</span>)        </div>
+<div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;    {</div>
+<div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span> OffsetT;                                                            <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;        <span class="keyword">typedef</span> <span class="keyword">typename</span> std::iterator_traits&lt;InputIteratorT&gt;::value_type T;            <span class="comment">// Data element type</span></div>
+<div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="classcub_1_1_arg_index_input_iterator.html" title="A random-access input wrapper for pairing dereferenced values with their corresponding indices (formi...">ArgIndexInputIterator&lt;InputIteratorT, int&gt;</a> ArgIndexInputIteratorT;      <span class="comment">// Wrapped input iterator</span></div>
+<div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;</div>
+<div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;        ArgIndexInputIteratorT      d_argmax_in(d_in);</div>
+<div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;        KeyValuePair&lt;OffsetT, T&gt;    init = {1, Traits&lt;T&gt;::Lowest()};     <span class="comment">// replace with std::numeric_limits&lt;T&gt;::lowest() when C++11 support is more prevalent</span></div>
+<div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;</div>
+<div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;        <span class="keywordflow">return</span> DispatchSegmentedReduce&lt;ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax&gt;::Dispatch(</div>
+<div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;            d_argmax_in,</div>
+<div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;            d_out,</div>
+<div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;            num_segments,</div>
+<div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;            d_begin_offsets,</div>
+<div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;            d_end_offsets,</div>
+<div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;            <a class="code" href="structcub_1_1_arg_max.html" title="Arg max functor (keeps the value and offset of the first occurrence of the larger item) ...">cub::ArgMax</a>(),</div>
+<div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;            init,</div>
+<div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;            stream,</div>
+<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;    }</div>
+<div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;</div>
+<div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;};</div>
+<div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;</div>
+<div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00565"></a><span class="lineno">  565</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00566"></a><span class="lineno">  566</span>&#160;</div>
+<div class="line"><a name="l00567"></a><span class="lineno">  567</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Wed Nov 18 2015 10:24:42 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__select_8cuh.html b/docs/html/device__select_8cuh.html
index 49a594d978..9fb417afaa 100644
--- a/docs/html/device__select_8cuh.html
+++ b/docs/html/device__select_8cuh.html
@@ -129,14 +129,14 @@
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
-<div class="textblock"><p><a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">cub::DeviceSelect</a> provides device-wide, parallel operations for selecting items from sequences of data items residing within device-accessible memory. </p>
+<div class="textblock"><p><a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">cub::DeviceSelect</a> provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. </p>
 
 <p>Definition in file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
 </div></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__select_8cuh_source.html b/docs/html/device__select_8cuh_source.html
index f7eee08b35..bd3a25a25f 100644
--- a/docs/html/device__select_8cuh_source.html
+++ b/docs/html/device__select_8cuh_source.html
@@ -145,123 +145,123 @@
 <div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;</div>
 <div class="line"><a name="l00082"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html">   82</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a></div>
 <div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;{</div>
-<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keyword">typename</span>                    FlagIterator,</div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00134"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#ad1273ce1f20e442c5a045e0fa17fd5fb">  134</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#ad1273ce1f20e442c5a045e0fa17fd5fb" title="Uses the d_flags sequence to selectively copy the corresponding items from d_in into d_out...">Flagged</a>(</div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        FlagIterator                d_flags,                        </div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        OutputIteratorT             d_out,                          </div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        NumSelectedIteratorT         d_num_selected_out,                 </div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    {</div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            d_in,</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            d_flags,</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            d_out,</div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            d_num_selected_out,</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            SelectOp(),</div>
-<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;            num_items,</div>
-<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;            stream,</div>
-<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;    }</div>
+<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;        <span class="keyword">typename</span>                    FlagIterator,</div>
+<div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
+<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00133"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#ad1273ce1f20e442c5a045e0fa17fd5fb">  133</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#ad1273ce1f20e442c5a045e0fa17fd5fb" title="Uses the d_flags sequence to selectively copy the corresponding items from d_in into d_out...">Flagged</a>(</div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;        FlagIterator                d_flags,                        </div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;        OutputIteratorT             d_out,                          </div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;        NumSelectedIteratorT         d_num_selected_out,                 </div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;    {</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;        <span class="keyword">typedef</span> NullType                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        <span class="keyword">typedef</span> NullType                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;            d_in,</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;            d_flags,</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;            d_out,</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;            d_num_selected_out,</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;            SelectOp(),</div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;            num_items,</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;            stream,</div>
+<div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;    }</div>
+<div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;</div>
 <div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT,</div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;        <span class="keyword">typename</span>                    SelectOp&gt;</div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00241"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#abec3c5afa3f1ca08e7b4fe3fd8990c39">  241</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#abec3c5afa3f1ca08e7b4fe3fd8990c39" title="Uses the select_op functor to selectively copy items from d_in into d_out. The total number of items ...">If</a>(</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        OutputIteratorT             d_out,                          </div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        NumSelectedIteratorT         d_num_selected_out,                 </div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        SelectOp                    select_op,                      </div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;    {</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;</div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;            d_in,</div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;            NULL,</div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;            d_out,</div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;            d_num_selected_out,</div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;            select_op,</div>
-<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            num_items,</div>
-<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;            stream,</div>
-<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;    }</div>
-<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;</div>
-<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;</div>
-<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;    <span class="keyword">template</span> &lt;</div>
-<div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
-<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
-<div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00332"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#a5e9ea8f2581326391ea63e1808ba514d">  332</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#a5e9ea8f2581326391ea63e1808ba514d" title="Given an input sequence d_in having runs of consecutive equal-valued keys, only the first key from ea...">Unique</a>(</div>
-<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
-<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
-<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        InputIteratorT              d_in,                           </div>
-<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        OutputIteratorT             d_out,                          </div>
-<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        NumSelectedIteratorT         d_num_selected_out,             </div>
-<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
-<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        cudaStream_t                stream             = 0,         </div>
-<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
-<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;    {</div>
-<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
-<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
-<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_null_type.html" title="A simple &quot;NULL&quot; marker type. ">NullType</a>                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
-<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>                EqualityOp;     <span class="comment">// Default == operator</span></div>
-<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;</div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
-<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;            d_temp_storage,</div>
-<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;            temp_storage_bytes,</div>
-<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            d_in,</div>
-<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            NULL,</div>
-<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            d_out,</div>
-<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;            d_num_selected_out,</div>
-<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;            SelectOp(),</div>
-<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;            EqualityOp(),</div>
-<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;            num_items,</div>
-<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;            stream,</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;            debug_synchronous);</div>
-<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;    }</div>
-<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;</div>
-<div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;};</div>
-<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;</div>
-<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
-<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;</div>
-<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;</div>
+<div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT,</div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;        <span class="keyword">typename</span>                    SelectOp&gt;</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00239"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#abec3c5afa3f1ca08e7b4fe3fd8990c39">  239</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#abec3c5afa3f1ca08e7b4fe3fd8990c39" title="Uses the select_op functor to selectively copy items from d_in into d_out. The total number of items ...">If</a>(</div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;        OutputIteratorT             d_out,                          </div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;        NumSelectedIteratorT         d_num_selected_out,                 </div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;        SelectOp                    select_op,                      </div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    {</div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        <span class="keyword">typedef</span> NullType*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        <span class="keyword">typedef</span> NullType                EqualityOp;     <span class="comment">// Equality operator (not used)</span></div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;</div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;            d_in,</div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;            NULL,</div>
+<div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;            d_out,</div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;            d_num_selected_out,</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;            select_op,</div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;            num_items,</div>
+<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;            stream,</div>
+<div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;    }</div>
+<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;</div>
+<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;</div>
+<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;    <span class="keyword">template</span> &lt;</div>
+<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;        <span class="keyword">typename</span>                    InputIteratorT,</div>
+<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;        <span class="keyword">typename</span>                    OutputIteratorT,</div>
+<div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;        <span class="keyword">typename</span>                    NumSelectedIteratorT&gt;</div>
+<div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00329"></a><span class="lineno"><a class="code" href="structcub_1_1_device_select.html#a5e9ea8f2581326391ea63e1808ba514d">  329</a></span>&#160;    <span class="keyword">static</span> cudaError_t <a class="code" href="structcub_1_1_device_select.html#a5e9ea8f2581326391ea63e1808ba514d" title="Given an input sequence d_in having runs of consecutive equal-valued keys, only the first key from ea...">Unique</a>(</div>
+<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;        <span class="keywordtype">void</span>*               d_temp_storage,                </div>
+<div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;        <span class="keywordtype">size_t</span>                      &amp;temp_storage_bytes,            </div>
+<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        InputIteratorT              d_in,                           </div>
+<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;        OutputIteratorT             d_out,                          </div>
+<div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;        NumSelectedIteratorT         d_num_selected_out,             </div>
+<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        <span class="keywordtype">int</span>                         num_items,                      </div>
+<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;        cudaStream_t                stream             = 0,         </div>
+<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;        <span class="keywordtype">bool</span>                        debug_synchronous  = <span class="keyword">false</span>)     </div>
+<div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;    {</div>
+<div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        <span class="keyword">typedef</span> <span class="keywordtype">int</span>                     OffsetT;         <span class="comment">// Signed integer type for global offsets</span></div>
+<div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;        <span class="keyword">typedef</span> NullType*               FlagIterator;   <span class="comment">// FlagT iterator type (not used)</span></div>
+<div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;        <span class="keyword">typedef</span> NullType                SelectOp;       <span class="comment">// Selection op (not used)</span></div>
+<div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;        <span class="keyword">typedef</span> <a class="code" href="structcub_1_1_equality.html" title="Default equality functor. ">Equality</a>                EqualityOp;     <span class="comment">// Default == operator</span></div>
+<div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;</div>
+<div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;        <span class="keywordflow">return</span> DispatchSelectIf&lt;InputIteratorT, FlagIterator, OutputIteratorT, NumSelectedIteratorT, SelectOp, EqualityOp, OffsetT, false&gt;::Dispatch(</div>
+<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;            d_temp_storage,</div>
+<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;            temp_storage_bytes,</div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;            d_in,</div>
+<div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;            NULL,</div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;            d_out,</div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;            d_num_selected_out,</div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;            SelectOp(),</div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;            EqualityOp(),</div>
+<div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;            num_items,</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;            stream,</div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;            debug_synchronous);</div>
+<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;    }</div>
+<div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;</div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;};</div>
+<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;</div>
+<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;</div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__spmv_8cuh.html b/docs/html/device__spmv_8cuh.html
index 394ed1584e..5c7cabbd56 100644
--- a/docs/html/device__spmv_8cuh.html
+++ b/docs/html/device__spmv_8cuh.html
@@ -132,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/device__spmv_8cuh_source.html b/docs/html/device__spmv_8cuh_source.html
index 14953addeb..4a5eec6214 100644
--- a/docs/html/device__spmv_8cuh_source.html
+++ b/docs/html/device__spmv_8cuh_source.html
@@ -197,7 +197,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html b/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html
index e94cd91c96..a9a93a9d0a 100644
--- a/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html
+++ b/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html
@@ -119,7 +119,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_18fc672d63781b5a743137aee24ff656.html b/docs/html/dir_18fc672d63781b5a743137aee24ff656.html
index 8e70f77bfa..7ab18175db 100644
--- a/docs/html/dir_18fc672d63781b5a743137aee24ff656.html
+++ b/docs/html/dir_18fc672d63781b5a743137aee24ff656.html
@@ -112,6 +112,8 @@
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:device__segmented__radix__sort_8cuh"><td class="memItemLeft" align="right" valign="top">file &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="device__segmented__radix__sort_8cuh.html">device_segmented_radix_sort.cuh</a> <a href="device__segmented__radix__sort_8cuh_source.html">[code]</a></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:device__segmented__reduce_8cuh"><td class="memItemLeft" align="right" valign="top">file &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="device__segmented__reduce_8cuh.html">device_segmented_reduce.cuh</a> <a href="device__segmented__reduce_8cuh_source.html">[code]</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:device__select_8cuh"><td class="memItemLeft" align="right" valign="top">file &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="device__select_8cuh.html">device_select.cuh</a> <a href="device__select_8cuh_source.html">[code]</a></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:device__spmv_8cuh"><td class="memItemLeft" align="right" valign="top">file &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="device__spmv_8cuh.html">device_spmv.cuh</a> <a href="device__spmv_8cuh_source.html">[code]</a></td></tr>
@@ -121,7 +123,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_80932b4cec52750ff92b1a1912314cf5.html b/docs/html/dir_80932b4cec52750ff92b1a1912314cf5.html
index b870667d22..04e8ebf9bf 100644
--- a/docs/html/dir_80932b4cec52750ff92b1a1912314cf5.html
+++ b/docs/html/dir_80932b4cec52750ff92b1a1912314cf5.html
@@ -119,7 +119,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html b/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html
index 50ae279f23..bbc264bb96 100644
--- a/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html
+++ b/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html b/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html
index d7fb1baeac..cc4380b5fe 100644
--- a/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html
+++ b/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html
@@ -107,7 +107,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html b/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html
index bd0641455a..e8692a183f 100644
--- a/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html
+++ b/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html
@@ -130,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/download_cub.html b/docs/html/download_cub.html
index f2d78ad3e0..a9f8ae9530 100644
--- a/docs/html/download_cub.html
+++ b/docs/html/download_cub.html
@@ -37,14 +37,14 @@
 </head>
 
 <body 
-	onload="downloadURL('https://github.com/NVlabs/cub/archive/1.4.1.zip');" 
+	onload="downloadURL('https://github.com/NVlabs/cub/archive/1.5.0.zip');" 
 	style="color: rgb(102, 102, 102); font-family: Helvetica, arial, freesans, clean, sans-serif; font-size: 13px; font-style: normal; font-variant: normal; font-weight: 300; height: 18px;">
 
 <center>
 If your download doesn't start in 3s:
 <br><br>
-<a href="https://github.com/NVlabs/cub/archive/1.4.1.zip"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
-<a href="https://github.com/NVlabs/cub/archive/1.4.1.zip"><em>Download CUB!</em></a>
+<a href="https://github.com/NVlabs/cub/archive/1.5.0.zip"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
+<a href="https://github.com/NVlabs/cub/archive/1.5.0.zip"><em>Download CUB!</em></a>
 </center>
 
 </body>
diff --git a/docs/html/example_block_radix_sort_8cu-example.html b/docs/html/example_block_radix_sort_8cu-example.html
index 12f8eda1cb..f9bdc51bdd 100644
--- a/docs/html/example_block_radix_sort_8cu-example.html
+++ b/docs/html/example_block_radix_sort_8cu-example.html
@@ -398,7 +398,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_block_reduce_8cu-example.html b/docs/html/example_block_reduce_8cu-example.html
index 0bef27ff33..4ff2a1dbe7 100644
--- a/docs/html/example_block_reduce_8cu-example.html
+++ b/docs/html/example_block_reduce_8cu-example.html
@@ -365,7 +365,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_block_scan_8cu-example.html b/docs/html/example_block_scan_8cu-example.html
index f971046324..690a02a2e7 100644
--- a/docs/html/example_block_scan_8cu-example.html
+++ b/docs/html/example_block_scan_8cu-example.html
@@ -414,7 +414,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_histogram_8cu-example.html b/docs/html/example_device_histogram_8cu-example.html
index 71d3e4b786..773eebcb1c 100644
--- a/docs/html/example_device_histogram_8cu-example.html
+++ b/docs/html/example_device_histogram_8cu-example.html
@@ -312,7 +312,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_partition_flagged_8cu-example.html b/docs/html/example_device_partition_flagged_8cu-example.html
index 98d3b78d1a..8f6e6ce981 100644
--- a/docs/html/example_device_partition_flagged_8cu-example.html
+++ b/docs/html/example_device_partition_flagged_8cu-example.html
@@ -318,7 +318,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_partition_if_8cu-example.html b/docs/html/example_device_partition_if_8cu-example.html
index 8be1c5003e..f8fadfbd58 100644
--- a/docs/html/example_device_partition_if_8cu-example.html
+++ b/docs/html/example_device_partition_if_8cu-example.html
@@ -329,7 +329,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_radix_sort_8cu-example.html b/docs/html/example_device_radix_sort_8cu-example.html
index 6ab4c37e57..fa0df1c0c7 100644
--- a/docs/html/example_device_radix_sort_8cu-example.html
+++ b/docs/html/example_device_radix_sort_8cu-example.html
@@ -259,33 +259,33 @@
 <div class="line">    Initialize(h_keys, h_values, h_reference_keys, h_reference_values, num_items);</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Allocate device arrays</span></div>
-<div class="line">    <a name="_a2"></a><a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;float&gt;</a> d_keys;</div>
-<div class="line">    <a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer&lt;int&gt;</a>   d_values;</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_keys.<a name="a3"></a><a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0], <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items));</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1], <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items));</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_values.<a name="a4"></a><a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0], <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items));</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1], <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items));</div>
+<div class="line">    DoubleBuffer&lt;float&gt; d_keys;</div>
+<div class="line">    DoubleBuffer&lt;int&gt;   d_values;</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_keys.d_buffers[0], <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_keys.d_buffers[1], <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_values.d_buffers[0], <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate((<span class="keywordtype">void</span>**)&amp;d_values.d_buffers[1], <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Allocate temporary storage</span></div>
 <div class="line">    <span class="keywordtype">size_t</span>  temp_storage_bytes  = 0;</div>
 <div class="line">    <span class="keywordtype">void</span>    *d_temp_storage     = NULL;</div>
 <div class="line"></div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a name="a5"></a><a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortPairs</a>(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a name="a2"></a><a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortPairs</a>(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items));</div>
 <div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate(&amp;d_temp_storage, temp_storage_bytes));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Initialize device arrays</span></div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(cudaMemcpy(d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[d_keys.<a name="a6"></a><a class="code" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4" title="Selector into d_buffers (i.e., the active/valid buffer) ">selector</a>], h_keys, <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items, cudaMemcpyHostToDevice));</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(cudaMemcpy(d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[d_values.<a name="a7"></a><a class="code" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4" title="Selector into d_buffers (i.e., the active/valid buffer) ">selector</a>], h_values, <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items, cudaMemcpyHostToDevice));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(cudaMemcpy(d_keys.d_buffers[d_keys.selector], h_keys, <span class="keyword">sizeof</span>(<span class="keywordtype">float</span>) * num_items, cudaMemcpyHostToDevice));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(cudaMemcpy(d_values.d_buffers[d_values.selector], h_values, <span class="keyword">sizeof</span>(<span class="keywordtype">int</span>) * num_items, cudaMemcpyHostToDevice));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Run</span></div>
 <div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a class="code" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortPairs</a>(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Check for correctness (and display results, if specified)</span></div>
-<div class="line">    <span class="keywordtype">int</span> compare = CompareDeviceResults(h_reference_keys, d_keys.<a name="a8"></a><a class="code" href="structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724" title="Return pointer to the currently valid buffer. ">Current</a>(), num_items, <span class="keyword">true</span>, g_verbose);</div>
-<div class="line">    printf(<span class="stringliteral">&quot;\t Compare keys (selector %d): %s\n&quot;</span>, d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4" title="Selector into d_buffers (i.e., the active/valid buffer) ">selector</a>, compare ? <span class="stringliteral">&quot;FAIL&quot;</span> : <span class="stringliteral">&quot;PASS&quot;</span>);</div>
+<div class="line">    <span class="keywordtype">int</span> compare = CompareDeviceResults(h_reference_keys, d_keys.Current(), num_items, <span class="keyword">true</span>, g_verbose);</div>
+<div class="line">    printf(<span class="stringliteral">&quot;\t Compare keys (selector %d): %s\n&quot;</span>, d_keys.selector, compare ? <span class="stringliteral">&quot;FAIL&quot;</span> : <span class="stringliteral">&quot;PASS&quot;</span>);</div>
 <div class="line">    AssertEquals(0, compare);</div>
-<div class="line">    compare = CompareDeviceResults(h_reference_values, d_values.<a name="a9"></a><a class="code" href="structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724" title="Return pointer to the currently valid buffer. ">Current</a>(), num_items, <span class="keyword">true</span>, g_verbose);</div>
-<div class="line">    printf(<span class="stringliteral">&quot;\t Compare values (selector %d): %s\n&quot;</span>, d_values.<a class="code" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4" title="Selector into d_buffers (i.e., the active/valid buffer) ">selector</a>, compare ? <span class="stringliteral">&quot;FAIL&quot;</span> : <span class="stringliteral">&quot;PASS&quot;</span>);</div>
+<div class="line">    compare = CompareDeviceResults(h_reference_values, d_values.Current(), num_items, <span class="keyword">true</span>, g_verbose);</div>
+<div class="line">    printf(<span class="stringliteral">&quot;\t Compare values (selector %d): %s\n&quot;</span>, d_values.selector, compare ? <span class="stringliteral">&quot;FAIL&quot;</span> : <span class="stringliteral">&quot;PASS&quot;</span>);</div>
 <div class="line">    AssertEquals(0, compare);</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Cleanup</span></div>
@@ -294,10 +294,10 @@
 <div class="line">    <span class="keywordflow">if</span> (h_values) <span class="keyword">delete</span>[] h_values;</div>
 <div class="line">    <span class="keywordflow">if</span> (h_reference_values) <span class="keyword">delete</span>[] h_reference_values;</div>
 <div class="line"></div>
-<div class="line">    <span class="keywordflow">if</span> (d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0]));</div>
-<div class="line">    <span class="keywordflow">if</span> (d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_keys.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1]));</div>
-<div class="line">    <span class="keywordflow">if</span> (d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[0]));</div>
-<div class="line">    <span class="keywordflow">if</span> (d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_values.<a class="code" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4" title="Pair of device buffer pointers. ">d_buffers</a>[1]));</div>
+<div class="line">    <span class="keywordflow">if</span> (d_keys.d_buffers[0]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_keys.d_buffers[0]));</div>
+<div class="line">    <span class="keywordflow">if</span> (d_keys.d_buffers[1]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_keys.d_buffers[1]));</div>
+<div class="line">    <span class="keywordflow">if</span> (d_values.d_buffers[0]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_values.d_buffers[0]));</div>
+<div class="line">    <span class="keywordflow">if</span> (d_values.d_buffers[1]) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_values.d_buffers[1]));</div>
 <div class="line">    <span class="keywordflow">if</span> (d_temp_storage) <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceFree(d_temp_storage));</div>
 <div class="line"></div>
 <div class="line">    printf(<span class="stringliteral">&quot;\n\n&quot;</span>);</div>
@@ -311,7 +311,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_reduce_8cu-example.html b/docs/html/example_device_reduce_8cu-example.html
index a655cd646a..2b6b709725 100644
--- a/docs/html/example_device_reduce_8cu-example.html
+++ b/docs/html/example_device_reduce_8cu-example.html
@@ -238,11 +238,11 @@
 <div class="line">    <span class="comment">// Request and allocate temporary storage</span></div>
 <div class="line">    <span class="keywordtype">void</span>            *d_temp_storage = NULL;</div>
 <div class="line">    <span class="keywordtype">size_t</span>          temp_storage_bytes = 0;</div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a name="a2"></a><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a name="a2"></a><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));</div>
 <div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(g_allocator.DeviceAllocate(&amp;d_temp_storage, temp_storage_bytes));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Run</span></div>
-<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));</div>
+<div class="line">    <a class="code" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8" title="Debug macro with exit. ">CubDebugExit</a>(<a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));</div>
 <div class="line"></div>
 <div class="line">    <span class="comment">// Check for correctness (and display results, if specified)</span></div>
 <div class="line">    <span class="keywordtype">int</span> compare = CompareDeviceResults(&amp;h_reference, d_out, 1, g_verbose, g_verbose);</div>
@@ -266,7 +266,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_scan_8cu-example.html b/docs/html/example_device_scan_8cu-example.html
index 360f8ee7b9..2ec49fed77 100644
--- a/docs/html/example_device_scan_8cu-example.html
+++ b/docs/html/example_device_scan_8cu-example.html
@@ -272,7 +272,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_select_flagged_8cu-example.html b/docs/html/example_device_select_flagged_8cu-example.html
index 7821efa0c4..85cdb1ba8d 100644
--- a/docs/html/example_device_select_flagged_8cu-example.html
+++ b/docs/html/example_device_select_flagged_8cu-example.html
@@ -318,7 +318,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_select_if_8cu-example.html b/docs/html/example_device_select_if_8cu-example.html
index 6225038057..9881612c35 100644
--- a/docs/html/example_device_select_if_8cu-example.html
+++ b/docs/html/example_device_select_if_8cu-example.html
@@ -327,7 +327,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/example_device_select_unique_8cu-example.html b/docs/html/example_device_select_unique_8cu-example.html
index 3da3edbc17..f883582e40 100644
--- a/docs/html/example_device_select_unique_8cu-example.html
+++ b/docs/html/example_device_select_unique_8cu-example.html
@@ -307,7 +307,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:02 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:13 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/examples.html b/docs/html/examples.html
index a85843055d..7605af1b12 100644
--- a/docs/html/examples.html
+++ b/docs/html/examples.html
@@ -121,7 +121,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:09 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/files.html b/docs/html/files.html
index ed789536df..0d610a43de 100644
--- a/docs/html/files.html
+++ b/docs/html/files.html
@@ -120,29 +120,30 @@
 <tr id="row_18_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__run__length__encode_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__run__length__encode_8cuh.html" target="_self">device_run_length_encode.cuh</a></td><td class="desc"></td></tr>
 <tr id="row_19_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__scan_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__scan_8cuh.html" target="_self">device_scan.cuh</a></td><td class="desc"></td></tr>
 <tr id="row_20_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__segmented__radix__sort_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__segmented__radix__sort_8cuh.html" target="_self">device_segmented_radix_sort.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_21_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__select_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__select_8cuh.html" target="_self">device_select.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_22_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__spmv_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__spmv_8cuh.html" target="_self">device_spmv.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_23_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="tex__obj__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="tex__obj__input__iterator_8cuh.html" target="_self">tex_obj_input_iterator.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_24_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="tex__ref__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="tex__ref__input__iterator_8cuh.html" target="_self">tex_ref_input_iterator.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_25_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__load_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__load_8cuh.html" target="_self">thread_load.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_26_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__operators_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__operators_8cuh.html" target="_self">thread_operators.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_27_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__store_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__store_8cuh.html" target="_self">thread_store.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_28_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="transform__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="transform__input__iterator_8cuh.html" target="_self">transform_input_iterator.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_29_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__allocator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><b>util_allocator.cuh</b></td><td class="desc"></td></tr>
-<tr id="row_30_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__arch_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__arch_8cuh.html" target="_self">util_arch.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_31_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__debug_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__debug_8cuh.html" target="_self">util_debug.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_32_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__device_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__device_8cuh.html" target="_self">util_device.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_33_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__ptx_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__ptx_8cuh.html" target="_self">util_ptx.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_34_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__type_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__type_8cuh.html" target="_self">util_type.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_35_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="warp__reduce_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="warp__reduce_8cuh.html" target="_self">warp_reduce.cuh</a></td><td class="desc"></td></tr>
-<tr id="row_36_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><a href="warp__scan_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="warp__scan_8cuh.html" target="_self">warp_scan.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_21_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__segmented__reduce_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__segmented__reduce_8cuh.html" target="_self">device_segmented_reduce.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_22_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__select_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__select_8cuh.html" target="_self">device_select.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_23_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="device__spmv_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="device__spmv_8cuh.html" target="_self">device_spmv.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_24_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="tex__obj__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="tex__obj__input__iterator_8cuh.html" target="_self">tex_obj_input_iterator.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_25_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="tex__ref__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="tex__ref__input__iterator_8cuh.html" target="_self">tex_ref_input_iterator.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_26_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__load_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__load_8cuh.html" target="_self">thread_load.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_27_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__operators_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__operators_8cuh.html" target="_self">thread_operators.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_28_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="thread__store_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="thread__store_8cuh.html" target="_self">thread_store.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_29_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="transform__input__iterator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="transform__input__iterator_8cuh.html" target="_self">transform_input_iterator.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_30_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__allocator_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><b>util_allocator.cuh</b></td><td class="desc"></td></tr>
+<tr id="row_31_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__arch_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__arch_8cuh.html" target="_self">util_arch.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_32_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__debug_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__debug_8cuh.html" target="_self">util_debug.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_33_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__device_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__device_8cuh.html" target="_self">util_device.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_34_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__ptx_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__ptx_8cuh.html" target="_self">util_ptx.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_35_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="util__type_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="util__type_8cuh.html" target="_self">util_type.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_36_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a href="warp__reduce_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="warp__reduce_8cuh.html" target="_self">warp_reduce.cuh</a></td><td class="desc"></td></tr>
+<tr id="row_37_"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><a href="warp__scan_8cuh_source.html"><img src="ftv2doc.png" alt="*" width="24" height="22" /></a><a class="el" href="warp__scan_8cuh.html" target="_self">warp_scan.cuh</a></td><td class="desc"></td></tr>
 </table>
 </div><!-- directory -->
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:09 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions.html b/docs/html/functions.html
index 2ad58e18b8..85b3a9f84b 100644
--- a/docs/html/functions.html
+++ b/docs/html/functions.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -130,30 +129,23 @@
 <div class="textblock">Here is a list of all documented class members with links to the class documentation for each member:</div>
 
 <h3><a class="anchor" id="index_a"></a>- a -</h3><ul>
-<li>Alias()
-: <a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">cub::Uninitialized&lt; T &gt;</a>
-</li>
-<li>align0
-: <a class="el" href="structcub_1_1_key_value_pair.html#a6babb4cdcde74f159b5bff9e4a569d83">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
-<li>Alternate()
-: <a class="el" href="structcub_1_1_double_buffer.html#a3895f1d57aeb379bee79de56ace8e35a">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 <li>ArgIndexInputIterator()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>ArgMax()
 : <a class="el" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329">cub::DeviceSegmentedReduce</a>
 </li>
 <li>ArgMin()
 : <a class="el" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce">cub::DeviceSegmentedReduce</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x62.html b/docs/html/functions_0x62.html
index 4d01f0b978..417149c5ed 100644
--- a/docs/html/functions_0x62.html
+++ b/docs/html/functions_0x62.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -172,7 +171,7 @@ <h3><a class="anchor" id="index_b"></a>- b -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x63.html b/docs/html/functions_0x63.html
index d72718ade8..7ed5269906 100644
--- a/docs/html/functions_0x63.html
+++ b/docs/html/functions_0x63.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -151,15 +150,12 @@ <h3><a class="anchor" id="index_c"></a>- c -</h3><ul>
 <li>CsrMV()
 : <a class="el" href="structcub_1_1_device_spmv.html#abbcd4c04d8bbbcdfabc1eb62f860b8b2">cub::DeviceSpmv</a>
 </li>
-<li>Current()
-: <a class="el" href="structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x64.html b/docs/html/functions_0x64.html
index d4b6568d82..fd7becdb2b 100644
--- a/docs/html/functions_0x64.html
+++ b/docs/html/functions_0x64.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -130,37 +129,28 @@
 <div class="textblock">Here is a list of all documented class members with links to the class documentation for each member:</div>
 
 <h3><a class="anchor" id="index_d"></a>- d -</h3><ul>
-<li>d_buffers
-: <a class="el" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 <li>DeviceAllocate()
 : <a class="el" href="structcub_1_1_caching_device_allocator.html#ae1088ac6ba6e5d55832ffbc0b2a5d714">cub::CachingDeviceAllocator</a>
 </li>
 <li>DeviceFree()
-: <a class="el" href="structcub_1_1_caching_device_allocator.html#a66e72cc3cc7d0dbd59148ac7c4ba0de6">cub::CachingDeviceAllocator</a>
-</li>
-<li>DeviceWord
-: <a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">cub::Uninitialized&lt; T &gt;</a>
+: <a class="el" href="structcub_1_1_caching_device_allocator.html#adbf65c59172b140420636e150325deeb">cub::CachingDeviceAllocator</a>
 </li>
 <li>difference_type
 : <a class="el" href="classcub_1_1_counting_input_iterator.html#ac66dea2a687f0ad6ed0cbcd6217a3029">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#ab45cc48afbfda7eaa3b4ea643e719c33">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#ac8cdd3a29db7e398f14e44a7aa054750">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#aff1afce146f69adb655e2ff7366b869f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a268f1e7a4b42a05c5b4d0bca2775c26e">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#ac05064e9ad33dc032452e3d09e8768f6">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a710012acbff3dd0c35822951e28148ea">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-</li>
-<li>DoubleBuffer()
-: <a class="el" href="structcub_1_1_double_buffer.html#a100c51f0e1aefdff4cdfe6480c89f59e">cub::DoubleBuffer&lt; T &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#aff1afce146f69adb655e2ff7366b869f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x65.html b/docs/html/functions_0x65.html
index e40f41ef7e..e339aded47 100644
--- a/docs/html/functions_0x65.html
+++ b/docs/html/functions_0x65.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -155,7 +154,7 @@ <h3><a class="anchor" id="index_e"></a>- e -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x66.html b/docs/html/functions_0x66.html
index 1fe52f0ede..5a5eaf1d74 100644
--- a/docs/html/functions_0x66.html
+++ b/docs/html/functions_0x66.html
@@ -96,7 +96,6 @@
       <li class="current"><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -151,7 +150,7 @@ <h3><a class="anchor" id="index_f"></a>- f -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x68.html b/docs/html/functions_0x68.html
index 384cc162a0..b7785cf970 100644
--- a/docs/html/functions_0x68.html
+++ b/docs/html/functions_0x68.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li class="current"><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -150,7 +149,7 @@ <h3><a class="anchor" id="index_h"></a>- h -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x69.html b/docs/html/functions_0x69.html
index 5beae4c66a..90285ae71d 100644
--- a/docs/html/functions_0x69.html
+++ b/docs/html/functions_0x69.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li class="current"><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -168,7 +167,7 @@ <h3><a class="anchor" id="index_i"></a>- i -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x6c.html b/docs/html/functions_0x6c.html
index 06885a5984..5a3c14d3ea 100644
--- a/docs/html/functions_0x6c.html
+++ b/docs/html/functions_0x6c.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li class="current"><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -138,7 +137,7 @@ <h3><a class="anchor" id="index_l"></a>- l -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x6d.html b/docs/html/functions_0x6d.html
index 67d2dbd3c9..f0ffe77c05 100644
--- a/docs/html/functions_0x6d.html
+++ b/docs/html/functions_0x6d.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li class="current"><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -132,12 +131,14 @@
 <h3><a class="anchor" id="index_m"></a>- m -</h3><ul>
 <li>Max()
 : <a class="el" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183">cub::DeviceSegmentedReduce</a>
 </li>
 <li>Min()
-: <a class="el" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">cub::DeviceReduce</a>
+: <a class="el" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29">cub::DeviceSegmentedReduce</a>
+, <a class="el" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">cub::DeviceReduce</a>
 </li>
 <li>MultiHistogramEven()
-: <a class="el" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668">cub::DeviceHistogram</a>
+: <a class="el" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95">cub::DeviceHistogram</a>
 </li>
 <li>MultiHistogramRange()
 : <a class="el" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f">cub::DeviceHistogram</a>
@@ -147,7 +148,7 @@ <h3><a class="anchor" id="index_m"></a>- m -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x6e.html b/docs/html/functions_0x6e.html
index d78bd3adc4..dc70da7bf9 100644
--- a/docs/html/functions_0x6e.html
+++ b/docs/html/functions_0x6e.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li class="current"><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -133,12 +132,15 @@ <h3><a class="anchor" id="index_n"></a>- n -</h3><ul>
 <li>NonTrivialRuns()
 : <a class="el" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a">cub::DeviceRunLengthEncode</a>
 </li>
+<li>normalize()
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x6f.html b/docs/html/functions_0x6f.html
index 5d293e229d..1d2a2a8944 100644
--- a/docs/html/functions_0x6f.html
+++ b/docs/html/functions_0x6f.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -136,19 +135,17 @@ <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 , <a class="el" href="structcub_1_1_reduce_by_key_op.html#a4c6624999354ccf78a94226b9762bdbf">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
 </li>
 <li>operator!=()
-: <a class="el" href="structcub_1_1_key_value_pair.html#aa7bcc39b09d285d41c9c0226a49790f2">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 </li>
 <li>operator()()
-: <a class="el" href="structcub_1_1_sum.html#a1edd85dbc039f93c8e45eb2096704a86">cub::Sum</a>
-, <a class="el" href="structcub_1_1_max.html#ab06fa8091c6aa396fe127f37e0a545d3">cub::Max</a>
+: <a class="el" href="structcub_1_1_max.html#ab06fa8091c6aa396fe127f37e0a545d3">cub::Max</a>
 , <a class="el" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">cub::ArgMax</a>
 , <a class="el" href="structcub_1_1_min.html#ade40f60337afc51da556ed65d5708136">cub::Min</a>
 , <a class="el" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">cub::ArgMin</a>
@@ -159,16 +156,17 @@ <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 , <a class="el" href="structcub_1_1_equality.html#a9db81c4cbcf79dbb8087b3c59593cae0">cub::Equality</a>
 , <a class="el" href="structcub_1_1_inequality.html#ac2b51f35b929dc74ea766b012e89b552">cub::Inequality</a>
 , <a class="el" href="structcub_1_1_inequality_wrapper.html#a97067089c24f8d6a09c914ab6a163c65">cub::InequalityWrapper&lt; EqualityOp &gt;</a>
+, <a class="el" href="structcub_1_1_sum.html#a1edd85dbc039f93c8e45eb2096704a86">cub::Sum</a>
 </li>
 <li>operator*()
-: <a class="el" href="classcub_1_1_transform_input_iterator.html#ab84a2e02eeec401af33903dcb183d5ca">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a803d29cb5212f1eaa5f4a34415987768">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a4548b15114d6a8a3f714dcbf0f22e854">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#afd9a8d22abd868d9fb13c235bd800eae">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a7acf3db367c8485a5145d9dbf47be1f7">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a8052b0744d233b7f9647d0eb6ba71583">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a78417e7a9f78ded354cc152997c2d7d1">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#ab84a2e02eeec401af33903dcb183d5ca">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator+()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
@@ -191,82 +189,82 @@ <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a40c7129f8a4c002b8fedeb141580fae3">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator+=()
-: <a class="el" href="classcub_1_1_transform_input_iterator.html#ac5e49ffffc8181a63c29b25054d97e36">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a379e3ea1d52e95b96c25bed66e12ce0e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9ecc94fabf03d35374955388414609e2">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9ecc94fabf03d35374955388414609e2">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac5e49ffffc8181a63c29b25054d97e36">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a41bc8a80a8eea119098e75d8ed8c6d57">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#af90706b0c0ed61a4215e53fbc46373ff">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#ae6e17c95c16a82c524b2093e8738ce66">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a3e78162dd96fa3dbe2c5796da7bbcdbe">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a88249ddcb876c6a729bd93c66c675dca">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a379e3ea1d52e95b96c25bed66e12ce0e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 </li>
 <li>operator-()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#ac47c90b1bda91b67e7a6aab76b69bf7a">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a2b2d556e2e9483245aaec4b9da110b60">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a4d5e8cdf6e9f7b575c4bf9206489a6e4">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a38f0679cfa12d3092c44223927ba8780">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a94882cefa58d9800c4ece240d6633b5b">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a1f524d491834020f1a5d6b275e6b2911">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a9c9d7051350cec95f15eea1da35bd081">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a64841968b926b829e8eef247e111b57f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a103d724220bc19e5910d17ef24b2a6f8">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a708bd1141c9318e3a1dc5f21bfc5a0d9">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a94882cefa58d9800c4ece240d6633b5b">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a64841968b926b829e8eef247e111b57f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a2edd714aa2c90a86c104b205ecfabc7a">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a9c9d7051350cec95f15eea1da35bd081">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a38f0679cfa12d3092c44223927ba8780">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 </li>
 <li>operator-=()
-: <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a3b019be13224e35146324ac69d8bef5e">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a8ee93d1301f7690db893595169405e09">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_transform_input_iterator.html#a7e37ef48f34630c9ecc1bbf41c25b9c5">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a509d3555cb5bad94484da7fde61a3147">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#ac464d5e300da6718a1dfe5e4c302b2d6">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a223d782b6abddd640dbcb99a0ea5cca1">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8b91b2e6d55844ebcd3c8cd93e886880">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#ac464d5e300da6718a1dfe5e4c302b2d6">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a7e37ef48f34630c9ecc1bbf41c25b9c5">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a3b019be13224e35146324ac69d8bef5e">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a1c33f1465e4bb84a10bf580c3d5e4f1a">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a8ee93d1301f7690db893595169405e09">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 </li>
 <li>operator-&gt;()
-: <a class="el" href="classcub_1_1_counting_input_iterator.html#a2e9c03abaf7baead2ba40bc4d01bc411">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a505ed4041dde85c030c56538974f1aa6">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a6dc70a54c23f5d20c3b04a34b01309f9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_constant_input_iterator.html#a349352574eae5a46831ca2cc27942c0f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#afa7d636e59396d459ea3efbe66ff8e8f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a909602025800c1f44525d95a65ad1bd4">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a349352574eae5a46831ca2cc27942c0f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#afa7d636e59396d459ea3efbe66ff8e8f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a505ed4041dde85c030c56538974f1aa6">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a6dc70a54c23f5d20c3b04a34b01309f9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_counting_input_iterator.html#a2e9c03abaf7baead2ba40bc4d01bc411">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 </li>
 <li>operator&lt;&lt;
-: <a class="el" href="classcub_1_1_constant_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a11b2073215294343bdc32f09c0bc5e80">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 </li>
 <li>operator==()
 : <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#ac37eaae76e6aee349fd39933838df70d">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8ec66485cd92af668d62850e34f09402">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a9519d47086ca7e8501d29a4f73f4b85f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac7a802ef64ec191fdfecbadf0434e00f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a265939c1f71b392896a5fd07d1d189b9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a38095c9cb635d8ba5ded2090ab671e20">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9925c001d0a1dd40b35b8ce831e85f13">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a96552e3b46084aeccc6e4219a1c93336">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a9519d47086ca7e8501d29a4f73f4b85f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac7a802ef64ec191fdfecbadf0434e00f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8ec66485cd92af668d62850e34f09402">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 </li>
 <li>operator[]()
-: <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a60fd926e1e100dee7a22088543a3b647">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a4654e821ae09a83f6e904a8fe57ed515">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a626335c517f746941c188f0e71784b2e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#aaef072ce96b8fe70786c6742ddaf2b89">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a05925d7ab0f9aaf16d420d41ad6c301c">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_counting_input_iterator.html#ad59d5567ddf4aa4ac6989581d5c65e43">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#af90df1390eb32cdf420dfb3185a79de9">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a4654e821ae09a83f6e904a8fe57ed515">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a60fd926e1e100dee7a22088543a3b647">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_counting_input_iterator.html#ad59d5567ddf4aa4ac6989581d5c65e43">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a05925d7ab0f9aaf16d420d41ad6c301c">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a626335c517f746941c188f0e71784b2e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x70.html b/docs/html/functions_0x70.html
index fc620174f0..3d18d6b7d0 100644
--- a/docs/html/functions_0x70.html
+++ b/docs/html/functions_0x70.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -148,7 +147,7 @@ <h3><a class="anchor" id="index_p"></a>- p -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x72.html b/docs/html/functions_0x72.html
index 1f95ed9f82..e17bab021b 100644
--- a/docs/html/functions_0x72.html
+++ b/docs/html/functions_0x72.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -132,13 +131,13 @@
 <h3><a class="anchor" id="index_r"></a>- r -</h3><ul>
 <li>Reduce()
 : <a class="el" href="classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c">cub::DeviceReduce</a>
-, <a class="el" href="classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364">cub::DeviceSegmentedReduce</a>
 , <a class="el" href="classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
 </li>
 <li>ReduceByKey()
-: <a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02">cub::DeviceReduce</a>
+: <a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1">cub::DeviceReduce</a>
 </li>
 <li>ReduceByKeyOp()
 : <a class="el" href="structcub_1_1_reduce_by_key_op.html#a88feb445a30081d205a7e1560c07bc6e">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
@@ -150,18 +149,18 @@ <h3><a class="anchor" id="index_r"></a>- r -</h3><ul>
 : <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a56ce41371ce3bd489f594bf4aa3eb470">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a4a1de212f06263f64d3c155d3c62afb6">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#a3a4e0d5ce69dd3c41176dc581145d75a">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a92e8225d597b4942f46eddc327668aca">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#ad8f9fade98b7ecc1b9ac36a64050aa3a">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#afeb390b1ca6d494d9295beb9a65705fd">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a92e8225d597b4942f46eddc327668aca">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a3a83e2e07ed33bf1449526e0fcc1d579">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a8498b60c0f5f6b04caf6e3b18cfd825d">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x73.html b/docs/html/functions_0x73.html
index 8c74596874..208087ec84 100644
--- a/docs/html/functions_0x73.html
+++ b/docs/html/functions_0x73.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -142,17 +141,14 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 <li>ScatterToStripedGuarded()
 : <a class="el" href="classcub_1_1_block_exchange.html#a83cdc3266e21fb11d5c37ac79dc66598">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 </li>
-<li>selector
-: <a class="el" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 <li>self_type
-: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#af26facb4f00568a940d5529047bd90d7">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#af26facb4f00568a940d5529047bd90d7">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#abca48cb8f0172b355ba465ff3c6308c2">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#a0d85b740ebf81e48973e2c6cf9d0e812">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a4e89463441c1c008e7610473ae8d7435">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#aa7e85f2b2f4d580e3ded497028604a5c">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a03d63027c130152d31623eee579f8506">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#ad9e475cc2aae6601422445fff16de9b4">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>SetMaxCachedBytes()
@@ -175,33 +171,34 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 , <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>SortKeysDescending()
-: <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701">cub::DeviceSegmentedRadixSort</a>
+: <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94">cub::DeviceSegmentedRadixSort</a>
 , <a class="el" href="structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef">cub::DeviceRadixSort</a>
+, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>SortPairs()
-: <a class="el" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9">cub::DeviceRadixSort</a>
-, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">cub::DeviceSegmentedRadixSort</a>
+: <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">cub::DeviceSegmentedRadixSort</a>
+, <a class="el" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9">cub::DeviceRadixSort</a>
+, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>SortPairsDescending()
-: <a class="el" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a">cub::DeviceRadixSort</a>
-, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347">cub::DeviceSegmentedRadixSort</a>
-</li>
-<li>storage
-: <a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">cub::Uninitialized&lt; T &gt;</a>
+: <a class="el" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">cub::DeviceRadixSort</a>
+, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>Store()
-: <a class="el" href="classcub_1_1_block_store.html#a67d0aa8fcf37f92b2074e986581ffdf5">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+: <a class="el" href="classcub_1_1_block_store.html#a86fd777fd9bef8264787d756b16303ed">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 </li>
 <li>StripedToBlocked()
 : <a class="el" href="classcub_1_1_block_exchange.html#a2855471bbbcc4d66ac6a29d35a040e0c">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 </li>
 <li>Sum()
 : <a class="el" href="classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">cub::DeviceReduce</a>
-, <a class="el" href="classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+, <a class="el" href="classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
 , <a class="el" href="classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
 , <a class="el" href="classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb">cub::DeviceSegmentedReduce</a>
+, <a class="el" href="classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+, <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">cub::DeviceReduce</a>
+, <a class="el" href="classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
 </li>
 <li>SwizzleScanOp()
 : <a class="el" href="classcub_1_1_swizzle_scan_op.html#ae81a38aa9d94025da72544b9e0dc611b">cub::SwizzleScanOp&lt; ScanOp &gt;</a>
@@ -211,7 +208,7 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x74.html b/docs/html/functions_0x74.html
index 2817378f58..d3beed24fc 100644
--- a/docs/html/functions_0x74.html
+++ b/docs/html/functions_0x74.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -151,7 +150,7 @@ <h3><a class="anchor" id="index_t"></a>- t -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x75.html b/docs/html/functions_0x75.html
index 760d6224a5..35814511da 100644
--- a/docs/html/functions_0x75.html
+++ b/docs/html/functions_0x75.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -142,7 +141,7 @@ <h3><a class="anchor" id="index_u"></a>- u -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x76.html b/docs/html/functions_0x76.html
index 8966d54c8d..76443ec624 100644
--- a/docs/html/functions_0x76.html
+++ b/docs/html/functions_0x76.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -130,28 +129,22 @@
 <div class="textblock">Here is a list of all documented class members with links to the class documentation for each member:</div>
 
 <h3><a class="anchor" id="index_v"></a>- v -</h3><ul>
-<li>Value
-: <a class="el" href="structcub_1_1_key_value_pair.html#a9fd385872c09fd3757e9ba59b2754955">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
-<li>value
-: <a class="el" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
 <li>value_type
-: <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a1de28970e3874202646e11984a0d9026">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a1de28970e3874202646e11984a0d9026">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a4405a9e3d39593b7c468629dff098144">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a3afabb6a47c8cf7526220eb817e2a97f">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#a72a3996ca30a1b2eb6f676923a2ee3ce">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#aba997f95620d692cbad05c74ef169fa8">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a60689c564e2ade39722947eeaf40156a">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x77.html b/docs/html/functions_0x77.html
index df8dd0c10b..cc899fa060 100644
--- a/docs/html/functions_0x77.html
+++ b/docs/html/functions_0x77.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -144,7 +143,7 @@ <h3><a class="anchor" id="index_w"></a>- w -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_0x7e.html b/docs/html/functions_0x7e.html
index a27155f53b..28a44ce475 100644
--- a/docs/html/functions_0x7e.html
+++ b/docs/html/functions_0x7e.html
@@ -96,7 +96,6 @@
       <li><a href="functions_0x66.html#index_f"><span>f</span></a></li>
       <li><a href="functions_0x68.html#index_h"><span>h</span></a></li>
       <li><a href="functions_0x69.html#index_i"><span>i</span></a></li>
-      <li><a href="functions_0x6b.html#index_k"><span>k</span></a></li>
       <li><a href="functions_0x6c.html#index_l"><span>l</span></a></li>
       <li><a href="functions_0x6d.html#index_m"><span>m</span></a></li>
       <li><a href="functions_0x6e.html#index_n"><span>n</span></a></li>
@@ -138,7 +137,7 @@ <h3><a class="anchor" id="index_0x7e"></a>- ~ -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func.html b/docs/html/functions_func.html
index 49d2066952..6627c4b52d 100644
--- a/docs/html/functions_func.html
+++ b/docs/html/functions_func.html
@@ -127,27 +127,23 @@
 &#160;
 
 <h3><a class="anchor" id="index_a"></a>- a -</h3><ul>
-<li>Alias()
-: <a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">cub::Uninitialized&lt; T &gt;</a>
-</li>
-<li>Alternate()
-: <a class="el" href="structcub_1_1_double_buffer.html#a3895f1d57aeb379bee79de56ace8e35a">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 <li>ArgIndexInputIterator()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>ArgMax()
 : <a class="el" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329">cub::DeviceSegmentedReduce</a>
 </li>
 <li>ArgMin()
 : <a class="el" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce">cub::DeviceSegmentedReduce</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x62.html b/docs/html/functions_func_0x62.html
index 1263d98e81..c3682436b0 100644
--- a/docs/html/functions_func_0x62.html
+++ b/docs/html/functions_func_0x62.html
@@ -169,7 +169,7 @@ <h3><a class="anchor" id="index_b"></a>- b -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x63.html b/docs/html/functions_func_0x63.html
index 2a4d6ee634..6b7bfa20ef 100644
--- a/docs/html/functions_func_0x63.html
+++ b/docs/html/functions_func_0x63.html
@@ -148,15 +148,12 @@ <h3><a class="anchor" id="index_c"></a>- c -</h3><ul>
 <li>CsrMV()
 : <a class="el" href="structcub_1_1_device_spmv.html#abbcd4c04d8bbbcdfabc1eb62f860b8b2">cub::DeviceSpmv</a>
 </li>
-<li>Current()
-: <a class="el" href="structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x64.html b/docs/html/functions_func_0x64.html
index 19bd3da1a1..f8f9c7e1f0 100644
--- a/docs/html/functions_func_0x64.html
+++ b/docs/html/functions_func_0x64.html
@@ -131,17 +131,14 @@ <h3><a class="anchor" id="index_d"></a>- d -</h3><ul>
 : <a class="el" href="structcub_1_1_caching_device_allocator.html#ae1088ac6ba6e5d55832ffbc0b2a5d714">cub::CachingDeviceAllocator</a>
 </li>
 <li>DeviceFree()
-: <a class="el" href="structcub_1_1_caching_device_allocator.html#adbf65c59172b140420636e150325deeb">cub::CachingDeviceAllocator</a>
-</li>
-<li>DoubleBuffer()
-: <a class="el" href="structcub_1_1_double_buffer.html#a100c51f0e1aefdff4cdfe6480c89f59e">cub::DoubleBuffer&lt; T &gt;</a>
+: <a class="el" href="structcub_1_1_caching_device_allocator.html#a66e72cc3cc7d0dbd59148ac7c4ba0de6">cub::CachingDeviceAllocator</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x65.html b/docs/html/functions_func_0x65.html
index ec6bc0d00a..992b396498 100644
--- a/docs/html/functions_func_0x65.html
+++ b/docs/html/functions_func_0x65.html
@@ -152,7 +152,7 @@ <h3><a class="anchor" id="index_e"></a>- e -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x66.html b/docs/html/functions_func_0x66.html
index 6430df6c68..c1b5cf40e0 100644
--- a/docs/html/functions_func_0x66.html
+++ b/docs/html/functions_func_0x66.html
@@ -148,7 +148,7 @@ <h3><a class="anchor" id="index_f"></a>- f -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x68.html b/docs/html/functions_func_0x68.html
index 532955e36a..d95c9aa175 100644
--- a/docs/html/functions_func_0x68.html
+++ b/docs/html/functions_func_0x68.html
@@ -147,7 +147,7 @@ <h3><a class="anchor" id="index_h"></a>- h -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x69.html b/docs/html/functions_func_0x69.html
index 32452ad43b..24d6d4258f 100644
--- a/docs/html/functions_func_0x69.html
+++ b/docs/html/functions_func_0x69.html
@@ -156,7 +156,7 @@ <h3><a class="anchor" id="index_i"></a>- i -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x6c.html b/docs/html/functions_func_0x6c.html
index 0a9e770a0c..b13e7596bc 100644
--- a/docs/html/functions_func_0x6c.html
+++ b/docs/html/functions_func_0x6c.html
@@ -135,7 +135,7 @@ <h3><a class="anchor" id="index_l"></a>- l -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x6d.html b/docs/html/functions_func_0x6d.html
index 6a6a181065..ad4380232f 100644
--- a/docs/html/functions_func_0x6d.html
+++ b/docs/html/functions_func_0x6d.html
@@ -129,12 +129,14 @@
 <h3><a class="anchor" id="index_m"></a>- m -</h3><ul>
 <li>Max()
 : <a class="el" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183">cub::DeviceSegmentedReduce</a>
 </li>
 <li>Min()
-: <a class="el" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">cub::DeviceReduce</a>
+: <a class="el" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29">cub::DeviceSegmentedReduce</a>
+, <a class="el" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">cub::DeviceReduce</a>
 </li>
 <li>MultiHistogramEven()
-: <a class="el" href="structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668">cub::DeviceHistogram</a>
+: <a class="el" href="structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95">cub::DeviceHistogram</a>
 </li>
 <li>MultiHistogramRange()
 : <a class="el" href="structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f">cub::DeviceHistogram</a>
@@ -144,7 +146,7 @@ <h3><a class="anchor" id="index_m"></a>- m -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x6e.html b/docs/html/functions_func_0x6e.html
index 41ce4ed1ea..55dae7ad2c 100644
--- a/docs/html/functions_func_0x6e.html
+++ b/docs/html/functions_func_0x6e.html
@@ -130,12 +130,15 @@ <h3><a class="anchor" id="index_n"></a>- n -</h3><ul>
 <li>NonTrivialRuns()
 : <a class="el" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a">cub::DeviceRunLengthEncode</a>
 </li>
+<li>normalize()
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x6f.html b/docs/html/functions_func_0x6f.html
index b7ca2484e7..abc2e5e14c 100644
--- a/docs/html/functions_func_0x6f.html
+++ b/docs/html/functions_func_0x6f.html
@@ -128,29 +128,28 @@
 
 <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 <li>operator!=()
-: <a class="el" href="structcub_1_1_key_value_pair.html#aa7bcc39b09d285d41c9c0226a49790f2">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 </li>
 <li>operator()()
-: <a class="el" href="structcub_1_1_min.html#ade40f60337afc51da556ed65d5708136">cub::Min</a>
-, <a class="el" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">cub::ArgMin</a>
+: <a class="el" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">cub::ArgMin</a>
 , <a class="el" href="structcub_1_1_cast.html#a649573c7f5d02ab09f176b5cc7cf1e5c">cub::Cast&lt; B &gt;</a>
-, <a class="el" href="classcub_1_1_swizzle_scan_op.html#a9548d484ccfb5161d56d3db1003693e2">cub::SwizzleScanOp&lt; ScanOp &gt;</a>
 , <a class="el" href="structcub_1_1_equality.html#a9db81c4cbcf79dbb8087b3c59593cae0">cub::Equality</a>
+, <a class="el" href="classcub_1_1_swizzle_scan_op.html#a9548d484ccfb5161d56d3db1003693e2">cub::SwizzleScanOp&lt; ScanOp &gt;</a>
 , <a class="el" href="structcub_1_1_reduce_by_segment_op.html#aff56752999857ef68029cfa150d88d5e">cub::ReduceBySegmentOp&lt; ReductionOpT &gt;</a>
-, <a class="el" href="structcub_1_1_reduce_by_key_op.html#ad5e0d929c9cc2447df47c0f2c6409c9a">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
 , <a class="el" href="structcub_1_1_inequality.html#ac2b51f35b929dc74ea766b012e89b552">cub::Inequality</a>
+, <a class="el" href="structcub_1_1_reduce_by_key_op.html#ad5e0d929c9cc2447df47c0f2c6409c9a">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
 , <a class="el" href="structcub_1_1_inequality_wrapper.html#a97067089c24f8d6a09c914ab6a163c65">cub::InequalityWrapper&lt; EqualityOp &gt;</a>
 , <a class="el" href="structcub_1_1_sum.html#a1edd85dbc039f93c8e45eb2096704a86">cub::Sum</a>
 , <a class="el" href="structcub_1_1_max.html#ab06fa8091c6aa396fe127f37e0a545d3">cub::Max</a>
 , <a class="el" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">cub::ArgMax</a>
+, <a class="el" href="structcub_1_1_min.html#ade40f60337afc51da556ed65d5708136">cub::Min</a>
 </li>
 <li>operator*()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
@@ -163,14 +162,14 @@ <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#ab84a2e02eeec401af33903dcb183d5ca">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator+()
-: <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a487a5a434527081b425e5a36ea0d4dd3">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a1e37cefa0808ae8b8b0853e30013bfba">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_constant_input_iterator.html#a1e37cefa0808ae8b8b0853e30013bfba">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a4491f4d5635571964f4d98cf1c3cc753">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a1284c246857d30e96e5bc1ea10d68116">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a85fcecba8f3d7b4941b3fa446cddb50a">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a36a3eb7ede1ef22b2316fc281aed52ab">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a455c2df363449c1b92628de54fb2bad5">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a487a5a434527081b425e5a36ea0d4dd3">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 </li>
 <li>operator++()
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a293aa0bf1aa6706505cb85d4d475c9fd">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
@@ -183,74 +182,75 @@ <h3><a class="anchor" id="index_o"></a>- o -</h3><ul>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a40c7129f8a4c002b8fedeb141580fae3">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator+=()
-: <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a88249ddcb876c6a729bd93c66c675dca">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a379e3ea1d52e95b96c25bed66e12ce0e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9ecc94fabf03d35374955388414609e2">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a41bc8a80a8eea119098e75d8ed8c6d57">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#af90706b0c0ed61a4215e53fbc46373ff">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#ae6e17c95c16a82c524b2093e8738ce66">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a3e78162dd96fa3dbe2c5796da7bbcdbe">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a379e3ea1d52e95b96c25bed66e12ce0e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a88249ddcb876c6a729bd93c66c675dca">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#ac5e49ffffc8181a63c29b25054d97e36">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator-()
-: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a4edc9b6609fac1ac62303d32ca21169a">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a2b2d556e2e9483245aaec4b9da110b60">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a103d724220bc19e5910d17ef24b2a6f8">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#acd474cb5beebddd501b901554d501958">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a4d5e8cdf6e9f7b575c4bf9206489a6e4">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#a94882cefa58d9800c4ece240d6633b5b">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a38f0679cfa12d3092c44223927ba8780">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a1f524d491834020f1a5d6b275e6b2911">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a8081970b5bc9de1a13cd0e9a75915f48">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a708bd1141c9318e3a1dc5f21bfc5a0d9">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a103d724220bc19e5910d17ef24b2a6f8">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a9c9d7051350cec95f15eea1da35bd081">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a64841968b926b829e8eef247e111b57f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a2edd714aa2c90a86c104b205ecfabc7a">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_counting_input_iterator.html#a1875e7aad8a49756a2a439fc0605bc8a">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a708bd1141c9318e3a1dc5f21bfc5a0d9">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a2b2d556e2e9483245aaec4b9da110b60">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#a94882cefa58d9800c4ece240d6633b5b">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#ac47c90b1bda91b67e7a6aab76b69bf7a">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a2edd714aa2c90a86c104b205ecfabc7a">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a4edc9b6609fac1ac62303d32ca21169a">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator-=()
-: <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8b91b2e6d55844ebcd3c8cd93e886880">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_counting_input_iterator.html#a1c33f1465e4bb84a10bf580c3d5e4f1a">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a3b019be13224e35146324ac69d8bef5e">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_constant_input_iterator.html#ac464d5e300da6718a1dfe5e4c302b2d6">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a509d3555cb5bad94484da7fde61a3147">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#a3b019be13224e35146324ac69d8bef5e">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a8ee93d1301f7690db893595169405e09">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a223d782b6abddd640dbcb99a0ea5cca1">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_constant_input_iterator.html#ac464d5e300da6718a1dfe5e4c302b2d6">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_counting_input_iterator.html#a1c33f1465e4bb84a10bf580c3d5e4f1a">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_transform_input_iterator.html#a7e37ef48f34630c9ecc1bbf41c25b9c5">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a8ee93d1301f7690db893595169405e09">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a509d3555cb5bad94484da7fde61a3147">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8b91b2e6d55844ebcd3c8cd93e886880">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 </li>
 <li>operator-&gt;()
-: <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a505ed4041dde85c030c56538974f1aa6">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#a349352574eae5a46831ca2cc27942c0f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a909602025800c1f44525d95a65ad1bd4">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#afa7d636e59396d459ea3efbe66ff8e8f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#ade9ae7920b62507e07895b052caa325b">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a505ed4041dde85c030c56538974f1aa6">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a2e9c03abaf7baead2ba40bc4d01bc411">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a909602025800c1f44525d95a65ad1bd4">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a6dc70a54c23f5d20c3b04a34b01309f9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#afa7d636e59396d459ea3efbe66ff8e8f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 </li>
 <li>operator==()
 : <a class="el" href="classcub_1_1_constant_input_iterator.html#a9519d47086ca7e8501d29a4f73f4b85f">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a38095c9cb635d8ba5ded2090ab671e20">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a8ec66485cd92af668d62850e34f09402">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac7a802ef64ec191fdfecbadf0434e00f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a265939c1f71b392896a5fd07d1d189b9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a9925c001d0a1dd40b35b8ce831e85f13">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#ac37eaae76e6aee349fd39933838df70d">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a96552e3b46084aeccc6e4219a1c93336">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac7a802ef64ec191fdfecbadf0434e00f">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#ac37eaae76e6aee349fd39933838df70d">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a265939c1f71b392896a5fd07d1d189b9">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 </li>
 <li>operator[]()
-: <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a4654e821ae09a83f6e904a8fe57ed515">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#aaef072ce96b8fe70786c6742ddaf2b89">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_transform_input_iterator.html#a05925d7ab0f9aaf16d420d41ad6c301c">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#ad59d5567ddf4aa4ac6989581d5c65e43">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_transform_input_iterator.html#a05925d7ab0f9aaf16d420d41ad6c301c">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a60fd926e1e100dee7a22088543a3b647">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a4654e821ae09a83f6e904a8fe57ed515">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a626335c517f746941c188f0e71784b2e">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#aaef072ce96b8fe70786c6742ddaf2b89">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a23be223e3feb98da6c2828cb94d45ed6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#af90df1390eb32cdf420dfb3185a79de9">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a60fd926e1e100dee7a22088543a3b647">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x72.html b/docs/html/functions_func_0x72.html
index d12e02d791..babf2a92eb 100644
--- a/docs/html/functions_func_0x72.html
+++ b/docs/html/functions_func_0x72.html
@@ -129,13 +129,13 @@
 <h3><a class="anchor" id="index_r"></a>- r -</h3><ul>
 <li>Reduce()
 : <a class="el" href="classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c">cub::DeviceReduce</a>
-, <a class="el" href="classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb">cub::DeviceReduce</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364">cub::DeviceSegmentedReduce</a>
 , <a class="el" href="classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
 </li>
 <li>ReduceByKey()
-: <a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02">cub::DeviceReduce</a>
+: <a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1">cub::DeviceReduce</a>
 </li>
 <li>ReduceByKeyOp()
 : <a class="el" href="structcub_1_1_reduce_by_key_op.html#aa9e777450d365effbfc54e4e7700dd24">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
@@ -148,7 +148,7 @@ <h3><a class="anchor" id="index_r"></a>- r -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x73.html b/docs/html/functions_func_0x73.html
index fdb4fb3b35..1bf566aa93 100644
--- a/docs/html/functions_func_0x73.html
+++ b/docs/html/functions_func_0x73.html
@@ -165,27 +165,28 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 <li>SortPairs()
 : <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">cub::DeviceSegmentedRadixSort</a>
 , <a class="el" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9">cub::DeviceRadixSort</a>
-, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597">cub::DeviceSegmentedRadixSort</a>
+, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>SortPairsDescending()
 : <a class="el" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">cub::DeviceRadixSort</a>
 , <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347">cub::DeviceSegmentedRadixSort</a>
 , <a class="el" href="structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a">cub::DeviceRadixSort</a>
+, <a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4">cub::DeviceSegmentedRadixSort</a>
 </li>
 <li>Store()
-: <a class="el" href="classcub_1_1_block_store.html#a67d0aa8fcf37f92b2074e986581ffdf5">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+: <a class="el" href="classcub_1_1_block_store.html#a86fd777fd9bef8264787d756b16303ed">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 </li>
 <li>StripedToBlocked()
 : <a class="el" href="classcub_1_1_block_exchange.html#a2855471bbbcc4d66ac6a29d35a040e0c">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 </li>
 <li>Sum()
-: <a class="el" href="classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+: <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">cub::DeviceReduce</a>
 , <a class="el" href="classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
-, <a class="el" href="classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+, <a class="el" href="classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
 , <a class="el" href="classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
-, <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">cub::DeviceReduce</a>
+, <a class="el" href="classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a>
+, <a class="el" href="classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a>
+, <a class="el" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb">cub::DeviceSegmentedReduce</a>
 </li>
 <li>SwizzleScanOp()
 : <a class="el" href="classcub_1_1_swizzle_scan_op.html#ae81a38aa9d94025da72544b9e0dc611b">cub::SwizzleScanOp&lt; ScanOp &gt;</a>
@@ -195,7 +196,7 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x74.html b/docs/html/functions_func_0x74.html
index 12b765153f..a870f9f500 100644
--- a/docs/html/functions_func_0x74.html
+++ b/docs/html/functions_func_0x74.html
@@ -144,7 +144,7 @@ <h3><a class="anchor" id="index_t"></a>- t -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x75.html b/docs/html/functions_func_0x75.html
index 76f02ebe73..78a048bb78 100644
--- a/docs/html/functions_func_0x75.html
+++ b/docs/html/functions_func_0x75.html
@@ -139,7 +139,7 @@ <h3><a class="anchor" id="index_u"></a>- u -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x77.html b/docs/html/functions_func_0x77.html
index 158899bf37..1f0c2074f7 100644
--- a/docs/html/functions_func_0x77.html
+++ b/docs/html/functions_func_0x77.html
@@ -141,7 +141,7 @@ <h3><a class="anchor" id="index_w"></a>- w -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_func_0x7e.html b/docs/html/functions_func_0x7e.html
index 4debbddaa1..9d08b15783 100644
--- a/docs/html/functions_func_0x7e.html
+++ b/docs/html/functions_func_0x7e.html
@@ -135,7 +135,7 @@ <h3><a class="anchor" id="index_0x7e"></a>- ~ -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_rela.html b/docs/html/functions_rela.html
index 5e75dda9e7..7fa735a2d8 100644
--- a/docs/html/functions_rela.html
+++ b/docs/html/functions_rela.html
@@ -118,7 +118,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:09 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_type.html b/docs/html/functions_type.html
index ab6d41f765..6d9c07e24f 100644
--- a/docs/html/functions_type.html
+++ b/docs/html/functions_type.html
@@ -90,7 +90,6 @@
     <ul class="tablist">
       <li><a href="#index_d"><span>d</span></a></li>
       <li><a href="#index_i"><span>i</span></a></li>
-      <li><a href="#index_k"><span>k</span></a></li>
       <li><a href="#index_p"><span>p</span></a></li>
       <li><a href="#index_r"><span>r</span></a></li>
       <li><a href="#index_s"><span>s</span></a></li>
@@ -117,18 +116,15 @@
 &#160;
 
 <h3><a class="anchor" id="index_d"></a>- d -</h3><ul>
-<li>DeviceWord
-: <a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">cub::Uninitialized&lt; T &gt;</a>
-</li>
 <li>difference_type
-: <a class="el" href="classcub_1_1_transform_input_iterator.html#ac05064e9ad33dc032452e3d09e8768f6">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#ac05064e9ad33dc032452e3d09e8768f6">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#aff1afce146f69adb655e2ff7366b869f">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a710012acbff3dd0c35822951e28148ea">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#ac66dea2a687f0ad6ed0cbcd6217a3029">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#ab45cc48afbfda7eaa3b4ea643e719c33">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#ac8cdd3a29db7e398f14e44a7aa054750">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a268f1e7a4b42a05c5b4d0bca2775c26e">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 </ul>
 
@@ -147,13 +143,6 @@ <h3><a class="anchor" id="index_i"></a>- i -</h3><ul>
 </ul>
 
 
-<h3><a class="anchor" id="index_k"></a>- k -</h3><ul>
-<li>Key
-: <a class="el" href="structcub_1_1_key_value_pair.html#a39a9e0163c21635a508a6f9b3a681e4b">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
-</ul>
-
-
 <h3><a class="anchor" id="index_p"></a>- p -</h3><ul>
 <li>pointer
 : <a class="el" href="classcub_1_1_arg_index_input_iterator.html#afbc0675053718d968d81b1a16e28dc5c">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
@@ -205,25 +194,22 @@ <h3><a class="anchor" id="index_t"></a>- t -</h3><ul>
 
 
 <h3><a class="anchor" id="index_v"></a>- v -</h3><ul>
-<li>Value
-: <a class="el" href="structcub_1_1_key_value_pair.html#a9fd385872c09fd3757e9ba59b2754955">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
 <li>value_type
-: <a class="el" href="classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
+: <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
+, <a class="el" href="classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_ref_input_iterator.html#a1de28970e3874202646e11984a0d9026">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_tex_obj_input_iterator.html#a4405a9e3d39593b7c468629dff098144">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_counting_input_iterator.html#a3afabb6a47c8cf7526220eb817e2a97f">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_constant_input_iterator.html#a72a3996ca30a1b2eb6f676923a2ee3ce">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_output_iterator.html#aba997f95620d692cbad05c74ef169fa8">cub::CacheModifiedOutputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 , <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#a60689c564e2ade39722947eeaf40156a">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
-, <a class="el" href="classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a>
 </li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/functions_vars.html b/docs/html/functions_vars.html
index ee83ee44d5..5b69f70c08 100644
--- a/docs/html/functions_vars.html
+++ b/docs/html/functions_vars.html
@@ -103,15 +103,6 @@
 
 <div class="contents">
 &#160;<ul>
-<li>align0
-: <a class="el" href="structcub_1_1_key_value_pair.html#a6babb4cdcde74f159b5bff9e4a569d83">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
-<li>d_buffers
-: <a class="el" href="structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
-<li>key
-: <a class="el" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
 <li>op
 : <a class="el" href="structcub_1_1_inequality_wrapper.html#a4143eec319a0c231f0f5159fba11371d">cub::InequalityWrapper&lt; EqualityOp &gt;</a>
 , <a class="el" href="structcub_1_1_reduce_by_key_op.html#a4c6624999354ccf78a94226b9762bdbf">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a>
@@ -120,21 +111,12 @@
 <li>ptr
 : <a class="el" href="classcub_1_1_cache_modified_input_iterator.html#abcd5d6e49d039ebbcec5402738f21047">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a>
 </li>
-<li>selector
-: <a class="el" href="structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4">cub::DoubleBuffer&lt; T &gt;</a>
-</li>
-<li>storage
-: <a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">cub::Uninitialized&lt; T &gt;</a>
-</li>
-<li>value
-: <a class="el" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4">cub::KeyValuePair&lt; _Key, _Value &gt;</a>
-</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/globals.html b/docs/html/globals.html
index 38e154c66b..cccfebde44 100644
--- a/docs/html/globals.html
+++ b/docs/html/globals.html
@@ -100,21 +100,21 @@
 
 <div class="contents">
 <div class="textblock">Here is a list of all documented file members with links to the documentation:</div><ul>
+<li>_CubLog
+: <a class="el" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7">util_debug.cuh</a>
+</li>
 <li>CubDebug
 : <a class="el" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236">util_debug.cuh</a>
 </li>
 <li>CubDebugExit
 : <a class="el" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8">util_debug.cuh</a>
 </li>
-<li>CubLog
-: <a class="el" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db">util_debug.cuh</a>
-</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:09 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/globals_defs.html b/docs/html/globals_defs.html
index 1dbdc00501..c5a94ba6ef 100644
--- a/docs/html/globals_defs.html
+++ b/docs/html/globals_defs.html
@@ -100,21 +100,21 @@
 
 <div class="contents">
 &#160;<ul>
+<li>_CubLog
+: <a class="el" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7">util_debug.cuh</a>
+</li>
 <li>CubDebug
 : <a class="el" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236">util_debug.cuh</a>
 </li>
 <li>CubDebugExit
 : <a class="el" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8">util_debug.cuh</a>
 </li>
-<li>CubLog
-: <a class="el" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db">util_debug.cuh</a>
-</li>
 </ul>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:09 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:20 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___block_module.html b/docs/html/group___block_module.html
index ecd6288059..2cfa7beae5 100644
--- a/docs/html/group___block_module.html
+++ b/docs/html/group___block_module.html
@@ -165,7 +165,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___collective_module.html b/docs/html/group___collective_module.html
index 1c945f8693..af6c9f00b0 100644
--- a/docs/html/group___collective_module.html
+++ b/docs/html/group___collective_module.html
@@ -105,7 +105,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___device_module.html b/docs/html/group___device_module.html
index 98d6b499f3..ac667d39a9 100644
--- a/docs/html/group___device_module.html
+++ b/docs/html/group___device_module.html
@@ -105,7 +105,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___segmented_module.html b/docs/html/group___segmented_module.html
index 0372c86ac8..4a0ea1e227 100644
--- a/docs/html/group___segmented_module.html
+++ b/docs/html/group___segmented_module.html
@@ -104,12 +104,20 @@
 .</div></div>
   <a href="structcub_1_1_device_segmented_radix_sort.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight"><a class="el" href="structcub_1_1_device_segmented_reduce.html" title="DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across mult...">DeviceSegmentedReduce</a> provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. </p>
+<div class="image">
+<img src="reduce_logo.png" alt="reduce_logo.png"/>
+<div class="caption">
+.</div></div>
+  <a href="structcub_1_1_device_segmented_reduce.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 </div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___single_module.html b/docs/html/group___single_module.html
index 91e0e05f2d..a2daf904d8 100644
--- a/docs/html/group___single_module.html
+++ b/docs/html/group___single_module.html
@@ -160,7 +160,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___util_io.html b/docs/html/group___util_io.html
index 3278479a69..a48549d553 100644
--- a/docs/html/group___util_io.html
+++ b/docs/html/group___util_io.html
@@ -529,7 +529,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00103">103</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00104">104</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -598,7 +598,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00132">132</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00134">134</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -658,7 +658,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00222">222</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00228">228</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -714,7 +714,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00280">280</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00286">286</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -777,7 +777,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00312">312</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00317">317</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -847,7 +847,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00343">343</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00349">349</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -903,7 +903,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00383">383</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00392">392</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -966,7 +966,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00417">417</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00427">427</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -1036,7 +1036,7 @@ <h2 class="groupheader">Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00454">454</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00465">465</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 
 </div>
 </div>
@@ -1459,7 +1459,7 @@ <h2 class="groupheader">Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___util_iterator.html b/docs/html/group___util_iterator.html
index d5ee9f220a..ca7c0b5081 100644
--- a/docs/html/group___util_iterator.html
+++ b/docs/html/group___util_iterator.html
@@ -97,7 +97,7 @@
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
 Classes</h2></td></tr>
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_arg_index_input_iterator.html">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> tuples).  <a href="classcub_1_1_arg_index_input_iterator.html#details">More...</a><br/></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code>KeyValuePair</code> tuples).  <a href="classcub_1_1_arg_index_input_iterator.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcub_1_1_cache_modified_input_iterator.html">cub::CacheModifiedInputIterator&lt; MODIFIER, ValueType, OffsetT &gt;</a></td></tr>
 <tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A random-access input wrapper for dereferencing array values using a PTX cache load modifier.  <a href="classcub_1_1_cache_modified_input_iterator.html#details">More...</a><br/></td></tr>
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___util_mgmt.html b/docs/html/group___util_mgmt.html
index 9b38f3c2c6..863486dab3 100644
--- a/docs/html/group___util_mgmt.html
+++ b/docs/html/group___util_mgmt.html
@@ -112,10 +112,10 @@
 #define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8">CubDebugExit</a>(e)&#160;&#160;&#160;if (<a class="el" href="group___util_mgmt.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95">cub::Debug</a>((e), __FILE__, __LINE__)) { exit(1); }</td></tr>
 <tr class="memdesc:ga26211db894893b3cec946e4e537536f8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Debug macro with exit. <br/></td></tr>
 <tr class="separator:ga26211db894893b3cec946e4e537536f8"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="ga6788287a780dc10c443aa1ab5ae9f0db"></a>
-#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db">CubLog</a>(format,...)&#160;&#160;&#160;printf(format,__VA_ARGS__);</td></tr>
-<tr class="memdesc:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="mdescLeft">&#160;</td><td class="mdescRight">Log macro for printf statements. <br/></td></tr>
-<tr class="separator:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga25f361894440b53e637cb7ead2a4c0b7"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="ga25f361894440b53e637cb7ead2a4c0b7"></a>
+#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7">_CubLog</a>(format,...)&#160;&#160;&#160;printf(format,__VA_ARGS__);</td></tr>
+<tr class="memdesc:ga25f361894440b53e637cb7ead2a4c0b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">Log macro for printf statements. <br/></td></tr>
+<tr class="separator:ga25f361894440b53e637cb7ead2a4c0b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
 Functions</h2></td></tr>
@@ -190,7 +190,7 @@ <h2 class="groupheader">Function Documentation</h2>
 <p>Type definition of the EmptyKernel kernel entry point</p>
 <p>Force EmptyKernel&lt;void&gt; to be generated if this class is used </p>
 
-<p>Definition at line <a class="el" href="util__device_8cuh_source.html#l00115">115</a> of file <a class="el" href="util__device_8cuh_source.html">util_device.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__device_8cuh_source.html#l00116">116</a> of file <a class="el" href="util__device_8cuh_source.html">util_device.cuh</a>.</p>
 
 </div>
 </div>
@@ -198,7 +198,7 @@ <h2 class="groupheader">Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___util_module.html b/docs/html/group___util_module.html
index d867982cc9..8837412284 100644
--- a/docs/html/group___util_module.html
+++ b/docs/html/group___util_module.html
@@ -114,24 +114,6 @@
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_equals.html">cub::Equals&lt; A, B &gt;</a></td></tr>
 <tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Type equality test.  <a href="structcub_1_1_equals.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_null_type.html">cub::NullType</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A simple "NULL" marker type.  <a href="structcub_1_1_null_type.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_int2_type.html">cub::Int2Type&lt; A &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values)  <a href="structcub_1_1_int2_type.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_cub_vector.html">cub::CubVector&lt; T, vec_elements &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Exposes a member typedef <code>Type</code> that names the corresponding CUDA vector type if one exists. Otherwise <code>Type</code> refers to the <a class="el" href="structcub_1_1_cub_vector.html" title="Exposes a member typedef Type that names the corresponding CUDA vector type if one exists...">CubVector</a> structure itself, which will wrap the corresponding <code>x</code>, <code>y</code>, etc. vector fields.  <a href="structcub_1_1_cub_vector.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; T &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.  <a href="structcub_1_1_uninitialized.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_key_value_pair.html">cub::KeyValuePair&lt; _Key, _Value &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A key identifier paired with a corresponding value.  <a href="structcub_1_1_key_value_pair.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_double_buffer.html">cub::DoubleBuffer&lt; T &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth.  <a href="structcub_1_1_double_buffer.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_log2.html">cub::Log2&lt; N, CURRENT_VAL, COUNT &gt;</a></td></tr>
 <tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Statically determine log2(N), rounded up.  <a href="structcub_1_1_log2.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -188,7 +170,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___util_ptx.html b/docs/html/group___util_ptx.html
index 6196b73420..971b6951b8 100644
--- a/docs/html/group___util_ptx.html
+++ b/docs/html/group___util_ptx.html
@@ -216,7 +216,7 @@ <h2 class="groupheader">Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/group___warp_module.html b/docs/html/group___warp_module.html
index 5844e780dc..9247457ef7 100644
--- a/docs/html/group___warp_module.html
+++ b/docs/html/group___warp_module.html
@@ -351,7 +351,7 @@ <h2 class="groupheader">Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/hierarchy.html b/docs/html/hierarchy.html
index 210ad9ef40..4f8eda17e5 100644
--- a/docs/html/hierarchy.html
+++ b/docs/html/hierarchy.html
@@ -99,7 +99,7 @@
 <div class="contents">
 <div class="textblock">This inheritance list is sorted roughly, but not completely, alphabetically:</div><div class="directory">
 <div class="levels">[detail level <span onclick="javascript:toggleLevel(1);">1</span><span onclick="javascript:toggleLevel(2);">2</span>]</div><table class="directory">
-<tr id="row_0_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_arg_index_input_iterator.html" target="_self">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code><a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a></code> tuples) </td></tr>
+<tr id="row_0_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_arg_index_input_iterator.html" target="_self">cub::ArgIndexInputIterator&lt; InputIteratorT, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming <code>KeyValuePair</code> tuples) </td></tr>
 <tr id="row_1_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_max.html" target="_self">cub::ArgMax</a></td><td class="desc">Arg max functor (keeps the value and offset of the first occurrence of the larger item) </td></tr>
 <tr id="row_2_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_arg_min.html" target="_self">cub::ArgMin</a></td><td class="desc">Arg min functor (keeps the value and offset of the first occurrence of the smallest item) </td></tr>
 <tr id="row_3_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_block_discontinuity.html" target="_self">cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;</a></td><td class="desc">The <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. </p>
@@ -148,40 +148,44 @@
 <tr id="row_14_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_cast.html" target="_self">cub::Cast&lt; B &gt;</a></td><td class="desc">Default cast functor </td></tr>
 <tr id="row_15_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_constant_input_iterator.html" target="_self">cub::ConstantInputIterator&lt; ValueType, OffsetT &gt;</a></td><td class="desc">A random-access input generator for dereferencing a sequence of homogeneous values </td></tr>
 <tr id="row_16_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_counting_input_iterator.html" target="_self">cub::CountingInputIterator&lt; ValueType, OffsetT &gt;</a></td><td class="desc">A random-access input generator for dereferencing a sequence of incrementing integer values </td></tr>
-<tr id="row_17_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_cub_vector.html" target="_self">cub::CubVector&lt; T, vec_elements &gt;</a></td><td class="desc">Exposes a member typedef <code>Type</code> that names the corresponding CUDA vector type if one exists. Otherwise <code>Type</code> refers to the <a class="el" href="structcub_1_1_cub_vector.html" title="Exposes a member typedef Type that names the corresponding CUDA vector type if one exists...">CubVector</a> structure itself, which will wrap the corresponding <code>x</code>, <code>y</code>, etc. vector fields </td></tr>
-<tr id="row_18_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_histogram.html" target="_self">cub::DeviceHistogram</a></td><td class="desc"><a class="el" href="structcub_1_1_device_histogram.html" title="DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...">DeviceHistogram</a> provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. </p>
+<tr id="row_17_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_histogram.html" target="_self">cub::DeviceHistogram</a></td><td class="desc"><a class="el" href="structcub_1_1_device_histogram.html" title="DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...">DeviceHistogram</a> provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. </p>
 <div class="image">
 <img src="histogram_logo.png" alt="histogram_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_19_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_partition.html" target="_self">cub::DevicePartition</a></td><td class="desc"><a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. </p>
+<tr id="row_18_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_partition.html" target="_self">cub::DevicePartition</a></td><td class="desc"><a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="partition_logo.png" alt="partition_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_20_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_radix_sort.html" target="_self">cub::DeviceRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_19_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_radix_sort.html" target="_self">cub::DeviceRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="sorting_logo.png" alt="sorting_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_21_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_reduce.html" target="_self">cub::DeviceReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_20_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_reduce.html" target="_self">cub::DeviceReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="reduce_logo.png" alt="reduce_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_22_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_run_length_encode.html" target="_self">cub::DeviceRunLengthEncode</a></td><td class="desc"><a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. </p>
+<tr id="row_21_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_run_length_encode.html" target="_self">cub::DeviceRunLengthEncode</a></td><td class="desc"><a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. </p>
 <div class="image">
 <img src="run_length_encode_logo.png" alt="run_length_encode_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_23_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_scan.html" target="_self">cub::DeviceScan</a></td><td class="desc"><a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. </p>
+<tr id="row_22_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_scan.html" target="_self">cub::DeviceScan</a></td><td class="desc"><a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="device_scan.png" alt="device_scan.png"/>
 </div>
  </td></tr>
-<tr id="row_24_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" target="_self">cub::DeviceSegmentedRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" title="DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort...">DeviceSegmentedRadixSort</a> provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. </p>
+<tr id="row_23_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" target="_self">cub::DeviceSegmentedRadixSort</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html" title="DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort...">DeviceSegmentedRadixSort</a> provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. </p>
 <div class="image">
 <img src="segmented_sorting_logo.png" alt="segmented_sorting_logo.png"/>
+</div>
+ </td></tr>
+<tr id="row_24_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_segmented_reduce.html" target="_self">cub::DeviceSegmentedReduce</a></td><td class="desc"><a class="el" href="structcub_1_1_device_segmented_reduce.html" title="DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across mult...">DeviceSegmentedReduce</a> provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. </p>
+<div class="image">
+<img src="reduce_logo.png" alt="reduce_logo.png"/>
 </div>
  </td></tr>
 <tr id="row_25_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_select.html" target="_self">cub::DeviceSelect</a></td><td class="desc"><a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a> provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. </p>
@@ -190,53 +194,48 @@
 </div>
  </td></tr>
 <tr id="row_26_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_device_spmv.html" target="_self">cub::DeviceSpmv</a></td><td class="desc"><a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">DeviceSpmv</a> provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV) </td></tr>
-<tr id="row_27_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_double_buffer.html" target="_self">cub::DoubleBuffer&lt; T &gt;</a></td><td class="desc">Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth </td></tr>
-<tr id="row_28_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equality.html" target="_self">cub::Equality</a></td><td class="desc">Default equality functor </td></tr>
-<tr id="row_29_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equals.html" target="_self">cub::Equals&lt; A, B &gt;</a></td><td class="desc">Type equality test </td></tr>
-<tr id="row_30_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_if.html" target="_self">cub::If&lt; IF, ThenType, ElseType &gt;</a></td><td class="desc">Type selection (<code>IF ? ThenType : ElseType</code>) </td></tr>
-<tr id="row_31_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality.html" target="_self">cub::Inequality</a></td><td class="desc">Default inequality functor </td></tr>
-<tr id="row_32_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality_wrapper.html" target="_self">cub::InequalityWrapper&lt; EqualityOp &gt;</a></td><td class="desc"><a class="el" href="structcub_1_1_inequality.html" title="Default inequality functor. ">Inequality</a> functor (wraps equality functor) </td></tr>
-<tr id="row_33_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_int2_type.html" target="_self">cub::Int2Type&lt; A &gt;</a></td><td class="desc">Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) </td></tr>
-<tr id="row_34_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_pointer.html" target="_self">cub::IsPointer&lt; Tp &gt;</a></td><td class="desc">Pointer vs. iterator </td></tr>
-<tr id="row_35_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_volatile.html" target="_self">cub::IsVolatile&lt; Tp &gt;</a></td><td class="desc">Volatile modifier test </td></tr>
-<tr id="row_36_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_key_value_pair.html" target="_self">cub::KeyValuePair&lt; _Key, _Value &gt;</a></td><td class="desc">A key identifier paired with a corresponding value </td></tr>
-<tr id="row_37_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_log2.html" target="_self">cub::Log2&lt; N, CURRENT_VAL, COUNT &gt;</a></td><td class="desc">Statically determine log2(N), rounded up </td></tr>
-<tr id="row_38_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_max.html" target="_self">cub::Max</a></td><td class="desc">Default max functor </td></tr>
-<tr id="row_39_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_min.html" target="_self">cub::Min</a></td><td class="desc">Default min functor </td></tr>
-<tr id="row_40_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_null_type.html" target="_self">cub::NullType</a></td><td class="desc">A simple "NULL" marker type </td></tr>
-<tr id="row_41_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_power_of_two.html" target="_self">cub::PowerOfTwo&lt; N &gt;</a></td><td class="desc">Statically determine if N is a power-of-two </td></tr>
-<tr id="row_42_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_key_op.html" target="_self">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a></td><td class="desc">&lt; Binary reduction operator to apply to values </td></tr>
-<tr id="row_43_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_segment_op.html" target="_self">cub::ReduceBySegmentOp&lt; ReductionOpT &gt;</a></td><td class="desc">Reduce-by-segment functor </td></tr>
-<tr id="row_44_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_remove_qualifiers.html" target="_self">cub::RemoveQualifiers&lt; Tp, Up &gt;</a></td><td class="desc">Removes <code>const</code> and <code>volatile</code> qualifiers from type <code>Tp</code> </td></tr>
-<tr id="row_45_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_sum.html" target="_self">cub::Sum</a></td><td class="desc">Default sum functor </td></tr>
-<tr id="row_46_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_swizzle_scan_op.html" target="_self">cub::SwizzleScanOp&lt; ScanOp &gt;</a></td><td class="desc">Binary operator wrapper for switching non-commutative scan arguments </td></tr>
-<tr id="row_47_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_obj_input_iterator.html" target="_self">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects </td></tr>
-<tr id="row_48_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_ref_input_iterator.html" target="_self">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references </td></tr>
-<tr id="row_49_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_transform_input_iterator.html" target="_self">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for transforming dereferenced values </td></tr>
-<tr id="row_50_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_uninitialized.html" target="_self">cub::Uninitialized&lt; T &gt;</a></td><td class="desc">A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions </td></tr>
-<tr id="row_51_"><td class="entry"><img id="arr_51_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('51_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_uninitialized.html" target="_self">cub::Uninitialized&lt; _TempStorage &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_51_0_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" target="_self">cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_1_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html" target="_self">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_2_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html" target="_self">cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_3_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_4_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_5_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_6_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_7_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" target="_self">cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_8_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html" target="_self">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_9_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html" target="_self">cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_10_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_11_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_12_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
-<tr id="row_51_13_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_14_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" target="_self">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_51_15_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html" target="_self">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
-<tr id="row_52_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_reduce.html" target="_self">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread warp. </p>
+<tr id="row_27_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equality.html" target="_self">cub::Equality</a></td><td class="desc">Default equality functor </td></tr>
+<tr id="row_28_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_equals.html" target="_self">cub::Equals&lt; A, B &gt;</a></td><td class="desc">Type equality test </td></tr>
+<tr id="row_29_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_if.html" target="_self">cub::If&lt; IF, ThenType, ElseType &gt;</a></td><td class="desc">Type selection (<code>IF ? ThenType : ElseType</code>) </td></tr>
+<tr id="row_30_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality.html" target="_self">cub::Inequality</a></td><td class="desc">Default inequality functor </td></tr>
+<tr id="row_31_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_inequality_wrapper.html" target="_self">cub::InequalityWrapper&lt; EqualityOp &gt;</a></td><td class="desc"><a class="el" href="structcub_1_1_inequality.html" title="Default inequality functor. ">Inequality</a> functor (wraps equality functor) </td></tr>
+<tr id="row_32_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_pointer.html" target="_self">cub::IsPointer&lt; Tp &gt;</a></td><td class="desc">Pointer vs. iterator </td></tr>
+<tr id="row_33_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_is_volatile.html" target="_self">cub::IsVolatile&lt; Tp &gt;</a></td><td class="desc">Volatile modifier test </td></tr>
+<tr id="row_34_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_log2.html" target="_self">cub::Log2&lt; N, CURRENT_VAL, COUNT &gt;</a></td><td class="desc">Statically determine log2(N), rounded up </td></tr>
+<tr id="row_35_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_max.html" target="_self">cub::Max</a></td><td class="desc">Default max functor </td></tr>
+<tr id="row_36_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_min.html" target="_self">cub::Min</a></td><td class="desc">Default min functor </td></tr>
+<tr id="row_37_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_power_of_two.html" target="_self">cub::PowerOfTwo&lt; N &gt;</a></td><td class="desc">Statically determine if N is a power-of-two </td></tr>
+<tr id="row_38_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_key_op.html" target="_self">cub::ReduceByKeyOp&lt; ReductionOpT &gt;</a></td><td class="desc">&lt; Binary reduction operator to apply to values </td></tr>
+<tr id="row_39_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_reduce_by_segment_op.html" target="_self">cub::ReduceBySegmentOp&lt; ReductionOpT &gt;</a></td><td class="desc">Reduce-by-segment functor </td></tr>
+<tr id="row_40_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_remove_qualifiers.html" target="_self">cub::RemoveQualifiers&lt; Tp, Up &gt;</a></td><td class="desc">Removes <code>const</code> and <code>volatile</code> qualifiers from type <code>Tp</code> </td></tr>
+<tr id="row_41_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_sum.html" target="_self">cub::Sum</a></td><td class="desc">Default sum functor </td></tr>
+<tr id="row_42_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_swizzle_scan_op.html" target="_self">cub::SwizzleScanOp&lt; ScanOp &gt;</a></td><td class="desc">Binary operator wrapper for switching non-commutative scan arguments </td></tr>
+<tr id="row_43_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_obj_input_iterator.html" target="_self">cub::TexObjInputIterator&lt; T, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects </td></tr>
+<tr id="row_44_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_tex_ref_input_iterator.html" target="_self">cub::TexRefInputIterator&lt; T, UNIQUE_ID, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references </td></tr>
+<tr id="row_45_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_transform_input_iterator.html" target="_self">cub::TransformInputIterator&lt; ValueType, ConversionOp, InputIteratorT, OffsetT &gt;</a></td><td class="desc">A random-access input wrapper for transforming dereferenced values </td></tr>
+<tr id="row_46_" class="even"><td class="entry"><img id="arr_46_" src="ftv2mnode.png" alt="o" width="16" height="22" onclick="toggleFolder('46_')"/><img src="ftv2cl.png" alt="C" width="24" height="22" /><b>Uninitialized</b></td><td class="desc"></td></tr>
+<tr id="row_46_0_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_discontinuity_1_1_temp_storage.html" target="_self">cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_discontinuity.html" title="The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...">BlockDiscontinuity</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_1_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_exchange_1_1_temp_storage.html" target="_self">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_exchange.html" title="The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...">BlockExchange</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_2_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_histogram_1_1_temp_storage.html" target="_self">cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_histogram.html" title="The BlockHistogram class provides collective methods for constructing block-wide histograms from data...">BlockHistogram</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_3_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_4_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_5_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_6_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_load_1_1_temp_storage.html" target="_self">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_7_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_radix_sort_1_1_temp_storage.html" target="_self">cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_8_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_reduce_1_1_temp_storage.html" target="_self">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_reduce.html" title="The BlockReduce class provides collective methods for computing a parallel reduction of items partiti...">BlockReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_9_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_scan_1_1_temp_storage.html" target="_self">cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_scan.html" title="The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items par...">BlockScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_10_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_11_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_12_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage</a></td><td class="desc">Alias wrapper allowing storage to be unioned </td></tr>
+<tr id="row_46_13_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_block_store_1_1_temp_storage.html" target="_self">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_block_store.html" title="The BlockStore class provides collective data movement methods for writing a blocked arrangement of i...">BlockStore</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_14_"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" target="_self">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_46_15_" class="even"><td class="entry"><img src="ftv2vertline.png" alt="|" width="16" height="22" /><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structcub_1_1_warp_scan_1_1_temp_storage.html" target="_self">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage</a></td><td class="desc">The operations exposed by <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse </td></tr>
+<tr id="row_47_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_reduce.html" target="_self">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_reduce.html" title="The WarpReduce class provides collective methods for computing a parallel reduction of items partitio...">WarpReduce</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel reduction of items partitioned across a CUDA thread warp. </p>
 <div class="image">
 <img src="warp_reduce_logo.png" alt="warp_reduce_logo.png"/>
 </div>
  </td></tr>
-<tr id="row_53_"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_scan.html" target="_self">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. </p>
+<tr id="row_48_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="classcub_1_1_warp_scan.html" target="_self">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;</a></td><td class="desc">The <a class="el" href="classcub_1_1_warp_scan.html" title="The WarpScan class provides collective methods for computing a parallel prefix scan of items partitio...">WarpScan</a> class provides <a href="index.html#sec0"><em>collective</em></a> methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. </p>
 <div class="image">
 <img src="warp_scan_logo.png" alt="warp_scan_logo.png"/>
 </div>
@@ -247,7 +246,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/index.html b/docs/html/index.html
index c9b01439aa..67cb8b4a3e 100644
--- a/docs/html/index.html
+++ b/docs/html/index.html
@@ -115,7 +115,7 @@
 
 <a href="download_cub.html"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
 &nbsp;&nbsp;
-<a href="download_cub.html"><em><b>Download CUB v1.5.1</b></em></a>
+<a href="download_cub.html"><em><b>Download CUB v1.5.0</b></em></a>
 
 </td><td>
 
@@ -394,10 +394,10 @@ <h1><a class="anchor" id="sec8"></a>
 <dl class="section user"><dt></dt><dd>CUB releases are labeled using version identifiers having three fields: <code>&lt;epoch&gt;.&lt;feature&gt;.&lt;update&gt;</code>. The <em>epoch</em> field corresponds to support for a major change or update to the CUDA programming model. The <em>feature</em> field corresponds to a stable set of features, functionality, and interface. The <em>update</em> field corresponds to a bug-fix or performance update for that feature set. At the moment, we do not publicly provide non-stable releases such as development snapshots, beta releases or rolling releases. (Feel free to contact us if you would like access to such things.)</dd></dl>
 <dl class="section user"><dt></dt><dd>The following table enumerates prior feature releases as well as update versions for the current feature release. <table class="doxtable">
 <tr>
-<td style="white-space: nowrap; vertical-align:text-top;">11/13/2015<br/>
+<td style="white-space: nowrap; vertical-align:text-top;">12/15/2015<br/>
  <a href="download_cub.html"><b>CUB v1.5.0</b></a> </td><td style="vertical-align:text-top;"><ul>
 <li>New Features:<ul>
-<li>Added new <a href="group___segmented_module.html">segmented device-wide operations</a> for sort, scan, and reduction primitives.</li>
+<li>Added new <a href="group___segmented_module.html">segmented device-wide operations</a> for sort and reduction primitives.</li>
 </ul>
 </li>
 <li>See the <a href="CHANGE_LOG.TXT">change-log</a> for further details, including bug-fixes </li>
@@ -410,7 +410,7 @@ <h1><a class="anchor" id="sec8"></a>
 <li>New Features:<ul>
 <li>Updated <a class="el" href="structcub_1_1_device_histogram.html" title="DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...">cub::DeviceHistogram</a> implementation that provides the same "histogram-even" and "histogram-range" functionality as IPP/NPP. Provides extremely fast and, perhaps more importantly, very uniform performance response across diverse real-world datasets, including pathological (homogeneous) sample distributions (resilience)</li>
 <li>New <a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">cub::DeviceSpmv</a> methods for multiplying sparse matrices by dense vectors, load-balanced using a merge-based parallel decomposition.</li>
-<li>New <a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">cub::DeviceRadixSort</a> sorting entry-points that always return the sorted output into the specified buffer (as opposed to the <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">cub::DoubleBuffer</a> in which it could end up in either buffer)</li>
+<li>New <a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">cub::DeviceRadixSort</a> sorting entry-points that always return the sorted output into the specified buffer (as opposed to the cub::DoubleBuffer in which it could end up in either buffer)</li>
 <li>New <a class="el" href="structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a" title="Enumerates the starting offsets and lengths of all non-trivial runs (of length &gt; 1) of same-valued ke...">cub::DeviceRunLengthEncode::NonTrivialRuns</a> for finding the starting offsets and lengths of all non-trivial runs (i.e., length &gt; 1) of keys in a given sequence. (Useful for top-down partitioning algorithms like MSD sorting of very-large keys.)</li>
 </ul>
 </li>
@@ -435,7 +435,7 @@ <h1><a class="anchor" id="sec8"></a>
 <tr>
 <td style="white-space: nowrap; vertical-align:text-top;">04/01/2014<br/>
  <b>CUB v1.2.3</b> </td><td style="vertical-align:text-top;"><ul>
-<li>Added device-wide reduce-by-key (<a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>, cub::DeviceReduce::RunLengthEncode)</li>
+<li>Added device-wide reduce-by-key (<a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>, cub::DeviceReduce::RunLengthEncode)</li>
 <li>Added MS VC++ project solutions for device-wide and block-wide examples</li>
 <li>See the <a href="CHANGE_LOG.TXT">change-log</a> for further details, including bug-fixes </li>
 </ul>
@@ -515,7 +515,7 @@ <h1><a class="anchor" id="sec10"></a>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/modules.html b/docs/html/modules.html
index 22dbfeb450..c63eae0437 100644
--- a/docs/html/modules.html
+++ b/docs/html/modules.html
@@ -110,7 +110,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/namespacecub.html.REMOVED.git-id b/docs/html/namespacecub.html.REMOVED.git-id
index c1e9fdd955..808ec4c14b 100644
--- a/docs/html/namespacecub.html.REMOVED.git-id
+++ b/docs/html/namespacecub.html.REMOVED.git-id
@@ -1 +1 @@
-0d06c26bfa4facfa64eff829bb8b41956185e96e
\ No newline at end of file
+d46891c8728626f5a8154a296afcfab28abe9920
\ No newline at end of file
diff --git a/docs/html/namespacemembers.html b/docs/html/namespacemembers.html
index 19cd422a27..8448ca3356 100644
--- a/docs/html/namespacemembers.html
+++ b/docs/html/namespacemembers.html
@@ -369,7 +369,7 @@ <h3><a class="anchor" id="index_w"></a>- w -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/namespacemembers_enum.html b/docs/html/namespacemembers_enum.html
index 624f8ea53b..41a86e1d93 100644
--- a/docs/html/namespacemembers_enum.html
+++ b/docs/html/namespacemembers_enum.html
@@ -128,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/namespacemembers_eval.html b/docs/html/namespacemembers_eval.html
index c4114b43cd..d1ecfbff65 100644
--- a/docs/html/namespacemembers_eval.html
+++ b/docs/html/namespacemembers_eval.html
@@ -217,7 +217,7 @@ <h3><a class="anchor" id="index_s"></a>- s -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/namespacemembers_func.html b/docs/html/namespacemembers_func.html
index 81adfe8668..6edd277373 100644
--- a/docs/html/namespacemembers_func.html
+++ b/docs/html/namespacemembers_func.html
@@ -250,7 +250,7 @@ <h3><a class="anchor" id="index_w"></a>- w -</h3><ul>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/namespaces.html b/docs/html/namespaces.html
index a111806707..53e4c66ae3 100644
--- a/docs/html/namespaces.html
+++ b/docs/html/namespaces.html
@@ -106,7 +106,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/search/all_61.js b/docs/html/search/all_61.js
index 86015b2e45..28c4256365 100644
--- a/docs/html/search/all_61.js
+++ b/docs/html/search/all_61.js
@@ -1,13 +1,10 @@
 var searchData=
 [
-  ['alias',['Alias',['../structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038',1,'cub::Uninitialized']]],
-  ['align0',['align0',['../structcub_1_1_key_value_pair.html#a6babb4cdcde74f159b5bff9e4a569d83',1,'cub::KeyValuePair']]],
-  ['alternate',['Alternate',['../structcub_1_1_double_buffer.html#a3895f1d57aeb379bee79de56ace8e35a',1,'cub::DoubleBuffer']]],
   ['arg_5findex_5finput_5fiterator_2ecuh',['arg_index_input_iterator.cuh',['../arg__index__input__iterator_8cuh.html',1,'']]],
-  ['argindexinputiterator',['ArgIndexInputIterator',['../classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6',1,'cub::ArgIndexInputIterator']]],
   ['argindexinputiterator',['ArgIndexInputIterator',['../classcub_1_1_arg_index_input_iterator.html',1,'cub']]],
+  ['argindexinputiterator',['ArgIndexInputIterator',['../classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6',1,'cub::ArgIndexInputIterator']]],
   ['argmax',['ArgMax',['../structcub_1_1_arg_max.html',1,'cub']]],
-  ['argmax',['ArgMax',['../structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276',1,'cub::DeviceReduce']]],
+  ['argmax',['ArgMax',['../structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276',1,'cub::DeviceReduce::ArgMax()'],['../structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329',1,'cub::DeviceSegmentedReduce::ArgMax()']]],
   ['argmin',['ArgMin',['../structcub_1_1_arg_min.html',1,'cub']]],
-  ['argmin',['ArgMin',['../structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9',1,'cub::DeviceReduce']]]
+  ['argmin',['ArgMin',['../structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9',1,'cub::DeviceReduce::ArgMin()'],['../structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce',1,'cub::DeviceSegmentedReduce::ArgMin()']]]
 ];
diff --git a/docs/html/search/all_63.js b/docs/html/search/all_63.js
index fd0c551ba7..39cdb9f58f 100644
--- a/docs/html/search/all_63.js
+++ b/docs/html/search/all_63.js
@@ -8,8 +8,8 @@ var searchData=
   ['cachemodifiedoutputiterator',['CacheModifiedOutputIterator',['../classcub_1_1_cache_modified_output_iterator.html',1,'cub']]],
   ['cachemodifiedoutputiterator',['CacheModifiedOutputIterator',['../classcub_1_1_cache_modified_output_iterator.html#a8d19a08f1fdc19bf71521beb5ccfd5c7',1,'cub::CacheModifiedOutputIterator']]],
   ['cachestoremodifier',['CacheStoreModifier',['../group___util_io.html#ga648d25a92a50ca41cf73e93a35f21f37',1,'cub']]],
-  ['cachingdeviceallocator',['CachingDeviceAllocator',['../structcub_1_1_caching_device_allocator.html#a8819cc293615f15d1f08f41140349b90',1,'cub::CachingDeviceAllocator::CachingDeviceAllocator(unsigned int bin_growth, unsigned int min_bin, unsigned int max_bin, size_t max_cached_bytes, bool skip_cleanup=false)'],['../structcub_1_1_caching_device_allocator.html#ab08a4c3d066ec2303d07363a25466bff',1,'cub::CachingDeviceAllocator::CachingDeviceAllocator(bool skip_cleanup=false)']]],
   ['cachingdeviceallocator',['CachingDeviceAllocator',['../structcub_1_1_caching_device_allocator.html',1,'cub']]],
+  ['cachingdeviceallocator',['CachingDeviceAllocator',['../structcub_1_1_caching_device_allocator.html#a8819cc293615f15d1f08f41140349b90',1,'cub::CachingDeviceAllocator::CachingDeviceAllocator(unsigned int bin_growth, unsigned int min_bin, unsigned int max_bin, size_t max_cached_bytes, bool skip_cleanup=false)'],['../structcub_1_1_caching_device_allocator.html#ab08a4c3d066ec2303d07363a25466bff',1,'cub::CachingDeviceAllocator::CachingDeviceAllocator(bool skip_cleanup=false)']]],
   ['cast',['Cast',['../structcub_1_1_cast.html',1,'cub']]],
   ['composite',['Composite',['../classcub_1_1_block_histogram.html#a90ef25b7af7ce819367eca662a47dd2d',1,'cub::BlockHistogram']]],
   ['constant_5finput_5fiterator_2ecuh',['constant_input_iterator.cuh',['../constant__input__iterator_8cuh.html',1,'']]],
@@ -22,8 +22,5 @@ var searchData=
   ['cub',['cub',['../namespacecub.html',1,'']]],
   ['cub_2ecuh',['cub.cuh',['../cub_8cuh.html',1,'']]],
   ['cubdebug',['CubDebug',['../group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236',1,'util_debug.cuh']]],
-  ['cubdebugexit',['CubDebugExit',['../group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8',1,'util_debug.cuh']]],
-  ['cublog',['CubLog',['../group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db',1,'util_debug.cuh']]],
-  ['cubvector',['CubVector',['../structcub_1_1_cub_vector.html',1,'cub']]],
-  ['current',['Current',['../structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724',1,'cub::DoubleBuffer']]]
+  ['cubdebugexit',['CubDebugExit',['../group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8',1,'util_debug.cuh']]]
 ];
diff --git a/docs/html/search/all_64.js b/docs/html/search/all_64.js
index 6265c56487..bc22ef1f2c 100644
--- a/docs/html/search/all_64.js
+++ b/docs/html/search/all_64.js
@@ -1,6 +1,5 @@
 var searchData=
 [
-  ['d_5fbuffers',['d_buffers',['../structcub_1_1_double_buffer.html#a38a2d8a9d5a36e9e4b9132166717a0b4',1,'cub::DoubleBuffer']]],
   ['debug',['Debug',['../group___util_mgmt.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95',1,'cub']]],
   ['device_5fhistogram_2ecuh',['device_histogram.cuh',['../device__histogram_8cuh.html',1,'']]],
   ['device_5fpartition_2ecuh',['device_partition.cuh',['../device__partition_8cuh.html',1,'']]],
@@ -9,6 +8,7 @@ var searchData=
   ['device_5frun_5flength_5fencode_2ecuh',['device_run_length_encode.cuh',['../device__run__length__encode_8cuh.html',1,'']]],
   ['device_5fscan_2ecuh',['device_scan.cuh',['../device__scan_8cuh.html',1,'']]],
   ['device_5fsegmented_5fradix_5fsort_2ecuh',['device_segmented_radix_sort.cuh',['../device__segmented__radix__sort_8cuh.html',1,'']]],
+  ['device_5fsegmented_5freduce_2ecuh',['device_segmented_reduce.cuh',['../device__segmented__reduce_8cuh.html',1,'']]],
   ['device_5fselect_2ecuh',['device_select.cuh',['../device__select_8cuh.html',1,'']]],
   ['device_5fspmv_2ecuh',['device_spmv.cuh',['../device__spmv_8cuh.html',1,'']]],
   ['deviceallocate',['DeviceAllocate',['../structcub_1_1_caching_device_allocator.html#ae1088ac6ba6e5d55832ffbc0b2a5d714',1,'cub::CachingDeviceAllocator::DeviceAllocate(int device, void **d_ptr, size_t bytes, cudaStream_t active_stream=0)'],['../structcub_1_1_caching_device_allocator.html#ab2fe4020cf8bc86ad886c797525ac8ea',1,'cub::CachingDeviceAllocator::DeviceAllocate(void **d_ptr, size_t bytes, cudaStream_t active_stream=0)']]],
@@ -21,11 +21,9 @@ var searchData=
   ['devicerunlengthencode',['DeviceRunLengthEncode',['../structcub_1_1_device_run_length_encode.html',1,'cub']]],
   ['devicescan',['DeviceScan',['../structcub_1_1_device_scan.html',1,'cub']]],
   ['devicesegmentedradixsort',['DeviceSegmentedRadixSort',['../structcub_1_1_device_segmented_radix_sort.html',1,'cub']]],
+  ['devicesegmentedreduce',['DeviceSegmentedReduce',['../structcub_1_1_device_segmented_reduce.html',1,'cub']]],
   ['deviceselect',['DeviceSelect',['../structcub_1_1_device_select.html',1,'cub']]],
   ['devicespmv',['DeviceSpmv',['../structcub_1_1_device_spmv.html',1,'cub']]],
-  ['deviceword',['DeviceWord',['../structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6',1,'cub::Uninitialized']]],
   ['difference_5ftype',['difference_type',['../classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6',1,'cub::ArgIndexInputIterator::difference_type()'],['../classcub_1_1_cache_modified_input_iterator.html#a268f1e7a4b42a05c5b4d0bca2775c26e',1,'cub::CacheModifiedInputIterator::difference_type()'],['../classcub_1_1_cache_modified_output_iterator.html#ac8cdd3a29db7e398f14e44a7aa054750',1,'cub::CacheModifiedOutputIterator::difference_type()'],['../classcub_1_1_constant_input_iterator.html#ab45cc48afbfda7eaa3b4ea643e719c33',1,'cub::ConstantInputIterator::difference_type()'],['../classcub_1_1_counting_input_iterator.html#ac66dea2a687f0ad6ed0cbcd6217a3029',1,'cub::CountingInputIterator::difference_type()'],['../classcub_1_1_tex_obj_input_iterator.html#a710012acbff3dd0c35822951e28148ea',1,'cub::TexObjInputIterator::difference_type()'],['../classcub_1_1_tex_ref_input_iterator.html#aff1afce146f69adb655e2ff7366b869f',1,'cub::TexRefInputIterator::difference_type()'],['../classcub_1_1_transform_input_iterator.html#ac05064e9ad33dc032452e3d09e8768f6',1,'cub::TransformInputIterator::difference_type()']]],
-  ['doublebuffer',['DoubleBuffer',['../structcub_1_1_double_buffer.html',1,'cub']]],
-  ['doublebuffer',['DoubleBuffer',['../structcub_1_1_double_buffer.html#a8d51dcd30484a1f186e056eb7ab09979',1,'cub::DoubleBuffer::DoubleBuffer()'],['../structcub_1_1_double_buffer.html#a100c51f0e1aefdff4cdfe6480c89f59e',1,'cub::DoubleBuffer::DoubleBuffer(T *d_current, T *d_alternate)']]],
   ['device_2c_20kernel_2c_20and_20storage_20management',['Device, kernel, and storage management',['../group___util_mgmt.html',1,'']]]
 ];
diff --git a/docs/html/search/all_69.js b/docs/html/search/all_69.js
index b0aaecc6f4..03cdac8f56 100644
--- a/docs/html/search/all_69.js
+++ b/docs/html/search/all_69.js
@@ -9,7 +9,6 @@ var searchData=
   ['inequalitywrapper',['InequalityWrapper',['../structcub_1_1_inequality_wrapper.html#a933c170bd76eef23017c5ff6ea010f49',1,'cub::InequalityWrapper']]],
   ['inequalitywrapper',['InequalityWrapper',['../structcub_1_1_inequality_wrapper.html',1,'cub']]],
   ['inithistogram',['InitHistogram',['../classcub_1_1_block_histogram.html#a080faf88a47e73e8bfa9964cd8f26feb',1,'cub::BlockHistogram']]],
-  ['int2type',['Int2Type',['../structcub_1_1_int2_type.html',1,'cub']]],
   ['ispointer',['IsPointer',['../structcub_1_1_is_pointer.html',1,'cub']]],
   ['isvolatile',['IsVolatile',['../structcub_1_1_is_volatile.html',1,'cub']]],
   ['iterator_5fcategory',['iterator_category',['../classcub_1_1_arg_index_input_iterator.html#a2cff9aacc1ba59ae9f74735c257261e5',1,'cub::ArgIndexInputIterator::iterator_category()'],['../classcub_1_1_cache_modified_input_iterator.html#ada1fa84e98ba648e275d11e6e8fd4970',1,'cub::CacheModifiedInputIterator::iterator_category()'],['../classcub_1_1_cache_modified_output_iterator.html#af8ebc2633b875aa62423c20f24143938',1,'cub::CacheModifiedOutputIterator::iterator_category()'],['../classcub_1_1_constant_input_iterator.html#ae0ec37dfa33ad77189ffb5153d923364',1,'cub::ConstantInputIterator::iterator_category()'],['../classcub_1_1_counting_input_iterator.html#a984b01f872ab978ad5db527083a4c254',1,'cub::CountingInputIterator::iterator_category()'],['../classcub_1_1_tex_obj_input_iterator.html#a53ae83bf8355764e5ef91a11d9a3c714',1,'cub::TexObjInputIterator::iterator_category()'],['../classcub_1_1_tex_ref_input_iterator.html#a1c95fea4acb0f52261acc21a0daa9056',1,'cub::TexRefInputIterator::iterator_category()'],['../classcub_1_1_transform_input_iterator.html#a6166ef08c614347c39fcba5f0fb776f6',1,'cub::TransformInputIterator::iterator_category()']]]
diff --git a/docs/html/search/all_6d.js b/docs/html/search/all_6d.js
index b2ef9e1339..4deaba7c06 100644
--- a/docs/html/search/all_6d.js
+++ b/docs/html/search/all_6d.js
@@ -1,9 +1,9 @@
 var searchData=
 [
   ['max',['Max',['../structcub_1_1_max.html',1,'cub']]],
-  ['max',['Max',['../structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48',1,'cub::DeviceReduce']]],
+  ['max',['Max',['../structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48',1,'cub::DeviceReduce::Max()'],['../structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183',1,'cub::DeviceSegmentedReduce::Max()']]],
   ['min',['Min',['../structcub_1_1_min.html',1,'cub']]],
-  ['min',['Min',['../structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a',1,'cub::DeviceReduce']]],
+  ['min',['Min',['../structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a',1,'cub::DeviceReduce::Min()'],['../structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29',1,'cub::DeviceSegmentedReduce::Min()']]],
   ['multihistogrameven',['MultiHistogramEven',['../structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95',1,'cub::DeviceHistogram::MultiHistogramEven(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668',1,'cub::DeviceHistogram::MultiHistogramEven(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['multihistogramrange',['MultiHistogramRange',['../structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe',1,'cub::DeviceHistogram::MultiHistogramRange(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f',1,'cub::DeviceHistogram::MultiHistogramRange(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)']]]
 ];
diff --git a/docs/html/search/all_6e.js b/docs/html/search/all_6e.js
index fffd0a9194..4312eb4584 100644
--- a/docs/html/search/all_6e.js
+++ b/docs/html/search/all_6e.js
@@ -1,5 +1,5 @@
 var searchData=
 [
   ['nontrivialruns',['NonTrivialRuns',['../structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a',1,'cub::DeviceRunLengthEncode']]],
-  ['nulltype',['NullType',['../structcub_1_1_null_type.html',1,'cub']]]
+  ['normalize',['normalize',['../classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc',1,'cub::ArgIndexInputIterator']]]
 ];
diff --git a/docs/html/search/all_6f.js b/docs/html/search/all_6f.js
index 1f8f9b5687..243751e651 100644
--- a/docs/html/search/all_6f.js
+++ b/docs/html/search/all_6f.js
@@ -1,7 +1,7 @@
 var searchData=
 [
   ['op',['op',['../structcub_1_1_inequality_wrapper.html#a4143eec319a0c231f0f5159fba11371d',1,'cub::InequalityWrapper::op()'],['../structcub_1_1_reduce_by_segment_op.html#a562b1870ca14ca9886ad22535ca673f3',1,'cub::ReduceBySegmentOp::op()'],['../structcub_1_1_reduce_by_key_op.html#a4c6624999354ccf78a94226b9762bdbf',1,'cub::ReduceByKeyOp::op()']]],
-  ['operator_21_3d',['operator!=',['../structcub_1_1_key_value_pair.html#aa7bcc39b09d285d41c9c0226a49790f2',1,'cub::KeyValuePair::operator!=()'],['../classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772',1,'cub::ArgIndexInputIterator::operator!=()'],['../classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c',1,'cub::CacheModifiedInputIterator::operator!=()'],['../classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad',1,'cub::CacheModifiedOutputIterator::operator!=()'],['../classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f',1,'cub::ConstantInputIterator::operator!=()'],['../classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240',1,'cub::CountingInputIterator::operator!=()'],['../classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28',1,'cub::TexObjInputIterator::operator!=()'],['../classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab',1,'cub::TexRefInputIterator::operator!=()'],['../classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be',1,'cub::TransformInputIterator::operator!=()']]],
+  ['operator_21_3d',['operator!=',['../classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772',1,'cub::ArgIndexInputIterator::operator!=()'],['../classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c',1,'cub::CacheModifiedInputIterator::operator!=()'],['../classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad',1,'cub::CacheModifiedOutputIterator::operator!=()'],['../classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f',1,'cub::ConstantInputIterator::operator!=()'],['../classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240',1,'cub::CountingInputIterator::operator!=()'],['../classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28',1,'cub::TexObjInputIterator::operator!=()'],['../classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab',1,'cub::TexRefInputIterator::operator!=()'],['../classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be',1,'cub::TransformInputIterator::operator!=()']]],
   ['operator_28_29',['operator()',['../structcub_1_1_equality.html#a9db81c4cbcf79dbb8087b3c59593cae0',1,'cub::Equality::operator()()'],['../structcub_1_1_inequality.html#ac2b51f35b929dc74ea766b012e89b552',1,'cub::Inequality::operator()()'],['../structcub_1_1_inequality_wrapper.html#a97067089c24f8d6a09c914ab6a163c65',1,'cub::InequalityWrapper::operator()()'],['../structcub_1_1_sum.html#a1edd85dbc039f93c8e45eb2096704a86',1,'cub::Sum::operator()()'],['../structcub_1_1_max.html#ab06fa8091c6aa396fe127f37e0a545d3',1,'cub::Max::operator()()'],['../structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d',1,'cub::ArgMax::operator()()'],['../structcub_1_1_min.html#ade40f60337afc51da556ed65d5708136',1,'cub::Min::operator()()'],['../structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710',1,'cub::ArgMin::operator()()'],['../structcub_1_1_cast.html#a649573c7f5d02ab09f176b5cc7cf1e5c',1,'cub::Cast::operator()()'],['../classcub_1_1_swizzle_scan_op.html#a9548d484ccfb5161d56d3db1003693e2',1,'cub::SwizzleScanOp::operator()()'],['../structcub_1_1_reduce_by_segment_op.html#aff56752999857ef68029cfa150d88d5e',1,'cub::ReduceBySegmentOp::operator()()'],['../structcub_1_1_reduce_by_key_op.html#ad5e0d929c9cc2447df47c0f2c6409c9a',1,'cub::ReduceByKeyOp::operator()()']]],
   ['operator_2a',['operator*',['../classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f',1,'cub::ArgIndexInputIterator::operator*()'],['../classcub_1_1_cache_modified_input_iterator.html#a803d29cb5212f1eaa5f4a34415987768',1,'cub::CacheModifiedInputIterator::operator*()'],['../classcub_1_1_cache_modified_output_iterator.html#a4548b15114d6a8a3f714dcbf0f22e854',1,'cub::CacheModifiedOutputIterator::operator*()'],['../classcub_1_1_constant_input_iterator.html#afd9a8d22abd868d9fb13c235bd800eae',1,'cub::ConstantInputIterator::operator*()'],['../classcub_1_1_counting_input_iterator.html#a7acf3db367c8485a5145d9dbf47be1f7',1,'cub::CountingInputIterator::operator*()'],['../classcub_1_1_tex_obj_input_iterator.html#a8052b0744d233b7f9647d0eb6ba71583',1,'cub::TexObjInputIterator::operator*()'],['../classcub_1_1_tex_ref_input_iterator.html#a78417e7a9f78ded354cc152997c2d7d1',1,'cub::TexRefInputIterator::operator*()'],['../classcub_1_1_transform_input_iterator.html#ab84a2e02eeec401af33903dcb183d5ca',1,'cub::TransformInputIterator::operator*()']]],
   ['operator_2b',['operator+',['../classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030',1,'cub::ArgIndexInputIterator::operator+()'],['../classcub_1_1_cache_modified_input_iterator.html#a455c2df363449c1b92628de54fb2bad5',1,'cub::CacheModifiedInputIterator::operator+()'],['../classcub_1_1_cache_modified_output_iterator.html#a487a5a434527081b425e5a36ea0d4dd3',1,'cub::CacheModifiedOutputIterator::operator+()'],['../classcub_1_1_constant_input_iterator.html#a1e37cefa0808ae8b8b0853e30013bfba',1,'cub::ConstantInputIterator::operator+()'],['../classcub_1_1_counting_input_iterator.html#a4491f4d5635571964f4d98cf1c3cc753',1,'cub::CountingInputIterator::operator+()'],['../classcub_1_1_tex_obj_input_iterator.html#a1284c246857d30e96e5bc1ea10d68116',1,'cub::TexObjInputIterator::operator+()'],['../classcub_1_1_tex_ref_input_iterator.html#a85fcecba8f3d7b4941b3fa446cddb50a',1,'cub::TexRefInputIterator::operator+()'],['../classcub_1_1_transform_input_iterator.html#a36a3eb7ede1ef22b2316fc281aed52ab',1,'cub::TransformInputIterator::operator+()']]],
diff --git a/docs/html/search/all_72.js b/docs/html/search/all_72.js
index be8bbdf5b2..9680b9e491 100644
--- a/docs/html/search/all_72.js
+++ b/docs/html/search/all_72.js
@@ -1,7 +1,7 @@
 var searchData=
 [
-  ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a81878a614ef3b39de654918fc1f6144d',1,'cub::BlockReduce::Reduce(T(&amp;inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op, int num_valid)'],['../structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c',1,'cub::DeviceReduce::Reduce()'],['../classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op, int valid_items)']]],
-  ['reducebykey',['ReduceByKey',['../structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02',1,'cub::DeviceReduce']]],
+  ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a81878a614ef3b39de654918fc1f6144d',1,'cub::BlockReduce::Reduce(T(&amp;inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op, int num_valid)'],['../structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb',1,'cub::DeviceReduce::Reduce()'],['../structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364',1,'cub::DeviceSegmentedReduce::Reduce()'],['../classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op, int valid_items)']]],
+  ['reducebykey',['ReduceByKey',['../structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1',1,'cub::DeviceReduce']]],
   ['reducebykeyop',['ReduceByKeyOp',['../structcub_1_1_reduce_by_key_op.html',1,'cub']]],
   ['reducebykeyop',['ReduceByKeyOp',['../structcub_1_1_reduce_by_key_op.html#a88feb445a30081d205a7e1560c07bc6e',1,'cub::ReduceByKeyOp::ReduceByKeyOp()'],['../structcub_1_1_reduce_by_key_op.html#aa9e777450d365effbfc54e4e7700dd24',1,'cub::ReduceByKeyOp::ReduceByKeyOp(ReductionOpT op)']]],
   ['reducebysegmentop',['ReduceBySegmentOp',['../structcub_1_1_reduce_by_segment_op.html',1,'cub']]],
diff --git a/docs/html/search/all_73.js b/docs/html/search/all_73.js
index 36fb5b74f9..fbfd9969f9 100644
--- a/docs/html/search/all_73.js
+++ b/docs/html/search/all_73.js
@@ -6,7 +6,6 @@ var searchData=
   ['scattertostriped',['ScatterToStriped',['../classcub_1_1_block_exchange.html#a024c9172cda50cd0e2ab364bbe9be63d',1,'cub::BlockExchange::ScatterToStriped(T items[ITEMS_PER_THREAD], OffsetT ranks[ITEMS_PER_THREAD])'],['../classcub_1_1_block_exchange.html#a46a32397168592af2c1050c4061690c7',1,'cub::BlockExchange::ScatterToStriped(T items[ITEMS_PER_THREAD], OffsetT ranks[ITEMS_PER_THREAD], ValidFlag is_valid[ITEMS_PER_THREAD])']]],
   ['scattertostripedguarded',['ScatterToStripedGuarded',['../classcub_1_1_block_exchange.html#a83cdc3266e21fb11d5c37ac79dc66598',1,'cub::BlockExchange']]],
   ['segmented_2dproblem_20_28batch_29',['Segmented-problem (batch)',['../group___segmented_module.html',1,'']]],
-  ['selector',['selector',['../structcub_1_1_double_buffer.html#a9641172c847169904c4054856d7c26f4',1,'cub::DoubleBuffer']]],
   ['self_5ftype',['self_type',['../classcub_1_1_arg_index_input_iterator.html#a6ad07b9b511ecbd6219756ebeffd6be6',1,'cub::ArgIndexInputIterator::self_type()'],['../classcub_1_1_cache_modified_input_iterator.html#af26facb4f00568a940d5529047bd90d7',1,'cub::CacheModifiedInputIterator::self_type()'],['../classcub_1_1_cache_modified_output_iterator.html#abca48cb8f0172b355ba465ff3c6308c2',1,'cub::CacheModifiedOutputIterator::self_type()'],['../classcub_1_1_constant_input_iterator.html#a0d85b740ebf81e48973e2c6cf9d0e812',1,'cub::ConstantInputIterator::self_type()'],['../classcub_1_1_counting_input_iterator.html#a4e89463441c1c008e7610473ae8d7435',1,'cub::CountingInputIterator::self_type()'],['../classcub_1_1_tex_obj_input_iterator.html#aa7e85f2b2f4d580e3ded497028604a5c',1,'cub::TexObjInputIterator::self_type()'],['../classcub_1_1_tex_ref_input_iterator.html#a03d63027c130152d31623eee579f8506',1,'cub::TexRefInputIterator::self_type()'],['../classcub_1_1_transform_input_iterator.html#ad9e475cc2aae6601422445fff16de9b4',1,'cub::TransformInputIterator::self_type()']]],
   ['setmaxcachedbytes',['SetMaxCachedBytes',['../structcub_1_1_caching_device_allocator.html#a2216ab13fdeb11ce61f04f69899fff33',1,'cub::CachingDeviceAllocator']]],
   ['shl_5fadd',['SHL_ADD',['../group___util_ptx.html#ga79b9963a4b033d545e42159e5f0b1621',1,'cub']]],
@@ -22,9 +21,8 @@ var searchData=
   ['sortdescendingblockedtostriped',['SortDescendingBlockedToStriped',['../classcub_1_1_block_radix_sort.html#ae89db4d439f0996b0dd7ad4b105e4f1d',1,'cub::BlockRadixSort::SortDescendingBlockedToStriped(KeyT(&amp;keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT)*8)'],['../classcub_1_1_block_radix_sort.html#ab56d4d4c3a8da9ff718906bfc5d815c2',1,'cub::BlockRadixSort::SortDescendingBlockedToStriped(KeyT(&amp;keys)[ITEMS_PER_THREAD], ValueT(&amp;values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT)*8)']]],
   ['sortkeys',['SortKeys',['../structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6',1,'cub::DeviceRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191',1,'cub::DeviceRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba',1,'cub::DeviceSegmentedRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac',1,'cub::DeviceSegmentedRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['sortkeysdescending',['SortKeysDescending',['../structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef',1,'cub::DeviceRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628',1,'cub::DeviceRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701',1,'cub::DeviceSegmentedRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94',1,'cub::DeviceSegmentedRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
-  ['sortpairs',['SortPairs',['../structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, OffsetT num_segments, OffsetT *d_begin_offsets, OffsetT *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
+  ['sortpairs',['SortPairs',['../structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['sortpairsdescending',['SortPairsDescending',['../structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a',1,'cub::DeviceRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e',1,'cub::DeviceRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4',1,'cub::DeviceSegmentedRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347',1,'cub::DeviceSegmentedRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
-  ['storage',['storage',['../structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782',1,'cub::Uninitialized']]],
   ['store',['Store',['../classcub_1_1_block_store.html#a86fd777fd9bef8264787d756b16303ed',1,'cub::BlockStore::Store(OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_store.html#a67d0aa8fcf37f92b2074e986581ffdf5',1,'cub::BlockStore::Store(OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['store_5fcg',['STORE_CG',['../group___util_io.html#gga648d25a92a50ca41cf73e93a35f21f37aacea07ea298b89dd1962a40b4823652d',1,'cub']]],
   ['store_5fcs',['STORE_CS',['../group___util_io.html#gga648d25a92a50ca41cf73e93a35f21f37a00ae8891d1acad179d134fdd60d7839b',1,'cub']]],
@@ -37,8 +35,8 @@ var searchData=
   ['storedirectstriped',['StoreDirectStriped',['../group___util_io.html#gac688ffdb1ecacc06375295947144d233',1,'cub::StoreDirectStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../group___util_io.html#ga5af198f11043a66ebfaab51c8f4f6fc9',1,'cub::StoreDirectStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['storedirectwarpstriped',['StoreDirectWarpStriped',['../group___util_io.html#ga9b2bb8f452cd26b5e297c9d5924fcf03',1,'cub::StoreDirectWarpStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../group___util_io.html#ga3978707feae6ad3e1b6467a6e525bc09',1,'cub::StoreDirectWarpStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['stripedtoblocked',['StripedToBlocked',['../classcub_1_1_block_exchange.html#a2855471bbbcc4d66ac6a29d35a040e0c',1,'cub::BlockExchange']]],
-  ['sum',['Sum',['../classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890',1,'cub::BlockReduce::Sum(T input)'],['../classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51',1,'cub::BlockReduce::Sum(T(&amp;inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409',1,'cub::BlockReduce::Sum(T input, int num_valid)'],['../structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f',1,'cub::DeviceReduce::Sum()'],['../classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20',1,'cub::WarpScan::Sum()'],['../classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a',1,'cub::WarpReduce::Sum(T input)'],['../classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30',1,'cub::WarpReduce::Sum(T input, int valid_items)']]],
   ['sum',['Sum',['../structcub_1_1_sum.html',1,'cub']]],
-  ['swizzlescanop',['SwizzleScanOp',['../classcub_1_1_swizzle_scan_op.html',1,'cub']]],
-  ['swizzlescanop',['SwizzleScanOp',['../classcub_1_1_swizzle_scan_op.html#ae81a38aa9d94025da72544b9e0dc611b',1,'cub::SwizzleScanOp']]]
+  ['sum',['Sum',['../classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890',1,'cub::BlockReduce::Sum(T input)'],['../classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51',1,'cub::BlockReduce::Sum(T(&amp;inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409',1,'cub::BlockReduce::Sum(T input, int num_valid)'],['../structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f',1,'cub::DeviceReduce::Sum()'],['../structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb',1,'cub::DeviceSegmentedReduce::Sum()'],['../classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20',1,'cub::WarpScan::Sum()'],['../classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a',1,'cub::WarpReduce::Sum(T input)'],['../classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30',1,'cub::WarpReduce::Sum(T input, int valid_items)']]],
+  ['swizzlescanop',['SwizzleScanOp',['../classcub_1_1_swizzle_scan_op.html#ae81a38aa9d94025da72544b9e0dc611b',1,'cub::SwizzleScanOp']]],
+  ['swizzlescanop',['SwizzleScanOp',['../classcub_1_1_swizzle_scan_op.html',1,'cub']]]
 ];
diff --git a/docs/html/search/all_74.js b/docs/html/search/all_74.js
index b6d6e32a8b..10ec064243 100644
--- a/docs/html/search/all_74.js
+++ b/docs/html/search/all_74.js
@@ -2,22 +2,22 @@ var searchData=
 [
   ['tailsegmentedreduce',['TailSegmentedReduce',['../classcub_1_1_warp_reduce.html#ab89e501348f1ae4ade9a21c4e88d2d6c',1,'cub::WarpReduce']]],
   ['tailsegmentedsum',['TailSegmentedSum',['../classcub_1_1_warp_reduce.html#a4c73c5bb3636224abb3fe3e1e5e05705',1,'cub::WarpReduce']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_histogram_1_1_temp_storage.html',1,'cub::BlockHistogram']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_scan_1_1_temp_storage.html',1,'cub::BlockScan']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_temp_storage.html',1,'cub::BlockLoad']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_radix_sort_1_1_temp_storage.html',1,'cub::BlockRadixSort']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_warp_reduce_1_1_temp_storage.html',1,'cub::WarpReduce']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_discontinuity_1_1_temp_storage.html',1,'cub::BlockDiscontinuity']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_temp_storage.html',1,'cub::BlockLoad']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_temp_storage.html',1,'cub::BlockStore']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_scan_1_1_temp_storage.html',1,'cub::BlockScan']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_reduce_1_1_temp_storage.html',1,'cub::BlockReduce']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_exchange_1_1_temp_storage.html',1,'cub::BlockExchange']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_warp_scan_1_1_temp_storage.html',1,'cub::WarpScan']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_histogram_1_1_temp_storage.html',1,'cub::BlockHistogram']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_warp_reduce_1_1_temp_storage.html',1,'cub::WarpReduce']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_radix_sort_1_1_temp_storage.html',1,'cub::BlockRadixSort']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_discontinuity_1_1_temp_storage.html',1,'cub::BlockDiscontinuity']]],
   ['tex_5fobj_5finput_5fiterator_2ecuh',['tex_obj_input_iterator.cuh',['../tex__obj__input__iterator_8cuh.html',1,'']]],
   ['tex_5fref_5finput_5fiterator_2ecuh',['tex_ref_input_iterator.cuh',['../tex__ref__input__iterator_8cuh.html',1,'']]],
   ['texobjinputiterator',['TexObjInputIterator',['../classcub_1_1_tex_obj_input_iterator.html',1,'cub']]],
diff --git a/docs/html/search/all_75.js b/docs/html/search/all_75.js
index ad7083c3d9..786e910f7a 100644
--- a/docs/html/search/all_75.js
+++ b/docs/html/search/all_75.js
@@ -1,8 +1,6 @@
 var searchData=
 [
   ['unbindtexture',['UnbindTexture',['../classcub_1_1_tex_obj_input_iterator.html#a35b98dd35b014a052f996ee37afe1b67',1,'cub::TexObjInputIterator::UnbindTexture()'],['../classcub_1_1_tex_ref_input_iterator.html#a5f451683e1f40e5f702251b707e87956',1,'cub::TexRefInputIterator::UnbindTexture()']]],
-  ['uninitialized',['Uninitialized',['../structcub_1_1_uninitialized.html',1,'cub']]],
-  ['uninitialized_3c_20_5ftempstorage_20_3e',['Uninitialized&lt; _TempStorage &gt;',['../structcub_1_1_uninitialized.html',1,'cub']]],
   ['unique',['Unique',['../structcub_1_1_device_select.html#a5e9ea8f2581326391ea63e1808ba514d',1,'cub::DeviceSelect']]],
   ['util_5farch_2ecuh',['util_arch.cuh',['../util__arch_8cuh.html',1,'']]],
   ['util_5fdebug_2ecuh',['util_debug.cuh',['../util__debug_8cuh.html',1,'']]],
diff --git a/docs/html/search/all_76.js b/docs/html/search/all_76.js
index 8cf51fc0d2..bce599db9a 100644
--- a/docs/html/search/all_76.js
+++ b/docs/html/search/all_76.js
@@ -1,5 +1,4 @@
 var searchData=
 [
-  ['value',['Value',['../structcub_1_1_key_value_pair.html#a9fd385872c09fd3757e9ba59b2754955',1,'cub::KeyValuePair::Value()'],['../structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4',1,'cub::KeyValuePair::value()']]],
   ['value_5ftype',['value_type',['../classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22',1,'cub::ArgIndexInputIterator::value_type()'],['../classcub_1_1_cache_modified_input_iterator.html#a60689c564e2ade39722947eeaf40156a',1,'cub::CacheModifiedInputIterator::value_type()'],['../classcub_1_1_cache_modified_output_iterator.html#aba997f95620d692cbad05c74ef169fa8',1,'cub::CacheModifiedOutputIterator::value_type()'],['../classcub_1_1_constant_input_iterator.html#a72a3996ca30a1b2eb6f676923a2ee3ce',1,'cub::ConstantInputIterator::value_type()'],['../classcub_1_1_counting_input_iterator.html#a3afabb6a47c8cf7526220eb817e2a97f',1,'cub::CountingInputIterator::value_type()'],['../classcub_1_1_tex_obj_input_iterator.html#a4405a9e3d39593b7c468629dff098144',1,'cub::TexObjInputIterator::value_type()'],['../classcub_1_1_tex_ref_input_iterator.html#a1de28970e3874202646e11984a0d9026',1,'cub::TexRefInputIterator::value_type()'],['../classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52',1,'cub::TransformInputIterator::value_type()']]]
 ];
diff --git a/docs/html/search/classes_63.js b/docs/html/search/classes_63.js
index 31c37749ac..fdd21ccef6 100644
--- a/docs/html/search/classes_63.js
+++ b/docs/html/search/classes_63.js
@@ -5,6 +5,5 @@ var searchData=
   ['cachingdeviceallocator',['CachingDeviceAllocator',['../structcub_1_1_caching_device_allocator.html',1,'cub']]],
   ['cast',['Cast',['../structcub_1_1_cast.html',1,'cub']]],
   ['constantinputiterator',['ConstantInputIterator',['../classcub_1_1_constant_input_iterator.html',1,'cub']]],
-  ['countinginputiterator',['CountingInputIterator',['../classcub_1_1_counting_input_iterator.html',1,'cub']]],
-  ['cubvector',['CubVector',['../structcub_1_1_cub_vector.html',1,'cub']]]
+  ['countinginputiterator',['CountingInputIterator',['../classcub_1_1_counting_input_iterator.html',1,'cub']]]
 ];
diff --git a/docs/html/search/classes_64.js b/docs/html/search/classes_64.js
index 0bf30727e5..7bd3da01db 100644
--- a/docs/html/search/classes_64.js
+++ b/docs/html/search/classes_64.js
@@ -7,7 +7,7 @@ var searchData=
   ['devicerunlengthencode',['DeviceRunLengthEncode',['../structcub_1_1_device_run_length_encode.html',1,'cub']]],
   ['devicescan',['DeviceScan',['../structcub_1_1_device_scan.html',1,'cub']]],
   ['devicesegmentedradixsort',['DeviceSegmentedRadixSort',['../structcub_1_1_device_segmented_radix_sort.html',1,'cub']]],
+  ['devicesegmentedreduce',['DeviceSegmentedReduce',['../structcub_1_1_device_segmented_reduce.html',1,'cub']]],
   ['deviceselect',['DeviceSelect',['../structcub_1_1_device_select.html',1,'cub']]],
-  ['devicespmv',['DeviceSpmv',['../structcub_1_1_device_spmv.html',1,'cub']]],
-  ['doublebuffer',['DoubleBuffer',['../structcub_1_1_double_buffer.html',1,'cub']]]
+  ['devicespmv',['DeviceSpmv',['../structcub_1_1_device_spmv.html',1,'cub']]]
 ];
diff --git a/docs/html/search/classes_69.js b/docs/html/search/classes_69.js
index 446b93123f..53e48188e6 100644
--- a/docs/html/search/classes_69.js
+++ b/docs/html/search/classes_69.js
@@ -3,7 +3,6 @@ var searchData=
   ['if',['If',['../structcub_1_1_if.html',1,'cub']]],
   ['inequality',['Inequality',['../structcub_1_1_inequality.html',1,'cub']]],
   ['inequalitywrapper',['InequalityWrapper',['../structcub_1_1_inequality_wrapper.html',1,'cub']]],
-  ['int2type',['Int2Type',['../structcub_1_1_int2_type.html',1,'cub']]],
   ['ispointer',['IsPointer',['../structcub_1_1_is_pointer.html',1,'cub']]],
   ['isvolatile',['IsVolatile',['../structcub_1_1_is_volatile.html',1,'cub']]]
 ];
diff --git a/docs/html/search/classes_74.js b/docs/html/search/classes_74.js
index cc574a1735..9cc662147d 100644
--- a/docs/html/search/classes_74.js
+++ b/docs/html/search/classes_74.js
@@ -1,21 +1,21 @@
 var searchData=
 [
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_reduce_1_1_temp_storage.html',1,'cub::BlockReduce']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_histogram_1_1_temp_storage.html',1,'cub::BlockHistogram']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_scan_1_1_temp_storage.html',1,'cub::BlockScan']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_temp_storage.html',1,'cub::BlockLoad']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_discontinuity_1_1_temp_storage.html',1,'cub::BlockDiscontinuity']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html',1,'cub::BlockLoad::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_reduce_1_1_temp_storage.html',1,'cub::BlockReduce']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_histogram_1_1_temp_storage.html',1,'cub::BlockHistogram']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_warp_scan_1_1_temp_storage.html',1,'cub::WarpScan']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_exchange_1_1_temp_storage.html',1,'cub::BlockExchange']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_temp_storage.html',1,'cub::BlockStore']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_load_1_1_temp_storage.html',1,'cub::BlockLoad']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_radix_sort_1_1_temp_storage.html',1,'cub::BlockRadixSort']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html',1,'cub::BlockStore::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_radix_sort_1_1_temp_storage.html',1,'cub::BlockRadixSort']]],
+  ['tempstorage',['TempStorage',['../structcub_1_1_block_scan_1_1_temp_storage.html',1,'cub::BlockScan']]],
   ['tempstorage',['TempStorage',['../structcub_1_1_warp_reduce_1_1_temp_storage.html',1,'cub::WarpReduce']]],
-  ['tempstorage',['TempStorage',['../structcub_1_1_warp_scan_1_1_temp_storage.html',1,'cub::WarpScan']]],
   ['texobjinputiterator',['TexObjInputIterator',['../classcub_1_1_tex_obj_input_iterator.html',1,'cub']]],
   ['texrefinputiterator',['TexRefInputIterator',['../classcub_1_1_tex_ref_input_iterator.html',1,'cub']]],
   ['transforminputiterator',['TransformInputIterator',['../classcub_1_1_transform_input_iterator.html',1,'cub']]]
diff --git a/docs/html/search/files_64.js b/docs/html/search/files_64.js
index 7ca198e4a7..31d4e1a095 100644
--- a/docs/html/search/files_64.js
+++ b/docs/html/search/files_64.js
@@ -7,6 +7,7 @@ var searchData=
   ['device_5frun_5flength_5fencode_2ecuh',['device_run_length_encode.cuh',['../device__run__length__encode_8cuh.html',1,'']]],
   ['device_5fscan_2ecuh',['device_scan.cuh',['../device__scan_8cuh.html',1,'']]],
   ['device_5fsegmented_5fradix_5fsort_2ecuh',['device_segmented_radix_sort.cuh',['../device__segmented__radix__sort_8cuh.html',1,'']]],
+  ['device_5fsegmented_5freduce_2ecuh',['device_segmented_reduce.cuh',['../device__segmented__reduce_8cuh.html',1,'']]],
   ['device_5fselect_2ecuh',['device_select.cuh',['../device__select_8cuh.html',1,'']]],
   ['device_5fspmv_2ecuh',['device_spmv.cuh',['../device__spmv_8cuh.html',1,'']]]
 ];
diff --git a/docs/html/search/functions_61.js b/docs/html/search/functions_61.js
index 79643ee95d..5e92c1d5da 100644
--- a/docs/html/search/functions_61.js
+++ b/docs/html/search/functions_61.js
@@ -1,8 +1,6 @@
 var searchData=
 [
-  ['alias',['Alias',['../structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038',1,'cub::Uninitialized']]],
-  ['alternate',['Alternate',['../structcub_1_1_double_buffer.html#a3895f1d57aeb379bee79de56ace8e35a',1,'cub::DoubleBuffer']]],
   ['argindexinputiterator',['ArgIndexInputIterator',['../classcub_1_1_arg_index_input_iterator.html#a97a6c3755ab132c099e90616e5f67cb6',1,'cub::ArgIndexInputIterator']]],
-  ['argmax',['ArgMax',['../structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276',1,'cub::DeviceReduce']]],
-  ['argmin',['ArgMin',['../structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9',1,'cub::DeviceReduce']]]
+  ['argmax',['ArgMax',['../structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276',1,'cub::DeviceReduce::ArgMax()'],['../structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329',1,'cub::DeviceSegmentedReduce::ArgMax()']]],
+  ['argmin',['ArgMin',['../structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9',1,'cub::DeviceReduce::ArgMin()'],['../structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce',1,'cub::DeviceSegmentedReduce::ArgMin()']]]
 ];
diff --git a/docs/html/search/functions_63.js b/docs/html/search/functions_63.js
index 66860b1520..9232b52ce1 100644
--- a/docs/html/search/functions_63.js
+++ b/docs/html/search/functions_63.js
@@ -6,6 +6,5 @@ var searchData=
   ['composite',['Composite',['../classcub_1_1_block_histogram.html#a90ef25b7af7ce819367eca662a47dd2d',1,'cub::BlockHistogram']]],
   ['constantinputiterator',['ConstantInputIterator',['../classcub_1_1_constant_input_iterator.html#a1237efdec602999d589afd3c898b7494',1,'cub::ConstantInputIterator']]],
   ['countinginputiterator',['CountingInputIterator',['../classcub_1_1_counting_input_iterator.html#ac51f0117505e3b13f5cf8771d99ea623',1,'cub::CountingInputIterator']]],
-  ['csrmv',['CsrMV',['../structcub_1_1_device_spmv.html#abbcd4c04d8bbbcdfabc1eb62f860b8b2',1,'cub::DeviceSpmv']]],
-  ['current',['Current',['../structcub_1_1_double_buffer.html#a861d3dff1a70d5e5926057a44d9b8724',1,'cub::DoubleBuffer']]]
+  ['csrmv',['CsrMV',['../structcub_1_1_device_spmv.html#abbcd4c04d8bbbcdfabc1eb62f860b8b2',1,'cub::DeviceSpmv']]]
 ];
diff --git a/docs/html/search/functions_64.js b/docs/html/search/functions_64.js
index 1c026b445d..c9f9ea3d6e 100644
--- a/docs/html/search/functions_64.js
+++ b/docs/html/search/functions_64.js
@@ -2,6 +2,5 @@ var searchData=
 [
   ['debug',['Debug',['../group___util_mgmt.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95',1,'cub']]],
   ['deviceallocate',['DeviceAllocate',['../structcub_1_1_caching_device_allocator.html#ae1088ac6ba6e5d55832ffbc0b2a5d714',1,'cub::CachingDeviceAllocator::DeviceAllocate(int device, void **d_ptr, size_t bytes, cudaStream_t active_stream=0)'],['../structcub_1_1_caching_device_allocator.html#ab2fe4020cf8bc86ad886c797525ac8ea',1,'cub::CachingDeviceAllocator::DeviceAllocate(void **d_ptr, size_t bytes, cudaStream_t active_stream=0)']]],
-  ['devicefree',['DeviceFree',['../structcub_1_1_caching_device_allocator.html#a66e72cc3cc7d0dbd59148ac7c4ba0de6',1,'cub::CachingDeviceAllocator::DeviceFree(int device, void *d_ptr)'],['../structcub_1_1_caching_device_allocator.html#adbf65c59172b140420636e150325deeb',1,'cub::CachingDeviceAllocator::DeviceFree(void *d_ptr)']]],
-  ['doublebuffer',['DoubleBuffer',['../structcub_1_1_double_buffer.html#a8d51dcd30484a1f186e056eb7ab09979',1,'cub::DoubleBuffer::DoubleBuffer()'],['../structcub_1_1_double_buffer.html#a100c51f0e1aefdff4cdfe6480c89f59e',1,'cub::DoubleBuffer::DoubleBuffer(T *d_current, T *d_alternate)']]]
+  ['devicefree',['DeviceFree',['../structcub_1_1_caching_device_allocator.html#a66e72cc3cc7d0dbd59148ac7c4ba0de6',1,'cub::CachingDeviceAllocator::DeviceFree(int device, void *d_ptr)'],['../structcub_1_1_caching_device_allocator.html#adbf65c59172b140420636e150325deeb',1,'cub::CachingDeviceAllocator::DeviceFree(void *d_ptr)']]]
 ];
diff --git a/docs/html/search/functions_6d.js b/docs/html/search/functions_6d.js
index 3a0a2471bd..967c2ce048 100644
--- a/docs/html/search/functions_6d.js
+++ b/docs/html/search/functions_6d.js
@@ -1,7 +1,7 @@
 var searchData=
 [
-  ['max',['Max',['../structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48',1,'cub::DeviceReduce']]],
-  ['min',['Min',['../structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a',1,'cub::DeviceReduce']]],
+  ['max',['Max',['../structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48',1,'cub::DeviceReduce::Max()'],['../structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183',1,'cub::DeviceSegmentedReduce::Max()']]],
+  ['min',['Min',['../structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a',1,'cub::DeviceReduce::Min()'],['../structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29',1,'cub::DeviceSegmentedReduce::Min()']]],
   ['multihistogrameven',['MultiHistogramEven',['../structcub_1_1_device_histogram.html#a917e507d773ef1b11424ed8bc49e4d95',1,'cub::DeviceHistogram::MultiHistogramEven(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_histogram.html#a309c2e30bd62935e1cfcd623fb04b668',1,'cub::DeviceHistogram::MultiHistogramEven(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['multihistogramrange',['MultiHistogramRange',['../structcub_1_1_device_histogram.html#a77511d5ae7cc53f0b5c984fd32ad29fe',1,'cub::DeviceHistogram::MultiHistogramRange(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_histogram.html#a5926ff12b29d9e8e67ecf2684071542f',1,'cub::DeviceHistogram::MultiHistogramRange(void *d_temp_storage, size_t &amp;temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)']]]
 ];
diff --git a/docs/html/search/functions_6e.js b/docs/html/search/functions_6e.js
index 159a543681..4312eb4584 100644
--- a/docs/html/search/functions_6e.js
+++ b/docs/html/search/functions_6e.js
@@ -1,4 +1,5 @@
 var searchData=
 [
-  ['nontrivialruns',['NonTrivialRuns',['../structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a',1,'cub::DeviceRunLengthEncode']]]
+  ['nontrivialruns',['NonTrivialRuns',['../structcub_1_1_device_run_length_encode.html#aa2318dc7a69f28a8c47d417aaf53db3a',1,'cub::DeviceRunLengthEncode']]],
+  ['normalize',['normalize',['../classcub_1_1_arg_index_input_iterator.html#aa3dd1dfb19d87d8e0b5fc3c8773fd1dc',1,'cub::ArgIndexInputIterator']]]
 ];
diff --git a/docs/html/search/functions_6f.js b/docs/html/search/functions_6f.js
index 41bc282ea6..c1e9c8366c 100644
--- a/docs/html/search/functions_6f.js
+++ b/docs/html/search/functions_6f.js
@@ -1,6 +1,6 @@
 var searchData=
 [
-  ['operator_21_3d',['operator!=',['../structcub_1_1_key_value_pair.html#aa7bcc39b09d285d41c9c0226a49790f2',1,'cub::KeyValuePair::operator!=()'],['../classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772',1,'cub::ArgIndexInputIterator::operator!=()'],['../classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c',1,'cub::CacheModifiedInputIterator::operator!=()'],['../classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad',1,'cub::CacheModifiedOutputIterator::operator!=()'],['../classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f',1,'cub::ConstantInputIterator::operator!=()'],['../classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240',1,'cub::CountingInputIterator::operator!=()'],['../classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28',1,'cub::TexObjInputIterator::operator!=()'],['../classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab',1,'cub::TexRefInputIterator::operator!=()'],['../classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be',1,'cub::TransformInputIterator::operator!=()']]],
+  ['operator_21_3d',['operator!=',['../classcub_1_1_arg_index_input_iterator.html#a17b0be0179d38b73e8a612d4d0202772',1,'cub::ArgIndexInputIterator::operator!=()'],['../classcub_1_1_cache_modified_input_iterator.html#acbe543a259aa55230c8a7be052fe311c',1,'cub::CacheModifiedInputIterator::operator!=()'],['../classcub_1_1_cache_modified_output_iterator.html#a9bb801bcb260204ff8312cbb21e77bad',1,'cub::CacheModifiedOutputIterator::operator!=()'],['../classcub_1_1_constant_input_iterator.html#af6345ecd7617d8eb9913d63be52d4a7f',1,'cub::ConstantInputIterator::operator!=()'],['../classcub_1_1_counting_input_iterator.html#af51f1a3ae34158f4f67964e278649240',1,'cub::CountingInputIterator::operator!=()'],['../classcub_1_1_tex_obj_input_iterator.html#af500ff312438db4a22ce466968fd4c28',1,'cub::TexObjInputIterator::operator!=()'],['../classcub_1_1_tex_ref_input_iterator.html#a3be490820de8d3cc7968601fc0fe34ab',1,'cub::TexRefInputIterator::operator!=()'],['../classcub_1_1_transform_input_iterator.html#a5d3245a6a5829ed10ff39af7cfecb3be',1,'cub::TransformInputIterator::operator!=()']]],
   ['operator_28_29',['operator()',['../structcub_1_1_equality.html#a9db81c4cbcf79dbb8087b3c59593cae0',1,'cub::Equality::operator()()'],['../structcub_1_1_inequality.html#ac2b51f35b929dc74ea766b012e89b552',1,'cub::Inequality::operator()()'],['../structcub_1_1_inequality_wrapper.html#a97067089c24f8d6a09c914ab6a163c65',1,'cub::InequalityWrapper::operator()()'],['../structcub_1_1_sum.html#a1edd85dbc039f93c8e45eb2096704a86',1,'cub::Sum::operator()()'],['../structcub_1_1_max.html#ab06fa8091c6aa396fe127f37e0a545d3',1,'cub::Max::operator()()'],['../structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d',1,'cub::ArgMax::operator()()'],['../structcub_1_1_min.html#ade40f60337afc51da556ed65d5708136',1,'cub::Min::operator()()'],['../structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710',1,'cub::ArgMin::operator()()'],['../structcub_1_1_cast.html#a649573c7f5d02ab09f176b5cc7cf1e5c',1,'cub::Cast::operator()()'],['../classcub_1_1_swizzle_scan_op.html#a9548d484ccfb5161d56d3db1003693e2',1,'cub::SwizzleScanOp::operator()()'],['../structcub_1_1_reduce_by_segment_op.html#aff56752999857ef68029cfa150d88d5e',1,'cub::ReduceBySegmentOp::operator()()'],['../structcub_1_1_reduce_by_key_op.html#ad5e0d929c9cc2447df47c0f2c6409c9a',1,'cub::ReduceByKeyOp::operator()()']]],
   ['operator_2a',['operator*',['../classcub_1_1_arg_index_input_iterator.html#a04e18ece1d7438dd929665e92091241f',1,'cub::ArgIndexInputIterator::operator*()'],['../classcub_1_1_cache_modified_input_iterator.html#a803d29cb5212f1eaa5f4a34415987768',1,'cub::CacheModifiedInputIterator::operator*()'],['../classcub_1_1_cache_modified_output_iterator.html#a4548b15114d6a8a3f714dcbf0f22e854',1,'cub::CacheModifiedOutputIterator::operator*()'],['../classcub_1_1_constant_input_iterator.html#afd9a8d22abd868d9fb13c235bd800eae',1,'cub::ConstantInputIterator::operator*()'],['../classcub_1_1_counting_input_iterator.html#a7acf3db367c8485a5145d9dbf47be1f7',1,'cub::CountingInputIterator::operator*()'],['../classcub_1_1_tex_obj_input_iterator.html#a8052b0744d233b7f9647d0eb6ba71583',1,'cub::TexObjInputIterator::operator*()'],['../classcub_1_1_tex_ref_input_iterator.html#a78417e7a9f78ded354cc152997c2d7d1',1,'cub::TexRefInputIterator::operator*()'],['../classcub_1_1_transform_input_iterator.html#ab84a2e02eeec401af33903dcb183d5ca',1,'cub::TransformInputIterator::operator*()']]],
   ['operator_2b',['operator+',['../classcub_1_1_arg_index_input_iterator.html#a1bf6ce760cfa6764c43dd13699601030',1,'cub::ArgIndexInputIterator::operator+()'],['../classcub_1_1_cache_modified_input_iterator.html#a455c2df363449c1b92628de54fb2bad5',1,'cub::CacheModifiedInputIterator::operator+()'],['../classcub_1_1_cache_modified_output_iterator.html#a487a5a434527081b425e5a36ea0d4dd3',1,'cub::CacheModifiedOutputIterator::operator+()'],['../classcub_1_1_constant_input_iterator.html#a1e37cefa0808ae8b8b0853e30013bfba',1,'cub::ConstantInputIterator::operator+()'],['../classcub_1_1_counting_input_iterator.html#a4491f4d5635571964f4d98cf1c3cc753',1,'cub::CountingInputIterator::operator+()'],['../classcub_1_1_tex_obj_input_iterator.html#a1284c246857d30e96e5bc1ea10d68116',1,'cub::TexObjInputIterator::operator+()'],['../classcub_1_1_tex_ref_input_iterator.html#a85fcecba8f3d7b4941b3fa446cddb50a',1,'cub::TexRefInputIterator::operator+()'],['../classcub_1_1_transform_input_iterator.html#a36a3eb7ede1ef22b2316fc281aed52ab',1,'cub::TransformInputIterator::operator+()']]],
diff --git a/docs/html/search/functions_72.js b/docs/html/search/functions_72.js
index 5859a95e43..6cf024ba18 100644
--- a/docs/html/search/functions_72.js
+++ b/docs/html/search/functions_72.js
@@ -1,7 +1,7 @@
 var searchData=
 [
-  ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a81878a614ef3b39de654918fc1f6144d',1,'cub::BlockReduce::Reduce(T(&amp;inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op, int num_valid)'],['../structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c',1,'cub::DeviceReduce::Reduce()'],['../classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op, int valid_items)']]],
-  ['reducebykey',['ReduceByKey',['../structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02',1,'cub::DeviceReduce']]],
+  ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#a089953b3bdfe7c48208632d0cc2ac1fb',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a81878a614ef3b39de654918fc1f6144d',1,'cub::BlockReduce::Reduce(T(&amp;inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#a0e947f6a1d812d21839632b87aaf32e5',1,'cub::BlockReduce::Reduce(T input, ReductionOp reduction_op, int num_valid)'],['../structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb',1,'cub::DeviceReduce::Reduce()'],['../structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364',1,'cub::DeviceSegmentedReduce::Reduce()'],['../classcub_1_1_warp_reduce.html#a0dd72fc4cf7e1ecf59e8b15bd6819185',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op)'],['../classcub_1_1_warp_reduce.html#ad1ecfeddf0e7fb3f359cf61b60f4745a',1,'cub::WarpReduce::Reduce(T input, ReductionOp reduction_op, int valid_items)']]],
+  ['reducebykey',['ReduceByKey',['../structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1',1,'cub::DeviceReduce']]],
   ['reducebykeyop',['ReduceByKeyOp',['../structcub_1_1_reduce_by_key_op.html#a88feb445a30081d205a7e1560c07bc6e',1,'cub::ReduceByKeyOp::ReduceByKeyOp()'],['../structcub_1_1_reduce_by_key_op.html#aa9e777450d365effbfc54e4e7700dd24',1,'cub::ReduceByKeyOp::ReduceByKeyOp(ReductionOpT op)']]],
   ['reducebysegmentop',['ReduceBySegmentOp',['../structcub_1_1_reduce_by_segment_op.html#a6f6fe38f76f8f50eff19ea20105cb1b3',1,'cub::ReduceBySegmentOp::ReduceBySegmentOp()'],['../structcub_1_1_reduce_by_segment_op.html#aa01e5bafddc8cf5b6df1601ba783b17e',1,'cub::ReduceBySegmentOp::ReduceBySegmentOp(ReductionOpT op)']]],
   ['rowmajortid',['RowMajorTid',['../group___util_ptx.html#gaa3f839b109cc6dc9d9ece4f1acf7d2ce',1,'cub']]]
diff --git a/docs/html/search/functions_73.js b/docs/html/search/functions_73.js
index 9a364f3591..b63a6587cf 100644
--- a/docs/html/search/functions_73.js
+++ b/docs/html/search/functions_73.js
@@ -17,7 +17,7 @@ var searchData=
   ['sortdescendingblockedtostriped',['SortDescendingBlockedToStriped',['../classcub_1_1_block_radix_sort.html#ae89db4d439f0996b0dd7ad4b105e4f1d',1,'cub::BlockRadixSort::SortDescendingBlockedToStriped(KeyT(&amp;keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT)*8)'],['../classcub_1_1_block_radix_sort.html#ab56d4d4c3a8da9ff718906bfc5d815c2',1,'cub::BlockRadixSort::SortDescendingBlockedToStriped(KeyT(&amp;keys)[ITEMS_PER_THREAD], ValueT(&amp;values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT)*8)']]],
   ['sortkeys',['SortKeys',['../structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6',1,'cub::DeviceRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191',1,'cub::DeviceRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#ab6a917a29d441021949e197d3a639fba',1,'cub::DeviceSegmentedRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac',1,'cub::DeviceSegmentedRadixSort::SortKeys(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['sortkeysdescending',['SortKeysDescending',['../structcub_1_1_device_radix_sort.html#a24761009c4cc15fd2e54cb72663af0ef',1,'cub::DeviceRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628',1,'cub::DeviceRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701',1,'cub::DeviceSegmentedRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94',1,'cub::DeviceSegmentedRadixSort::SortKeysDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
-  ['sortpairs',['SortPairs',['../structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, OffsetT num_segments, OffsetT *d_begin_offsets, OffsetT *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
+  ['sortpairs',['SortPairs',['../structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027',1,'cub::DeviceRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148',1,'cub::DeviceSegmentedRadixSort::SortPairs(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['sortpairsdescending',['SortPairsDescending',['../structcub_1_1_device_radix_sort.html#add6a87f54c8058edba4b9e875bb0626a',1,'cub::DeviceRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e',1,'cub::DeviceRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4',1,'cub::DeviceSegmentedRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)'],['../structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347',1,'cub::DeviceSegmentedRadixSort::SortPairsDescending(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)']]],
   ['store',['Store',['../classcub_1_1_block_store.html#a86fd777fd9bef8264787d756b16303ed',1,'cub::BlockStore::Store(OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_store.html#a67d0aa8fcf37f92b2074e986581ffdf5',1,'cub::BlockStore::Store(OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['storedirectblocked',['StoreDirectBlocked',['../group___util_io.html#ga58460e08f9c9c2560ab311c30ab58aca',1,'cub::StoreDirectBlocked(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../group___util_io.html#ga732f5998fb8698032d3c598944eb28a8',1,'cub::StoreDirectBlocked(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
@@ -25,6 +25,6 @@ var searchData=
   ['storedirectstriped',['StoreDirectStriped',['../group___util_io.html#gac688ffdb1ecacc06375295947144d233',1,'cub::StoreDirectStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../group___util_io.html#ga5af198f11043a66ebfaab51c8f4f6fc9',1,'cub::StoreDirectStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['storedirectwarpstriped',['StoreDirectWarpStriped',['../group___util_io.html#ga9b2bb8f452cd26b5e297c9d5924fcf03',1,'cub::StoreDirectWarpStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD])'],['../group___util_io.html#ga3978707feae6ad3e1b6467a6e525bc09',1,'cub::StoreDirectWarpStriped(int linear_tid, OutputIteratorT block_itr, T(&amp;items)[ITEMS_PER_THREAD], int valid_items)']]],
   ['stripedtoblocked',['StripedToBlocked',['../classcub_1_1_block_exchange.html#a2855471bbbcc4d66ac6a29d35a040e0c',1,'cub::BlockExchange']]],
-  ['sum',['Sum',['../classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890',1,'cub::BlockReduce::Sum(T input)'],['../classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51',1,'cub::BlockReduce::Sum(T(&amp;inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409',1,'cub::BlockReduce::Sum(T input, int num_valid)'],['../structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f',1,'cub::DeviceReduce::Sum()'],['../classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20',1,'cub::WarpScan::Sum()'],['../classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a',1,'cub::WarpReduce::Sum(T input)'],['../classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30',1,'cub::WarpReduce::Sum(T input, int valid_items)']]],
+  ['sum',['Sum',['../classcub_1_1_block_reduce.html#a7632bd9c8950dd6a3528ca99fa3f0890',1,'cub::BlockReduce::Sum(T input)'],['../classcub_1_1_block_reduce.html#ac5d4591d9513f08b180d4112cb0c4c51',1,'cub::BlockReduce::Sum(T(&amp;inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#a33ddffdde07275ab0c4e1bf61b0d9409',1,'cub::BlockReduce::Sum(T input, int num_valid)'],['../structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f',1,'cub::DeviceReduce::Sum()'],['../structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb',1,'cub::DeviceSegmentedReduce::Sum()'],['../classcub_1_1_warp_scan.html#a25bd83f795e88b9260ec2bcbf846fb20',1,'cub::WarpScan::Sum()'],['../classcub_1_1_warp_reduce.html#abe4aeeabf8859a7582a0b5858b84ee7a',1,'cub::WarpReduce::Sum(T input)'],['../classcub_1_1_warp_reduce.html#ad9c4a8d85a7795cf220713f362c36f30',1,'cub::WarpReduce::Sum(T input, int valid_items)']]],
   ['swizzlescanop',['SwizzleScanOp',['../classcub_1_1_swizzle_scan_op.html#ae81a38aa9d94025da72544b9e0dc611b',1,'cub::SwizzleScanOp']]]
 ];
diff --git a/docs/html/search/search.js b/docs/html/search/search.js
index ccd325d797..13a3875f4a 100644
--- a/docs/html/search/search.js
+++ b/docs/html/search/search.js
@@ -7,13 +7,13 @@
 
 var indexSectionsWithContent =
 {
-  0: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111011011111101111110000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
-  1: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110001011110101111010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  0: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010111111011001111101111110000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  1: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110001001100101110010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   2: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   3: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111100000000000000011010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   4: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111011001111101111010000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
-  5: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100100000010001100100100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
-  6: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001010000101110100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  5: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  6: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001000000101110100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   7: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   8: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000001000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   9: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
diff --git a/docs/html/search/typedefs_64.js b/docs/html/search/typedefs_64.js
index ca6b90d973..6c9c1cb7ba 100644
--- a/docs/html/search/typedefs_64.js
+++ b/docs/html/search/typedefs_64.js
@@ -1,5 +1,4 @@
 var searchData=
 [
-  ['deviceword',['DeviceWord',['../structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6',1,'cub::Uninitialized']]],
   ['difference_5ftype',['difference_type',['../classcub_1_1_arg_index_input_iterator.html#acd5e39570dd51f4883547ef33f9ca8c6',1,'cub::ArgIndexInputIterator::difference_type()'],['../classcub_1_1_cache_modified_input_iterator.html#a268f1e7a4b42a05c5b4d0bca2775c26e',1,'cub::CacheModifiedInputIterator::difference_type()'],['../classcub_1_1_cache_modified_output_iterator.html#ac8cdd3a29db7e398f14e44a7aa054750',1,'cub::CacheModifiedOutputIterator::difference_type()'],['../classcub_1_1_constant_input_iterator.html#ab45cc48afbfda7eaa3b4ea643e719c33',1,'cub::ConstantInputIterator::difference_type()'],['../classcub_1_1_counting_input_iterator.html#ac66dea2a687f0ad6ed0cbcd6217a3029',1,'cub::CountingInputIterator::difference_type()'],['../classcub_1_1_tex_obj_input_iterator.html#a710012acbff3dd0c35822951e28148ea',1,'cub::TexObjInputIterator::difference_type()'],['../classcub_1_1_tex_ref_input_iterator.html#aff1afce146f69adb655e2ff7366b869f',1,'cub::TexRefInputIterator::difference_type()'],['../classcub_1_1_transform_input_iterator.html#ac05064e9ad33dc032452e3d09e8768f6',1,'cub::TransformInputIterator::difference_type()']]]
 ];
diff --git a/docs/html/search/typedefs_76.js b/docs/html/search/typedefs_76.js
index 2967e89efe..bce599db9a 100644
--- a/docs/html/search/typedefs_76.js
+++ b/docs/html/search/typedefs_76.js
@@ -1,5 +1,4 @@
 var searchData=
 [
-  ['value',['Value',['../structcub_1_1_key_value_pair.html#a9fd385872c09fd3757e9ba59b2754955',1,'cub::KeyValuePair']]],
   ['value_5ftype',['value_type',['../classcub_1_1_arg_index_input_iterator.html#a6c40ea1dc7c0923b9010f1e938e07d22',1,'cub::ArgIndexInputIterator::value_type()'],['../classcub_1_1_cache_modified_input_iterator.html#a60689c564e2ade39722947eeaf40156a',1,'cub::CacheModifiedInputIterator::value_type()'],['../classcub_1_1_cache_modified_output_iterator.html#aba997f95620d692cbad05c74ef169fa8',1,'cub::CacheModifiedOutputIterator::value_type()'],['../classcub_1_1_constant_input_iterator.html#a72a3996ca30a1b2eb6f676923a2ee3ce',1,'cub::ConstantInputIterator::value_type()'],['../classcub_1_1_counting_input_iterator.html#a3afabb6a47c8cf7526220eb817e2a97f',1,'cub::CountingInputIterator::value_type()'],['../classcub_1_1_tex_obj_input_iterator.html#a4405a9e3d39593b7c468629dff098144',1,'cub::TexObjInputIterator::value_type()'],['../classcub_1_1_tex_ref_input_iterator.html#a1de28970e3874202646e11984a0d9026',1,'cub::TexRefInputIterator::value_type()'],['../classcub_1_1_transform_input_iterator.html#a8230414e069b99a279e2afabed83fe52',1,'cub::TransformInputIterator::value_type()']]]
 ];
diff --git a/docs/html/structcub_1_1_arg_max-members.html b/docs/html/structcub_1_1_arg_max-members.html
index 259c9060fa..9e74bac7ae 100644
--- a/docs/html/structcub_1_1_arg_max-members.html
+++ b/docs/html/structcub_1_1_arg_max-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_arg_max.html b/docs/html/structcub_1_1_arg_max.html
index e102fefb54..246fe90e57 100644
--- a/docs/html/structcub_1_1_arg_max.html
+++ b/docs/html/structcub_1_1_arg_max.html
@@ -114,8 +114,8 @@
 <tr class="memitem:abc8619b45e188b364d96c0bdf0b29c2d"><td class="memTemplParams" colspan="2"><a class="anchor" id="abc8619b45e188b364d96c0bdf0b29c2d"></a>
 template&lt;typename T , typename OffsetT &gt; </td></tr>
 <tr class="memitem:abc8619b45e188b364d96c0bdf0b29c2d"><td class="memTemplItemLeft" align="right" valign="top">__host__ __device__ <br class="typebreak"/>
-__forceinline__ <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a><br class="typebreak"/>
-&lt; OffsetT, T &gt;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">operator()</a> (const <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a>&lt; OffsetT, T &gt; &amp;a, const <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a>&lt; OffsetT, T &gt; &amp;b) const </td></tr>
+__forceinline__ KeyValuePair<br class="typebreak"/>
+&lt; OffsetT, T &gt;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">operator()</a> (const KeyValuePair&lt; OffsetT, T &gt; &amp;a, const KeyValuePair&lt; OffsetT, T &gt; &amp;b) const </td></tr>
 <tr class="memdesc:abc8619b45e188b364d96c0bdf0b29c2d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Boolean max operator, preferring the item having the smaller offset in case of ties. <br/></td></tr>
 <tr class="separator:abc8619b45e188b364d96c0bdf0b29c2d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
@@ -126,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_arg_min-members.html b/docs/html/structcub_1_1_arg_min-members.html
index 6b59d53c38..ef1a744330 100644
--- a/docs/html/structcub_1_1_arg_min-members.html
+++ b/docs/html/structcub_1_1_arg_min-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_arg_min.html b/docs/html/structcub_1_1_arg_min.html
index 38c731d8bf..4bf81fe630 100644
--- a/docs/html/structcub_1_1_arg_min.html
+++ b/docs/html/structcub_1_1_arg_min.html
@@ -114,8 +114,8 @@
 <tr class="memitem:a57bab80de70f6401ea6899ca2488f710"><td class="memTemplParams" colspan="2"><a class="anchor" id="a57bab80de70f6401ea6899ca2488f710"></a>
 template&lt;typename T , typename OffsetT &gt; </td></tr>
 <tr class="memitem:a57bab80de70f6401ea6899ca2488f710"><td class="memTemplItemLeft" align="right" valign="top">__host__ __device__ <br class="typebreak"/>
-__forceinline__ <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a><br class="typebreak"/>
-&lt; OffsetT, T &gt;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">operator()</a> (const <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a>&lt; OffsetT, T &gt; &amp;a, const <a class="el" href="structcub_1_1_key_value_pair.html">KeyValuePair</a>&lt; OffsetT, T &gt; &amp;b) const </td></tr>
+__forceinline__ KeyValuePair<br class="typebreak"/>
+&lt; OffsetT, T &gt;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">operator()</a> (const KeyValuePair&lt; OffsetT, T &gt; &amp;a, const KeyValuePair&lt; OffsetT, T &gt; &amp;b) const </td></tr>
 <tr class="memdesc:a57bab80de70f6401ea6899ca2488f710"><td class="mdescLeft">&#160;</td><td class="mdescRight">Boolean min operator, preferring the item having the smaller offset in case of ties. <br/></td></tr>
 <tr class="separator:a57bab80de70f6401ea6899ca2488f710"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
@@ -126,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.html b/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.html
index 854cdb4501..0614937371 100644
--- a/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_discontinuity_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -121,32 +119,8 @@
  <div class="center">
   <img src="structcub_1_1_block_discontinuity_1_1_temp_storage.png" usemap="#cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockDiscontinuity&lt; T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,619,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__discontinuity_8cuh_source.html">block_discontinuity.cuh</a></li>
 </ul>
@@ -154,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.png b/docs/html/structcub_1_1_block_discontinuity_1_1_temp_storage.png
index 850b02cc145ac04dee4b352fc22f1154e1e0ce85..9bea73c53d6e81e67fd1d3f5d54794ff18e32e0d 100644
GIT binary patch
delta 1337
zcmchWTUgQu7{>p`f^BA-Iv*`dI)KfmJfw-DG-ruQWiB&O1a;<<DHu2pkW$#mY}liQ
znvdpF&I%9XPZ;1KP0CHD6D>z5c$}3}RLld25;(ovE_S=`=KH<x&G%lsU%zRcX+F`-
z7w6|2$}pKNA0$j;EH>9twHa$3Mu<2qotAaZrg@=pmmp5G^}RZB&z=*0*Kg$nUe7Nm
z+|AO1EquOHonA~Drs7oL>R<{SN2v2yJ^WlmzN30G{Jih4mj={h(lai3J`mMbU;l2=
z$P*Xl3X!~)Ylz#F<-7p8AThs>kt_USBzN_|!DD^VZ~eidQU!Crg6mji;I%p6hR-a#
zmnSU7!<#T?GGxW+pJU%ehSRyt;#AgjpsyxCP-%?~p{IdnN8x$Gqq2_d;=zG){o+_u
z)j~SgnSDogaLJZrJ6EU<a-CsM-mjN}jszuTIXSAVdp0)v;T17wu@qsjib_r5XKBJt
zTOi+%Nk{brri#EBQO7EG5(iEDm7;(vLmACQT_Fl>WEmu0pW^8`na;x0=Y<4#Wh{S>
zoN^kHxZ_qGoTw8!>BAtdFKxs^&S9n|xDAdw;OH;bo%-Y?x4AIPY$;+JZguy}#_DPZ
zad!$HQvN0EK&)R$&Zm~<1+Xj=#tjw#l>ms?eqhVDu4~2!6adL*lOQfYCnl8jlmb9^
zSv`Zs01JYIb+p~UIsy5ALWlo4_rcZN+<wnXKR?6~z-B#gHm7xy6-#=HJS=7Cb3MrX
z$`uG1&Z{+%|5@M5aga@jAAR%sq^e<1c#{3G)zn4(>h;?Bl6V(8yfyve)|4+FH1;$D
z;j~WCzMAgLPo9iF>`+`Y7~Vgov$C;w#mlp!^vBf8P3_Lm@CL1Pzo*$ND7<)pJ{7+-
zK9A9Cit$b3d94~*x`yPAb2D?w*Nx@V@~E=oe8VL&^ukMuo9DbPE<zI>WXUe~vDY<`
zTYV%6{tYUE?egCKE?>Jg@ybjf7wL~S$V=oRRga)So7gH|YuYh%n1+%MRAyFiyFYZ{
zcq*)i)QCudFuO!|!)Rixbl?w0idPxYTWALx>bbxQwjsqP<UJ7zZypXhUrjtMhIN)+
zGy}_Ti;C0kO)C854)@AK*ZNqDkh4GP)BwlEM|8E~L@%hZR8WP~l{#0L5MQ=KxpyXI
zX&*6iLiC7y>4z3IfoHkRU5HzY?j$>JE(z5ctpKTe(e+c(cEJimUvX2Q4X_4-rp6~(
zBZ#)~`F>(q;-v6Cp6G2qe}GRyR|~RzWwk!l!9@-DP0SvVH6D<WQ)fGAy@`_%QQqiZ
zZjgVs@NATaDiU0KkB`BQn9FLNCrHUQ(N#}5iQ)(3ZKEA&S6j!FcOlGoN<8F5P0NhZ
zrP7tJX}3ItXMNy;?o+&AVP;Jx>*MgqPuc=|y<Ll{3VFPEdz0!6)X7lVhn^b}LiFLt
zFlo2@G5zI3953%`@GB-Y>-l$aKgZ~oex>wa*+`Ge&TqA!(W#WAuhR=nPyx#1=__!@
zlv6MroaS|NsS^!I*u<^qiQl5flpZx<qx7tTYRw#s@y!m&G;w<ph!1LgAsWq(Fxev6
z4DK9=)6G>?0MIe#vm2j<N;i`LasR+Yn&U=)OLB5BMhH4L0~)r)t{Ye%9QLFicqHcP
E-+IH3$p8QV

literal 1495
zcmc(fX;hL~7{^~TLmaijCR_?jDYQ~^$vlF`(hA-db(Abi)GTcj%@s*(Q_EaRMROWg
z#?0J`v<A#Imvl-4x0D3Qh0=-CH%w%5W2ipVhxtDDoaf%(@0|PLIp;qAo8;nz*HF_{
z0{}n+B<ynqfE8o~yQnHF_OQwpy5i7wIZWIym&+CS_oS4P){?HEuhrDlq>?nGuTa%U
zS0^_>ap}wI?;u(LfQmo3Z!ht@(p0`6Tx6@hGPkIFQI26et6fu1sLJ$lcE_4hd{fW_
zy2(hEpU3*gJ34)T^YcKQqN5GL0~x)Q<cu?DW4*eq3VBrZqkM*WZf{Mam3(G<aYDer
z--T`-_8C@wWE`{!luhVUkGM9pr)IngAMeb<FXTl{r84_*Gd5%C;o=&Ilp3NRj~Wi_
zq|*mfPr^X42Lw6pt`ej1B2>Uns5@R;4uJCuHO1uqEGeNF!CdyI*H##pPCXDBgB;4+
zPFA0|YyG@7Wwz8L41WFjZq_uLsnKy$8Z_p<QKx=VnM3^5^MeEA=$mL8#O{dU`}0=u
z25NO+mxcm5@1S=@F41`NSiCG<U0^_S6io6#i$G8E7-@plJmk+cC^L)-_sYEcH2IHC
zE|MhJ=A#u--cp~yX8rSumHuX$%s)a}Fd_)D?VKc%in8<w9!jVmUmc5ww4F0nMZvJb
zxv-PBbORuv({@nyu7C;lnvDcRPoK*sW!{A(^h(p29yhhNF=J5H#spmvUB;2}u>c<W
z1)*2Jg$ZWR?*>igpmo5G_8(<j$ol>~Gr&&Z;m1`zfchSTD2*ln{b|mBP$8z@gqh=~
zyb~yUy$euIPdlQ(!LN9e^grX3+s4J~+zuMwyXA@~1hf_1CH!YgoFp=bGb4E)GT;^S
z2{X`G&0X=b#M>TusFYJxoSK-zhY|)xH_L0`^BegKkcLQYUKBW<U(jj;U&FLC1GeJc
z@vm(Yp_E?QybrUkY_J#3jyub`@q*I})%gLrW(-NL)?v?uQH)LgCZ{)czJg*>Ps(2r
z?^T+vice5x&FhDsT(o3UgP{$)S~hRIh&R`Gt?5(_MFoRm;M|vv^*uW?Aya9b=0l7T
zD?{9e6L~UAUTXYMYx@OuN#E&I(QJ`<4@=Bl4wWQ(i=J+U?N;XcO6_uxwCCy%^IqNe
zvg!<SwxGtD)y(y3<puweX!lI7Z*YXqr09p*)tQ{KO4@=N?Ln-LIo?gIabaU$wA9-2
zxV{{}DI|?er@+r*`Wt5%1w@9yC0g9x8}`zbq-+$4jC>?a4Eg-01v2s2VNHpuV8fCM
z?_PAU_(ZNJgZf|~>4vUuubeu=KcuC;ZE;Zx7}Y)K@X$Xr2=~T15=qvx4GSh9V-%TK
zo%VEs@8WoU=|Vf};@V@;Q`wnfvi2<+3nMK?$U0VfM@+S00C9g^)Y|Ftgo1ahOvk(@
z)+Vv><iZZklp51mhU)%8uVC|#WJkF}em?Pa+`Zz<`V;VFIIq#J`7HAyHEu8KHtB<V
z<b86ZWL--?YO##Gxq+1wb%+98=2aVM!Uw6)(IS+`Oljd`FZ@W6w^6=`tAVX-vmGzb
zuHhnv(<)0JX9gZ@?sDp0pL_#z+&<V`FQVryJ$|?^T5z>)*8x3EY${M;XnxM_V}sug
zF(6kiArrjwFu-|wNy%iyTeHw9*wG3IyQiz0CWkXymFl@}J?%89p67x__RmcKt~ml`
zt-Xv5G`gbn#PUn3&Yb2c0%rXKLd-1J<ZlBVy$FldiSwqS!ZC_p7y$P>?PEIl#Qh6<
CjK@C!

diff --git a/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.html b/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.html
index 6f6d1dd63a..4ecca4c0f8 100644
--- a/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_exchange_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -123,32 +121,8 @@
  <div class="center">
   <img src="structcub_1_1_block_exchange_1_1_temp_storage.png" usemap="#cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockExchange&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,873,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__exchange_8cuh_source.html">block_exchange.cuh</a></li>
 </ul>
@@ -156,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.png b/docs/html/structcub_1_1_block_exchange_1_1_temp_storage.png
index c32b5f7dcd9851bfe2b62eb4d0cd6b08b18e6a8b..f0d7eabdcdca0e1ae92f383137edc3fc4e8dbc2b 100644
GIT binary patch
literal 1931
zcmc&#eN@s}8vYrk9+Vs#b;_J&j**@$lSK_v25Z9)6v7-GR~!6TO@~BbGVl|%cC4)N
z3&&84awjD1NLB*o2g2Hv<5-#E$BLS3On#7n<|u=R>`!O^+CS#sz31F}pZlKYJ@0eh
z_n!Bbgr5qAL7kug0Ko7exCj7%P(U1R1p!xK|4S;k?FlDD2AWJJ5Y8{IGRk`@K(sw&
zGTF|ONjP}4x)5>d3jic-jgg-tQ2=1+gvXtT%r;YVV=hgE?lOl2yfRE@cN}0B$2K+)
z=f`hTy$+X%b~k=jP;wP%XXonDgTvV$4`Ej#_Tz)tM|%1AZ5Z`0Z#;^fjA(-1bOr6-
zA)*rHh(3_UbEnoJ9L`(Qu79Ib7cis;nv!a7KQZBl*n+{PYF>aj$S=0x<IhEB8LF|K
zlLKjfBKEIW66A=c3Z_bj%FH($!L-o??;3-2mqd&TUZugvn&(HI`Q_`$RD~<473+Qt
zjJtr^4pSqp9cKBvlMF}SC`#GG<X}NpR>wpuk@cNluJA@z@yR5AH@pbc>dqSxHpJxV
zDFM6CxkLmbI#q#O#Ue<9mth`EB3t!{!jAmOn*VGpzVJ7x)>%5X+r}6qf9@Zwi?pIi
z7wZr#U`sd#oqn#bT9ZS(*`*$}!9&`sKCj}HZ;MA0J5%>hDKZE#egx|#GKC$?Zx~7$
zKrBGO@b?kjn?VN^7p0tL?SOGqqBAbiK1EQH(e#RRc-zaN6h)q{KNQT~dGK1sLtVGR
z+{kR~HDVP{dRhCo)p|$C_x+NdEy7l?h4>lKSJ?0<Fzw_rV%2tK?mwh}Y3U1za5zc?
z?R)X}d;Ygn*F5DeE(F}S+l3v2BmkQ(1!j8y5I`SN01Fvlzx<vP1wjH9^%gfR5&)ay
zcJV*N_-qkEe<utl4gfYqW{o}kEx+($;x-xjJK5nVOa|;Ja;G{KnIQr2fowT~vH%Mi
z5Sb|d|2b}?<8U=stv<F(ba8=xwxt6JaFYH=kQ<+YRvKts<GE9Hy<**lgjD&Q5GJ0T
z913z)N;fC8{80)ien28<_v8k1sVUlVANb1R-Chl=zKE%|eXL@1gevCUsp=EjW#5d3
zz~`mIR}61=^1V1;R8>8~G-oz{*~^GC4;ia<0IcIs>Dg~B?Tr7R$cxTabeb+-!6-6j
z-njf3zR+HH=7&<J)`E^&eUKB^t%h@xxG$Ic0G08p_c=WPS32HX|3GhvqmIq4T)5cv
z+efPIFzMYY<@&t+omjJn^|hlm5*QrPW|`C<oqL)|dp$awHCgx$U&Q>}Tfh=>-ntA;
zRj!leamQQ(km*j@B8qL#h(ntRF&XVakA=ngc(fQ^2$eQk`E0D1Gn7>Nv|^?a&TlwO
zwdJ&I_!RzPsmi;xp&01x%Uj~Ucu|QJ({ZyxrY9ln?mBPt0fkGFi(4b*{Y>&F9Y1Et
z8DU?gDg0+PD=D;hD~mE-`#t?^#L)u{JL&g}rd3pTb)cJWO3Q-|N2^{5@lQ@pME0DQ
ztvwcrvR2BLZ(-M0Fy;?Da&FWX!^%;a5%&~phSO8~*0Z|>b8C4uRBFaIImrrRyr%j}
z3n|SjUE^)p9lh@BN?vN&rQ-$WV9Sr&6tlbq(;=Z}v<iCYjt|wgc<gHyYx5$dziyP^
z-}hlTIrE<z-Q&S)mOkkPGjm@P)B|a;0q2gi=`$B(ewvd^67fuKds$!iV7nje^uDLn
zx9Q{~o>1NRb#+hvj)l6%KG$*_rvj0e+sn|)$%^c`d93rQHNiiIQI{HH2OkwiqpEr_
zeJ>!#9QmuB6%CaBzt@R<4|ZZ^j-5z-I*m*(Q&jUMcA7i2Ybn@^%+m2(<WyG%dIRyu
zT<w$sO-S%n-K9)C;r8+whuY#u_F>(gaq!gIz0Z6f&G2fZRK=bjp>oIIC6?T=1aoZQ
zq();WU6u)_cj;QWM$L{)kHRrwVsJmlB)qP%3h)sGsG5GtIIDSYGWa8G@zq({kras!
zZDlfVE|BJNP(s;VPgZ_RlP`{-qh`d%3-&pGI^gAH|L;h3X879Kxx=%!TWW4}&iR)4
z8(?z9_hjk^Y&pH5L6`4P+<D0SfFARw{Cw-OK}_L<U`%~o9cODX-Vyxd6Mn{<m#v$P
zXgPqDDc}8!UYWVK^RG*(i*MttbcG-FaX8NJm6kd`iM~jaTj;4)TOd)QZjnsce#ZxL
zYw1-@^q2aDQ=857=)+4Z&SuRy6tTXsP|PPAUvG7q{BOs~mM#njq-}R06X}oI&@&F@
gzdu~f)vNFI@L}cN{caAeg8(@2fv0c}{*rL%KVf8xI{*Lx

literal 1962
zcmd5+do-Kb7XMP0Y2z_>QLR$kUc`7*QDP`jowo5!C5T5mh>D9g6@p5po;9Yby|mge
zC}NFOK7%HTP=W;YsE$g9QWc@0C5_h*Z_54Xy6gTof8Vp#+2?om{_V5ZKEJj0T{k>d
z8L5c`06-aZK1%=qIWmmh_RGPR^ZP3bd}z42c%IwY*@2;CW|@}J_5enEyQQT?{_cn_
zeBB>Sz<U5NX?G30^t1$kuWZ4yPM$XqvrHlVvEAS0FZhPO+PS$1DxF~|w$t4vqgIqY
z4oSYRNJ})BD!}#H<`-N?+fFqB)%r(3OGc3XCpsl>4|0(CDDXTZU7ytkfsBXxdSJy<
zGEC@p?lXodEPZ81DRWXtx8@s^k?5}EO+(OQrBkK?Q^?2AIEHsra9(ljJ#B-sk3HEY
zX)p6&U!oRl!lA(j*yPkGj~ZUIEiXYOHooFG)_OQ(=#(=-K<6uoYRl7(-V25j7R{%<
z+_T`qV=WNa*X-$0cPpDvzmX40*+b77z0w(P{IIMMyRelR58cQ2b!rX5#jl5Y!9LND
z70AsYoE@H-yEgphiJj%8cdk}yMHZ}luF_qPH>4Wkur<g%0=Kui=+NKS#izfYVot33
zC9^M6eQr)kbdchS9Aaj%!QMO6M`+HHjQEz*Dhzq0U#3T|zQW+_-%VBxGHN0ZquETV
zaU02pAyr2;SRT(p$DzOr@J~gbB>z5;A;79QCjLSh9flwWuA{=dT*hc52GsmUfnNPf
z<fbVN>T4u=!=HS7w?%$;zjVj~^hw8M7W1<Nfi(BDLe<|qFTuj>Rop=d4n!A~!}4$u
ze<{wcl>gIj{5fY&WJ*-;yAK7j&wxe}|ITGVp#=u=Lso!X0B~qwI>$@S98d^QMl0|7
z&&A&f832@$5SNm1dw%T|rCq<*7rr;Ce=nHwC8+)->Hjg9#J`tCurw+OB7EOnoYl9_
z`AgrzD)KIP(gq<k0J*2ge1!l&sl}M0nS?M0D!#Q~H9W%XEZc-uVy};_<ne_ENj*=4
z!tNiZwOedYJRVVIJuk!*#$B#4)y6U@&=MstBj%-q7pL{jul^sMLYnax4`?w{SQyEn
zj0x{c#bYlvBF36p9$a6zI!<!Qq@@p-61&bNMKFIrn2U}2xrV$ul6**64sr^|a8vDv
zb;P537SPfV-^?AmT$;#zv~XTS+fXA%3pX9hWcN{*tgc!7lo6Tvgy%3)b6D6s6~jI4
z*Q7`jmWciUx&FZmW$J)hIeFl9*PoaO1C7m?l*7R?1r{!>YtaP1@6B`{XQPyN(C}e6
zLD~p8WTL3KXr(40CoRIdWA@ptO}0F~qO}I<pz$qdUx+Qko&=X<>4YA=9bJ}~dk9au
zS%0IWa=|tJ*SfCmN`|iCeCtJgNcvJ$ik@X@ZCs#_#(<9WM_g#UW}SA4rhlD``}%17
zNn%ZVGt{g)_sk{klf6I{)#K1(lU*%#y9T5p*UEa?1S7jDYJ-}wn{Sy`zk*mL?7IVL
z2$SGYDs^&p!gh-+Yf$tfF3tp@(0<GwkXbIIVy&r?iS?Dj&H3<ZUPI@{mz>#~GwP{=
zn%X=xT~lt%IH4F++iHC7aJ6d7KI3$BO0x}1<os>jV6E8&)pipU=pw1Z@JYiQ$g3ZZ
zU%ydZ6_7Ye(c3(a)4IH7q50kBTWglKb+&NrM`?@woaj?w9(p-SC$svGDm$9{^mFyX
z2!Aswa`j!>Y^UsRZPo?eCnkT0Hf)tl+KsmVO#aM#9G4@t$)#6I%>|r)`VVrh;8FM2
z$Aip1<mL|YPD2n*&6*PFM$Z#3B=z3r2MlY}p2Ec)8<~H!G9I&US?W3+$c+?*taZ0C
ze!FPnvCod{Igb-|sk3pR-t1<co9P-RCZT}oa-#FDvSG&-xqssyZ1XTia%Dc+_C;p=
zTyJ>V+RuvpJngwk`ie-t(`UP!EV>V!*dZOpA#aj>@})&D=7Q%YuE(XeA1hkS_NNVE
zWf;NWDYK2M2fDOS2<d?10tQ#cD?+}N9~Zut+(@(}B2)%23MHne3Ik`>)5%qF>Hp%_
zP;<RYDS>LMF}F-iiM^rj_4#L>ez4E@YJMy^!AdYjm(hfQzEUScU)g?N(zQvEL#Rh{
zp2DAUekY#ApLE>f0BYOm#4Sbfw(`UH*lr$X`@MgvjqYo|%J~wW2#CEsm~*JUaI+#L
zNzMO=tjBnM?t=ii`fvGBN4+Fl*H>t0Lomo`)YaUbPP*cTgF1xISN5jT`<+{VHJv(A
i$^V@S4A7`#bN%a|O6#4bzee~1@B+`_&$7=1B>x9JoRUZY

diff --git a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html
index 061a05f1b8..fae635575d 100644
--- a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_histogram_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -124,32 +122,8 @@
  <div class="center">
   <img src="structcub_1_1_block_histogram_1_1_temp_storage.png" usemap="#cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockHistogram&lt; T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,856,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__histogram_8cuh_source.html">block_histogram.cuh</a></li>
 </ul>
@@ -157,7 +131,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.png b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.png
index 31b02c03353d566f7a6b84097bf829fcedff1598..0a65457754e6b5bb8c99e862a28123439ba41323 100644
GIT binary patch
literal 1926
zcmd5+`#YQ27XH*oO$npw(u*<?RV8#N8XbC*CLikFG}@yj#f-Y8lcY#RJZ(>lsE{c}
z+<GeN7SXiAh_*5soK}@eW2O_Anp6=L({@lHoR6O8oL|l#aQ5@;z2CjpyZ2t}UC-KC
zf&N(drfr)50Dyxy?_dDXCO|k)R~s5#)X4MDWFF`jidkJ<h2Z47IV!6o4?^o*EEelz
z4Ov1<T}rTj2mq1Rn(S$)GXQKf0lg20rfAKS`Ce3b=<DeHIHZ~VWFzptY-#s{AGM{g
z&8to_c%2Ju`Njs(;qCoXrmnHkdylKgt)MeI3ORMcp>Q2G=`wA=W9%Fxs)Go!^O4pd
z^DY4*Ji!ehAj+^fnGhFWlEkW9M1gQ=lSE++hBPz%Ymu7pHS;0cLN0Azb>k3uEUjE>
zSHNv&cCxwKYcP-8^FPegH>5G@Px>^bHdW5RKxJy_sO7Lj-H$h!OE~%K*q8SUZK0>n
zbP~Y<_!fcN1x`{`<I5yXZMYTK8{{#jxO3e>-alUZ1NI@iD3B*4vMG9|zU;(X7kRa~
zOXS*v*_#?n*qlnE`t39Z(lE$!nomH1A~+O~JLxd*({ht=g?eYX{MWSPvrc%%ouTsI
zhZdabZNP;{0dfkKZW{M9d+E16i@5%#VizR!O$o06Ih?iTeo8P8YdTOnBgrd6;Bj-f
zE2UWl+lqv2?h>R8W#jwBDT|sp&G)#os|z8lHp{9<!&abY8>#H{5?Q{R%GIbYo`O`S
zm_go-l=Yw~_iUyDBA_=bChASfC>&q(&{8#|zQ`DIb@sV%NBK{7kPvvtttU8kJ{Lo9
zSWi-{5p+QgL_Br1lMQ9&7~Awel(cLUqeuk8&--dtX3ADT`%hRJU=C<U0B5?1S`fH<
zjb&J(69MRpH#?YSXzc~!KK=DHX5F%ZxxVkrGg;e*(hb1GfJ4XUW4fP>h|h=qFO5k@
z)`H$^WV8$M2UT}1W_@uz!C?(R`T+VFT7;{e8}<UQ2qRNHF<_p7*^wzL&1no^+#5Nu
zQaByZruKZiumc-1*t6(Ezx{2+U_RfVN1ZmC@SLt(e)$V={y<4jbm``6s(BB_f%qlP
zT-O*sQbQfmRQ2pvoZp3RJt$(@PWGH@XQar#j-uD>O5W+efvdo6$oR5IqU=O}6}Pfa
ztRu=U(P>?jK8$Lew@{%r&x}tUsIpUr&<738T*`3)6IxwU>U_CKS^0wBlv=r5RS~l@
zNA|*VcgEkR4wT+{m6v<O&3m%uP@i%>5W4sTZ_k?r1^xAW#7)$_LT`Ref+gslc(gEt
z)d;|=i84m3eXirgHu?#kee?%CLGC6JOX@&wAfUgvZ&zVD{b-WUc}>EK_TbI#hX_Q@
z!=W|smcffCW$;9_48yxQI5Ykszx^%acryH-*j(uuK5JB%x*#8MDK4h2#DpcheQO*Q
zt_(>CK5Js1KroYVl4R(XzARUG*YA@nKD_FI<WS4lcaaYSL2&eM7OIWUqEGp>Bv^$9
zxwpdvu=eb#+QZ`+GsDvt9``@I#<8mDjxuX#SKh=H7jIb&i!l)OKU!X?jX64Ec7L>0
zqk>I4&9?VR&Ld7M-0?wXIi4d`s4CJNSez;J5M9w}scz)ndNtFPC)5Kk#LAS}2rEl8
z>N;jPmJIS@yX%Oql`Pm`j;*E{wj|A^G>_jXA?0?FG!wZpFIP9|GDCE@Pcag`aQECe
zrulK-(<YO8`7PT5b$mB%j|eLt?D$J-%A75|w0~G?;MVd4_GNyqz{xM$BZ=h1lqE{S
zDQZ{!(;vgSMeO3`XNJY-3y}ysIo)xvDNwq3<56;&e<z3K^~Jfb>gn@TL3yTXRzh>8
zAM#YcDb;s>F8?+?4||zLvDWE-LHj0C=-EPc#JjjiNZhfv#2am$K{4uV^zM}_czPJm
zD>FIMH5}8Lg5inm#*4g+;!>7mFEk+Uq^<b_+9_TlCOZ6Moiz^I_G)J`v)NK+DsmO{
zwX;yx2ia<6`bi66@7PN9Rz9V-*)HcFKFn9g>Wo$PeDZgTJvX|Gr#Q4k4gKe*@sDpC
z_%gQGkCrR;r}oBV%^er*0e`89t0<Zd<(1r)$A4_$CK-<3?Vd}z^q{DB!h)90pBeBh
z(F#te{@Xk18Rea99g<$3-=6^}*z28+4Ma`w=s^+VYkE=jySCAWIE!YtjC18@eTSa`
zv}^SN$4OrE0YzE{Dcc)WIVua|WiK0m>}Wg9$}XT;ul-1~9YVE*ayfCmls}C|{Lhm9
icfG&2!~gd)h+feS+h=gOVH#5iKn=wBd-IP(T>cmJ5_Nk3

literal 1956
zcmc&!dpML?9R93ZC9*`bC1W4Mx+Ym74N{D6rqJXv+1zc`$gNDSgY8kdWf-2wG?h?d
zn7%NRh8^S<hR9`^kY;T$?sh1bW<PrNuYI2VchB>jbKdWq_jk^F-rx89Qk@+!^1Jr$
z0sufBbU1|v07(Ljop(sWqsbbI2u}#-(=K*mu^5IQXGA1QQx=T2dqYEmbn06q{JA3%
z?|2S?Nn358!{rD7$eMtsPPjx%%+mRng~t@6X*kz)v31O|kW?un{0!oliA5Ej*mgTw
zIYrxRm=$8eXN6qkvv`;IEV2R(^&NPG8i1NQXK;HP<|emLQJSDVmHLzbufKA@E2-kB
z8Soyq$q($4&z-_>-jL(GM#I2q!sxk}FJK>ab0S6(J|$8TW`@4Q8i2C}2e9~_J8JHI
zYF9iAz_m7rK+Qc_u1JYjXerkE+*_Qm&YxPWKtHMZ)1qn2hH8Irl~T#E3!$zpAcBy`
zGh$tr;G6sQz!Gw8l?8XW_?<)1RSqvz_zG-=Ur{9YisT-9r}Ha?!OW3|=u}+kpocNo
z0qeu>!lZu8i76%EvORh<Cd^;!&09h@3Mq#oQU*h6u;p4HNH74g6DCl{koWlFJbf*W
zzI>8%ma*s<qTcaVkT*arc7dHJS;!{5$%&{FrkMtf@Xn*;o9zd&RO;3<Y{LcbZu+6u
zMFPX+-_l%*uU<b<*}Us|rw54FgWrEyr>B2oExve0FqB=_6NrP<c}V(dW(`TriK?wI
zA;Zp0Sl0<DE+Sc1Y<b!Yd&*7AI|JdVk;-s*!oEaYuU$#ZtGuEFEH%4002Z@Z9?i>>
zGtq(_k%iT7rAv2bx)z+H43w5H1Um&2QUA){C#B%8S-um(H>(QauNVU|@AfUoHUJ1f
za=}7X4hBhTUpAogfJ_5W>;nP2lO)<FI`wJNKEQ4pnXiN&87fH<hXL3&K+*?*e~K+K
z%6FUW(BvlppOux|^37pDMq_9CcA(PNK$L;%{}!0!ZL3?7kH;F9<oaQCC$H#iiAE$j
zLI0sXRT+9go17nyuBs`!FZ#@{w7MAG6=C*soZ9_r^_oNiCi<A6P*}feL3!fO%xO_G
z<g}6Q?!QqOL*pV@iM+vZqG<*9R$N`=!9XvbpVJ&AbV<rTR&Dk%n{I1%gV?(isibis
zPzf-I1{r7XDno<y5?(&-&t0=PlnE_i73TS>!(WuV`Mr;Rsc-eP_Uww+{3}zk>&-3A
z)fiLzBJZG8`^8Rk-Z8{*FB(j0p0%Vuu*~YVHM9F@aDutC;_^+B^cjZsA4Thk=b8vn
z$|c=zO^DVQ+n40_Sm9@t2Le1}#mKE?crw4}ewqkduwlcfk~;prdZgBifw(r18<tM;
zj;zu)M`wGf1g8v_5Kb1%=~Z9jU*>3$c*`%Q_1u0R+?;%`!fj3TBpE6)5sx)J?{j}z
zRHxKOUA^e~ne(&Al6_BOx-V8lTYEy7Xox+*5S=}^x`KvoH@4rJqcq0`uXitLW}t5!
z-B%X0N8x6hgnz+c|EyelR{tmyWV~V$gLIsYeK4X}v;4B5T;;tZHRg7FrrG3hw*gg5
z40J4e4Q)W~Q5wi}3y(SO&BoTYwt0Q%MLX*g@BEKaf|Mvj-1ip*j|`eSGUoysXZh~z
z2bQycW{Ko>MobOtr#`f)d(XP7EqcJi`5EfFDVHB0n?0S4ejR4?be!(sjv0&J(e}&f
z9xjtWX4dAniaRA^53c$~IqADy2(aGJI}-=pkc`cKGVjJ<Ka9J2Nh=hoSQ&K1eotQD
z$g2sKxkh1faHU*&ai;9sHb0T<q9Q#){!MSZE95nN=l2gZdTqGx_ij;%hJ#1WzEt|*
zq<{RO77vP*w7*jM{CX`R?9^(YUwtjvhSSQcMTFjC3lpAE_KuilxFr2LmJTxN#~W)1
zqg~FA^mnsWgRKm^uU_oR&-uezcfcs!RAx~Dx?Ntn$NSszgV@CqqvnD!E6W$-siG;V
z9QUwvQ$}fJFrBowX#mOkFz}FxsYsr)n-MO@1T-HitW>WUDcw|8M~;Tq5LfJc6<i5j
zR(RS1A%bSv;eFuVe3x^-{}l42YamUPRIpY>fMPNP?@c*ctfSvOLrE1?km;2;C^RiJ
zs%!ii8Kd*qBs8ACTy+UO@3QP4Iy5~juXx#VrX~A>X;=XPU@vq_iO;1=JinRemd))S
zM~;e{R_c#W5;j9Ug^|?~cbC6-jQGkNK7u9(UlT&yxZUAR%*E;Mk7LG-J!~l-UMEdN
zNmC(eZdDfU^`WJxj%Chv@p&hf`LF8pNvS*+fBK)Auw*Qjs57lDLs?Ld0&oNE98Xo+
H`ds@Px7dhJ

diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html
index 94e61c9337..aff70026b4 100644
--- a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_5ea8dad2df262b118ec77ecff8dc9dd3.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -118,39 +116,15 @@
 
 <p>Alias wrapper allowing storage to be unioned. </p>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00823">823</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00838">838</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 </div><div class="dynheader">
 Inheritance diagram for cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage:</div>
 <div class="dyncontent">
  <div class="center">
   <img src="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.png" usemap="#cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage_map" name="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_TRANSPOSE, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1181,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__load_8cuh_source.html">block_load.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.png b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___t_r_a_n_s_p_o_s_e_00_01_d_u_m_m_y_01_4_1_1_temp_storage.png
index febc4db9a1ea47fc76fcfa660f1d8f7d479e05a2..052ce695d4f5f685d73b0efd5f25ae580f7816eb 100644
GIT binary patch
delta 1998
zcmZuydpy(YA78Cs7e@)dN-BzJ(d#6XFqcB=+sUX|4nq4SjO4ocg_+C2>E^O!(rim&
zjSO>Hs3AGBWysuiiQLDQI!3wn8?SSIuiqcP=k<D?=W}_U&-Hn|AHD`dBl9Z$CE$3*
z?cW>ZlyfyB7CSc;m{Yzre0`Vs<;n6^3HOJVug0gJQW{CWZ`D*s>FHf`{O!)~->rUw
z>;+5-evr2FA95-v%8if((FcOB!GLwbiO}1KB`76Mg!_=vL^))g*b8`qMF1h5!24rD
zQpWM7O|c<JRlFvYi6@-!I~w&tdHEUmCKOl%v|#|(Wp0sb75&P%^?wyC!{O!X<=ylS
z9{Xy<D%F>AR7Zpktz#TfBdC0%*6{E7uzi(rN!^uENudB%C_~^xv|+EMAa>@nsevVl
zk@tg248^lhyY^Y0{bibs0Ul_$s|?T%U?IkyTyU<Y5L|+)&lgj<aOb{QBI};`g<%B^
zB3(;U3axF1hcA|-gvS%(Gg`U?!Eyly=$ZZA=Q~e?temJ(E)cd{kPT^uRI9S+j6Up1
z7UEK!JBtjFwyqnv-3jlrHuMQGu3HAMDu#Avq4;xH;go^HfHOf+(hnyn$Zw@(y8zPL
zvDUFjJf;*k*K*|y!4n&NGD#>3c|A&<7tWQhWtg^J_Ycro7%wlK;HNB`DM4WszCwJa
z9(`k=me75PaDTn<wtSoi_Q|O=wS&^ZTGNzuK3V=(+lY5J?g;+>-AX1>{_6N~b@jt~
z!y=Ft2>eA44^mCoa2&LGXu2N`+S~}zN-+37+ZwY`DQX10NRV&3wszpxBx2qRC<v@%
z8aMuZ5Lr$pdV_BhAAkYngzQU*-ud5GrS##O23j|wm0*0G$OJ?1nlPmC$Z{X&eRWYx
zu!{d2e5Oq8w8P50X%^4T-;CFC1GtqX+@rs{Q1XcW<>vLz4mqWVi37n`S{Ssd@uC==
zhR;}wYO$ZMi)W}^=Rp!pO-$Yr7#&|WSX%Rw$G+m^XExkXYy}7`%)?u#;#$Hz-EI%e
z*k@6_H%$?`n1y(cU5H6e`f%rrrmM9X5v2N(2bXNdwZUIAouuLvwz#`0a^if*hLU?}
zzZj>!D|NbaGVkJ#r-9zsPPJ1<f=HL}=vb|?SxMqP^vY|P*?}ik>6Mun^~FJ%!c9w<
z?fl{C^v!y@(>8M#5-Nk?lqGi_?iddHCso=A0<$Qr9{jO(kyr%IWlr&rBD-pF4G1r`
z^m_d?`v*gM@>ov;t|$%5)m>209#z=Ru=}Odi*aw@5_{f9ZcqE=uZLi`V_`GeL%m(E
z(DcLpX;|$W5!{dF&~F8Vj*e;6bK4@cNd>~Txzf_j00-hmX$#R|8uhmG$<&P76)!%a
zT=@9@_-w2Zsmi%9j$1Z%&e9Jd82hy=MC36Cqb#q=&9GCER!}eZeB@Lk{A|hM_g6vm
z7*Rcw>}q$7{>f@^-%Gko(aC>b!<mfLVbxORAt6xFjfmJI5k**N0e>OjP3L<*!fwpg
zyGtA^lLE_!2CZ1G_mw-o2B^Mb!MpK!yp26l(u6X;;0eRPh;D3hS#N~F{YAwI_2R*K
zkuRH}y??(j!oRXzS8H@q_2Oqv$a5xgAolTlpU5RiRPXMY2HmP|^IX}Ao<{p=eb{SQ
z80CU3$IP%Ud)s1WkEY@9WnHFMt~JM`wVKDLsWj_jg?kW!lAkT}94IjvLI$2!)RndE
z2rX!602L~Z%%NBz_Ya`$G;%m(^k6vx^Jf0m4tZ2-k03PAGSEGKB=xf=rq&y`Lnc^=
z&1=K(P)XZElRuqK#&9gd%t{-=0}_V!THMfP*^*n+rVkRyaR;KP%%oFqYim;vqb~uB
zTtwvL60?4LUcaH-@wK4@?`;w_-U!!kI+exe#<jyGDIYPeE=#EtVepI%)<`6_0B^D1
z+s}xxtLe5N3pZ&Q2E-RS%iTJp7?{n@mlHC@5FfOF9*OVtw!766H&lxz>G%gN24+k(
zqb7~`-A{E6Ir}}n*8J#IAU!{mESJ@%Jb6m`C4j#2uYQXf#8G@UC(!Gh{<JuDWqVS3
zt4u&`zjMYj(Kd)~-b*X*9vWrS^#-k*=gg%sf%F`3G!I_3qmxpQCKu?Av^72U`|};5
zy8t=9tcjX5pFj9>DwaDD9hDy1&nPt!KQtALm;tWVb2wW&*;xQhM>KJmB?i31eXQx+
z{-7}*MEEg$U3xe4=2WG2@DEzdJ4sb&z4(Fyz3E%Zm?3H3LnD62=>_KFjzaXVt`nZ8
z7?-S^Wo}oDn{71QEBMx>;Vd<|BEJ!nvnq=A&$t}DrK*HdA8&mN)28be5y>8Zo&zoD
zSeR(;EW*L>!DMbl0S8)wWXGO+KTtBdGouquuK1qfKhki~PCfy9ToPPn|IE~-&F)Fs
z*!gAEV|gXf>v{la^-%9Xb#DVcpP~~=oAF5?F%|h6(w8_$)#IptSmI2>Xj(%Hf#A4|
zrHPfqUGcV8qaTDZ6>9z4l&}c`mz#d0e23ThR%osRY9q+)Z=h7dvl2v!4DfDfQyY8@
jecw1bK3e;PzlN-W7xV@ujBna(1}O({7VgNjzn1hLSXK83

delta 2014
zcmZuydo+~W8=k09o#>)Y5>3_-N=1eg#|V|aFp}HJrQ{M5V^VxVGvhcGCBlrfFkf_1
zpP?p~ammz(u5%I@Glnrt(be2!GLHKhPG_y(AHTiUUi;nee%|MO*M8Q1H+@~ly1XFr
z0C3RB=i(9>)JGZKv1S?7<=M=<sW>(;)OGNssO#&o27JxNLt{s?_Y~`eS?_rf6`I}k
zOmA3Vbd-{B{0nfHl5I@f)DNhaO@I}JiEN-Q8sWw`?HE(FC)}8Ggsb0+T{I~9<u=uj
z@mE^9+?C`_WMTLOGEk}a*t=;C;O5;wchX7C1Fcijb#U?_<B#wIGr$J@)SxI5)eTbA
z={Bh8p^P;okLtA~l8$?gKOiP;4rYI(h!vB3!t*eDnTPF$k50$m4|ew?bxBli3w&A&
ze9jH8eKn4o-Bd;76~p)X9|?CE)00)CM)Fbr;h139kA8Ufb~uk0=?QR?7E0V1lgTRd
z!RHH|>K_GA>kV^0c=gf0CKHZ>`6W4LaE<^Mw+MT51|C)V+s+WFq}=CSyl5GZ4e*tJ
z3r?xjm9ARI+~x+392L%G3`LF92j@tAJRiurV=3izA@%_Ggc*5how6Ka$VTQe#*Tn1
zkCU>n*QCtOa|*s1qg=q?mJ5`#%K7thixyBb=ye9z?9*aBsDD=&L8kC(0;|^Y24f}L
zCYS<7=5_TI!EK8-Mb2d<gjuMgYoG-MHm?vk0{U?`;z{OxSW0}rqBh?rnmkc<TFF#(
z*u3@sSB`o}!^r>LOYDEj-@dZvux?nk82JRDm%4FMGX$|Jb%`b7)Jssq5}|hPY%O){
z(OqyMLzcyuA{GEo*3G=)B>y;1l`UP2`E41CiqTA>#%coUxy|sRmQi|$^YYg8HEAo1
zmg+h$Kb492kNgGIT+^6r0#=^P5Vxoz7&iPsZ_BC)<=hm)nn$qEedr#sw=O8C@Wan^
z;+{}1Va}xPO?*4?OD#X#D&<Y(zHw@g$ft8yFrWh$mPjRlM5d+xY)tukiV`8~P>i$q
zpi#6-LLSq#NVNOskOL*k)tR>*?89E&VKE(S#;v=0AlOP+PqwZKT{$4|GMFN2WJR7?
zm%1dekKSKq(MK2X%^=ojVM;M5w>=*CWMt?)_o~tPDz8v&)9x0tfE$M2pK50nNAOMw
zx4e*CN_=-XfUM|n*Q&TSyfZvFeXba~xzdGI3k1YN_z9j_>nDztbPwOG_AxMX!r)pP
z+E6d#K!bM!S$HR@Bqe2Qx9>pGogGtnv4YowpX!xQ$;fAubKJ%WrC)DBW3KEp<o?H<
z)4RZCQ{B>P@5Z;ajpq~Ud@jAhmi{DSA)6Ac2}1gnzFW}YB3f<Z8_sOS2c<GuuQC@G
z;4Urb9F5WO1yHG0PzI<^d&bC}E&CjHa%c3#UMd;gaOw_}wGHpRy>u*k(o3greZqdf
z_<qoD7Hir<p3a7*XdRp$->B<$)!to|7|a79eNe@anDAEj;+i@pkn-*J4or02eT%7^
znK7}j1JKU_i$q(pX!HS2#f7;dOKWsd`60u!A@&1odz4NDrv^HI!*|FZ=&j#jQ~Qui
z%su9tc&_&<r3X9EVa%6(s;UoKaU0{{kv`Hms}kTVky@Dzw7wXn7IMFxCY43lTp8`F
zx@l4;5Rq)5`sdRMEg93pB&Fr@LG;^KScOgyOvzQaXT?UOVZz4`W;O9LC4MHkg!VXD
ztrBh<y$q?IuOC0_rt|q2Ja`)<kJRGg!!-&#ZAPWlw(^IsvCt0nQ$<~I6Te{+dg*qj
zQ*1NA=Has=woC)2c}tv2;{9e31EU3w%s6y>X#Qx~n$g&LuB_EgY|vEHBW^+;__m5>
zrG29G&G6HNJdOuj$IjEgb@PxY$c4(RDCf1)_kSk}29Q{30r=-&0a?z_v%3%i=f5jC
z5*fih1IwKib`>!3DOOfLvBmy6K9=^o=B-sypU0?v9L)o-kzIgp;ob!+KYwqAwk#K2
z0F%!}rp_?q9KSG7jb|~VB5SwRXd5Eab^N_${D#jsuiQKnE05~a?kAbu$6mbwRRhjt
ze-^q$!n2{h`x?B-=GWO#fv&$uL@Be{8iu)jbcYh%KbRUs9u|Mk@ig4ZA$T5QCuK<R
z-0`<q3oUsebkeZQ-gR|9-SGwW4;#*heE6<wRr{oA+grTUDPWbXpi}Y1KwRw|7}m5s
zqT*#F<-j-I#N<r%#%*^X(n?iA@U9bk9~K68KJzo5D9$2Z=t`hrPJ5T~f194?xybJl
z`#<{W+i9~cc6!!`%6#HT$iEW#mv(zpUB27Z4CHWep>S>TCV75SR>CJ-4wV=MuU^Jr
zeHXLQDFw~;Wx+Dh_;6g6+uT-a3CqUy@vobR9ge7sy|N1dqSLUIgT%})EYV4JR=t_J
zXzFN7POJ2@!AM!?F+0{pjZ4KhGH3p#wX9j6nlaHiG>>Ys0g>-ep-^!2?6f1ZdswC%
z_~|Y8I>!1*oVYP-iOIdKuqRTktNAMmpQ?zPf8<!*)|n|`tR<PxYX0b*_u7f3Y^vN}
z`frD{L~LmI(Mi>RDS}#}skePJ3|`}G`9Ga9ouEWTf14ya6NKdm^#BfG4>FuX(*6Oq
CYx|i1

diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html
index 0a9d06ba7d..cbe30858df 100644
--- a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html
+++ b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_b58863673477c12a4e46def6747d1835.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -118,39 +116,15 @@
 
 <p>Alias wrapper allowing storage to be unioned. </p>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00894">894</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00909">909</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 </div><div class="dynheader">
 Inheritance diagram for cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage:</div>
 <div class="dyncontent">
  <div class="center">
   <img src="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.png" usemap="#cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map" name="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1224,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__load_8cuh_source.html">block_load.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.png b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_402c3164d23f1ec647db5dad06a54584.png
index edd6f65fbe6190252facb2652c49f94ea659c23b..9123be8bc827abc636653d5e8260f61be99008a6 100644
GIT binary patch
literal 2375
zcmd5;X;hQP8vYPOw8RJ~SeB*)gyLFJQ3#7lXbM8uB1&XYNfl5~kbuap0tOTspooMN
zqO5_C8iXKwuLYuPR+bP$h^#@#V#*RBgxp~D&;8}kJLk-tcV?dR%)HM#^UmPhkO<Hg
z-7NqBfE<5@djNn6Lt(qADJxob@N-2A>~wQRImqR5g*n4t#Ag2ISB3Sdwzaiw#En1{
zU9|`gq$i*Nt(U=9D09WokR$vA>ZX!_$i6sUuf6fq0Z+NS({%IjP|sH5*EabzZ>m+R
zq(ffsfAetKT#}>dWVS2E!|8{ToZpJdRR@c7QFmso;283WBZ}VCJqGGdup5ZnrGc;b
zH2Kg7>Iqt(+>d~l>So*>mc!yhxuLG}=V~wHBd3<itfu_(P$&?&PZM8}atL41+20gj
z+^Z~_JD8nDKC$QxH6Q#D@3g#%PS^t}^yg+|^@%zw`_h<DcK#>@t4`Aiwc)_1x0>Z1
zAhO~5S~iUg;p5<z5Z6UYLha^LA-_|&F(rfop|6Otq>FI0N#<m^=Ldh<Xy1O?PiG#q
zqw#{-pnb&_u12)#If^5v{qL$hxL2q<XgiHZ!8d=xOifU+T|9zJGIG$j*$*QlG_M?7
z6P!Vn>JCif7^oLoP(7!}Zw1=<lc+OPQLqV(98tObfx`dXd(KBzQ5ZVE!jI#x?p>Q+
zH6un5eQ<ogYY)8-m+C5fE3$I4UAx88w_RQlFFtQ>7;LGS@;gy`S<8n<qi~uZJ}IL_
z!m1RK1~0CQyx2E?UAW8f^qtuP1$TK%Qn=KUD0UOZc{yc~RR=8<8+Mb;Tj$2;m?$)r
zi@r86>rwN~0OJ)gRz%j#$&8faanh`0ugY;i4Y)Q2Nd&e58;%2Kare}}2UHS)KXm1*
zTG>hpYa5_#1XWf8l)eY5I#N3?nty_}q2ZYIA(h09xDH^$I*Z~+{EN^1g5QgPtNu%y
z@r71j<J)zt(s@hhXQJ>gM2fTZ|4l@9|03JnPdA08x&4c5sbAyYF%9o|TI|~5flPFn
z34crPY+O^T4PYQ(Q*p5_Q&%q`!hFM@4YSV}d6m4#gN!}Td=B!ZTRrObsxcdtvX~>M
z?K-YC5|M&H8s)OT=-6xHToHIAebY=-e5GAgq?$K`oL=1gYBK&QptVwvV5J_=p8afC
zx6gxTyh#4DQJxxUKY$!h*jtn@_<+@>g`G;8wanhyb~hDn!JqQiWChlFPlLxOzj&6k
zwRi@E-(So$%<!twFF;<3b+4_4?Mx+X$94s_F4%N4muK60LKB(7?)N}XnDsZTZ6rGr
z8YG$I&pNzM7mvm6_n=mcGUX=JQquscyTN_o{eoov?yNFpW#?n?NVN}@(S@bwx<%>S
z-An>XC$0{1ba7O$7A9TszR~C>u{1a^OJ_o1wi)RmG!nbXA4)@HEp3A8X`>}xRN>wo
zg_<ns1_@aSd5wv17;5KxU}~I4Yv_G11?BoYp5#KHE%MFy4k#=`%8DRl9rVIZyEs@_
zKC_1qw6+?dH1cj;V0OvQ?4t!cN-73iWwmSNsr<B*h^htw>`J^SYbSRNjtN+SL|dX}
z1TYQnL9$Iy>kjWmfj!f=yl@*Uf=C68WWtvQdJ9701|`vqUWd9}L3S*|Y^i-1X7;-I
z+XA`c&W1<b{f})5CNB0lM85v6niVmZbp>zWYjL^wC6jbIq(J)M>BgE0nE2%BH|bZ0
z|Kg+(jL+RU8X5#f>yVZNd1fJm3uVaOD4UuZ>qm^~+4_Q!atSHL3oDXh>hdLA-^+30
zC8;BxV|v3!<Fs)@(j{mN_MPfnQ{3gH8H+bN?T3)XX{Tc9Te>Efr>v;HQEN|rwEls{
zZe?BU^Q6wzVMP%QA-T4DXoliTWtGQehm{MXN&N%%P`#?~T{OW$>>|fSs)s+48a6lW
zfdTv6oajM;t#T4>h`^MTfY-4|T9BK}`aL_hUxnbO-=AbM7aR7jH#AX4sP1N?B`Zj|
z+C9BI*1(wD?{7*J(L25wAK{#xe^%}gSvUO1d}Wph>F`#MiG$MO`rpd*LW?T=OrZo{
zJx-~9q>RXDm<t?#75SEXud3DA--s4De;~6fImQHfWwg)1qPFu_;1G<+jW2^<;Vs1A
zZ8Yi&ar~D=rHrynsYBF6y~{jK)3s*7+*6XdR&HyMLK+W3`|t*=GaDNh4(qgv#L;+{
zBV57i@V@Q3XfO!Mt|q{k&^g&5Z4UGAXxU7&Of@-)2Ook=Xt5Tm<Ge6&Ra`?F&Ui(6
zU{YMmNJ(zCy3WP%>B{Q7nIQaw&X1GliI{(!gUS$<y~~ih7lw!DpM9)lWIVBj{R3V&
z%ght{H}9FBI7Pa(yVB<tX?KOglKUT&Ig^2gmlS?vtgtr2Pcr^w#-cZm9aGqImMxC~
zBaX`0h9C(^@+Upnz_aRTAoXUv&ummrY)ig{1_z;Ma-^#%ySTEFIrh6{HJW#jviv+R
zjqj3lW!HxcBJ1&CA01h6A2oTo9vkZSOL!Cr;!bt#$eXtV+ZQe&&`n+#tO;G|TB+q<
z3FYEM23m_hB=Q+jIWJ6hIe_>%<+XAX!5_T5WGPQB0wS8?wdXboxla!P^1{^^i+cLX
zZD>r3EbNK$f_jN7=sHId)ee}h>RB)KwlxPN79#ZCiNqh93eS$|-4?GgZ0l(9l+USM
yuJK*=i+8-%&&mHd@X_pR9t5!=?SK9z$sZv}_6u*$weO|_isI;igjb(DfBU~gkXc>;

literal 2406
zcmd5-c{rO{7k`y%2MK+&8b!4kHEp$`s>o=Vu{MZ7$=9fDDz%i-N`lg$3;C>RVpnNR
zhoZJNbOayqRV`_2Us6l79aM>>NGw4!Z%pTz@6Y-3yU+99_jk`d_uO-T=iGA>935;R
zatGu90Dz#+SvmuNOgo4jcT0o)$dy{K2Vjo&E>>c(7=)i@g(T{Wdm!5Wnwpw+CX8x=
z!`(z@hw}iaBxwV$T=YSa5N-LRON7)MV<dC<d!?Or7Xv57X6p*x7i%(i!HkvTP}g@p
zoiE2uXXFtKkyAmTokJ6ORUG70#vwa8P3v2<H9c9Y{En6OHc&;oL6hu3i7Hy3UahW@
zvkmZC0mu{jpaieQIoBI{0Z+%LvJEzmhUA9S6s?~`LwZaKM;>a$1;s7PNpvI%S|yYA
zc>1f;*z|(Toa9ce=hOfinocV)dD^ZuU4@>8r=$pkr|-S;oLmbFAZ}r8b>F=%wQC{J
z<}gNq89%1JXecUQe!`v00zD8B!QDmviT@66yyq+wAkz9#B;Ndh?g?~9)oQ(K9VdV<
z@+b8lJtEu-wQ~!g3#LI21U*3q{mFxJMFXz<TF*$b4bATWjpVU_nbEba^3TNPXpxP<
z^N|hAlyM?apql5HI@_m>D2${DLsDlhZ;l{JK_Wn29(GbK4buf%ajDZYLJuo=U=epE
zAO+p)=R-sH+MKWTpF-1k!_T+k9w03<3zl3Mc?m%eKa7>00hgvq)??~Q-t)z!@CtSq
zZuFHo&7;&yEd)XxCdJSVp2}AchQ@;Vg@S5sFeL+wM=LxH?AaYWU*-#t?&H+$(2QZ~
zw$RQmJ)-MenC*XyqjNSkiI>d5&<JjAv(7mOay$ugECHaM*FQ^G2Z1#`vw7RHB;F?}
zf5$r&L7S8$gkKt=Q+p~8(}_gL#7gM_As-YaKiMXr?5>rpH=qzJbs@zpCP*KU1_w=-
zF5o3|u-JjMrEJXCdHEy-Py|RnJv0d0UhEC#&7b;U*8HgtIw!HE2Y{|gfx6xR=$Q2O
zMkgqLZ3O+xkh+TM|FxmdhR(K}f^<6i*pW16TkXPh>}{ECuJi!D_dhwGGVKhaZ?ZB5
zcLjrayYI~mxq*!WH0DP5mKEo7I?DEPj}*4_G#+?%T%&bUuFLSFDUo+YKc~U>{7nI&
zthUs%HKZO=gw37hBL_{OdtWYX-HKWSwmKJtlmX~rz6FV1?Uyr(P&t&}wN7c)!{!yV
zj2YW9&@--g!o`Yn@x_?my;K}K!qyJN1&uoANBZ_k?Pl5Gn?1kK+pVjta?I}5WMVRQ
zv^k`3P7NX~(y>m^_%u~7mvTO|^~3E)!K!mvjg$yRCDffL{hu(u$I=FE*HxH@&BgcI
zAIMp-nR+L|OGk2Wt}GejinS|Wq+RbXhFXVP`yMt)njm_|#=@EKrod%W%xZQ>(mNgQ
zeV6?n!_@nAq^O&tjh<KLx%^W_OOK+XbeT&|y1#i_tlnT-;#alil;LVBZmZbJ;LJqD
zgjw>fr~#mFMCeH)oif>nOXt|EtRQPOGeV=I55|A=p{rCX;gIuH(}qWU%4XikTdAlI
z|3c+`)3V!D#vGv@ITSqrFU_pR6h_Zf3T;sMe;E|`3|xJOTi&?$a&1MGmNM<N6k6Op
zVx)u{c!CNr+lQ;4v*3CZYbxPbom|=ZV>DFt>8kQd(F015E)#{`;6QUmH52J2%^ZKi
zmmEFf>4d&VsJ!t@Q*fPXicW5@N2o5d_cFmc_ojy1K<iL&GAbLxYm#?GBbYxcE9%VC
zgCJw9LX^M9-QgOa$JJNk8M%22k-k-Dzrhp2+_jBn4EwvE%x`7C@g!{UNQ3(q*qVEX
z40A%QTxg$U6D?O}njnKWHagXNt`?k%FJqkBm$h<@oW)t3Jix6mi8-zio;bULaGO0V
ztP8%l8sB%dO5j9;B<yEY$O!us&Muo5mDF1rUH}XD#&qop53`|0J43m?@X-Yy?jAAT
z;iC9`dJJJfu{_?V`=;!WL9wZW=(o$+-XB`_K=+m{x7@nV0}!LC`H?%e>@5s^2{%~|
z`n9jQ`OD*j5kKbDtLZo-x#6o?tY3X94UdNTothO|;@ciS-!K~e;TKP7nibc&=2%Mw
zSeC;zc2)!xZKbq}UJK5^1C7K2Rvzm+CET*93eKN;SPE{^5yCEMA4YDu;}vD*ku((R
z^ba>gtcOIS)DZ*4bmNZRK6{j@=vUD-SbSn%wQ*O8F}1#)n49!&b&b_--VttM7ABt%
zKWK5MeTDBk?v8JWX?=86gTb|1m@_)3a8{f71@;rZuYSdeCVUo6Y2Ses>?-Dr6KD|m
zDAM~z<)NQ;!8&`I;cC>y32%%((s@|mKY$cl!s1gHMJ-e-Y-%KNV&T)XiaqmQ4LXa9
zN1RX@t_}^38U65$<8Qen<tw}iY?z7Z#qwUhlY3(DcycXSFqHgU93yb7n|=okh)^Ia
zG$P9EpN-}J`uirUe<63>{l4KIl(m}nzRP1;(=#(9*0*i0{1bhNT~<e#MmsAqnoPoc
zslC@c1f>iw`9&5f_`&7Sa7b1$_dB0A>r>6$MPzgmH?gd9!ti~E0cB0`%)AmV@OHC^
z@j4fH>G@XZY_5Oz;CtR?M0-uw`u^KHx)(*bGEElS#}6<~WBc8SDn%S+wvA+|Y2^DI
zCHu!NH?nnPvp*X}%rb68wE<l=YVTKBF=L8lpCSK^h`}#h9AY2-`UaT2Acw*>H+8<2
sdswXBvw!dwfDcsR{}fnz+hZqR#`JXWC`m@Q)c{~aTRB)(n0w#;14yn^`~Uy|

diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html
index 7134ca36ef..9c4256feef 100644
--- a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html
+++ b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_26aa8cf86896512ffa4e7866d1071b26.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -118,39 +116,15 @@
 
 <p>Alias wrapper allowing storage to be unioned. </p>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00965">965</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l00980">980</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 </div><div class="dynheader">
 Inheritance diagram for cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage:</div>
 <div class="dyncontent">
  <div class="center">
   <img src="structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.png" usemap="#cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map" name="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::LoadInternal&lt; BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1304,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__load_8cuh_source.html">block_load.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.png b/docs/html/structcub_1_1_block_load_1_1_load_internal_3_01_b_l_o_c_k___l_o_a_d___w_a_r_p___t_r_a_n_s_p_o_s_e4c36dfe8f549604998f6c46cc8fbd1d.png
index 33990c504d935e706f33ba1f58b17ebdc41bc6fb..2ba54b3f39cb58052dc676ba700c14e9df40d0e5 100644
GIT binary patch
delta 2110
zcmZWqdmz*68=sWw{8Ct0QmamfbW)bgUCJ$z<G5>$Y@+58$7SwcT@gjiEexfSd(6yj
zRwpL+Si`JYwIbGx<1(|2Gx`1g`n`X=e>~6U`8@CYJkR@iN5Ld;#wEbo*7A%?lnf8g
zK$Vz~ww%{T^4F+<(%$!*VGrUT@m~$OFs<i@-KMgg?6yD8RI=0hEz`;F$n(tKE&h}*
zKcn|jj?PtrmCMnK)m%>e(f(4+<rc<~Y@%3#ag@3j|FKNl0QG>^LC$f{`?8JuNKBg8
zEhzT|vPlAzIW(ziTyecM-I5HE?pj8$awe@c;aR^dp9{m)NjZ?~9E-d2pLI`;tRRHT
z;+2N1xCFLWyqk#%XJ{e#O;83gWNK<Wn$DHvtf$pX`N`x48}As2LljOU)-a02V<zV*
z0y}*MtHldZrPuH%=9h|yl5=#RIOb9EpL#C~QOf8N3iVzU#664X=A2)uh#pxSdNzX!
z7|yc1FU`OGkTtzH=HAwJ53|*a?(uD&zyO|3=XQ)zJS(nq*a=qapgkKPeuqJ3<zI*@
zco(_ai%qvrbDeV1y67TbKA`xs3I)44+CAm&0#%D8pbB-Y+0<3j2Gu%(_y*CI776$B
zzUtpNHQ3RK(i*6DQBMVemtQ`k>=LWu6B&GIj1|j>xriaXT`zk~;rj7ck^B1x4-+ts
zl0ECCmsoZ4{)nJj7_N9~bycdl0M_fBSv#Ko?fVT+S80bNKcl4K56cf-T4#wiYxl6z
zdXnjMR_pogpRNAO2eio?wCuVnvpEr@0NV6AD+MAWwQK{qPqHbwiU&Yadr0UZ>kThm
zIE(7tCdHY98sUn+uH$45C}`sj$hP8wWRLGl!2jO>$*e;_n-jT8IH>Glfbc7I;)+nr
z^$sRR_Khk3po^k_Sdh^e$r~)p>mKiI_|ohfP}@*rU1^sDJN<VUG;B|EFj4pMb3f<S
zB|>#mMcaLLq%nSM*w=HQc|5e(_81%5sJiPtU+e^U@D%FIT(;WAW_C$XG8zkpfYA<g
z2fp4zZNSN2bFutQ%NmdrY4IWNc*RS1+$C;(s@Hs&AaFs%)_gM?(Xa)54@4(u!V<Ud
zN(p^;-&=`uwVPq9_BW!Z!mcc0veS1j_qs)3m7k$0zIXX428q?#sjoGiJ(w^Rn_Dwa
zzmd;6Qp0-8lF8$cRqGC(UGqo99o-@6esOIuxwaxKBj&<l+bW<Rt%vl)?x(^D!Blmq
zO{LLH=P2W537&!6_WJgPw0uWOp5Pkg$7j>r^WS1>BL@9NFxwecHI!6Sq?Z%xi>EC;
zO^7{4<)K@c8r$o7MZclqzABg;r20IGxV|{O<Rr?3uvHt~<zE3A`*)*N2^O&alhJ+S
zW|}!Ab@-wpAnA#4)Hc;UIn;P2mKJ2!pB#!M+UQ@ybveTv4(lw`8u;nj>pd>>cSEuO
zI;!$)c#)mtgEu!l=3VvGbNq5<8(~7A_H~Y@i&9UgWlvh%8J_^d+^WWKB4=0w(=42I
zXXQ4elp7cHsZfbihG#HW%K4HEUTUcAP(q!B1|Ud6^?dm3TW`2#4{3hfOM7Zsz(EN*
z8VC1RGs3x#gmU_7(>GVZu;G1J!oIC)D$CcJTZAijenh=EI@MutxC6tmYUat)8p}-^
zk?l+pdV&~q7tfxvG9{4lx$n5&mw6wT1yAm}Mew=4*?9ss(m#IgAJH=iAIGRJII0A&
z%ynUHMj^L4#@loQ!kKU3w=41IlyjCs;&Zy@dt$s=X2fMWCP%1D3Iio@uHiOB2*aC$
zviGOdj^SWOU1xzq)PoQGONr)^Q~zPi+n17)+6_FW&!sWDDFrNchz%-mr(me>2?W}h
zfqhG{6wk(y)?5+r{hg~9z;COIfUc!*Pewz5#*c+Vc{@*ePNvF|uqCY_{)y9%zX$63
zqTvI(M$Twpg+^0x<U3vplQVJ!rB>Qhaz}sWpgUiz(~lbEp8197lLTjB1aP0-XjW*}
zuD9}r^fk*gLO>PsYv1Z=FdB<=_{ek+?S(DnNCMhNHE5sbj6)cfX?=jG2~rAIQ@ZTe
zB6-G8MjMwFrOsJ?Dg<-7@G=dh>7p}O0>M+S=t!jg`CYmZlJDu$Q#w18v9vD+r4!K~
zS$!={%Z}+_*n`xBB1q|oV~#DF<aqTe8J-;4w3Dx%S}S09gnQdy`GI5-yI3d0A&smO
z`IX)xs)UF<C>Ch|cxqv5ouO(cN?m1X`gOb}wP>eKK@Sb3PJ9*iD)MCXi+8LN*GVzh
z2Lg{gt(_p9^p^t<#1^O@#FI_(OPmmcFQ=f>$U<)7*+rLZqXm0cr_{NU%PF)A?;~&s
z2G3wTAc$Puffx)bn5(RehoiOEYD@!J3)hqCy^_4PdIKsA7i!06iC<RdD%g7SLd=+D
z_`aar565uWM~&59NwE{X7(BCnVlwb3Rewu$R=07aU&>I3WTApZ@tfcbH$`W5v50L}
zWV1c!{?pLAW;{}TR2774A1pg=Pk<&&)Wi^(7xO-y;7eb(b)0!xmFF;`EDQ~P-{h}?
z3<?0$8<ak=#8K}^147lCo8Hf@c!~wG0{uw210ZoG1*apanuqS@2$x9tm>QO23?`A&
zM)_TC^z(_RIsA>-PSAkO-cN$4xN%Ll<(12=t(^k?2lT_0wSyZ1Z4QD&*~4$~0IReO
z)dkJEC;wN_b2L$#x8b&IhG+>D(Ero_jjz7@ju3i<qba*cYfJ%2m#vk9W%b$1H~$56
CIY`I=

delta 2099
zcmY*Zdpy&7A0N(1H}_8MJVF<jlt_x2TP_i~<q?_7YRV33l3a(M%9Goo=L|J7m!ma_
ziD4M#p;jG<j!QGMDavUxn#C_>+cVDVIj`6A{p0)nygrxj`}6*OUf(xOhoy5r)E(vF
z>J*TyBqR#_r|oE)LWH%T%pyf4_Zo4|P_r99p3$S0n-C6<mR<=iF*56K%2J%q%=)!U
zt^`xI=STag)jA!o34ee)r)&yzyZ+og7hCE}Bd<`{M=+>%X^l_&F8!eOq$Q($Zljf}
z+9+fIh^nh$-!P<EeW{Od!zx1eT-SFkQkv(VJ}cj0{~m!~k$fBfQ!pI9xRL@a1&czl
zF~js9L8sQ<7BXh`d;=J}j_P2H3{b1pV7*8b7nbn9Ab_31@(wf@{Zl2|E`|Ym+Kf-q
zx$tYpmw!#&N=T?aqkw;yb52n=HD5edSl<Jews7Z+*h$C4f%};hL5s)a3+`3x{+jg^
z!;P!I<Dva8eJ8$(2bR<4>nPzCOxZwSx`BN8aI^y1SKG37)2Qm&8LW#3k2ewwgmra4
z#X~2)w4}S`uM^U$j`mAG^{KTuHcvE05ytZCy|Jf3GJnPkE7<-%y!!Ot9sR}`Srmb1
zPa=4ME1cgSBiCwWqu;sLpN4^C*t4zF_-Xb$x1T!<r|ShCuh}$8&Vio4!*}c*ew?qZ
zF#0#%FV?h*lM#Tj`$Qk~+(5>!la=kao2Bbe6^&NHjMho5j|=CgYz)`1H2=SG_BNUI
z|4V5zL}{vU(clzBVeEl~;&Zh2Kz_)CTu+cmEFypS%USlf)1eb;rK|{RLMYzPH)}$a
z6?LxpxJxPsoS90#o}-T`Da+q;3#mxk)6}TQ|M%i5$VRquPeLv(dW#`&QZqu_lqUOT
z0m8E*g<A{w>75LFh@NnwAvewY(=U;jxUeP~iygE-kkt3k;6r<1yLuQ6_V5jyYRX+5
zALE?oy^N(3eXhkeQBWZZCx#457zS-N8bb?faNvSav&kXA!~;|ET8dOM{&rG+1Io-(
zjcT+mC%tvzpTCm?0NIHyBc<j~Yl3hbesex|QT+AlHwn+=KVRcoe~ie3jK&*UW^VD#
zjeB*^(UAGBh>AOwMJov^_MdoL_}Y4mm(2<pxRDCOi%ySr%wOD%_8&Q1Egb@nAZF<*
zHuQ5`x>dHyv6qqqmtC@CDCAlobhX%f_&2SHpjpGEQhvG=P)#`=7M!Hdw5Fn&yA9my
zVDm3eux?X{Cqp;)WQAerevDEuiZTDe{$b3g)W(E&D2b)V{3KeAHgJrgfUm5{($muA
zP^^R@MH38P!4_yaBD~92HB0qv^_ZtYNkwdnToI@YQj`ib{=gyUkckJG!N#%U6SFg^
z0n1`BAU<+TuKveIJtU`4^6n<#*P!RgZEPx``Sr{bsEIao^@1K%_i^c)5i0^^o)>Ky
zxVOWXl)*1tVx+#J{BmZYcA}g5S+a9RBusjKLZ56un7y@Yzyw(bZ!rPM$g8yUKRdQ9
zZ9|q(#9g-}B~Sz#UtQm@>YzT}2z_rTG_eIP-vACQd+GVO5yZSLE;hIncyo4q=v*T-
ziQRXcfDSv#n{smr^8@xVJEqaC)2I5B9leum5RCgOg8@ce(CkMnGrA|O>(W1hT2yKI
z$&$o3ovj1U#H!sjsKjO+ofM)T<BqP}VPvRwKArFeE|zr(>MX(;i~~%`<>f<UAkWoi
z$P2rm&bio#;NcH<2rub6kNG^xJBw-4zp}X2RQ{e~D-VR0@0b=QwX&v11RdAH9EfNF
z35Bd%zKr$!X{{nB?##@1)L}oz0p|j{;iv^uOZ5_%&UE*p1q!s`;Gu-RuEh9*OcV2&
zzOM~hj`loZMRY%Nn#G7hvVbM4@pAJ+6D7l0#BkT)TgHO4_^ASPk<I*eKYJab-rcKj
zKCgU$y`C{gm=I%WL%JV1lbPb?nit8<Ed0?`KGA>~yd7hIrpkJ5=y8bfoiP?!XG>YU
z)V7zrs%R{+K7r6VAx9zSDE+yZi6}?S2w<Gyj=gr2sL#-O;Qbt6bynoZMOW`tifpy@
zDRr@IjbCktA~tit=ifiRg-C{9N*~e3$LmUccVHfp$oLMappf5x33^cf@rx9n0dX6h
zpb;;6qHluSy-SKgmW8{78Q+_9uBuvM?_n;%I^)fmTvWunbiK(gD#*>a-P;p`oNKwh
zr-3`!kk?qa1jOdV?IX%gS<ik9#rE}b_1fH#YhNIJ?P<Z->YeK9V@}3{J5rLH`DJo9
z=-qqvKKTauagKcIet8iTQ}2boGkv10;KV?7>VsKWrJsE<$;bYm#UseI0~e-m>fD^1
z2~4yt0O6R15K;I|U6kFSkcxo|0$zP~7eOYxKQ=K0OiT%SmrciXyU*J>WGvAm9T47x
z9CWoBuJp>HllJ>j!S%ti;QIKAv9&Vq7vIm`iXkoP+EU%3+A>y0^$}L$I0475e`X%e
zs}8TgTFx>I?m7s;=Hv#bzzeOy^`aVNd~Qf>k-$r*8Wt2E+%3%+TOZsS@+3Y&XQ&+&
z2!sh^cDHIz(N;wF84k0KnmaNJz`>6^4Gyo-N}<H$HTx~YJea%^iiid1PYT#-Bn!3c
zT(BCMsUq8CA&Z>Kh{#-U*PR*^t#u+Ue30A`E$agSxjcVFu3`C?GAsA=#5etXRqp;j
zfrbFvwDxZZ{1on_ik_(f(u`E(=V@A1<nO$p*X>8-s=JnTo>pqC3_%pj<Ft<}#rfjx
F{{qDqM<4(I

diff --git a/docs/html/structcub_1_1_block_load_1_1_temp_storage.html b/docs/html/structcub_1_1_block_load_1_1_temp_storage.html
index 63567a29f7..032c05c3c5 100644
--- a/docs/html/structcub_1_1_block_load_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_load_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_load_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -116,39 +114,15 @@
 
 <p>The operations exposed by <a class="el" href="classcub_1_1_block_load.html" title="The BlockLoad class provides collective data movement methods for loading a linear segment of items f...">BlockLoad</a> require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the <code>__shared__</code> keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or <code>union</code>'d with other storage allocation types to facilitate memory reuse. </p>
 
-<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01052">1052</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
+<p>Definition at line <a class="el" href="block__load_8cuh_source.html#l01067">1067</a> of file <a class="el" href="block__load_8cuh_source.html">block_load.cuh</a>.</p>
 </div><div class="dynheader">
 Inheritance diagram for cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage:</div>
 <div class="dyncontent">
  <div class="center">
   <img src="structcub_1_1_block_load_1_1_temp_storage.png" usemap="#cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockLoad&lt; InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,858,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__load_8cuh_source.html">block_load.cuh</a></li>
 </ul>
@@ -156,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_load_1_1_temp_storage.png b/docs/html/structcub_1_1_block_load_1_1_temp_storage.png
index 3b3d3f627a67942f01d9d7daf89890fb7dffdcc5..419de89d6e0278a59e7b9f5cf69972911fb11043 100644
GIT binary patch
literal 1890
zcmc&!dpMg}9{yCREvZ$rJH<|wVmehe>dp>3Gf`5SsG#Xbia}jcQYxW>V0KleThWY5
zQR%i~OcKeAOQa}G%Q97$HWQbLOOzpT38jRIOXj2d@6Nw_p68tNp67Yr_xWAU^P~Es
zykUop4+8)IL;4^B06-Iju)mHbbUN#=U4pJ-{(ga8YPA}I^2K!`?Rg=D_P0<d)S`}=
zLQkEP02Bs*NPBCrC-5u)Xs;j<p9Q9AC@Mz--aR?0MNVvA-z7ZB!N$0{z$b)Zx07!n
zVrb&>5OZ@s#6PI{a03IQKhmQtUii@OGM6Ly?}0;YiG>mJEWIdikZy4jd5$jn0o*4A
z7zg{(mnKj0Y#w(Ay6v}1m^q5}&S^Bg=`Y0-7^Er!5{^G(mN1oi|G6XyJ+&-bW1JW1
zL%wrIBzAwLGPB5N8&fHGq0gCCl;TjxEc3oOm!2lJViiX-Bi|;9W8%>Ce_3Rja4eCj
z{)28}qxQiuQG1`4ml?7H)9|M1^%td)atWs?)^X=3TTtRhUwRgg6n)fK%MW5%+*4Nd
zz|+m*(zN;CKJ+ck${$K#*M-tkO!tG%1y_{L4CMx0MW)QxkQ%@n$b^aN7Ft+|FFmny
z&n1I56!4R*exPB<d>$U-Q!Y2Td<NO_r@dI-J_2b8@~;bWqseU0j9+8&OBsk;t;f<+
zPD191W)r(tbtQkGSX6JQtL3j%Nh36Ex&x-DV)67&!9|Hr^|}ies%Q%g@}e+aR>;Ro
zm}*@}ndP}WCr36lH&QKcorhWwQU}u4WX(-f^jHy8`8cc%)P^XFPxgf8pvqK7{LdDp
zVXV!cBM|rUbqo#e0@)gyy4#v?KwAhzh4TTuY>gkMq@Jx>;eg)Xj|S@sfn(WcCVJ~b
z_6WbcC-`)Jsc{U@EX*4`0O?M^IQwMd_hg6KKWA&e0Vp6qGaP{a!=8{CvG45D5&!=a
zZd(60=LmE2vnTcvg#*&=-wOyOHa<MPL$JNZMUCG--7je?_B@_Vty_yUGxc>1xy)I@
zgEYz0iaHK%HbZy7E~czynOJ{Ml>8lUTAa!#7yQ~CA@IfHdbY$HxQ`{BtGhx=G<-QD
z?&%Z7(R$ZF*)3A#K{>-71~`QV;M1o~F?eEv;KPpY4{n1Sb=Ka(u6YF~71Ma--5VEN
zFZgPG=@7PNH1NXrc3EZKBTr*UlcdyurT+V<S7?uf@sX&|*DoS6zU^aa119C*g%`A?
z8<XeZ^Ons%c7bghVS{dYTy;T+a<djk^KDa~vd&}&Zw=eN?8_@6)_g~tamf6PJL9$W
zS|u30qpSxvv~2OYKW*)@K};`$9fV44160@eo~(7Dpz)GcEW4<5;lVCm{r!VvHh4e`
zbJus;=5>>7cD@D|^29lK0R7G8IWBMb@rE>r)A}<>7JGPIH$FjX?|eNhV4=FwbyStO
z_^z;$6Le(mZ<EtS9w{tYQf#X$E7JD37(B>gkdSQsk&NO3gZn6HmtX#hk<~;oY9+zi
zeN@=DLTSqRxJ-U8e$H`hEZVMmW`%5gjc5BbEN}tq*V@?P<7cQ4_Z%6UdZuv4RJn&n
z<A3b>XG)UMOu5qi-xQU?<5Si*E~R40Rjxh1Cu<0ij^n;bD~e#SiEk1^2@6@G>fLv>
zj!I)*!HimIB_#}`=yWk!zj9+c;zSh<s`#PwPp-$L22Aw>;|-mePD$j5gbw#BYW~7G
zu7W5v-Q4xqVSq1#TJ+w=50x*;_4q1}2iGPIO`PIkGh}{~tT|+AD%QruorD$}53gnN
zujq^oi_?>0vX#9_l(S=Q%dd8#ZtF<w?|dNajO}_9j68m5V6glSmeXS4#u~Idoxk++
znN2PJ>{rC(ZFIdVnm>z`J{+1I=`S59WHLDXCs@*?2WFww@XXb=+M&irExer^b2($h
z>QhfvBq}~`%_oZRfaY7ZESeb-vG0JKkikfXqlDNS!CSrT9ox}c?HfKXCa$i2nLypK
zxoE;5goioR)}Xg7k5$~}6K;BO+i#jKRT9U^P0<Sd#b9p50q4+l!J;2kl$_H?AZJ}v
z$Z)G!loq`9N7^F{aq{HoUF<>NaRmW=bE}+k;uK)`D`#^nXREK9*q=@D=MHZPLiEG0
zvNGdxWs#I%*!AZG_(H#M-0s<M73>F@DAw0lU3GbX7Ll7tp&ve^Aoi!rZpH6SnXU@Z
gz`*MLsLMaNd#Kn{zi;-6-VFd+NG}wE_j!2kZx`>0x&QzG

literal 1919
zcmc&!X;4#H7Je9rs00;s8c;zL5mX4M7y%J`0>T5?!X8Y^qSAy0EJ#=qS%y)%WNpJL
zi)priga9J1fkYsULI!D|Wl4k<(CP*xm>`k>4TQ`KR#(lBsrfs%>fZa^I_Il<&Uene
z58PcHP`h<@0|0;m9qn)cfWX7pT@?YZn7tBTc++yf=4CIJ%VGH8qm-P}{R51)Czs1r
zdGJ;bKB|&%uATtwq!?qbyv_o^PIb`kYp-}EQQ1PY@VSOcfV208Oz+QqP2y%m(8Z1Z
zhWu#FUneWW=FygO-$f6O|55s^&vH)nU$mPBeU7w;RjW_&+YqPa<4nsm&<1zEAcC}q
z2FJl5KNIjQY%^A4o&f1bO>FUfpF~S)T_?f<eHoIq+$s}?nFIvvBH2}6V33%48Zv-F
zBaFb~p_J=kp0h1@nncbcg1CL6g0@gnTzSiRw2QE{(RDS7#hAk6?9WEh&MzCG343!U
z44~#rlG;=jg)D;auA&D*2F7WO-sAtQ9<_-HqA_ZstPdW%Z-8>^K<Q=avJYbg-$3@v
z`9k*&=qBwz+8C`$>y?E$W0@2l;Z3x$M$P>r_K|I^1&Y9yU|S+N-gr|my&DciZShHd
zWO4|vk+x2yOFcRMtoEu4iw*1zbpHwPDV%TmdlRUjIN|w1K=^)g(K)dA!y*4OV0y2D
zTXy8WUY+y<CcAZDv(AS$F@lP&_K8bS*n1Rd&Oa<|9ik3TCFe^LM<d4An69|XR(!rJ
zp4Q@?SMu?cEZbwkv08eO1&f^ik_ZlW^^7$5odVYnz{)3S-w?d+iCE4vpHa_;#eWS4
zv)&p%ShNRoqR(;I`af%N6|U3$Am~f|%wX-c;UXwhfO%JG-EB|uV+IZ9N?ismOG;zZ
z0WPqK+yb-!L?Cdi2@fE-fZftu&g2d=0C`lEsS*gNT{(bx_VI5iN@jq$pNYb6$5TIr
z-@PCRP)ku#eL+-$>HE^gwmE8FI9fh?xPVrQS^YK<ZWaZx`^yFYA7auT<La9@r@pE@
z8faXt>ZnLTD+O+F1^DkFWoklc-4ZG0tDk&oB3%!LiXvxnb_NEY=|KO{nfELo5_U4A
zwK9%2XX4giKzz1k|7c=kZcknTmVBh9b9wanfUP=*fM38()pRaey*4bM@4fy>d3msK
zrT;fm{Sqwsam#|imDSQPpy|Ca#K3HY6Il(SmEg6aV2=ecCtCTt`l(I|#(u%#ByT>L
zpV|;-%e9(h5Gh$p{g=A8Y$_40*15rPv!v9d&tVI_hQFt~(w3Y`JW;@f^H4EuRUGrq
z$xNu<?r83nD7`UemBx~1d#<e26LPQ}a--87UQAi|_cOP#uh`g8ieX98kNnRSvTtR-
ziLO%pSwkwO#MF`HEq{nnM5yy_=I8>l2xw@u_>*XYx#PU|tA44Q9Fhy%EWJ&XglT0v
z4<!j&2+qkBe6~LMoep=z`EI3;ApGq{+?deyCxWg^dD7P`5w#a7b|7}NoC^(@&Pvp6
zj)9zLgNVz`l5*z(Ub)E7lgEj758q!!nIPa2xwDm|%y)T-htBC|4Vf)9kkX?9#>bJ7
z!*U|REwUn{u3B&>x)9czw;0yc&fZM&V1<~Aeia@wcWLoFZFMJOr{44<oyqq`0oV(s
zRT=$8f|luB>owEjk!xiy6P#Eg7MbBQK16wVxsm^a?yVwc=<&rvf)O^I^hCbJ*P)^u
zc%PH0w8hp?r_`r&rXCIu153$eHcsKuD&mjVcE8pBky_9;WEi=(J|dp?dspmaEo39!
zxVJbovO0>-9=?_08^xN>%ywrkE)O8TdDG+(ZZyq=2Ie9kWK=@$&WKD$bp(=5JX%>C
zO6+JmcZE9Jc0;Xo(3)gBeoRC3=&BR<<+r^RKJAy(8^(&W{FrQK5{}8KJxU(B{WH4O
z;v^qjr@s}tF)euOn#73V*vog*oP~JSOiu-3wG6-L+EgTM#*d^6*(<y3E2zD>l1rXE
z>nDUto_*}b-NQPGh2lsP)<iAD_O_YzuO#pe&z_=_?_K!iI$N3HN;ru|^9$b4HK6x@
zB3Ev`psZ(fDTl-~t(ed~m{C*TQ$q=>k0bmy?oHN7%eg2AnL{kYF3gfU+iGK5yRvM$
znna(JB99>7+$Mr+1sWLuK%W&E+jh=&?K_b2V9k4Tiz_P}A$KYzm*X2(qiS@yzu-28
zl?5TmzNkCh$v@Y-A;p@So0ld4>SLXV_TJ=qmf|<!h9;O2zABfeZ@=5klK=Iy^t!O}
faA*H|_BcR}aM6JDebF9;06ai@SG$J42d4fTX~%=Y

diff --git a/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.html b/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.html
index 67f6e36b17..b33ee2e2ed 100644
--- a/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_radix_sort_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -127,32 +125,8 @@
  <div class="center">
   <img src="structcub_1_1_block_radix_sort_1_1_temp_storage.png" usemap="#cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockRadixSort&lt; KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1315,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__radix__sort_8cuh_source.html">block_radix_sort.cuh</a></li>
 </ul>
@@ -160,7 +134,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.png b/docs/html/structcub_1_1_block_radix_sort_1_1_temp_storage.png
index e49210f10b6cc6e98f0341edbd203b3388dab1f9..3230402b4b67496c2b42be20d024ca37bcf9a196 100644
GIT binary patch
literal 2538
zcmb_deK?crA0Ijrc}Y$aW7UrMoie2q(o`oc5h|&SNmMk%*cQWz<mhGWROEQ?ke3-1
zv$BSwrlU-;MVOcE7@JuVVq<3uo$Gg9=lcEid#>wwp6~s=Klk_Zxj*;yJl_-#H@LQ@
zktP5D&~`q!|0n?P14I#fXs9Wk?4h&+ikG2>tEZD(E?0!Zqm!7_7io&<+uhL6@Iy)u
zSTWRyI_h=|pa3Z^_Me{C0Kkf9=ly#<FR6%gdeMy~`s!I(;%Nf!c2oK?HFO};E{ihj
zBWzM{Vr|(@tLAtIQK)U+r~)cyABvJxse9O_M4g>)<k`qHCwgj^?X@YnW_xrow<)6V
zf`>BKL{d&DaNa49<B7tF*uPpydm3ro+hkrQCx?*qIM0lpO}A-fdna)V00Vd#bDM!*
zaAd(V@V02OktZQTSrx__AWzLu<s8^NA#T?NSUuP&PaC`ojh}=z1*9?1q7iQN2hm)H
zlGSPzyiA=f=rW1M^?wS**IuLsMHbjQAnK<N?3DD<xqTlIAe%V@tO_$-!D}{nkexs4
zK!+?}t&;#Ud5Q=%$hLP#gt&a@-V_I9qKY=AD|TnK`diJX{moP3YpIi`9?Ov;ay6M)
zZib%-kabs{RA{Vo1C<(i<_Yj{HygSD``l9hVKv#80_tLi-8!QrS-n8zZvQ#`g6})V
z=|j(ZNrzL5MSK!OBF4JEO8YKHRm?kv!IlU`%nx$PFlY801+T{bbEx(HYcT_F1$Re?
zB2Fs3_cK#kPsnab`TeGsc9~?Iawy*+o;u<i@t-4(#%6ygOyBjZ(t5DK`#vnKTxrk=
zrPaz{{o=u?vkxhyD|1T+SK_4<tIQdKy1t(}$6Jd+T?2CjsH_9Xq%t)<fa)HA24FpL
znT9g8EFV8VWB%L1;Hs^@B6As_2T+F@taVoX7VZPBSX`52xHvs{P>E6911M=C0aTPr
z^xz9%)jej5h5xDbbHq<dMwN9-V1P#oL`C(#R9rr^#IghicrLO0x(HUu*I#7$N7CIT
z#Y^D-k0b(d!8UJ{<-%9=X|h0N*}|s3eqGwf|J<}W5v18IC|+YqsS!RJw8$i*-2EJj
zchFdg!60_rTiZ`w)pxPfc30S$2ymovEkP7$jc6%|k(^IG5In!`5(b&Uvz~rhd7Ak8
z5hs{X>r^@Tkk%)nUu%ERCwINFJD4jO8G&N_b2iS#)Ki0EFgm`O%)w3{4Z%ow3+=(%
z*~wqVhg8xt-xs}~dDWp|>7A>i8pX<F0oM4S`GpL-`{c-YXv0g^U*1=Er_|%W9-HkF
z)z$Qy<}CjsT_<%2ehqo=jZYh+7q>+h_nK53ktrQL^%lQ7v3B@T&}dxUDnXl-8l5<F
z#**BCCB{F~W=+y_2fh?loXhcrPcR1@z+u#y_U-ugf|E$+i@PzpvhbBiou+IySp>Gt
zh0pJrT-%V)NJ>W3BP*|TCp_va#h;>)5wpE97CPL@jMwAL1gTvtubJ1^<uBHrG=(Fb
zbATW__W(^axZi~c;z~Iro~3lygXQnn6JGSr2F^*rx|9(|-+#fwkz+K%Q&uV46dA>z
zKGB*uFhqS@JS)dqaBUi_d-Ea!SFnG9hnWXCYB4Pdl@5iTGAg*H)t?vfOfqcoyrMQF
zpwyVlRb%(2nniX0bX^}A&IirKZNOKrnQknF&}px3&6h&pah4`_+<kq_vtw1<qb_i$
z^`(R5m?K@uVEe}`lF`*X+s`TSEpeX<A;2!o%{hx~xh@<QX@NxZj}?!-`;ox$W50Pp
zV!EZ8aHFCoxDRq4g<2Io_iMkJUe=RDpTYIiyrnbH$)>YdZNp*fz>eWeEn;xCnDZha
z6^p_=PBlAxQF~gvBa?C#c(tO+n=w~6*j$x0vKeYlEMHF3I`g_RNl#$g@lXQ9*3U=6
zbCXa{1~M|5`VSiMCV25c;xnr_Fh;ZY5yJ%-65yyQfWW6vB3-uLEgbl|b5MP}aLUpc
zS{ohkDkbWj-`Vm0W;C(3+=5<b9|AOa>Oe_`Nbj7oNB`cM(TVyf@jsb2=Cad!_u+uF
z2eTK$sz)exrBZo|6vqIbWr`eh##bJZoP825><}=<f5shhY!ADslX>G1klcN4q`4}b
z-D@oQ&FS%|)H<{cH3#+GLZR6{7Ut{t3R~X`-;mlr4uwQZ3r>dpg?Be;+kGQz@{3m<
zbvxE~)+qL9`Hu9>GR)WM^F$E(TrB2Z=V){AUY^Vk%TJq1#fh$PT6;cql3MsC$G6sD
zW7fW*^;fr6J?XF_-^-?pPQfcG=TXG+ciAU8vj%G+enQ+p(_IMAC1yCGexTW0=Ov05
z`?*7fS=E_XHRd*j-ocN$!V&~w`TR<>Y4Yl)<DI1Q`aOMh8F_Y(5gk4J1PG8R;a_!V
zdpS)kzcQz-sL$Ej%(zowi;kW|Vge7}DXn61pvRt!HCItLgQ5$A^2k=xh@4A4JP~Y2
zOc|+emL9itSF7>qhLQX14f^ap7pu8mkNrrRK(=1XKDQNg|DuT<sAe7-N;?9Z=dS!v
zk0#SY+Ze~kyS#A51vtK<r2@>gYIj=F*m_CQ65nJ*V*qUKd9QJ<#LJH(md&x($4Z0R
z3%x3KMFjReOBD+%7%wMGE1_R)pzSkOB2%kwsAu<aOo%Inzd<KXiirCVZde=}ZBX&?
znFV?Dy^l08USQzxo?0<|9g_TouGd<1*^<Iw2{Uw@cH<?E82p2kE#9EaWANOYh_JTS
z;GB)gW<9HEuWCRwToQng1P2;7;Af0iU5Z&F$({0Zy((}0GS8ZjNx;_${*G3<b~|LZ
ziL!zXsdUYJYc|ZLfmc^gR<|9<_;F^giA!3}g342-Bt1p65W|K9+(oU1HwvHXw>EU|
z91`A#8PCPR=l7!2zWuD{Q9i3%0*C5{m-^M$olDK+d;j~l&I9|G{wY`M+haLW^8AJv
Pptzi!-1b-OJDu=1Bz&<A

literal 2567
zcmds2c{rQt77wkWy>wAkOK6`It(qEiRAMlto!F@c#b9)5sYq3XXizO3s;y`)r9*Ak
zSR-iCB#5o{j-s`c*ixh|35hMyNXaBd=RWuTb^pHa^L*cT&U=36obUY3`+X<H#R;mg
zS8Xo<08p^Cx!?)_$biJMi`*{pefZ<!AH|QOE)KAZB9TZe&MojF(^?;krR}Swr9~!X
zLQAa5p<JCV1H>T78vX-j1OUiq+Fm#hi<Vl<9Y@aU9hS*)9~Ow5Em9D+6Tf$3jnm7$
z&O~0_H`=l%+aoi;vz4fCe<d?OzK5vq@0qDRVWD4f{=2e0!v$&<7U92#xG0F#cHGY1
zm=d%?VuD#~l&irKC026@DDJM&mr=%92FH2To9xf;LyF3rZXMS2ka+WM&DabVY)A)T
zVe3JaWn)NYow0}z*C~$4o0LT6sTId=%$*l}1}_hL1qk`{IX=Qb@TIAFqcWP)O`f;D
zAEWC%tu}wK;lak|VsTp5MM*?V%49fN`FeTYR=`_MIXkM_U8SOz)ENh>kKrD~z{$g-
z_N?Qf(%5g@{&q;d>g7f`19u$Nd$E39E4ZYl6X!EWwgp}dGf@xF7gtn$P*R_LY(MV9
z?4Ze)Skh)58;>6`BgT{57{X@J18s3I<L#F|#6U7#2D)-sVtaBnm<aS$H%@{nRAk);
zi80!Gy@ZCj?iiZM6wE&d^pNAM%zTYw%Z-#7is@>!k<eFtiIXY#97F0~9AOp4(1&nT
z7PT`i$$Jr!$7*5qIi<sC&<yhdFWREJq#^9>wD~i{wX0AupK21`H+?dM(8x}0UYz>_
zNm}?0CVRu_1YW|F;s4x1NuSSRfBN@H(C;DM3-HWb5=hAbq~3E^wSEAIg=2twriYFJ
zcHaRUxWf`~n{9TK85{8qK>7zj@Mo86C*-$9x$qM^KB)G`04a&r0jTuuzktpf{|#De
z$NfKm{Gg|b@44E^7iS!@JEfiBYF8U~@~^r82gIM$4kce}1-wz(p~UJdApmn(Y^ZN$
zzq1HgO+G@E5#~IvO+yL6Lv`^^NzYYAv=LowJP+XpdBg;%q*;tqvE5}8J>e!D-p{^K
z_u-{8d9(G;vJ5|(BpT`pe@f<+>zMbnxyPTb)>C}A_1X9YzW~!%l8lEHM;@JHhYLNd
z2Mr@9O?A3HT0!>Hawuv?*j@WOa?GA`GD1!R4IJ_dZF}3C?y$fssx9~Iu^KPV1)`$*
z(qv2p;M~(SmV@o*;_FSl=?HmEI(VMEuhxTsp8NIXBZG=}k$0Zc?6Kd!?m0>dG~o#3
zHf*WYV{@~qrX{ztM{k+vM*OBtL9PjmzE?q8v`*Vw<_I`r`25n<S{i*qkxI`|>{zqI
zu*=Ps+oo=zk9GQ()3)wzf>#-Z{Q#W>?8GDnKBarZu`{al2Die^hsrRw`t*JgqpH#D
zT&vlpf}Icnf$zp#xZ%tvgw+I4m;XO(tCV72;T&lOK>8FhC>i4*WG1ood?Kz{z6r5w
z1mO`IWRO7@L4719gp#BJ6!4xz3{09*mtmsk*2`6jCF7f_(5P}qBomWBA=NNe(pr7&
za+@h_?%+yuY??Ac`-(EUXY|Q;1nLF1^<T2;BM;;r^1aD?x`{aEP0?9zD+J-)7p>w)
zaD=83Y-m_v^{P?lLRfh@NY|{Eer45T5)wOz<7v*PF9D$z2dOmYJQVX9dND4)c-7f-
z<u~i)e}eKMkwG3`E;aZXD?vG@C-*e4=QxdS=7>ozyVT|+DMz<Dc{DGK7K%s1KbRb3
zVPt~KqHOSQK`IMvre1`Uu0D5o6mk8O0wG%%1$^yAva?dIH1Li03unIVMP+2|g7Rgl
zc2}v*I#wF1Z%X56HEe;F*~&?uUIoj>6KYV`;QX+zT+<ap9%1Boq;SQ_r-mPw*=G!W
z7lGgJmY#~%dT>72@-~a24L?&K?mu=HM$ccI>BLQfE21t^S){h8x46N<dNbmb(PgM&
zTU|;cgYT{Ou~aya`Yyl3#MWA%a&GKR$ixs&G{0%)_wYuCFnnR=I?=c1=FHIXk3}n?
zTvT|{a4^uQiDmWrMou{>ZD6?CHzv!$q=#~*O}g<2nw8++lnaUhuKAz?pyRA=T-!wF
z<O<)~(C3f!JV*gX=Nx;QU=dQTFxe3jJK#x(GhQCyFt}hM-54<#_;V2`S)M!_PsjCA
zEd2;d1Yq6t1a7c*yy9PosA$zk)c4=^7d5Q;1%km*BP8rIzs(U1dQikOf5ZbeHue;!
zJ;fO6RDxn9Fs8e86+7VA*p(qEp|NLT(M?%6SPfa*(iL$Qf?V=IYvw4`AvxV3`5Oc+
zZkmRtV@M_PB`$pyebdSbwM+&H+qdq4K6$`%m|k`s8sJ7uHH64GX<ZWmM_bo&x{K-@
zH0r|MVyc@osOXv7sr|n^>KP9!ebppBEZ_PM)**^E%B`25fSSi&7Ic>a!BroEyZsOM
z9^V+i_4?zv14!<NS?=V`$n+R`++9sFB%ctlt{DQI${qn~ILuul(+Ju6-(v{URm$tj
zL$YXmbb}X<7V<&MF=q|YsF0Xl$7#Vu@*q?aji1*KZK`0d-%{<1KgZ=039e*cH(~1<
z_8GIVD&C-4&twzj*EcZi!Oa*Z3|!ert(`qGL4{i~PGdtUj9*cj+Mywf2$&`6<OXSd
z>oRpQBhV`y6fxJGc$=pF0_j!!DsXRF>i8#$StuDKb$efsih4_o0H_*`In34MhfT<d
z`j)nqOhud4%PQ|yky(fizxnPkJ!CT;fJquCg_x#ox*4`eSypye%kr^tOABlHWuiVe
zOizN8L-aZk@aRjG^|BJMA<n<biA5UVqL~ISOtuHtd%xN2D|?JZKYpEBEsSH>-TyfS
z0hq6&?><OMdfTnwQycE@zyHbN3>Ujo)2pkNt$IgxJ-t|G4iGQfi%u7+tbR`T6JpT7
Aj{pDw

diff --git a/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.html b/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.html
index 586dbbed1d..1eaab02852 100644
--- a/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_reduce_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -122,32 +120,8 @@
  <div class="center">
   <img src="structcub_1_1_block_reduce_1_1_temp_storage.png" usemap="#cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockReduce&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,672,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__reduce_8cuh_source.html">block_reduce.cuh</a></li>
 </ul>
@@ -155,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.png b/docs/html/structcub_1_1_block_reduce_1_1_temp_storage.png
index 4e2cc421920ca52ec510589db0335660d12e1e9c..a02391625872dfae4245b24648cf66189724aacf 100644
GIT binary patch
literal 1538
zcmcIkdpO%y82*K2E<xNLqubCd#!%F4PpU4dP@%<C?=J0%l)1DTaZN&4Q7M&DMQK(#
ztm@JbMH{9zREfn%C5TE9B@IH|O7?5FKes=3p68tNp7XuW`JU(f&iiHidLLF*-lGfv
zfGXAl;|Bn8bP)S0%7g2&<l7!_)Ac>#?<SMUKqy*Vi_7b}0iutyv$GSLJ#GLV74d%F
z#{iJ@VT?TYw*~-cJ{E)aPk@Na#7V+hHMgSO7c7|rsdPg=t+5n8UNboV_E|}Ym7$6m
zCm4Y^g2~9vs{V8@)}2O0j&xBxY5A43Q@&Vhn!hH@6ukBP9&Oa;;fEzP)7d9uBm&~u
zGwWpRo7?(<=46^}C{gsp5PSCm+Gnz3ffV`zwuAZ^T3E?$r2Cv^Rx-)Sm929v??Tu_
zucQD44^A(q<7i-1B>G(@-*aI)eB^<)IkrOqwpCDpGI0~MB?;oA5T0*%<;6wl9Q*=j
zyOxMarnNNiW&OvS=?bb24$0{F`$_G{xQAN!Q%R+?qKm^<jZgeZAt6M5qYRt8a#n<w
zUyoI^Hhys|v#N>D-3YF&hCbC^T|&-{ux&)>b&D{MJ-z9*RZybOP_s)*oqwZIv%TeV
zc)l6>7^1nnBVeexZV<XtE$GiWY$Sk5c+FW?2>8;+=@tAZafM8deH+{iI~xy5BfD0_
z9{UVbXP%&6vy6~F#SgYJy=aYe+Nx!O_)Z|-XeR7$>CCH<dRWtJXd&_|tPvOwLDnF3
z{6W17U%L)vDQan)M<A|2&H>TW@KlHuFw#H=KBXhbyDxnKd0%yy@;ShC<yDm`)CySp
z7%~~v>I4e9DRR20V5vZk<_8W~D2R#+*4!i&Vv-F7$zUq*9i)!-|4knE`0$CWG&DvG
z0hLrp^^gCa{f;6p?ss9#Vq^ug2R97KJ&2o0%rp^SCUL?RrOvx!NyEcq?pFsLyI36k
zaQ>u|Q1_KV<mOvISR5mS$0U<_CUV2pQ&i+C314e<W)yw$<zxck-VTF%&u5l8$J?=a
z!rfg=>{{qw&x4Z)$pf&};Hxi<HQr9HV1`QBuWen_oW_aHm4ZL|9D@U8HAeug68h1_
zxz*(<=ChZ_1}3@o9gh9R+@pYByLJ<?X>~tpJPIDHZd>n=wP}Zz2v#N_-dETUC~7#u
zH=-aaGxW;rxaPWk#ZBY+M=)LnKggoxy&akA{0qTW*vk}msN+whw3V$E*|nCWxJ#4{
zT)!l_)qxb9#l;dt;o@gWJ>7}w`0agEn1w~H-VZ*P6BkEq3g-+(zYb5lE3lHLMfElM
zuo(H_mN;DDEmSMjNiQWykHNFYoFet^^PhB0ZVes647InUG|acNLtr-o<j;7;S;ZjB
zyy``CqDwUDa432^VhM__(rZ%Ub#!Mx&%?Pkw=%k`&cv3Zaoh>m0VK8Wy5QurF>Ek)
zcK`<pYl5p_6IWVImoJ1z9-vZk9k-VX2=SXT@1fIM&I5F*0WY~|zYZpAbgTb1x8F4*
zonB?Mn?qqW&eSVy%@zX>uTz4|<Bw0XtwlF5nT2w?qK7EOwn6@5ou-_Fc!I&r_q!B&
z1+vVZamd<X+zZWZa<0Yi6`k*EZ+Tx#Cx1RQHOHr@Ih4$nF*L~yMyY9&B~JJ2v1{LD
z-<?SGd@kizGb8xkK|c{Im0Ti(UUfOq^N#m714{UkFoOh=o@=0<a<8xgwswy$XqsW=
z5_^ix$(c4Nmbiv)d>L+M;_wJ3jZT|Eth7BS?<?{4O}#tC7YYof$d~$EqMa|zTV{U2
zScb#4NE^2l0CzPXvyDyhSr)2uEf$bSIT4vOKf`p;b+VWs>EaUOqZ$9hUn_er{$q;Y
aHsCH%rB?32V`#8p16Vh2Ox>Y#l)nJY71g!?

literal 1576
zcmcIkdpOit82-78tr@bZFfolXwCuR$Qai=?g&1;a%!U$$$slaw8VzGo?WDrwmYDKj
zDMhY>VNj}-Vpwt+gGvms7<XcJ=+E7M_dL%z-+RvUe&?L$J?H)CP7c;`(7jLq0OW8s
zmUsY=d?dzB+oZ(f?DXL`;-Ktwl3*ngiNsJaxe%84B1w$4dShcFgg$^2Pq&5Q9h?C%
zX|r{|B^&|(2n%Nk5+Wt0vZu&Aj4UMG+K?i$=2Ys?hi?*&{(5F$!l(N3@nkit;<yHM
zYgq=-O|cx^%8*mU9b)*Pw3HMHjN7REW|}P{IWNW4NxYh(0Mo{uVBpCCVC#(Il;1{F
z<!CI<KV#u>T@1?a?dnV<6~*5Ri&87h>wg``=pv)~?qB6)iZ}XWz@D2~%JhgIIZf=l
zF&S&BX)1c+xHzzM>-mnPJf@4tB&@Pjw+uUQ;8~MJSNU!<PGod=R|RxYXs(^JkZm<_
z#(A1oY;TFP=NZlgaP4Z1=gk6;rye%zOfC*NcB?=!k(e|dJt^#BQjNUPT*1x?SiL=c
z|3pQfZg4>%WZj(W;+D7zX&Ts_mUkn_U5_+WE-&xLdN#X+@1k+^1T%Sw5lrw0{sZHL
zyt_yKgcBGW6q>h+=k8D)u6^BO?3@CUmbHDa!nn>tUG}5Is2Y#?xrv7&W5`uqLCA85
z5OW!c_>Ajp)C?LEm;~R!s|V)th#KQ*1NKfh+eY4NzN@`hx}jI<#!Md$t1hAP93zMh
zNzKEyMGx|8tIlqz3;(XJ7pT!<gSSN&qTo1NF$txP(^}CIl%W&c(ufAFW#|?_A5fZ=
zY6O(2mt36ysib5i_R>#O37g*`5Pbl?GF~7jjw{a}0WxY@A9ghYdCfFQWvciM#asm#
z^gMv~0wrY4ZPA-a^vxu#(oM34;_m;O>~5o#pMkeE$WLL&L8%h@K+V=Q|8+7$3VhHe
zTpmCAwQy>-Bb1sv_Vh@wYrffC$BEmNjdsto!I2oAO;1h9bL)PW{wh|xvdgm6Sx!{k
z#yi&ROWwYr%jLO!rilqa4x&X`|Dz7bKFrJ}txeW<9}HOy35*GwuEa(&XI6z%pPSk*
zmKd0?S+f?Kpf15z4hUZPlB><#>)31IHtgJM(Pr!oi~E2k0W9imA75_@yjfb1Szi(q
z6Rqf2d>@Dp*CwqOj(d}<dHXnOq}x3DiZNI+B<zN%jxr-4Q*Dz3ZoP8g1@F-Tg(c1j
zbw@5ysRGKXcfT~-zT6S}c%oxo{cBaM1=nrXFTB&^!-)SUI7L$sdOd*kO#{SY?&Vs@
zm)?x@{-i3+=fEh9>9By;{m$;H?1htG;L?a={6P8bhWmQcRCyyuRp5){(9`?hH4!UX
zhSSpxiURr25<T)c?Z0z40|8u1ZV~HP?>yhPOy@^;DSud)eh@0lu834XiR<;cdy7`p
zJq)8c7?So>Q0P*r$o5+Q5F%Z^a^%Rhwqo|43OnU{Dh<WWj*@NUjaR;s@g^zdgJUKu
zS*gn$qtl2eaE(ccv|+QqH5?S6*O3>wD@s%4tpeDAl>YwKjGopD*Fo+}1(6kq?G^S&
zug{#M_D6w^FS1jYb7lKAI&|?8m@@$~V9ynmF`CKG5!vtGpc)$Xlk{-8@1G{U+p$F8
z8c#(eJ4hprd(?mN+`h_&$;QN>NvvGI!C$mzujxPpq-TE4tC<93>T(~VApTgxohPTh
zhI_@Y?$iWw7W8Ul_?^)<bEQT-)9=k@mJij|cJ4`fV_n|$fEM#t8V|FG@D;AY+kCF_
zEI|1W2!E$gpY~CEYL8WObrG{E^Mn(%p@~%zyDlkusUuaoe%&rMy2VUIL<F&(oKXoX
z^P`^(wt>>Br1#st9m64mw@gw}q~T&eedE=Q_03fN_c#CUMxirz%MxC;S{iVX1akdO
V`f%2ulDMn`I4cLs;^XIO{{WX5+i3s*

diff --git a/docs/html/structcub_1_1_block_scan_1_1_temp_storage.html b/docs/html/structcub_1_1_block_scan_1_1_temp_storage.html
index 24191d9f7c..1321899f8e 100644
--- a/docs/html/structcub_1_1_block_scan_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_scan_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_scan_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -122,32 +120,8 @@
  <div class="center">
   <img src="structcub_1_1_block_scan_1_1_temp_storage.png" usemap="#cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockScan&lt; T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,658,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__scan_8cuh_source.html">block_scan.cuh</a></li>
 </ul>
@@ -155,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_scan_1_1_temp_storage.png b/docs/html/structcub_1_1_block_scan_1_1_temp_storage.png
index 63cb1a34ccf18be53f3765f70c38517058e1fccf..8cbd7d3bed612b9f82bb67289c2cfae458b83647 100644
GIT binary patch
literal 1531
zcmc&!dpML?9R6}A%(`qynlO{fC67{Bw_K;0hjFjmZ&PleMahhzhc1*`pBZeCWb7mv
zvaE(;*sa|UOJQRev}+JCZjCV+GyCPQ{cHc-^E~Ig?|Fac`905h&+m6~J@FV7rM*f3
z0H|QkIC}$t6bN%q1!;J#n7_~sgSMxeuZu(?f$7IZ{*}V+BAD$=7K<gD%R$0hg%oeR
z4*(zfI{xhOwEzIwJ6LB2-)p;;Dwc@VFI8Pi#eE$T>x^;rOG4>`^KakGHEuPIY36Zr
zZhkJ!QP5BeH!zq)=wn^MJk%>)<HIo7(}lx<_h>GSAV@&Ps6C^hbg%(8NJoVHiceaJ
zcJY|?PL!ov+@A45lzUoG%I+eX#xojiHX3jL(`@d#6`e|<Y4GX^>CFt@P?Yi37vBbo
z_~#bZZ&KI=cK>p#ov04m^V)XM@Y)^X`-|v#IZhB{@nncT*t@2g=&`hbjV%}?x4JyI
zSon5^(eyY33Ov*?<=5Yy3e{$joa!W+xdD|ZOH4IEi`a?2{iOBEIi5)k)y6YP_dOdN
zIJRU`Lc5g^+(5|u(Zr*^rGk@68N~Hd_c6GWLV=l;!X#H2tCx1Korapo)1s#5O%RX#
z5su}TA$XcxH)z--(S#dJO?T^Mf=HjVz*Vct-c#iAF<+~vY({L<OvaQUF2A`UVF)*v
z&#VQ(&w)0<VfKo!Z(1vCF}`iwN48%h5KPE}O+d~trAQHhSgZxutbNuSHrO<aN<kT7
z?}nk#(3wGl^E)90B+k{j?CVpkUImm{oon(eK>NBN61_{AwO1sJG=fq6JQk2{lta3X
zeMPCW>MXz{`=*RJ0H=b(a3)w**paiS|DVh_V9@Oh-z7)j1ZV@&D+$j~*LR(lF;)x%
z;G&Y!z7E$EAQz^lO1Xr8Gv|?MQBMq#J2h?W7Gm{led#)th+#^mM>0+X>!sS2o9N(Z
zV9ZnqGqP`GcVNeN4x#Jh?Unf3Aiw5*iQs@@QGp>DIA~Kt5adUFOEwE_j}3rk&k5Xn
zS{m-#btI6|%$wN3s&6wsCLswgh`k!<qYNlKtNbNtYze%IkslB)t|Awt&8v?1`-$6L
z9p!}N4F>fvMUHcE#e!L1=nLJooRU27MFpMA!M|QUoVH!kfWJH%`p~bbj$I4RK+ECD
zshkJNkXH&6aRFTyu~piO5tm689T0|nZQaH*mH8A(d$4)4=rc%mf?{sPwTev9p-&;N
zcRq{ylcW3O(VvbXi3{{d#N5V-k`Z2%w?`Z)vzq(CZ`IoJbx`BQ?D(4Gjp~Kr7nqsE
z(9O=0%oESsl*fAmd}eGH-7~CD`mHUPBE+H;_YE1y?9qNc(^40EC8lQ48Sy)Yhx5PF
zReX+Geif>c_}SNOOeiyxF4Gmi=b)nyFZ`O~l!~L$Cq6usDd+R=#!1m4wf$dp)e@>@
zC$`w}?KZ*?NbKhSFd**T_7tVOWmA0f)I2yq=|m=f!;kK@ugr3~<pBNCwe5}$9bFSp
z(w`qZZA(OZbXTbGs?FGzJ!am3UcFZ~7}Jb+pwo2x2l`0TNQNU6a@Uo@zE(zEhwUxk
zo-x9y>NiZTO;JQ6GTHZz8nc(46gWQGzuslLsdgFz#z&7Az7ejHNbEJ!+S28^q~Tfa
zNae6Sy`cG+f$V52RX%}yvZs4`Ep_Oz?(XqP@25soQI%=y(7aJ>)3&muPUlh;D4%xQ
zs<{4;Ub|wWZO%S{K0&?COZ`LY=4#%E(I%2(QB*jU**8F6wB|J|^Z7UU!SWw<7>qD(
zVWRMFnYq7!kh$9E>EmQfDcn_(DVwPGEC1bW|7vEjv_Uf%p`^fJz&|yBb-_C`9K*8y
E0sigI@&Et;

literal 1563
zcmc&!dpML?82{Xg%4&;Ptwnq?RBn~9YAu%eTxJZ0A(t4rJ)@G0a;b-8#j-PIhzKoh
zMwD@x<Q^KI>t>>vjG5fa6qyk#GCSITd-mTw&vVZEJMa6R=Q-#1p5L2}c6CtPp}qqE
z07ayu9R>iT$r6l~myxVRZ$q9+j2hb6!(J>FOW>P#A1`G;{au2-%=Y$nxpc0kWGf$o
zadihINn2y2*TWP5<Y-7c8;>}t_eBDX$a1%=@00;gOj7Dntva$YCu<w?0UaezudD2N
zKi=ldI+`C|pl9a7T28v93p<iYWyD|*gdK;qypc##@=<v#hMcLZm`tVU>L63jqd<2N
z>IcNh0k5StU6ti$lvz#WJe|ue)<Uj!L8B1V>t*FehF0n<hY17WAXqVd9Gayq8*3{+
zdcd>{SzoSyvxMMt=EuH2ldpw5Ci?qH%#zVTPE<81_qF!CTRyd{4fdn-Ljx#(Xo9vq
z$HUKF)hG@7QCH<9HgOJFd)Da<VdS_DKi!v|&_*gGV(2Edgu5DYqTZ5o8uvq;OYtgK
z{Px&RhqU<d93aEXzrc++W@>;=h*54?a)f^s-kR>BR9^p%Yfc)5?x5+7?o6v{@^7#0
z2X!kBxJFYcWo?d+B4+HtyawMcfja9PSG$KqF7@U)^{nf!jmYIWtjDpl#Wx$cUP6=4
zHF3Dd16fPD6=yu_>Ct`+JD4=e3GqQbQDwMc$lG4m1rsUHA!v9ef!dZbAKP2Ut^wcr
zONjksJ(wRmltuW$wD;GT4*a(W2?iD}Ed0Ijg8ri}tT8ELfIlPC4yavQ4F;ulwW}p0
zu#td_u`G5^JCND&Cn<KzUZcty1D%1|k_1T+C1Lrk@UG-7cS_*ax&Oy~&Cn46=3pSb
zatcpLVe1u=ulCj3Jo*D~a;r6IzQZ`OQSp42j&mWWYk)CT)Ewk1UW?RFB#srCr}*AD
zJe(Uo&TF_k@L{KKtEElitYBky?6?C~({nYBNCqZev9_0VBEZUXT&w37*Lrc|Ifs%w
z`S_;Q6Ra6QE@Qt=#&|a{lV-5$dZUuTbrVjSaa?xic%hmKF$-aH7Mj+Zeu;o39<1%n
zp842Ol~$vYM>y2_*OLESi-m`&82;9Z*?2b523BKWmX3iB`N9!}SV12StTeOoh>jOV
zvb{B4PBzc5W731{!P+1}UWV6(n)rbYQD>R<)QriGL}(K&N)c-lV7v_+bt96oK_;V$
zU|wk&+oieTghV-^ycaKwnEU2NP4~@J>y+N3*0-VQOA#U~<?gt^?rnWJS)5hZgUoTO
zO<}5dZfVsLOFff>yGVPSR^Kxe><2#NMNbW(is1q+w%oMgDMpaa#Ayg&_<V1dA+}uK
zehRLIVk`0PVtO%dotAc*^Vl|x{gB+U{j>p2C3VoOwfX{4dS(7occ}FB%5YqL$3&0M
zPTFeitCf}MNKil`T+y$(y});;drb;dCrh9^7Mo87Y<j&eZ+t$&#)nb31g??6T32p>
zyAU>$5_%e5-o0@D!lEZkLt{yM&~EA@`rrA)*XHXvks9NTu<!Rmqdv-amtWZ~o=~Fp
zFFIz%l;HOY=MRgW;6=T#%mg1e!l6+`dTri(;aRq<hb*Un-l9A>#25AL%e(H=I(XA9
zqk~zDKGl>(u|6Q{%~07Nm1N$KzRnD|ltDY(AHTE0)nLum_ec{9RS0je3X$ssB9#k|
zE$4@IV{0-1QOd6uRJBvQQ^`PEXLe}w$qw=PzMq<t#3sckoEJBMFAyn+n!Z7^xEdWj
zntWlmLRP2Lk3$t<1oK3PEqPtNeevuuqyJRehYCD|$fsaT5#`IVn=<w`H>p=EdFql*
O4j}Da?W%tcO#TNs!`n&#

diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html
index 0684e58a1a..f5ba061848 100644
--- a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html
+++ b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_07dc8a0abd37bbc1ba3abf8440890a6c0.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -125,32 +123,8 @@
  <div class="center">
   <img src="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.png" usemap="#cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage_map" name="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_TRANSPOSE, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1201,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__store_8cuh_source.html">block_store.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.png b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___t_r_a_n_s_p_o_s_e_00_09dfae03f13932c7dbdb41be30a5767ba.png
index e18b9e3ff5ac5a7d37aae30589a2b6acb1435657..a45827361d4fe1a1aa7a174b7f5c013d807c8b90 100644
GIT binary patch
literal 2361
zcmd5-d00|u7st%B(rK|$Bt;E#933T1$x*Q;Qqi!oL^8Lea7)QC)Lf#~T&Khl%@i}Y
zDM%wV6HORPTyYI7m1xFXA)F>xOqnY+-}8LmU-RepKF_`Pyyu+voO^!1bI(mV>h7$u
zL1zO91kymepgcgJwM;-hs-_Iwdu_A;1xWko;S-1CaycMQNM=d5nr;J<;^J^PDk;NI
zV5@e)!~Hl2a9Umb_7j$Xsuhhoa3WUeL%~qo2xF@Xok(~thfKq$_iSCLTjMVgf<}+?
zTGuFrveHf&8XiVnPf3Pv*|OU(5Uqx`q@01Zxll4oRG3VK0NaHY2jbC`Oh9m=2%5PR
zg^&(=l$K!xXwqR|w6_+-jdID>-{k10dv)<XJR&+Yx9q6>3M`~uFfo$_9LRahgiYr3
zgWWuy=e2mrd}3+i7i2)t61Uel%1p<Dx#aGl1TCnXEaX$Wmt+%FBX!I<FN6ah?K*<&
zvx(IqEh&-#DcSjf44w;5c&W60r8i<XnpbnHo)m`*hOinu<6co4uHQdc??y@g>dAG>
zsPM*bZ>eTwzVgXnvWn)t{@OfSQ_<W-f#HF&>H--H3t>ioEtY(yb(Dw)Aat?^YhV$|
zmYAU}ZK$w2rd%EGR8nfh?u)Nx#W(Z7rZv(6iRl*`QCpn3XR>!LptU5BaeFFzovoEX
zP>JhkRK)Hh8Y5TY>P|!^EHB8&<e7=(^R0pdl(^_DQs1Cx$mzkZ56_1}i~zJ1Q1|(d
z{M2Z>2iUjgD3J9-ts=-RO<0n=fB6e+mx({{K47%W&ZnbRb95Cv|CIlxV3i9?O0~Jd
zZ<Waq-*Z423QGG}0McK%B`w0S|6#oeg<_~(R)T{pMmDWJ)^R{)DH$pOAoa^iII1I=
zVFpqLHk^mAajQ1%%V!riQ?tL?w?Q@nDv<JH%en1}!vR3XFaAKjFa9JZb2Szm1oV*-
z;2Qt}x@e6e5hLxNL|Xp@GPl|Be;w#g@BjH^6x9MyZT>eE*ut@&;7Gab=7WJ_R{L87
zq#t;5?;@2hR3BmQ$X115Pu`7kODt|EBpiP9A|F~bI5u3~MbtGTOuyNi6WNZ45sE+i
zatLnpN{Q%rV@!VV){c2qX|v{x9eW<y7_D{^`VifX^ei0xBSa9BQLD^!sPMNx7pa-L
z+p$IHqO6`7e2P%-C7?#1c~)=;KWw4zi<pUggA>XKZ5JoN_xxTYfp{b#uDFR{g1pWu
zOo+!uKMJQM8DNfyroyD>_l#VdiPj6qkLY`q=oH_73oh^SQjI8+4M;nGUyBLRyKyeL
z2d(1M+I)&Di`Hj(#CuQ{3o+-bP<HNllgXvd?Xn8c&-WvDlt5$?>7JPR*|@@M@%z$7
z;@Oh&17Olj=<LeKhn6e3V?t=BlMamrp%;Wbn@;$sgZFLY5w*h>dhESdSjE*1XBqP<
zQcumDdJnuZ_{|bYC$e?Q!!U8lZ(mK`CB0<im9q6w5e)AEXVVaG?*8|-<=ngz@8Mj5
z1z+C3pFD?t+s1V*tp47}f6+#sqfWb({5+}toH&Euh3fHHb`kbZYln4yvOK?j3^jI>
z>q@LKnmvxMF@dWW#K>sOGST87C$R%p8$A{*?tk54U}uBPX$d+h%y9{aPL*`a?XGD1
zKM!KKp0cgVeou|U^9}B<38qBGn$ky47j5kdyP`K2{awX<cHHB=&_M}_X4$X1t@xgM
z27f2g_(s0h#b3VBr04USe_LONv2MtAwn%#uAHlBwO*XbXkHLqhnTWD>^<wr~3FyiQ
zs<1w$3{`6fS-M@b5gj?H^3jYwK0fi-Dse@m@$%Q)OKD3l9}WpeUB%s#uI^3^0Sp5=
zp;YsBCE<_a`_l;*+Hlt_<M5byskp-!O6K7({0~MKLm3-Yab7rHQ|<TrOd60;Nww<`
z2>Fq8>z(GGhNV-%)N@(yzS)TU+@+-|b(yso2J8D~S$%|`!4sfbgQu}EU^{T*&drXg
zGxpPj^;S^_*-zZp`tl3iMLKkeOTFz2Tg%yrm$<6^1NXZPR+!AZrrdm+svG`$v1I#3
zE^e{Fm=tMvCxw|e9)-tP?}5EIe6Yp&Z4q<OdU+U!STpZ{7Y+lxnmBwU2{In;H=QQ=
zmVR@aUR2FzgzhIN{h!lkeTc%8*xcZ5=LRC<!NdA+-J3V=6>aYdU@W)3-iN@N=DaYu
z+dQrrFdwdKtnZ)IEP@Np)COTE)f0zsiNm7Pj1&#o4{gRRns^Q9+!V*bN^rzjJ9O$P
zlS@IGKMAUfE$V51CfbDZn_N)JN%stk!wzXVbvl&Y%{qvz#9Y1L+w1((u`Qm$%H?{!
zvCc~SLA@I0Y<qD8Td$UML-?dN=&Tkr?G2%QQ`s&D*=YH}#t$98h7CJDF-H)wld(Qp
zizlmSm0ouzYzl|WLJ2fw{HWA+#_CS#G@~Q0T>c72+~5<H*LXH)?puxVCY;cDrLg-H
zt6ZZXh(z%iqKQicf*9q@snTVkap!itwZApJmG$&w+}>%V6&BJSD0TP7Y#uwr1{<`U
zN&b!_df(E|PF$8pUVi&DmF##vOrp+_8;B)-Lmvwfg5`NJSfj_P`fc931W6DNq{`4|
z#H^F*Y9=@ig5*XqUMoo!iR9Pjyg%Pe$~(VKeDamBY{%pUN5q#=&H)|%H@|?KP548o
k++;P)EvHvU?90^8n*@eqS25mw$3ehCA96=k{1|ZcZ+_ZK*#H0l

literal 2399
zcmd5-eLRzU8(%Fcr_pniS*!L4NnT@=h{VhmvUz_wOp}+q9Wk<usD|=#2t9?I<fYnh
z<}L3O&52{N+Pux^O)^H|EX(Y<sq^_fpXaah{P+AmpZmVA-*tWO@9+Lz*Y&yXG)D(C
zL~f592n2#)Y_0Ji&=v+@J8lC5_d#PQ-~cH*+Bu`7QYm0g&n}a)p8f(@8&_v%r%W0T
z0c^KL;~ktp0BC)2dYugbmk?uZ>3nh1T+zsdvEw^s;Ib|&(lhN1O$0c;ROctv!AOQ;
zb~p2h-P^pvC}Wckp%+*q?+R<5n|#nzIh~WOri!u2NmsK^%~0PkV3<2l_-Vic!x-cs
z^^Eg341cv?pHOYU<FEGImfbq+p0gw@eO+!AoS@?trH6qG8kW63lXHrDs|T_UC?FCs
z?3SniZOr>Ua4fzro5TvO>klSka<WUo01w#;FRtstiW?!b45N`mN{m?S!jE6m%`%7I
zg@*{5XCB(sbnt9EWvgv|nqMb0Jqr+i#iiG#T4FgI3p;!)C#(fsa9ho2$XGZU#27ff
z#1DzU@n4-DCI!V}zT+86@mf_}?aEu8_m*P>ve&P&m1%IY0Iz^+M$zhO)jmQP47HDv
zFSi-&9^bJ~i_a4feWf8oa9U!`QN2aI*qD5^HXR@XZ3qhk?m{_2FBdaZg*a^B+R_?h
z_Z6GNR{%1_&%z#uPawJM>V0~<dc)~zd9*$6{%{|YdyJdY!jRD$a@qJrgfyXPQV?GI
z^s_FK>$X_U3h8Mhow%z82|Xw%tzY&+YpRA-js>f)rw>pIWNKovCVBQS#$clcWJvJJ
z&C=A<2gITNKot0J09gIAy|qT~O`wJm$^T>h)(Z)o5x1i6$}%bVkIq<32PuQV3&^$g
z2LZIBGk4e>w6znYd^vb!v8Q*l{`%&R>lrdY?DD36<tzAkczlav7>sq7KFE;hyJ_R)
zsb|dZ_#1!Wq3|C7E)ZB>&sb?4yOjVm76`n45YSfNh~oYtT0Beq|0a5&UY2Wz)x4dH
zH_$Be$76%Bwi^-veJX!P-vt7F%1^v1K)6<B`hcV9_FchT**=+pL34?x^NHc#dOtTO
zIFV;1G#Kr1S0Un_=LcSS)unyf0U;x;{GO4HrVyX0`OQi?mC+;Zfl_{C$v^X}6?iu_
zrehOAEaK;O)|fk6_L;jN-oFXv#_C1Lo~3xVUmSjOaF=~TW(y9YZ(TxBoa47QoJUiO
zaY&|Q^CJP(To+FB^35$wxx1;?D0JI^_fUq>VQuUul~<B?f?CSZ=#ZcCBTZ~|`;<J?
z(EI^;2%P>>rn>n>P~wtm_r%I<C+bn6khRxgAa@G)J9S%;bLTbh)^qd-*%GaKS#=L-
zt(-2l?rrIIG|$V~V`6tF4#`T6wn7v!u77NA7MV0&i;9h(zcq&(M(=C$Z0%U`k?%gw
zpR+`=7nXSLbKYI^Ea;0P+&tbYl|fagMAJHihjb!f0#ec??v@nvj0=%;#MHD_p@yuN
zy(V!zR(2<0f2w)X)2k*&T^|bW=OH7QCtpQTnR16OC=<inIMS|@i46~1o!~TaDM@qL
zMM{^_cK9Iv;RT5u;lgP}Y1UE8s-!a=MEa2zq9hSvtAPd5l|$2cSiFa3pX4`9u9$~u
zt+GCB6?!q7>kZLfz8RFgHAEsQE<T?_Ny`;k6)mkKYV0HqU1A=t%Tuf@O^6Do8~8hc
z><#4G&ev-zjdsQ940Q3SZ$_8h<nyPj3fihF=pW^;;~XoP=jDrUo~m+-%au=ZVa>dm
zyy{)Chs?P@e`Cb6N;FQ82<Fzx8Qf>H6He%of<p$}TJX{QaawFZR(P!OpDH0x{IfD>
z_kDWii$T9Y@oEP=6?a6UBuaB>Xz9!27Qe<i*t8PpDzp+d#?iOG8;zP+na*KsJ-5V^
z6A1-<SF$<+=;bpFIc7%OYhbz-J>a!Ksc@H1v58gnlm32b5`)^M&YG*rEb;##*;M<>
zf@$&IMpceQWZ>l)IaYVqFB!Kf-a#oI<L5&4R})UI9%4uGQQH-p?)iFXBv;Lxsffs^
zpSNu>=`#6|jMR7CHH}{)r6z{b%k`6A9`yWzA};s5jY@YB#W_MXa<->)%(60epFe9$
zwYBO&^Y1Z6jU+txqnkx1h~qriuig~i87llkShENflgqJ^%W7(tLNnu3ccPhO`eUKB
znO$9vD*veKgPCxz9zT`A0l7eZZj3*@5KhbMd!5_=MxaPo41;M{Qe<v)_|XZERcX=G
z_At7+uv*`haHZX0=H^>Ku)?L7$^9@vh2jn)(wI9DgMg0Q_il9z`r0_5VBN<41lEPo
zUPU{PL2c+>R|}j{%&P2v<d9H^yc;pNJjrN#$dSPDb8(MfF-ITmC$)Jc<nNB@^C_Nd
zY19rVdYG^?v#yR-EWgsAbDtjh_4gVcx%N+L4+&=x!d>Llr-d&z^_Lu2+^b9r_en9&
z@mMZ8=MgDRVUlr^cU~^3o7SR&k3pF;=ARSI=J~Co1IK7un)UO7<C^u$<0qY$*?iIV
zsAul|Zm8WJT%|JfOHnps)KNGkEahxb(G_#H<=IgmY+fy1y(qp_ExiR^s&QGBBoz<?
zL2H&%BW(?@eIw{!C4Df+fMIK!(UX<oEFbV*{VTuuz#84`79>I7)q}BaYsq>P%CK(#
z!pJnj@Y{@^jF|L%uW(?E^nIM~IGOr){s-~OTK_d(ZPvg%Psu>O-i2-uurMeG>#9?P
G<bMM-{aU90

diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html
index 8ecd90b3e6..295b3e84e5 100644
--- a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html
+++ b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_1336264f25bcb4cda6dbe142b2fec3e9.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -125,32 +123,8 @@
  <div class="center">
   <img src="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.png" usemap="#cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map" name="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1324,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__store_8cuh_source.html">block_store.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.png b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_263becc1ca5b47586740c2f7bb0d0145.png
index bcc4ffbafff41ff4b256005b2e0b3c08815c7802..17488343334073928b37f4809d0e0497ba9f86db 100644
GIT binary patch
delta 2079
zcmY*a3s}<07RRvMG;`}#rfZtludH2yZl*>ln&CCycE$9cirZeyTPiB8Z-uPP9-EIP
z!Od6sRuozkK}d9aY#I65Lc<rWg{TCiC@2Ih?&JIJ&iBonnKS46&F`F<Ij448-nQZ>
zM6oaOI4Z+PS~-q$IMBYHSPm2FZ>!$SgST}+`W9^m?fi$&`yYN*a}s;LjWKj4{yt;G
zE50p%<cwJxLw3KU%9UWb8gS3)*0m&N+)CTg2LZilnkJ|Ky*D%L%KYFzfR+t>%l&lJ
zVB9`@-2hmmE%Qb5@2M3?Qp@pW@1PV$5lzcfiuGcqqW_tyeYSQw7mx+II#pe<fvKuG
z2RLYEOv`cSCT;+x*%}q|zF4&P@vq~$Z-p0zq28$(N<5R`=W2wFwreZ27MJ5q3vSc~
z8t@O5)L#h6EjdBDvehKCknXnRayMvK)p1h2q}x~cRM7hH2Gl^0c^zUHZ@KN}DaB4|
zN$vZUZe>Jscr4&wI4@pd#ta)kw6=dh(PH7<@T=)%*pmEuwDiIro=l3~--0r4Aqe--
z4Fa}U2T58T*`ByQ)Yg`skcEhLRAIXK$F5q18A3q5v33%r!=rkh@d~K+K<MS(%rV#8
z#O<iBILKQtRazu0E;w7762ZS`jp{kE=B$3b?Z8MK!{CSI8U^k4j&YyVrzZsJQ~S@i
zS)+vBe;AqB@<xtzhM61UX+^}Z+effpqhko!2L2;EpmXHR|NeGwW2+$JOQ4VbD;B%g
zCZB`W4Y`->@dSMaGTIIL?vpAeVWUB}wg&&7Iwkfmb?-+cY0cF566oNTuyqEpW^PY6
z{vE*hk{8j#cpGsFe1OOXpZc?#63vWH6FOUXXFD&CgF|-udpPpbUHTVuCVSwKHd4+O
z4)awR!HaRb%VLtrE_;c-n3215v9$~3@eF1Z87NJl)5H;Sx1oQMJnUGm79r_)nnzoj
zDPYd8ITy&M;TEtXWOA^{H(w#b-4auLTMOB<x3+y~2+;*e8)nU;(}v~B)&aspJBiGy
zIWvTKR?Qx}4iCE|%X}1N!K1%SN`*qVn^X+mG3)*42IUE=D6gFJ*_*kgyMTjS^tJn6
zOecgDX+)Tr%hXiiPi_;_VZhHT^~wimF$c_K$4+sa$ly&r+AlGoS7jBKOmGYP0j|w;
z+-vGN*XMMII>>Z0KVAzXEXH?nanKnT3U(%{+ULVj>VKmWo^)#sB)mzD7?1MNC1Ls^
zOVBc=9RGDT^|acmIZ`t2{rWb?vNh|jpPYxYiC3dh%eDXl2SFE$I)=DRd}{c4-5m%4
znk=_|s}6DB$J^Q-51`L2ry%Dw+P4*rbDF%SjU54H%)Uhes+V6^;5ztx5!bK63eI-<
z15nJt<sbQy*9ws>;q}~|Q6AlF*rilA*Q3<4c}+b#RR>FvG`fvX&A;3jjsH>Kgoqu|
zzRfYdm{Jt=f*$W&H6wq43&?Ah;2W!nB3z~1e8Qieb0Ln}$(3l1Vn*qZ%p4d+vIQ;-
z7T3)X^iFwY<x}KX_ya`^EjIIzEuCMdah2S*eT}-T*##(fkp%<bK(kK|gy$YNDl_~j
z^-dOws?oX6S@!*E`|KCXM>q>qNF#)meA0tQi3UECPL)JOjccUIU&JTyRrr8&v@R{Q
zl0T}EJH~be&O`*;-8eMlN0M)z$eAW&5#kDGN%39R5-LtwlII&}^sL<c;A5c*y}<M3
zi<Zc!trCZz$kqn$n8vStDZ`6AUzkbE(YEn&xXa|;93N7s7cD69iq2{WPE=m=r~m^O
zw7kIm;8Ja0=BT>|DFFAFD5wxSRws=4O_6TWDTbJK!8zLOvEI8ONHwO8D0Lt_^qcZd
zZrm6{RrKd^49|w;<f(#C-|>q3f5^=pg0{0UA8Iu3u<FxwqnS*<vU~__NB1u$+t3xK
zj2HL;9QVkp7k`|g^q0j7(kDbuzs-iyuI59+vvbr9!pS5a?g;VLu{7relUM+v*b68V
zxktWF|I}G53gMDOzl4MyBD8MGsPrV$ZNy8t0AdSk6tnt%03;55hi~NY;nKUKC(=s!
zp{9dfS)U@6tH|9{Oy(R#)5VZIoERs=S3`=-fB}C!9Du@_zCU%5Ika<Uv}+%(XP>K#
zhg%vG6+eEj;U$D?1;JNzK!9!|0Nt9nrvnFfJHWG6RTqoA2*~B@o8#VdA&vRn$C6|E
z7CGR6xTW*Jz)cnc6C;`in@DF>%brV4m6ji|K-DuseO>_u2y16KQfpJvpTwe<INc-b
zZp$*CL{)Am0;-$o&*t_@xqZ?}X#|#`Hq;3wO}vTM!K6FnVTpo&&zq6lC+C;zLtEsp
zj)+-?|5t(SS@n4;aS)Kr6{`tHk?9e#gUzK&(5-CRQ~_QzjI9)Rj5sFk=h>M~B$w!)
z4}3Uvms$rv!ovr%CTg>o_a&{eoFiQMU|(h@+8mjV{#odJb_&9?2T>kpL*LDhNhgv~
z2r?q!5!ywpK0FPk+oUhBXg6U6E418pLSK{OO~-wZzB(s+#jaW(vw5hfe_CiUA%A0K
zNXSrp@bq`v^_c)ds7oV8Gs>qB&b?Z0PnLP|Pf5w5;0-dz4H@oZAObNiPKVU0Ohi(5
z-KRqL9nE@o;LkMNyQj!Ej%}o4LyXq_p3&bjxBRu|Njz+9_ay-%_<IT8acxj<#KbvO
TQ8x9*gAC2rI|xbj`ug&3n0Yp~

delta 2109
zcmYjRdpOhkAD@WhtUA<*$|6b^O*)mdgi?nby2$O!Tuz6bv_c3&^CidA#qU^>&9N~l
z><}_G<}#fmm#qymVlt|^wk^5L?!S4??{}U)f4tw%=k>f_ug~lAyx-3|{re~1KL~QA
zc$_)qi`Eepiel$%>huZzJch>8d4H|--)RuX7<P!Fs<ZU`^;c5|j{w88NHCukd6`eM
zz0Rj)M)GOTBN5W)IRy?tbbpz}mF~}?<#la-2L4DI-#xxtC5&IoE8gbY<`MpvHg@lX
zS4<ra@ZC<|IJVPR9Oj0rIJFdT7wVTWyHLMuS>`C>a55X0Z8{sUZX^NQ!9u_(!QRIr
z=?<-gdB{gJU3yc}GPk7mv?uHH{^4<nlxCO*LOKAvEECSm;Yxz<&wl=-8r%O)4=s^L
z#19-3EczVtf<#vQ%NPwld_3;a2>DfCsoyi|lRTzA99J#fjWNAa$Adoc<Q2=;<dmW=
zVhTvz6qW2h>s@}z7F@&TCHvv1AIYiNfMRp7Rt$e{)izgo7RM!w2>Qc-(hw2Kni-QE
zM)F=|7j<2-Cp({;YA%fK`7#*wDSC!-8h4IUKKC&PD%~n>b<5NKeY4f-b0<V)1Q)=?
zeo=PKv|se@5{|@kF%y`XQx$68Jqh<rLd7C+E73_r?Y!)lp_R^Al~zxoM^gKqqAsd*
zR^6(s6cG*B+56>?74Ll%-}MzPJ659xRung`%h&oXmaoluY71zak%5}PLK`CaFKLm8
zw_o=&!5O5p1EjQ8>KcL8oCJLX(wQ#m+PDLxmj*Hdxv6D)XnI;~^`8C}+<55!@God9
z&(+d3CqemlZFICrTV8AgU!(UAYDv+{Z8n7D(sUhiTMf5;m2GoHy8eLJ58pltj{5H8
zWvGhiD;Pe)T!`09UXQ7lxP2NsAEq|sVOW@H2CpN0xuAXS2`TJNsT`JS_!fn_l8bTb
zOrA<Nsa2|8v?irBEu^TfQhmBmWE1;q6{&ey-_<c%Zo!PC@8deSCiqvg<SfS7zoWht
zN(Q3tkM$x=RZ*N-fTgH4FUlz}u-{r%Q~5`}&)VrCQ`S;Fl&SQedVeYAh0Kc%TE|(S
z5JmLk!9L`<aVcdd`1KI;;LX#-s@a$}A}gquDt64jgJ`i~ZfP4ZADp|DIPTxTN=clI
zO`9NxW?sMFo3ZKhnFEYn%xHrW2(@x}Jq4PT`r!8Ir9cgkr3|dDA!gCvR1jc8Q^w8N
zb6ZT!<|BZRg8@0heZ!nddzG~CL=>%prb;-LT$q>YHajR!FpmEqURzmI*565ibQUm7
z{aW2|97tAlS8QN}0MXV|c?9slwV5_1uD^jH7d=AW8>T|->g30)mQ<-X*xbd1lLLBz
zyIK5jAjxJN)!$AC2v4F<x;K1!K3WPjJ(MqR9GV+g5Sjx6GmnOt6mTELM#Tk0ns~f7
z@PqiW&AkvLEc;9t8g-#`(UC&LzU`YgW_38l?qgQ-&;@`E^M|!%)r!K@b2w%^!{LVD
zq0rMYj=B^q7_w}}A-g}g!-KJ1vee^y0DDcS;j7WbS|~HU&=h0K%$<KwwfkKgJbJOX
zY~v`Yiy~=cC74G9j^9y<kKVe9Ew2wezHm^IRJbACtpAw>r922*XJ*k8+jARyXR)qj
zBrmhqe0z5XwN<d8q4u>uM$)Mr9I2qRPM)k$l|UuAutBbS$-4T7!e(6u(h49L`*UZL
zFtFOvY)^kjY12&QvyjjUE_*-;*c;VRX0>x8zsq&<=a$s?Jb%iIkO`mSGwccnMQ<R4
zs7<L>%zcjo%4w%G>%jCc=e#F;D8%c2NV*q+W5n77T*9@d3o>LEaTkL4ctowcX5!Bp
z4@YW2b>@V|%BMUf67vk`F%GeoST%Bgw;Pg8z6gGID0*~jaf->%JhE$&#c+<m!-Eec
z^r>PTRD1j9S`gMOS6o0=$knI*cI>^kS5Rf|ifmufe_r7I#-e<36o2`($q66u(iK<f
z<c#4;#8@PZ*xj+cxnL!i0>vfeLKR0tA>9V4h_2YZq0HE;WMJP`fBly8#h&o&I5`~}
z`pu{`T=t#~n)3OeE^g+*L<K{q6Y_XI-*V{5O2o3vbw_o0(oAJ1ohoh_<>O&8Uo05T
z_S87R57?0(ddD^NZrKv-&>h>hS8@U;oTf4@?{rJ?!`1RSlD?`gUkRoJ?Hc(hLKusN
z<Lt$4mH;#J?yT{4c2^kuJUAtBobgb2!62VK&7=@F$6&e`Jso>CjPyyQx_z9RAupPd
zN#>e7{i26Rzc=rT8na)w8{?DuB+G%|BTZ9V<KnlS_zhFmV_r_ESL|*`YzRxL;YpJ<
z@rq>jb1y|m!mi%PL(&}iZRCZ9WX#N?S;Y}H0261ODCbA3z31g;myzwaoCzEHw&NVV
zP}Z$kzl$rSk3*B{5mT_LrEWye7yKHBppR{o!zqu-_Fpw&@_2RTQu0{j^zlkZi)@J^
zF1DyjqF>@@_d!x{Fd8Q`O5;s9;bY=O2~o#vNS+o)FWV8~Fx%!0;n~gT(SQ#8X(cek
zpfkw}Z|fg}G$bW@C6uITgA|7nIslg&KqnidmzFp$-NpWS-o=hN#1}+SDigSTygqtV
z8c?HiV8tv*Mo3rup~J9<OapQQuBcsAQUjsLNv+OyGW|>2mH(~b7uFn)D@{7;_<sR`
zTezWiYv=mw?zB<nAg@(r4hDj&s3w8eZ0iRDw>WDIT2f5aZjZe(LE7W-<M}hyuD{&=
EFP1lE<p2Nx

diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html
index 7faaa395f0..40a7f76a20 100644
--- a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html
+++ b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_2a6ef8c29850a6a6dfbf5e4acbc50c1f.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -125,32 +123,8 @@
  <div class="center">
   <img src="structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.png" usemap="#cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map" name="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::StoreInternal&lt; BLOCK_STORE_WARP_TRANSPOSE, DUMMY &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,1244,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__store_8cuh_source.html">block_store.cuh</a></li>
 </ul>
@@ -158,7 +132,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.png b/docs/html/structcub_1_1_block_store_1_1_store_internal_3_01_b_l_o_c_k___s_t_o_r_e___w_a_r_p___t_r_a_n_s_p_8d170856b7ed1df0ed565731a681b449.png
index 882b42e32ccc2e4f240ae340b7cc01c918d478a5..29e6fe7c925e6a4fb0021dcd219d9d63f0f6eb49 100644
GIT binary patch
delta 2080
zcmZ8i2~d;Q77h(UwFOivD3GA+AYu^#n-t=LbwDFX0fnfjXprQECG1Q1aYfn!+VB7g
z5QMUrkVIt(2@TXz%Vq$DKx4upOMrv`f*~eAXhPq-dGqefoqOk;bH8)W_syJ}X(-$I
z@FJk??R6|5NlRK1oFHHuZTSN6RMGp1Z)6&mXNO{EDH@IUKY5@UD_cBmUwPNxXKQ8N
zP9J;gJpZn!%yN@}I~YGbN-L=Y=~|=1D@)mG;m#{lTgBE7g#Y*o_Ts<Sqa1UsxqMp_
zl#piqXDNg$kVd3a-Od<Ng(@JR+VP&Z53fu~&^&(krV}|#7A-%9*|rRY+1Ll(5zDhQ
zSnW<lONx-zf9Gzrj;#FLD_`#qf3=E7k(_J<y8LZJrTIP7%LSpDO!{7tlw%EHM1ciE
zgd<Rxs(uIOhUCHY0w4FQ2QEd?8ckBYJ??Rf-xOo~AP;~|a2w`#SF`ycEF28O{l(4K
zl1Y~|cn`lCsokG7qHZT|v-aYLD8(l%VOeow)kA*vJfi@$af8~D3#{(qJk{ZS^N57;
zlO{PbpIkQ_Kv#Ju#LJX)G@j}+<a18#@vzVk0rBmU=AYtKPqjS^B?*6+_vHbd9;^d0
zx0fezlw<LmWO+<YFywvng$(o14F)-~A&MRBP_t6|R<hREh)Ut7hG3a(6gAs0OAR7G
z><=!C3)G|JYA6?@?3oDOp6VBib^dPyU28lyvjW7};H35(H8~n;G*X@3!BUs3mg*pI
zY_e)oM~=3p0ku}Lo&Swm+{gSM$$ji9^FsBHf<6aDi3~J{)?UzBf`xkqXglb$-*$ak
zwoL$U8ZTya=<Nk*XMm!&cnXc0wbZxmpmf9|)6GX?7G5}zmd1a2J4=gr8k|Bb16%)R
zn?W`*{x9XI{+3NJHH15H2iO|W#bZ75ds|gufs&ftWnoWY5VRsX6$E?72qjYBrf8P&
zkag;hG=l53K-f=;9h4id<7e2sm)8f2kRJwn8%1=bXS(q-{=kZjB&AW714dp`sD9%l
zWu#9JDcdla4`ok^2Ijt(opT!DD(Q=}M;*!XT?Qzjb<oMynLA(A0#$wM@Osno#AgPp
zhbUar%i*_7nMO6Gwr65PB8S(|$!RrKSAhE*aMARy!tVs!vu)eO>5Ef>Ml9~b;V*gH
zQ>m&X=vnldUjrS!JCpQPeD}>zB~&CyiyuQXf5#?@qBBG{q4)mvps&O*+QOLpvw#VH
z18Jz>yW%BA1luhDNlaMRRk(WWLuHFI;=-rwqUGa?yAd#!)PD^#QY|r2NJTS&^&`$G
zM#XLU{_#O&0yk)aZjScQ3ti}@L9j^+14|~X67Pwq4%~&C!zd2BqCU+ha^VXhRp-dB
zgx)UNk9>|~eJ_=1`L=+e8;(FC47|uk`|l<{7A9-~%#dvRJz+t<Q4Y+`_Yma;aUdnB
z$(9`NaFU0o=GUL8OcxI+TYXk!uj-(3C+AdFgXOuKr7hL-lEg_jXcleaK%wD+c^3`C
zD{Ucn(U7;ENK&fFL_u5$Lr=(@Tp8#w!x4D$n2wyO!Kn|N+WFj$%bvwV&kgd{KmPjI
z0tne)xRdfiBwV_Wgk(Fh4l+62qT0jB@OsLDIKx3BMQrM!UtSKxknH6WO7Kmhk?f)C
zu*`)$M;G3|1V6GYbId&yZc&zcYG!QY(tVQ<9w&+zjCE>l?op1*<U~BmrjAyf<K^CW
zCX%5qOgcWwaY4m3W9toq;g1}gIKXM>4cS^3B%!KzBOL7PP&GR4^xSiz{twKBk1^X4
zB6uOmu-VzM>M1v?vAMimHDvUdB_d7?vu3_@;s~n*YYj`C<Gr1<Z^|UIoc&D0q4A_x
z-W~m^Wi9rDAev6L`-bl!Y*VJFy$p`8yH3(hEL?mK3pH!1K)t^Q2M6>5lge+u<QMYW
zb){cZn|~^aYaO;m{P6MH1@u;zM}jxy;U=@x#GV=Ln8YfYu#t<E$265xb(M7B8f}}+
z#lxST3yWM1v)a&x2z#dLEg@Sdb~DK7gP0>H2h+&iI85HYJ?KASEf9mL!JgS^e5ytC
zC{aL!gXetB<?!L`3BZlJdLki@cVb&Ei@okHUCugc)+y~fD38UCBq#PlMUxXya)IFE
zXETSc%o%Odqmgu2G~+kF6IcyUnAwZRX@uZf%*d*}AA;D4*vIs~_F8+Ob1i};ivco)
zHsdq70mOd!l$+xm?^dc6ldS)B@l!RI(AkL-D|W`dX7vDVbV0Ygbhatud^ZlEa?vZE
zc}{m8Zcj-RwHF?j6+0s<Y21j_LAr$P-581A-wdg!3>ua!Nmkgs&qymvIKiUedj2D^
zL4RqaOw2feq4)na-Q<KPM{<HuuO=)50IX=4SDzJGCq(<ZctfnNT;k}jzfR@B<Pluo
zt^go1YTL!w4=p#s2krFEci7#opkDKw58}M*OAb1(!gNbwUyx}FI_0o~(gn|}mmQfl
zpt`o^gJJpJNWm#QwxVTNw`8;^iM#%+qhhJ&nHFfn8Ua;Fmp8h#QUs3az?dq#MC;Rr
zXp}$GIL;^#EKB7i^RjR|0w!aXfN;<`a%3ztS^x*jbF^B_aT%?hbuK`;%JlY<>T@!R
zFPqd+PvfJYrrpz4&PP@m-TzEt_3-o|9spZ8yv!7C{@<Em?&o9wB+oznn+<T5p;p?w
Sb^0_&y}VDJ^`alYa{F(+axw=1

delta 2069
zcmZvddpy(oAIB#h#W~VdrO=IT%8|?Jw@~2}9VVB;uwTh-n2eDv`>Hf_Ax${MbU34F
zbPQv&D7hWCifxB&QOso*5;L2bGv{}HkKaGPzdn!0>-~AZU!TwW{dqh;(#@$`9{c<0
zJ3F5Bx~U;4oI;JB+^n^wGc9rD{HMBB&F#URIUz&5zBgk1bcXu{Ww39KiCI^5ih6xz
z*0nh&*cS&eBcHV{*J2eb80kE(y9jq(a_%0Kr|zm6cqglPJ8*tyV1{{56}P{!!lwJR
zFA^Yovfs|>tZs;RM3T?eju!C-Mt2@1*FDn~7S-2T><6o>ZOtk`_@F)P9IHK4jc$%q
z??E8RN})t&-%{GyC3!WP(<oDj>Y&HW8=`PYqY#~>w%n`s70&KP(N8&El+GJJh=&Hy
zmVg@PpQDN>!?hn>eIhv63TE3S59n2Js8yN4KDCiipAgTEp``fTtDP1({h44D3*Aqv
z6j7Y{tsf$7x+Yz%)o1*SMjZuILwIBOf~+Bdj$!f?=2&Tvk_($B84Qf7FW%B8u*sfZ
zc4hXnM2!4SN?{;3bbz$#0KESt<Y{LcSP&n<SpcK^j?;c^_e7(hNbV1`z!DqQ#)cPK
zwU|F#^#IlKK-&b@^f7GE3rS?z@2b+34WR+DX2JZwnxZ7^z>K20A;~Ls-&VUOjUKxS
zbBZ9JMV}iZ8F;PO{vm;)+Z=io<y+MSS=#FMxi3fD?DjQC5SOdtRsVN7yZlEodFnB~
ztv0*plM}4nRc&_FH%E}Gxe>Icxj?>d4`^*NXd?&*$=Y=Wv>v1}qNALHinY|yGoX6V
z|HK&BKjPxDBNC*sn)YC0En$ylE<v@%(@^!D%VCpj50^u~WxKl^exB`tgKS-`yXFjN
zkx%%##sqlQL6Q5|sT7&s6za9)j9Ot_p0q`xF&Wl`b0bej<_RwO=k7kXu)s?5p89c|
z=uQbjI&dg%rc_qseHm`SI)Rlr%Si4a#aj)ssZ|NP!!~!x)=jr+&O@45#+M-}FZEcZ
zAw)7<@P+?!Mu)iJokH$2`>HkIn)mzzJLfk5LWkevz}Sm9xOBs;WT^4;gxQTz_8Qh)
zO1M!D4BX^vL_DH>vgqne@_3>@vP2S6c6B}4eo+xMTw8b30FOv3@di#q4_e~N{WyjX
zTU_tcgKO*+M&qJe=#dcG+lYkmxa9G$qI;2P?+dqlJ#*a7gciN-R|q1M8ZpK`rf>sv
zY*<SQ_!wtCMQlyh{8;#u+RT42v4u1>7Jp%unn<jKjmZj1u6&4Z7c(<->?+e>BCa*;
z5hE#2YITcgPK$r}IiW0v(;>;SQsH^{du%4F0&=mdY;56mGZT^A&y`(jKwB^pn8fQ+
zoTK8KWMp^Q)Wq${En)dTniIId^JJ<s*d(Z1IOQs^Io0T39Km$i2A}RcU+8kFmPa<B
zc)lPiLYLttONuXp8Q9!BCT}R~kQH>;QznI=c4fZ%G;7qIE`tg#*CL?9Pyw6N{2tBh
z@9T`B_j0tRVJIvjw72fHaOw2*36$vDV&G-j`LRRBz0SmyzO~mP0Sp|;ziqfZ={W7~
z!s8y(fLvZK_d{^(<@`oNKbAgSJ0epVggsIEW*~-fI8#LS4I}7FA6xUKmUM9?4<7(I
zH77OeI}y6C7<+zl>{CFhfgg)GLl{I<&IE)e%`T`)dF)QH=*9jIx>f{kg(kS@eZow)
z5r5i1;=pz*y$@WRc~Vu4!3j1%`X^SXw|8vokB-UpF^=&IvNltk{*F<Dpqwt8kNcV`
zusJXy&fZo|3>mSsj}u}pAnyiX8eD1eSOKBc-jp_;j4YE?@{H>B+Y2g;;tmn@-aFMG
ztiu}Rz!TpVTPu4lepZ1A_dtD13X~)N(p<>bG3V_BiZP~~iSUlLZae$1GGBhMxO8dx
zxcpM-L?ZZNUOKN=-?sk(oEiJm!8C?tl%Os;kdfIUvGSgFL{e=}Dcxt<G6n2s&pQom
z3Ks98MCN=>2L(CTx`{qolFrMYiEVIwbmBlux6cG^^aXF|)UrPkx^^0i!lFWxN2f+{
zWdI|FM@j@bdaE%PK3iOot&o!JHl<lM6e<KbWDjI!da(bQlHNWcuwjDeKkQ)KJLe!G
zoken;YEW<_CDko8(i9HYFUZft=?wH;{|4=dda}b(1APMULniTt_tMf&D)zI+=93z9
zzsI6SUO(YN*X`&X=vP0SdHLR@BijMvlVRAVVLeCN{uqs_9Bzy<)Zk`uJH0K=Sj7Lu
zI7o*zmll2`O5z5oi-&PstI-75pkxQ7Rv5^kar=Vs{2EU55`Nf}n&AcHEyPt^0SrB2
z3)(E&qq5rLL_I@Ci;SNuV3x!_l51Rnz=n|TEIgng5~3$VKUJ_3e~tif9wZdjb0q(P
z>==aV6kdPsr%lfSw#~)Jge-h7mHzDtERaYTjZ6Y>WF>|VFQp{@&dUdNZ<zW0p|7cR
zRMQjKF6pw}q>}wEVmj5^=xG987qkeL&wh<2t^m7>X+2-mG@AKyXYxw4s^}5&s0!0y
zJFKdgYhdsn%e3l;0oj#S&_Sgx5*s@5$)Z_SqWcy8P25c^U%?kQ+P}@Dh%qQlHGlVj
z{2#U-usFG7pZgym?*S>ZJ9PiX^bPHO;J=~TY?8)XMeBNy`X~M;>yAC@=zrmR?(lCZ
f9xif$QqS5+O9`$wO$U7s0)d>J+#D;<{rk>eR_j4}

diff --git a/docs/html/structcub_1_1_block_store_1_1_temp_storage.html b/docs/html/structcub_1_1_block_store_1_1_temp_storage.html
index 57daaccc5a..0c2fffd3ec 100644
--- a/docs/html/structcub_1_1_block_store_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_block_store_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_block_store_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -123,32 +121,8 @@
  <div class="center">
   <img src="structcub_1_1_block_store_1_1_temp_storage.png" usemap="#cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map" name="cub::BlockStore&lt; OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,870,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="block__store_8cuh_source.html">block_store.cuh</a></li>
 </ul>
@@ -156,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:06 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:17 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_block_store_1_1_temp_storage.png b/docs/html/structcub_1_1_block_store_1_1_temp_storage.png
index 9c40a5bdf7dd37f952ade3069eb531aaa101fc67..e08c8fc3531653dc35510163da2881442ed2e3f4 100644
GIT binary patch
literal 1899
zcmc&#dpMhC8-FcjEn>wMha%I(w58P|R8?)0G~N<*r5PnwzKv)a+Gy*T6?3(tpI)u<
z&4^P=(9WThV381%evw30hgig+bxwlN7DdTk`hWl3b6wB#{+{Q)@85GDuIGMl`}*ME
z+NRn70Kh>{3;_UOWC;82hCwTqm;DO*82fq?v1+v%g0pkW(f8UjAha`iJf7z5VI;KO
zeVyQQ4uF)l$3QpH9ssn~LCm+rIF0!NS<J+ty_yxlWVISughCiNde#~)3qi>W!OCfT
z$oH*{zW&$y+cB7&w6tbR6yG!RKI$uQ^+(YTWExcFqD(;hOkxT;3pziYd8US(84Ae=
zdnDeOig#%X`s1%;6DEmXy}YZj<4gCMRx&SDC2G?S(j($}y?d8*JH`F+#T;axs|~2u
zf%pcZ%+|6)R<5WH&P>Rb$YPIsMAr8iOkoKJJ(7PyOB&oz*wE8_;Ysg9=xX*J>epcO
z93#>cKDLs7?PmcNZw+?Th4XQxsV7K-^a^g*Qdv>^Ld~r#=9wMx(7IB;!%JrFF~?5h
zP;l{s00kQ@Z$8c_bw=Pb|EPuBdTz+*l!c#MQ(0}OSQ?KnNNj(f8)*Lg-IAafB)IIv
z6;vn37fqZJle6kOax4$N@P8Vb`POJVrswGlH>VPtA?-5!5L!nb$M&gqRAw@)vY1z)
z>%TC#QWWtlY6!_!Y$n+x^e39VuS!9MF%tRplFQ1e*~bGGslP#q>^$!Pu3Met{x*6I
zTu<NL9ORTgPPcvA@7FMV2+H%;Cy*bBd&H4bx5}Y}%gG-t#2qM8huRq6PVuZb6omeN
z_V4$`+`OIEWCFkzuf2nX0J_N<P5Y=?5cozjY3pwX06NpIAp=@GfVfD`b%PxRv|c7w
znj~u+1+eGsx5ZGgx5fVp5p7!^Xk%y?12Eqd^tS4QT_3piNOXCCak9Y&L#y*Uy#J1&
zdwSY#!w-=^8`-<<xp6YR_9G$OG!*^X9;cpcua-!u$<O?vU4(v%ae%>3^5@L;p0WUo
zCmYVVbnXr3A6H{c-y9~&aw2D22Kb?YqF2kK;+ARRrm?$b1e#R>lEf{e@ei=JN?7b0
zAby6lNnxG)|J6qb^x01}*fX~r)b(UaXGL0Jw}#nS?U`7|JT7TouxQ;Gmb6;yL0aq@
z6db+oVOI6}TbBZ1&|j<t*PK)m=ZwI(b<#qj<har*HsqrPF6IxACnHDBhvmQEw{cI1
z?ef={(uXs+H)c(UJDcr7LcLCyD!N5CP~;?>P{_L?H7g3sda0tW+A@_qW2{8#V6D_)
z;C1HdO8-X1e3(e_N$em{9{(~xL=YZ9Q?0qK^&_S}J@2y#pTS~8dXYop>*eYpHMO9O
z^w^y9d2yxmxIx?EWFbNB@0jNtu8R9?no=BmQNl76x5c8BmpKEuir96uC|k*KrUm!T
zl@_?j)~i)-G76|pg%kPbkczpPC53Qx8Nt9e%=g6bsh%O5yEZK8guwhNc7H^Z>WELU
zyP!@K^oYjFwKlo5@6sjo@NA@g)%1#Mbars#nBAoY{IqAl52)U6Y(xus!;_e`p1Buz
zqXa$a@%u-Plw72=!q&^j7W%RnsDv|hVNQ9v-HaQyA%*X<7;s!^L5Y{Bu*qSD#6ez9
zP{%v%BjJs@7n9;Uo-`gZPph3${B@U6DxgPQ4i=H^G#BO)*-_!i3jsls<(7G8W7yXa
z&X$<fpdU8r^IN(}q`Lh7`cu~uoV;RJ%^Ay)r=&t12LuB?MHOX-H|LE{DlgDtcG)@Z
zn|XUfbYb`Kpd%e(o8U*a&R({Ny_onEyX$#Yg7%lk0_`ah1AZ5aT%up@p_hexO>++1
z(rSA~WlVX*6t{!ZNr81{tJ)$2?szc9<273n5%~AWFB{%<D!rotD^JOk)9E*Ba<HeE
zMfv^5{LSUjUbHJoNF&cNrr^&izcJ;Ca)~8w1YmU2kzeE0u6JZxK5lM`<<fH*LymV{
z8n709^H=V1$JfYuls}8axIRmE$JJD@>blE|$|s|#iD4m)JwF91JDU3|570cc4@h$n
zrE+^gg<EUAWa2KR!Ms0osOIJ}IC3{CocGX#=l)XG@w-@!{Pa--!rh&w4hb)`un+G-
zR!f!h_hfs&bl~2gBr99o>2O-Ry6tH<?|a?F<%_V&_&*vCa@8pjJ1y$4*nALNNUGjx
pN3Dh*>oswGwjX|A(BqGCP;2PGjr?A{hx-7~0I@!p>hD5Q{sH;$dw&1`

literal 1929
zcmc&!eOQuL8-J9<G%fe)nwGAbWOHMc8sRjwQl@B`;YVYd7uLl5LM;<bv&@lerZ_Y;
z{GivwF9e(TkwQkcNouUAh?fe1ncqlm8Kx-g!LI%J?%(%Z*YiC0d4BhOpZolN_c^!x
z{k%+#cNhZzV2bhf!~y_>0NVbWA>bR$FB%6wmi{LK(ORt*G-nj=E>pVlL2KjT^Z5q1
zB)h=r<|M3N0027a#!zpdBLEm$Vmyxprsyq{&tK%Ym~9I27+Tg)(*Kin^gE+-g4jtI
z!hZCZW60pK(Z}$<rjiKUb`HGnTu}s0c@s^&4YR^HQj-pdPi&YzPiPqG&oEnz{kKlw
z|L<Y;1~GP+Z$ONJ7sBi*j%&`!B$=40{t9EmN&8}+4m&yWX$3MLj7S9|dMA9GFapHk
z!m0|VAht#QrSF_DtM(0dVbb2ys82_v7Bt(*n1c1O$QnW0L{C_@_@h=R)dKfsm|Yqm
zfT^Fl3>;2znhW~65X>hN&@cn#*0hEZ_?@YrkU19Y*0_g3m#Lz&X?H?*vgTqv-aJxm
z7ZfBLai4DYqY}Gy?xMf?Ym#dT0j#`=Tx#OIy9VWi@R?DD&8wNiAq;duRoX8EYSO+p
zQNeDS)jT>$CA$PU&aK7qILLS(`9v}3VWO+HCxm8t>Q&0{P;7o{DaYHmITwBxHVHB?
z>$fA|3z;Z(VVHt!!4Efh3x-TCGH?MR5RcH|!M(KFU3AK(jJ5hYo?B_O>Vjs5cUcnB
zHh~#9I&`2USRHrY+IC31-VBmEn*lDij;>I3-@st^gA!!yrAN$F8lxgku~<6t4MFm+
z{XDH`Ukn5#ctk-IfW&-5#XcZ_XpcA6_<zbq-X;VUe%=Jn!4*X~^fkee5Im5vO$(S@
z*E<N1&I@(MGr7z?4T=X$j_Qj|_<-eg6E`>~5`gjn2?YT;2taom`EJ0*A7G0HD_u0$
z8vvLMtU~8nh2O~K{e-4x2|)61d<Zm%`TqsVNvD16s|&Fw>=`9kpZ1-`x}u@-uz#z5
z+>@gutgPiAOM@dRdio)QvxmlV-|25&BJIkFHryGUdW1-~MyWKy+rq){-zolam&A2r
zVf%9GzTyhh&T7}}DZ$zvJcnU(c22E$<LY<lRz!xJSg~7~9_P+mFl(}`$Mg+iu1{Ur
zGffx6d4IwyYc2wUkGod#2=_^xtFvy$LsgL!o6#G??iDxxd-v7M$L5ns4imZ6Bv-z(
z^!4eKT#c`FxTb<+NKAH<a-C|-n+vj8^QB8)x?m*C^o%{zS%mBgiqatgVX4lvV$>S;
zy3M6nii$4N-pxc!v5*8?Q&J;hetJh|Htew%C(?0#C(ZQ0Xr8!J8rxiq=-VPWYhEVK
zj6`g4;EEauR|=Yiu4S*+g0b5m$-=F1bA2sm)Gh003{7jWCwXR-wK#WkvhpWeGFR3g
zD=E7wN0ro&Gb(X|%okZCM2sH~=hx!8P9qF1^N<O$0R^M{>#<dW;%$C;jCeY+p^oX2
z<`y=YK1Qq@!H+3p19%<!;{6ZGoAws|)?e={?0BpEOg$P@5jW*YW(tqg`Huy9G%7tC
zLB88$;Sb(F)MT9I#5s8NM1S^?9W5%EB!})BAQ31;=ReLrTO}V#oJDU@pS|~Jbiqw1
z+T9;OJINfu`->ER>Q8jhMGd}pXkNuMR3ag-CdGAjacw?Yyu4#;p;Ei_13f;-`^U~{
zcvPCOYaG=xc7{3Po9XIGCrYgDwc8o4L_O<%zBTgq*RUT3LeF?<Uv>z`+YmX76xDuN
zBVk<8>A`7z@kwcFcsJNknov#@5$S84E{!i^$jO9t+RI@DnSb>Z=MxgtnQgh8Gv1xb
zRqhM%9Dms5ZLoQ%$b>d@yeW}|i~n`DQ2L7NB6+_3@v(3@LU@i^UYmej0ehz#<-x%i
z3$kjz3>yX&1h?KfppY6t`@+QVh6`9TJLZp=RdltgW$yFFkWBUa&~5pxz5$nFn8~7r
z;nfya8(@=WC9PgrQ%tewPrYNfDy3Of$L;gE@AgtATWbU_7t@`%O%nEWe@hN=xkFk>
z=qS4&J{<4i?(`t_#d+M@*X7Yg77y8*suU-32NFttYeTs=PrX$|wAw`<WTlZ)tBFw(
z%Y+NiMF<adb#?Wc^U)~g`}It9W_jwB-fyk0wG$eb-TgeSKU-X47_kMKIS0Ox*09eJ
zQta?#`M&AF>AL=#e`yTsH}0DDBgO12i935^a?SkTZk6|cH2ibs9T<fdowRyr^R_0f
R&uA_H4h-7QlYJb2<8K}Pjx_)P

diff --git a/docs/html/structcub_1_1_caching_device_allocator-members.html b/docs/html/structcub_1_1_caching_device_allocator-members.html
index 3f8cfea11d..4a009e7fcb 100644
--- a/docs/html/structcub_1_1_caching_device_allocator-members.html
+++ b/docs/html/structcub_1_1_caching_device_allocator-members.html
@@ -117,7 +117,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_caching_device_allocator.html b/docs/html/structcub_1_1_caching_device_allocator.html
index 93c2ac1fa8..56722d4440 100644
--- a/docs/html/structcub_1_1_caching_device_allocator.html
+++ b/docs/html/structcub_1_1_caching_device_allocator.html
@@ -460,7 +460,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_cast-members.html b/docs/html/structcub_1_1_cast-members.html
index 4907311cff..0c1b5083af 100644
--- a/docs/html/structcub_1_1_cast-members.html
+++ b/docs/html/structcub_1_1_cast-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_cast.html b/docs/html/structcub_1_1_cast.html
index eafcc6a567..e3d8e193fa 100644
--- a/docs/html/structcub_1_1_cast.html
+++ b/docs/html/structcub_1_1_cast.html
@@ -129,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_histogram-members.html b/docs/html/structcub_1_1_device_histogram-members.html
index b04d9cfa2b..d2b48e3885 100644
--- a/docs/html/structcub_1_1_device_histogram-members.html
+++ b/docs/html/structcub_1_1_device_histogram-members.html
@@ -116,7 +116,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_histogram.html.REMOVED.git-id b/docs/html/structcub_1_1_device_histogram.html.REMOVED.git-id
index 1db5f6715b..debf4f81a9 100644
--- a/docs/html/structcub_1_1_device_histogram.html.REMOVED.git-id
+++ b/docs/html/structcub_1_1_device_histogram.html.REMOVED.git-id
@@ -1 +1 @@
-30d089be1000e90e09af754a87c5ae99340e0143
\ No newline at end of file
+085e20f5050ace16d0c8ff8b312cd0608597ef5c
\ No newline at end of file
diff --git a/docs/html/structcub_1_1_device_partition-members.html b/docs/html/structcub_1_1_device_partition-members.html
index 4990ea347e..192302646a 100644
--- a/docs/html/structcub_1_1_device_partition-members.html
+++ b/docs/html/structcub_1_1_device_partition-members.html
@@ -110,7 +110,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_partition.html b/docs/html/structcub_1_1_device_partition.html
index 9b1720b59b..55566d2548 100644
--- a/docs/html/structcub_1_1_device_partition.html
+++ b/docs/html/structcub_1_1_device_partition.html
@@ -112,7 +112,7 @@
 .</div></div>
  <dl class="section user"><dt>Overview</dt><dd>These operations apply a selection criterion to construct a partitioned output sequence from items selected/unselected from a specified input sequence.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_partition.html" title="DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...">DevicePartition</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of partition as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU.</dd></dl>
@@ -232,8 +232,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The value type of <code>d_flags</code> must be castable to <code>bool</code> (e.g., <code>bool</code>, <code>char</code>, <code>int</code>, etc.).</li>
 <li>Copies of the selected items are compacted into <code>d_out</code> and maintain their original relative ordering, however copies of the unselected items are compacted into the rear of <code>d_out</code> in reverse order.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the compaction of items selected from an <code>int</code> device vector. </dd></dl>
@@ -286,7 +285,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_partition_flagged_8cu-example.html#a2">example_device_partition_flagged.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__partition_8cuh_source.html#l00127">127</a> of file <a class="el" href="device__partition_8cuh_source.html">device_partition.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__partition_8cuh_source.html#l00126">126</a> of file <a class="el" href="device__partition_8cuh_source.html">device_partition.cuh</a>.</p>
 
 </div>
 </div>
@@ -374,8 +373,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
  </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Copies of the selected items are compacted into <code>d_out</code> and maintain their original relative ordering, however copies of the unselected items are compacted into the rear of <code>d_out</code> in reverse order.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated partition-if performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively. Items are selected for the first partition with 50% probability.</dd></dl>
@@ -456,7 +454,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_partition_if_8cu-example.html#a2">example_device_partition_if.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__partition_8cuh_source.html#l00236">236</a> of file <a class="el" href="device__partition_8cuh_source.html">device_partition.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__partition_8cuh_source.html#l00234">234</a> of file <a class="el" href="device__partition_8cuh_source.html">device_partition.cuh</a>.</p>
 
 </div>
 </div>
@@ -467,7 +465,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_radix_sort-members.html b/docs/html/structcub_1_1_device_radix_sort-members.html
index ff31b7998a..c546e19fae 100644
--- a/docs/html/structcub_1_1_device_radix_sort-members.html
+++ b/docs/html/structcub_1_1_device_radix_sort-members.html
@@ -116,7 +116,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_radix_sort.html b/docs/html/structcub_1_1_device_radix_sort.html
index af5a8a78df..b609c47c22 100644
--- a/docs/html/structcub_1_1_device_radix_sort.html
+++ b/docs/html/structcub_1_1_device_radix_sort.html
@@ -112,7 +112,7 @@
  <dl class="section user"><dt>Overview</dt><dd>The <a href="http://en.wikipedia.org/wiki/Radix_sort"><em>radix sorting method</em></a> arranges items into ascending (or descending) order. The algorithm relies upon a positional representation for keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, characters, etc.) specified from least-significant to most-significant. For a given input sequence of keys and a set of rules specifying a total ordering of the symbolic alphabet, the radix sorting method produces a lexicographic ordering of those keys.</dd></dl>
 <dl class="section user"><dt></dt><dd><a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> can sort all of the built-in C++ numeric primitive types, e.g.: <code>unsigned char</code>, <code>int</code>, <code>double</code>, etc. Although the direct radix sorting method can only be applied to unsigned integral types, <a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> is able to sort signed and floating-point types via simple bit-wise transformations that ensure lexicographic key ordering.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_radix_sort.html" title="DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...">DeviceRadixSort</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of radix sort as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU. The following chart illustrates <a class="el" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6" title="Sorts keys into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortKeys</a> performance across different CUDA architectures for uniform-random <code>uint32</code> keys. Performance plots for other scenarios can be found in the detailed method descriptions below.</dd></dl>
@@ -132,7 +132,7 @@
 <tr class="separator:a31d7224d3f6c6a9309a0b84571f25eb9"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a0e0f38bafdc30403a68b3ff5c7b80027"><td class="memTemplParams" colspan="2">template&lt;typename KeyT , typename ValueT &gt; </td></tr>
 <tr class="memitem:a0e0f38bafdc30403a68b3ff5c7b80027"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027">SortPairs</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;d_keys, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#a0e0f38bafdc30403a68b3ff5c7b80027">SortPairs</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
 <tr class="memdesc:a0e0f38bafdc30403a68b3ff5c7b80027"><td class="mdescLeft">&#160;</td><td class="mdescRight">Sorts key-value pairs into ascending order. (~<em>N </em>auxiliary storage required)  <a href="#a0e0f38bafdc30403a68b3ff5c7b80027">More...</a><br/></td></tr>
 <tr class="separator:a0e0f38bafdc30403a68b3ff5c7b80027"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:add6a87f54c8058edba4b9e875bb0626a"><td class="memTemplParams" colspan="2">template&lt;typename KeyT , typename ValueT &gt; </td></tr>
@@ -142,7 +142,7 @@
 <tr class="separator:add6a87f54c8058edba4b9e875bb0626a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:aea53a40e665f2d5ed683a6655a3d188e"><td class="memTemplParams" colspan="2">template&lt;typename KeyT , typename ValueT &gt; </td></tr>
 <tr class="memitem:aea53a40e665f2d5ed683a6655a3d188e"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">SortPairsDescending</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;d_keys, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#aea53a40e665f2d5ed683a6655a3d188e">SortPairsDescending</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
 <tr class="memdesc:aea53a40e665f2d5ed683a6655a3d188e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Sorts key-value pairs into descending order. (~<em>N </em>auxiliary storage required).  <a href="#aea53a40e665f2d5ed683a6655a3d188e">More...</a><br/></td></tr>
 <tr class="separator:aea53a40e665f2d5ed683a6655a3d188e"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr><td colspan="2"><div class="groupHeader">Keys-only</div></td></tr>
@@ -153,7 +153,7 @@
 <tr class="separator:a4f555afa8ac2949d9fef49fad52a50d6"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ad7b5dbc2e3fd44c21193f2b792706191"><td class="memTemplParams" colspan="2">template&lt;typename KeyT &gt; </td></tr>
 <tr class="memitem:ad7b5dbc2e3fd44c21193f2b792706191"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191">SortKeys</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#ad7b5dbc2e3fd44c21193f2b792706191">SortKeys</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
 <tr class="memdesc:ad7b5dbc2e3fd44c21193f2b792706191"><td class="mdescLeft">&#160;</td><td class="mdescRight">Sorts keys into ascending order. (~<em>N </em>auxiliary storage required).  <a href="#ad7b5dbc2e3fd44c21193f2b792706191">More...</a><br/></td></tr>
 <tr class="separator:ad7b5dbc2e3fd44c21193f2b792706191"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a24761009c4cc15fd2e54cb72663af0ef"><td class="memTemplParams" colspan="2">template&lt;typename KeyT &gt; </td></tr>
@@ -163,7 +163,7 @@
 <tr class="separator:a24761009c4cc15fd2e54cb72663af0ef"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a95a8939332405efec4527442c26c2628"><td class="memTemplParams" colspan="2">template&lt;typename KeyT &gt; </td></tr>
 <tr class="memitem:a95a8939332405efec4527442c26c2628"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628">SortKeysDescending</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, <a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_radix_sort.html#a95a8939332405efec4527442c26c2628">SortKeysDescending</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
 <tr class="memdesc:a95a8939332405efec4527442c26c2628"><td class="mdescLeft">&#160;</td><td class="mdescRight">Sorts keys into descending order. (~<em>N </em>auxiliary storage required).  <a href="#a95a8939332405efec4527442c26c2628">More...</a><br/></td></tr>
 <tr class="separator:a95a8939332405efec4527442c26c2628"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
@@ -260,9 +260,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The contents of the input data are not altered by the sorting operation</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
-<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrappers below.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using DoubleBuffer wrappers below.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated sorting performance across different CUDA architectures for uniform-random <code>uint32,uint32</code> and <code>uint64,uint64</code> pairs, respectively.</dd></dl>
@@ -322,9 +321,9 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </table>
   </dd>
 </dl>
-<dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_radix_sort_8cu-example.html#a5">example_device_radix_sort.cu</a>.</dd>
+<dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_radix_sort_8cu-example.html#a2">example_device_radix_sort.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00148">148</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00147">147</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -352,13 +351,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; KeyT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_keys</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; ValueT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; ValueT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_values</em>, </td>
         </tr>
         <tr>
@@ -406,13 +405,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Sorts key-value pairs into ascending order. (~<em>N </em>auxiliary storage required) </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The sorting operation is given a pair of key buffers and a corresponding pair of associated value buffers. Each pair is managed by a <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
+<li>The sorting operation is given a pair of key buffers and a corresponding pair of associated value buffers. Each pair is managed by a DoubleBuffer structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
 <li>The contents of both buffers within each pair may be altered by the sorting operation.</li>
-<li>Upon completion, the sorting operation will update the "current" indicator within each <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
+<li>Upon completion, the sorting operation will update the "current" indicator within each DoubleBuffer wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
 <li>This operation requires a relatively small allocation of temporary device storage that is <em>O</em>(<code>P</code>), where <code>P</code> is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size <code>N</code>).</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated sorting performance across different CUDA architectures for uniform-random <code>uint32,uint32</code> and <code>uint64,uint64</code> pairs, respectively.</dd></dl>
@@ -435,7 +433,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"></div>
 <div class="line"><span class="comment">// Create a set of DoubleBuffers to wrap pairs of device pointers</span></div>
 <div class="line">cub::DoubleBuffer&lt;<span class="keywordtype">int</span>&gt; d_keys(d_key_buf, d_key_alt_buf);</div>
-<div class="line"><a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">cub::DoubleBuffer&lt;int&gt;</a> d_values(d_value_buf, d_value_alt_buf);</div>
+<div class="line">cub::DoubleBuffer&lt;int&gt; d_values(d_value_buf, d_value_alt_buf);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line"><span class="keywordtype">void</span>     *d_temp_storage = NULL;</div>
@@ -473,7 +471,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00250">250</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00248">248</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -569,9 +567,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The contents of the input data are not altered by the sorting operation</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
-<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrappers below.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using DoubleBuffer wrappers below.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>Performance is similar to <a class="el" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortPairs</a>.</dd></dl>
@@ -626,7 +623,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00331">331</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00328">328</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -654,13 +651,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; KeyT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_keys</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; ValueT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; ValueT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_values</em>, </td>
         </tr>
         <tr>
@@ -708,13 +705,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Sorts key-value pairs into descending order. (~<em>N </em>auxiliary storage required). </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The sorting operation is given a pair of key buffers and a corresponding pair of associated value buffers. Each pair is managed by a <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
+<li>The sorting operation is given a pair of key buffers and a corresponding pair of associated value buffers. Each pair is managed by a DoubleBuffer structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
 <li>The contents of both buffers within each pair may be altered by the sorting operation.</li>
-<li>Upon completion, the sorting operation will update the "current" indicator within each <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
+<li>Upon completion, the sorting operation will update the "current" indicator within each DoubleBuffer wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
 <li>This operation requires a relatively small allocation of temporary device storage that is <em>O</em>(<code>P</code>), where <code>P</code> is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size <code>N</code>).</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>Performance is similar to <a class="el" href="structcub_1_1_device_radix_sort.html#a31d7224d3f6c6a9309a0b84571f25eb9" title="Sorts key-value pairs into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortPairs</a>.</dd></dl>
@@ -731,7 +727,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"></div>
 <div class="line"><span class="comment">// Create a set of DoubleBuffers to wrap pairs of device pointers</span></div>
 <div class="line">cub::DoubleBuffer&lt;<span class="keywordtype">int</span>&gt; d_keys(d_key_buf, d_key_alt_buf);</div>
-<div class="line"><a class="code" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">cub::DoubleBuffer&lt;int&gt;</a> d_values(d_value_buf, d_value_alt_buf);</div>
+<div class="line">cub::DoubleBuffer&lt;int&gt; d_values(d_value_buf, d_value_alt_buf);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line"><span class="keywordtype">void</span>     *d_temp_storage = NULL;</div>
@@ -769,7 +765,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00428">428</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00424">424</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -853,9 +849,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The contents of the input data are not altered by the sorting operation</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
-<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrappers below.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using DoubleBuffer wrappers below.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated sorting performance across different CUDA architectures for uniform-random <code>uint32</code> and <code>uint64</code> keys, respectively.</dd></dl>
@@ -908,7 +903,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00511">511</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00506">506</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -936,7 +931,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; KeyT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_keys</em>, </td>
         </tr>
         <tr>
@@ -984,13 +979,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Sorts keys into ascending order. (~<em>N </em>auxiliary storage required). </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The sorting operation is given a pair of key buffers managed by a <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
+<li>The sorting operation is given a pair of key buffers managed by a DoubleBuffer structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
 <li>The contents of both buffers may be altered by the sorting operation.</li>
-<li>Upon completion, the sorting operation will update the "current" indicator within the <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
+<li>Upon completion, the sorting operation will update the "current" indicator within the DoubleBuffer wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
 <li>This operation requires a relatively small allocation of temporary device storage that is <em>O</em>(<code>P</code>), where <code>P</code> is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size <code>N</code>).</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated sorting performance across different CUDA architectures for uniform-random <code>uint32</code> and <code>uint64</code> keys, respectively.</dd></dl>
@@ -1045,7 +1039,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00601">601</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00595">595</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -1129,9 +1123,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The contents of the input data are not altered by the sorting operation</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
-<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrappers below.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>This operation requires an allocation of temporary device storage that is <em>O</em>(<code>N+P</code>), where <code>N</code> is the length of the input and <code>P</code> is the number of streaming multiprocessors on the device. For sorting using only <em>O</em>(<code>P</code>) temporary storage, see the sorting interface using DoubleBuffer wrappers below.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>Performance is similar to <a class="el" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6" title="Sorts keys into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortKeys</a>.</dd></dl>
@@ -1181,7 +1174,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00677">677</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00670">670</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -1209,7 +1202,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="structcub_1_1_double_buffer.html">DoubleBuffer</a>&lt; KeyT &gt; &amp;&#160;</td>
+          <td class="paramtype">DoubleBuffer&lt; KeyT &gt; &amp;&#160;</td>
           <td class="paramname"><em>d_keys</em>, </td>
         </tr>
         <tr>
@@ -1257,13 +1250,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Sorts keys into descending order. (~<em>N </em>auxiliary storage required). </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The sorting operation is given a pair of key buffers managed by a <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
+<li>The sorting operation is given a pair of key buffers managed by a DoubleBuffer structure that indicates which of the two buffers is "current" (and thus contains the input data to be sorted).</li>
 <li>The contents of both buffers may be altered by the sorting operation.</li>
-<li>Upon completion, the sorting operation will update the "current" indicator within the <a class="el" href="structcub_1_1_double_buffer.html" title="Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...">DoubleBuffer</a> wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
+<li>Upon completion, the sorting operation will update the "current" indicator within the DoubleBuffer wrapper to reference which of the two buffers now contains the sorted output sequence (a function of the number of key bits specified and the targeted device architecture).</li>
 <li>An optional bit subrange <code>[begin_bit, end_bit)</code> of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.</li>
 <li>This operation requires a relatively small allocation of temporary device storage that is <em>O</em>(<code>P</code>), where <code>P</code> is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size <code>N</code>).</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>Performance is similar to <a class="el" href="structcub_1_1_device_radix_sort.html#a4f555afa8ac2949d9fef49fad52a50d6" title="Sorts keys into ascending order. (~2N auxiliary storage required) ">DeviceRadixSort::SortKeys</a>.</dd></dl>
@@ -1312,7 +1304,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00762">762</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__radix__sort_8cuh_source.html#l00754">754</a> of file <a class="el" href="device__radix__sort_8cuh_source.html">device_radix_sort.cuh</a>.</p>
 
 </div>
 </div>
@@ -1323,7 +1315,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_reduce-members.html b/docs/html/structcub_1_1_device_reduce-members.html
index 0af9b92329..cd65af67a7 100644
--- a/docs/html/structcub_1_1_device_reduce-members.html
+++ b/docs/html/structcub_1_1_device_reduce-members.html
@@ -108,14 +108,14 @@
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9">ArgMin</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a974a241463ca892c8f5e73b879065e48">Max</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a">Min</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c">Reduce</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, ReductionOp reduction_op, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02">ReduceByKey</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, ReductionOp reduction_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb">Reduce</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, ReductionOpT reduction_op, T init, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1">ReduceByKey</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, ReductionOpT reduction_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">Sum</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_reduce.html">cub::DeviceReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
 </table></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_reduce.html b/docs/html/structcub_1_1_device_reduce.html
index add5d22631..b48c7e12f2 100644
--- a/docs/html/structcub_1_1_device_reduce.html
+++ b/docs/html/structcub_1_1_device_reduce.html
@@ -112,33 +112,33 @@
 .</div></div>
  <dl class="section user"><dt>Overview</dt><dd>A <a href="http://en.wikipedia.org/wiki/Reduce_(higher-order_function)"><em>reduction</em></a> (or <em>fold</em>) uses a binary combining operator to compute a single aggregate from a sequence of input elements.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_reduce.html" title="DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of...">DeviceReduce</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of reduction, reduce-by-key, and run-length encode as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU.</dd></dl>
-<dl class="section user"><dt></dt><dd>The following chart illustrates <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a> performance across different CUDA architectures for <code>int32</code> keys.</dd></dl>
+<dl class="section user"><dt></dt><dd>The following chart illustrates <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">DeviceReduce::Sum</a> performance across different CUDA architectures for <code>int32</code> keys.</dd></dl>
 <div class="image">
 <img src="reduce_int32.png" alt="reduce_int32.png"/>
 </div>
-<dl class="section user"><dt></dt><dd>The following chart illustrates <a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">DeviceReduce::ReduceByKey</a> (summation) performance across different CUDA architectures for <code>fp32</code> values. Segments are identified by <code>int32</code> keys, and have lengths uniformly sampled from [1,1000].</dd></dl>
+<dl class="section user"><dt></dt><dd>The following chart illustrates <a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">DeviceReduce::ReduceByKey</a> (summation) performance across different CUDA architectures for <code>fp32</code> values. Segments are identified by <code>int32</code> keys, and have lengths uniformly sampled from [1,1000].</dd></dl>
 <div class="image">
 <img src="reduce_by_key_fp32_len_500.png" alt="reduce_by_key_fp32_len_500.png"/>
 </div>
 <dl class="section user"><dt></dt><dd>Performance plots for other scenarios can be found in the detailed method descriptions below. </dd></dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00082">82</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00083">83</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
 Static Public Methods</h2></td></tr>
-<tr class="memitem:a326ef5ae888b92442295c26190a6c39c"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOp &gt; </td></tr>
-<tr class="memitem:a326ef5ae888b92442295c26190a6c39c"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c">Reduce</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, ReductionOp reduction_op, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a326ef5ae888b92442295c26190a6c39c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide reduction using the specified binary <code>reduction_op</code> functor.  <a href="#a326ef5ae888b92442295c26190a6c39c">More...</a><br/></td></tr>
-<tr class="separator:a326ef5ae888b92442295c26190a6c39c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aa4adabeb841b852a7a5ecf4f99a2daeb"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOpT , typename T &gt; </td></tr>
+<tr class="memitem:aa4adabeb841b852a7a5ecf4f99a2daeb"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb">Reduce</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, ReductionOpT reduction_op, T init, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:aa4adabeb841b852a7a5ecf4f99a2daeb"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide reduction using the specified binary <code>reduction_op</code> functor and initial value <code>init</code>.  <a href="#aa4adabeb841b852a7a5ecf4f99a2daeb">More...</a><br/></td></tr>
+<tr class="separator:aa4adabeb841b852a7a5ecf4f99a2daeb"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ab7f21e8255eb842aaf74305975ae607f"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
 <tr class="memitem:ab7f21e8255eb842aaf74305975ae607f"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
 cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f">Sum</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:ab7f21e8255eb842aaf74305975ae607f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide sum using the addition ('+') operator.  <a href="#ab7f21e8255eb842aaf74305975ae607f">More...</a><br/></td></tr>
+<tr class="memdesc:ab7f21e8255eb842aaf74305975ae607f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide sum using the addition (<code>+</code>) operator.  <a href="#ab7f21e8255eb842aaf74305975ae607f">More...</a><br/></td></tr>
 <tr class="separator:ab7f21e8255eb842aaf74305975ae607f"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a14d9c12a1beb9a04f77e903d07fa596a"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
 <tr class="memitem:a14d9c12a1beb9a04f77e903d07fa596a"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
@@ -160,18 +160,18 @@
 cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276">ArgMax</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
 <tr class="memdesc:a07a9ecbf0b6db1882107f6adee1c4276"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the first device-wide maximum using the greater-than ('&gt;') operator, also returning the index of that item.  <a href="#a07a9ecbf0b6db1882107f6adee1c4276">More...</a><br/></td></tr>
 <tr class="separator:a07a9ecbf0b6db1882107f6adee1c4276"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a3206c7c11b4d894ca176482e86215b02"><td class="memTemplParams" colspan="2">template&lt;typename KeysInputIteratorT , typename UniqueOutputIteratorT , typename ValuesInputIteratorT , typename AggregatesOutputIteratorT , typename NumRunsOutputIteratorT , typename ReductionOp &gt; </td></tr>
-<tr class="memitem:a3206c7c11b4d894ca176482e86215b02"><td class="memTemplItemLeft" align="right" valign="top">CUB_RUNTIME_FUNCTION static <br class="typebreak"/>
-__forceinline__ cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02">ReduceByKey</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, ReductionOp reduction_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a3206c7c11b4d894ca176482e86215b02"><td class="mdescLeft">&#160;</td><td class="mdescRight">Reduces segments of values, where segments are demarcated by corresponding runs of identical keys.  <a href="#a3206c7c11b4d894ca176482e86215b02">More...</a><br/></td></tr>
-<tr class="separator:a3206c7c11b4d894ca176482e86215b02"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a303ae673ac32825f95912b4bfff8bef1"><td class="memTemplParams" colspan="2">template&lt;typename KeysInputIteratorT , typename UniqueOutputIteratorT , typename ValuesInputIteratorT , typename AggregatesOutputIteratorT , typename NumRunsOutputIteratorT , typename ReductionOpT &gt; </td></tr>
+<tr class="memitem:a303ae673ac32825f95912b4bfff8bef1"><td class="memTemplItemLeft" align="right" valign="top">CUB_RUNTIME_FUNCTION static <br class="typebreak"/>
+__forceinline__ cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1">ReduceByKey</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, ReductionOpT reduction_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:a303ae673ac32825f95912b4bfff8bef1"><td class="mdescLeft">&#160;</td><td class="mdescRight">Reduces segments of values, where segments are demarcated by corresponding runs of identical keys.  <a href="#a303ae673ac32825f95912b4bfff8bef1">More...</a><br/></td></tr>
+<tr class="separator:a303ae673ac32825f95912b4bfff8bef1"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="a326ef5ae888b92442295c26190a6c39c"></a>
+<a class="anchor" id="aa4adabeb841b852a7a5ecf4f99a2daeb"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOp &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOpT , typename T &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -209,9 +209,15 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">ReductionOp&#160;</td>
+          <td class="paramtype">ReductionOpT&#160;</td>
           <td class="paramname"><em>reduction_op</em>, </td>
         </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">T&#160;</td>
+          <td class="paramname"><em>init</em>, </td>
+        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -237,22 +243,20 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </table>
 </div><div class="memdoc">
 
-<p>Computes a device-wide reduction using the specified binary <code>reduction_op</code> functor. </p>
+<p>Computes a device-wide reduction using the specified binary <code>reduction_op</code> functor and initial value <code>init</code>. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative reduction operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>Does not support binary reduction operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>.</dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates a custom min reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates a user-defined min-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// CustomMin functor</span></div>
 <div class="line"><span class="keyword">struct </span>CustomMin</div>
 <div class="line">{</div>
 <div class="line">    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
-<div class="line">    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line">    __device__ __forceinline__</div>
 <div class="line">    T operator()(<span class="keyword">const</span> T &amp;a, <span class="keyword">const</span> T &amp;b)<span class="keyword"> const </span>{</div>
 <div class="line">        <span class="keywordflow">return</span> (b &lt; a) ? b : a;</div>
 <div class="line">    }</div>
@@ -261,20 +265,21 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>          num_items;  <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>          *d_in;      <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line"><span class="keywordtype">int</span>          *d_out;     <span class="comment">// e.g., [ ]</span></div>
+<div class="line"><span class="keywordtype">int</span>          *d_out;     <span class="comment">// e.g., [-]</span></div>
 <div class="line">CustomMin    min_op;</div>
+<div class="line"><span class="keywordtype">int</span>          init;       <span class="comment">// e.g., INT_MAX</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c" title="Computes a device-wide reduction using the specified binary reduction_op functor. ...">cub::DeviceReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb" title="Computes a device-wide reduction using the specified binary reduction_op functor and initial value in...">cub::DeviceReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a326ef5ae888b92442295c26190a6c39c" title="Computes a device-wide reduction using the specified binary reduction_op functor. ...">cub::DeviceReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#aa4adabeb841b852a7a5ecf4f99a2daeb" title="Computes a device-wide reduction using the specified binary reduction_op functor and initial value in...">cub::DeviceReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// d_out &lt;-- [0]</span></div>
 </div><!-- fragment --></dd></dl>
@@ -282,7 +287,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> (e.g., <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramname">ReductionOpT</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
+    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data element type that is convertible to the <code>value</code> type of <code>InputIteratorT</code> </td></tr>
   </table>
   </dd>
 </dl>
@@ -293,7 +299,8 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_in</td><td>Pointer to the input sequence of data items </td></tr>
     <tr><td class="paramdir">[out]</td><td class="paramname">d_out</td><td>Pointer to the output aggregate </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_items</td><td>Total number of input items (i.e., length of <code>d_in</code>) </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">init</td><td>Initial value of the reduction </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
@@ -368,39 +375,39 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </table>
 </div><div class="memdoc">
 
-<p>Computes a device-wide sum using the addition ('+') operator. </p>
+<p>Computes a device-wide sum using the addition (<code>+</code>) operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative reduction operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>Uses <code>0</code> as the initial value of the reduction.</li>
+<li>Does not support <code>+</code> operators that are non-commutative..</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated reduction (sum) performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively.</dd></dl>
+<dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated sum-reduction performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively.</dd></dl>
 <div class="image">
 <img src="reduce_int32.png" alt="reduce_int32.png"/>
 </div>
  <div class="image">
 <img src="reduce_int64.png" alt="reduce_int64.png"/>
 </div>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the sum reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the sum-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>  num_items;      <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>  *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [ ]</span></div>
+<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [-]</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">cub::DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_sum, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">cub::DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run sum-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">cub::DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_sum, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (+) operator. ">cub::DeviceReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// d_out &lt;-- [38]</span></div>
 </div><!-- fragment --></dd></dl>
@@ -425,7 +432,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_reduce_8cu-example.html#a2">example_device_reduce.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00219">219</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00217">217</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -495,31 +502,30 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide minimum using the less-than ('&lt;') operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative minimum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>Uses <code>std::numeric_limits&lt;T&gt;::max()</code> as the initial value of the reduction.</li>
+<li>Does not support <code>&lt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>.</dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the min-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the min-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>  num_items;      <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>  *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [ ]</span></div>
+<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [-]</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_min, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run min-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_min, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a14d9c12a1beb9a04f77e903d07fa596a" title="Computes a device-wide minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// d_out &lt;-- [0]</span></div>
 </div><!-- fragment --></dd></dl>
@@ -543,7 +549,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00291">291</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00284">284</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -612,21 +618,23 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </div><div class="memdoc">
 
 <p>Finds the first device-wide minimum using the less-than ('&lt;') operator, also returning the index of that item. </p>
-<dl class="section user"><dt></dt><dd>Assuming the input <code>d_in</code> has value type <code>T</code>, the output <code>d_out</code> must have value type <code>KeyValuePair&lt;int, T&gt;</code>. The minimum value is written to <code>d_out.value</code> and its location in the input array is written to <code>d_out.key</code>.</dd></dl>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative minimum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>The output value type of <code>d_out</code> is cub::KeyValuePair <code>&lt;int, T&gt;</code> (assuming the value type of <code>d_in</code> is <code>T</code>)<ul>
+<li>The minimum is written to <code>d_out.value</code> and its offset in the input array is written to <code>d_out.key</code>.</li>
+<li>The <code>{1, std::numeric_limits&lt;T&gt;::max()}</code> tuple is produced for zero-length inputs</li>
+</ul>
+</li>
+<li>Does not support <code>&lt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>.</dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmin-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmin-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>                      num_items;      <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>                      *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{ , }]</span></div>
+<div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{-,-}]</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
@@ -640,12 +648,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="comment">// Run argmin-reduction</span></div>
 <div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a6b35963e90120b6d2c76a6068b0340a9" title="Finds the first device-wide minimum using the less-than (&#39;&lt;&#39;) operator, also returning the index o...">cub::DeviceReduce::ArgMin</a>(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [{0, 5}]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [{5, 0}]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (of some type <code>T</code>) (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
+    <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>cub::KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
   </table>
   </dd>
 </dl>
@@ -662,7 +670,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00368">368</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00353">353</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -732,19 +740,18 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide maximum using the greater-than ('&gt;') operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative maximum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>Uses <code>std::numeric_limits&lt;T&gt;::lowest()</code> as the initial value of the reduction.</li>
+<li>Does not support <code>&gt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>.</dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the max-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the max-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>  num_items;      <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>  *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [ ]</span></div>
+<div class="line"><span class="keywordtype">int</span>  *d_out;         <span class="comment">// e.g., [-]</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
@@ -780,7 +787,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00444">444</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00424">424</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -849,21 +856,23 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </div><div class="memdoc">
 
 <p>Finds the first device-wide maximum using the greater-than ('&gt;') operator, also returning the index of that item. </p>
-<dl class="section user"><dt></dt><dd>Assuming the input <code>d_in</code> has value type <code>T</code>, the output <code>d_out</code> must have value type <code>KeyValuePair&lt;int, T&gt;</code>. The maximum value is written to <code>d_out.value</code> and its location in the input array is written to <code>d_out.key</code>.</dd></dl>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Does not support non-commutative maximum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>The output value type of <code>d_out</code> is cub::KeyValuePair <code>&lt;int, T&gt;</code> (assuming the value type of <code>d_in</code> is <code>T</code>)<ul>
+<li>The maximum is written to <code>d_out.value</code> and its offset in the input array is written to <code>d_out.key</code>.</li>
+<li>The <code>{1, std::numeric_limits&lt;T&gt;::lowest()}</code> tuple is produced for zero-length inputs</li>
+</ul>
+</li>
+<li>Does not support <code>&gt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_reduce.html#ab7f21e8255eb842aaf74305975ae607f" title="Computes a device-wide sum using the addition (&#39;+&#39;) operator. ">DeviceReduce::Sum</a>.</dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmax-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmax-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_reduce.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
 <div class="line"><span class="keywordtype">int</span>                      num_items;      <span class="comment">// e.g., 7</span></div>
 <div class="line"><span class="keywordtype">int</span>                      *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
-<div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{ , }]</span></div>
+<div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{-,-}]</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
@@ -877,12 +886,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="comment">// Run argmax-reduction</span></div>
 <div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a07a9ecbf0b6db1882107f6adee1c4276" title="Finds the first device-wide maximum using the greater-than (&#39;&gt;&#39;) operator, also returning the inde...">cub::DeviceReduce::ArgMax</a>(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [{9, 6}]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [{6, 9}]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (of some type <code>T</code>) (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
+    <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>cub::KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
   </table>
   </dd>
 </dl>
@@ -899,15 +908,15 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00521">521</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00493">493</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a3206c7c11b4d894ca176482e86215b02"></a>
+<a class="anchor" id="a303ae673ac32825f95912b4bfff8bef1"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename KeysInputIteratorT , typename UniqueOutputIteratorT , typename ValuesInputIteratorT , typename AggregatesOutputIteratorT , typename NumRunsOutputIteratorT , typename ReductionOp &gt; </div>
+template&lt;typename KeysInputIteratorT , typename UniqueOutputIteratorT , typename ValuesInputIteratorT , typename AggregatesOutputIteratorT , typename NumRunsOutputIteratorT , typename ReductionOpT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -957,7 +966,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">ReductionOp&#160;</td>
+          <td class="paramtype">ReductionOpT&#160;</td>
           <td class="paramname"><em>reduction_op</em>, </td>
         </tr>
         <tr>
@@ -995,8 +1004,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd>This operation computes segmented reductions within <code>d_values_in</code> using the specified binary <code>reduction_op</code> functor. The segments are identified by "runs" of corresponding keys in <code>d_keys_in</code>, where runs are maximal ranges of consecutive, identical keys. For the <em>i</em><sup>th</sup> run encountered, the first key of the run and the corresponding value aggregate of that run are written to <code>d_unique_out[<em>i</em>]</code> and <code>d_aggregates_out[<em>i</em>]</code>, respectively. The total number of runs encountered is written to <code>d_num_runs_out</code>.</dd></dl>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The <code>==</code> equality operator is used to determine whether keys are equivalent</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following chart illustrates reduction-by-key (sum) performance across different CUDA architectures for <code>fp32</code> and <code>fp64</code> values, respectively. Segments are identified by <code>int32</code> keys, and have lengths uniformly sampled from [1,1000].</dd></dl>
@@ -1030,26 +1038,26 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span>          num_items;          <span class="comment">// e.g., 8</span></div>
 <div class="line"><span class="keywordtype">int</span>          *d_keys_in;         <span class="comment">// e.g., [0, 2, 2, 9, 5, 5, 5, 8]</span></div>
 <div class="line"><span class="keywordtype">int</span>          *d_values_in;       <span class="comment">// e.g., [0, 7, 1, 6, 2, 5, 3, 4]</span></div>
-<div class="line"><span class="keywordtype">int</span>          *d_unique_out;      <span class="comment">// e.g., [ ,  ,  ,  ,  ,  ,  ,  ]</span></div>
-<div class="line"><span class="keywordtype">int</span>          *d_aggregates_out;  <span class="comment">// e.g., [ ,  ,  ,  ,  ,  ,  ,  ]</span></div>
-<div class="line"><span class="keywordtype">int</span>          *d_num_runs_out;        <span class="comment">// e.g., [ ]</span></div>
+<div class="line"><span class="keywordtype">int</span>          *d_unique_out;      <span class="comment">// e.g., [-, -, -, -, -, -, -, -]</span></div>
+<div class="line"><span class="keywordtype">int</span>          *d_aggregates_out;  <span class="comment">// e.g., [-, -, -, -, -, -, -, -]</span></div>
+<div class="line"><span class="keywordtype">int</span>          *d_num_runs_out;    <span class="comment">// e.g., [-]</span></div>
 <div class="line">CustomMin    reduction_op;</div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run reduce-by-key</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a3206c7c11b4d894ca176482e86215b02" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_reduce.html#a303ae673ac32825f95912b4bfff8bef1" title="Reduces segments of values, where segments are demarcated by corresponding runs of identical keys...">cub::DeviceReduce::ReduceByKey</a>(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// d_unique_out      &lt;-- [0, 2, 9, 5, 8]</span></div>
 <div class="line"><span class="comment">// d_aggregates_out  &lt;-- [0, 1, 6, 2, 4]</span></div>
-<div class="line"><span class="comment">// d_num_runs_out        &lt;-- [5]</span></div>
+<div class="line"><span class="comment">// d_num_runs_out    &lt;-- [5]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
@@ -1058,7 +1066,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramname">ValuesInputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input values (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">AggregatesOutputIterator</td><td><b>[inferred]</b> Random-access output iterator type for writing output value aggregates (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">NumRunsOutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the number of runs encountered (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> (e.g., <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramname">ReductionOpT</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
   </table>
   </dd>
 </dl>
@@ -1071,7 +1079,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_values_in</td><td>Pointer to the input sequence of corresponding values </td></tr>
     <tr><td class="paramdir">[out]</td><td class="paramname">d_aggregates_out</td><td>Pointer to the output sequence of value aggregates (one aggregate per run) </td></tr>
     <tr><td class="paramdir">[out]</td><td class="paramname">d_num_runs_out</td><td>Pointer to total number of runs encountered (i.e., the length of d_unique_out) </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_items</td><td>Total number of associated key+value pairs (i.e., the length of <code>d_in_keys</code> and <code>d_in_values</code>) </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is <code>false</code>. </td></tr>
@@ -1079,7 +1087,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00642">642</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__reduce_8cuh_source.html#l00611">611</a> of file <a class="el" href="device__reduce_8cuh_source.html">device_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -1090,7 +1098,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_run_length_encode-members.html b/docs/html/structcub_1_1_device_run_length_encode-members.html
index 50b1209581..1d579a5e15 100644
--- a/docs/html/structcub_1_1_device_run_length_encode-members.html
+++ b/docs/html/structcub_1_1_device_run_length_encode-members.html
@@ -110,7 +110,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_run_length_encode.html b/docs/html/structcub_1_1_device_run_length_encode.html
index d8dcadd9b3..7b745bb0ea 100644
--- a/docs/html/structcub_1_1_device_run_length_encode.html
+++ b/docs/html/structcub_1_1_device_run_length_encode.html
@@ -112,7 +112,7 @@
 .</div></div>
  <dl class="section user"><dt>Overview</dt><dd>A <a href="http://en.wikipedia.org/wiki/Run-length_encoding"><em>run-length encoding</em></a> computes a simple compressed representation of a sequence of input elements such that each maximal "run" of consecutive same-valued data items is encoded as a single data value along with a count of the elements in that run.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_run_length_encode.html" title="DeviceRunLengthEncode provides device-wide, parallel operations for demarcating &quot;runs&quot; of same-valued...">DeviceRunLengthEncode</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of run-length encode as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU.</dd></dl>
@@ -219,8 +219,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <li>For the <em>i</em><sup>th</sup> run encountered, the first key of the run and its length are written to <code>d_unique_out[<em>i</em>]</code> and <code>d_counts_out[<em>i</em>]</code>, respectively.</li>
 <li>The total number of runs encountered is written to <code>d_num_runs_out</code>.</li>
 <li>The <code>==</code> equality operator is used to determine whether values are equivalent</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated encode performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively. Segments have lengths uniformly sampled from [1,1000].</dd></dl>
@@ -287,7 +286,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__run__length__encode_8cuh_source.html#l00149">149</a> of file <a class="el" href="device__run__length__encode_8cuh_source.html">device_run_length_encode.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__run__length__encode_8cuh_source.html#l00148">148</a> of file <a class="el" href="device__run__length__encode_8cuh_source.html">device_run_length_encode.cuh</a>.</p>
 
 </div>
 </div>
@@ -372,8 +371,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <li>For the <em>i</em><sup>th</sup> non-trivial run, the run's starting offset and its length are written to <code>d_offsets_out[<em>i</em>]</code> and <code>d_lengths_out[<em>i</em>]</code>, respectively.</li>
 <li>The total number of runs encountered is written to <code>d_num_runs_out</code>.</li>
 <li>The <code>==</code> equality operator is used to determine whether values are equivalent</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd></dd></dl>
@@ -427,7 +425,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__run__length__encode_8cuh_source.html#l00247">247</a> of file <a class="el" href="device__run__length__encode_8cuh_source.html">device_run_length_encode.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__run__length__encode_8cuh_source.html#l00245">245</a> of file <a class="el" href="device__run__length__encode_8cuh_source.html">device_run_length_encode.cuh</a>.</p>
 
 </div>
 </div>
@@ -438,7 +436,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_scan-members.html b/docs/html/structcub_1_1_device_scan-members.html
index 6b49a89bf9..bb357f7a99 100644
--- a/docs/html/structcub_1_1_device_scan-members.html
+++ b/docs/html/structcub_1_1_device_scan-members.html
@@ -112,7 +112,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_scan.html b/docs/html/structcub_1_1_device_scan.html
index daf18ef634..c8cc035d00 100644
--- a/docs/html/structcub_1_1_device_scan.html
+++ b/docs/html/structcub_1_1_device_scan.html
@@ -111,7 +111,7 @@
 .</div></div>
  <dl class="section user"><dt>Overview</dt><dd>Given a sequence of input elements and a binary reduction operator, a <a href="http://en.wikipedia.org/wiki/Prefix_sum"><em>prefix scan</em></a> produces an output sequence where each element is computed to be the reduction of the elements occurring earlier in the input sequence. <em>Prefix sum</em> connotes a prefix scan with the addition operator. The term <em>inclusive</em> indicates that the <em>i</em><sup>th</sup> output reduction incorporates the <em>i</em><sup>th</sup> input. The term <em>exclusive</em> indicates the <em>i</em><sup>th</sup> input is not incorporated into the <em>i</em><sup>th</sup> output reduction.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_scan.html" title="DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...">DeviceScan</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of prefix scan as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU.</dd></dl>
@@ -215,8 +215,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <p>Computes a device-wide exclusive prefix sum. </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Supports non-commutative sum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated exclusive sum performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively.</dd></dl>
@@ -269,7 +268,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_scan_8cu-example.html#a2">example_device_scan.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00133">133</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00132">132</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
 
 </div>
 </div>
@@ -352,11 +351,9 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <p>Computes a device-wide exclusive prefix scan using the specified binary <code>scan_op</code> functor. </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Supports non-commutative scan operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b" title="Computes a device-wide exclusive prefix sum. ">DeviceScan::ExclusiveSum</a>.</dd></dl>
 <dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the exclusive prefix min-scan of an <code>int</code> device vector </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_scan.cuh&gt;</span></div>
 <div class="line"></div>
@@ -405,7 +402,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in,out]</td><td class="paramname">temp_storage_bytes</td><td>Reference to size in bytes of <code>d_temp_storage</code> allocation </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_in</td><td>Pointer to the input sequence of data items </td></tr>
     <tr><td class="paramdir">[out]</td><td class="paramname">d_out</td><td>Pointer to the output sequence of data items </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">scan_op</td><td>Binary scan functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">scan_op</td><td>Binary scan functor </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>Identity element </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_items</td><td>Total number of input items (i.e., the length of <code>d_in</code>) </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
@@ -414,7 +411,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00221">221</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00216">216</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
 
 </div>
 </div>
@@ -485,11 +482,9 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <p>Computes a device-wide inclusive prefix sum. </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Supports non-commutative sum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b" title="Computes a device-wide exclusive prefix sum. ">DeviceScan::ExclusiveSum</a>.</dd></dl>
 <dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the inclusive prefix sum of an <code>int</code> device vector. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_scan.cuh&gt;</span></div>
 <div class="line"></div>
@@ -532,7 +527,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00300">300</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00291">291</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
 
 </div>
 </div>
@@ -609,11 +604,9 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <p>Computes a device-wide inclusive prefix scan using the specified binary <code>scan_op</code> functor. </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Supports non-commutative scan operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Performance</dt><dd>Performance is typically similar to <a class="el" href="structcub_1_1_device_scan.html#a02b2d2e98f89f80813460f6a6ea1692b" title="Computes a device-wide exclusive prefix sum. ">DeviceScan::ExclusiveSum</a>.</dd></dl>
 <dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the inclusive prefix min-scan of an <code>int</code> device vector. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_scan.cuh&gt;</span></div>
 <div class="line"></div>
@@ -661,7 +654,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in,out]</td><td class="paramname">temp_storage_bytes</td><td>Reference to size in bytes of <code>d_temp_storage</code> allocation </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_in</td><td>Pointer to the input sequence of data items </td></tr>
     <tr><td class="paramdir">[out]</td><td class="paramname">d_out</td><td>Pointer to the output sequence of data items </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">scan_op</td><td>Binary scan functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">scan_op</td><td>Binary scan functor </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_items</td><td>Total number of input items (i.e., the length of <code>d_in</code>) </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is <code>false</code>. </td></tr>
@@ -669,7 +662,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00383">383</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__scan_8cuh_source.html#l00370">370</a> of file <a class="el" href="device__scan_8cuh_source.html">device_scan.cuh</a>.</p>
 
 </div>
 </div>
@@ -680,7 +673,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_segmented_radix_sort-members.html b/docs/html/structcub_1_1_device_segmented_radix_sort-members.html
index 6d2dec7e84..c156d08b73 100644
--- a/docs/html/structcub_1_1_device_segmented_radix_sort-members.html
+++ b/docs/html/structcub_1_1_device_segmented_radix_sort-members.html
@@ -108,7 +108,7 @@
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#ab8b433b55358ac507a7fcbba933cdbac">SortKeys</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a49ccbb6ca7a1d26e4b01e68e8da8a701">SortKeysDescending</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a5c21fba171c718aaf9b3b3c27bda6a94">SortKeysDescending</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4c291958575f14acc6a9e6d3e2ea6597">SortPairs</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, OffsetT num_segments, OffsetT *d_begin_offsets, OffsetT *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a6770adfa8e5a99c8015d9e6ab5ed8ca0">SortPairs</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a4cffada9bba553f9871a34d926bbb148">SortPairs</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#a39de3d48166582a802cc7df1481d3ff4">SortPairsDescending</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, KeyT *d_keys_in, KeyT *d_keys_out, ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html#aafce5852dfbfbeef027493c3791d6347">SortPairsDescending</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, DoubleBuffer&lt; KeyT &gt; &amp;d_keys, DoubleBuffer&lt; ValueT &gt; &amp;d_values, int num_items, int num_segments, int *d_begin_offsets, int *d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT)*8, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_radix_sort.html">cub::DeviceSegmentedRadixSort</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
@@ -116,7 +116,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_segmented_radix_sort.html.REMOVED.git-id b/docs/html/structcub_1_1_device_segmented_radix_sort.html.REMOVED.git-id
index 1e74b0c016..2d6e20cc76 100644
--- a/docs/html/structcub_1_1_device_segmented_radix_sort.html.REMOVED.git-id
+++ b/docs/html/structcub_1_1_device_segmented_radix_sort.html.REMOVED.git-id
@@ -1 +1 @@
-d62b06a2976f2d1c4abecbacc5c2f2563c95464c
\ No newline at end of file
+818721631567b517b0ee143268be8ba2c0a10915
\ No newline at end of file
diff --git a/docs/html/structcub_1_1_device_segmented_reduce-members.html b/docs/html/structcub_1_1_device_segmented_reduce-members.html
index 1dccab5b9c..9ef3d39214 100644
--- a/docs/html/structcub_1_1_device_segmented_reduce-members.html
+++ b/docs/html/structcub_1_1_device_segmented_reduce-members.html
@@ -104,17 +104,17 @@
 
 <p>This is the complete list of members for <a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a>, including all inherited members.</p>
 <table class="directory">
-  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da">ArgMax</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03">ArgMin</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16">Max</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe">Min</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf">Reduce</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, ReductionOp reduction_op, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3">Sum</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329">ArgMax</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce">ArgMin</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183">Max</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29">Min</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364">Reduce</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, ReductionOp reduction_op, T init, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb">Sum</a>(void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td><td class="entry"><a class="el" href="structcub_1_1_device_segmented_reduce.html">cub::DeviceSegmentedReduce</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
 </table></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Wed Nov 18 2015 10:24:47 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_segmented_reduce.html b/docs/html/structcub_1_1_device_segmented_reduce.html
index ca06c7afda..81d0aa132e 100644
--- a/docs/html/structcub_1_1_device_segmented_reduce.html
+++ b/docs/html/structcub_1_1_device_segmented_reduce.html
@@ -116,43 +116,43 @@
 </ul>
 </dd></dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00063">63</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00064">64</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
 Static Public Methods</h2></td></tr>
-<tr class="memitem:abbaebbc469e9603774e1d1fc60435baf"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOp , typename T &gt; </td></tr>
-<tr class="memitem:abbaebbc469e9603774e1d1fc60435baf"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf">Reduce</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, ReductionOp reduction_op, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:abbaebbc469e9603774e1d1fc60435baf"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented reduction using the specified binary <code>reduction_op</code> functor.  <a href="#abbaebbc469e9603774e1d1fc60435baf">More...</a><br/></td></tr>
-<tr class="separator:abbaebbc469e9603774e1d1fc60435baf"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a62f13e427316b0caec6168ebd9ba85e3"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </td></tr>
-<tr class="memitem:a62f13e427316b0caec6168ebd9ba85e3"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3">Sum</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a62f13e427316b0caec6168ebd9ba85e3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented sum using the addition ('+') operator.  <a href="#a62f13e427316b0caec6168ebd9ba85e3">More...</a><br/></td></tr>
-<tr class="separator:a62f13e427316b0caec6168ebd9ba85e3"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a687d16fb62173a4f2630db8fe4db15fe"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </td></tr>
-<tr class="memitem:a687d16fb62173a4f2630db8fe4db15fe"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe">Min</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a687d16fb62173a4f2630db8fe4db15fe"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented minimum using the less-than ('&lt;') operator.  <a href="#a687d16fb62173a4f2630db8fe4db15fe">More...</a><br/></td></tr>
-<tr class="separator:a687d16fb62173a4f2630db8fe4db15fe"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a1adbde01527b1df8fd326569dce6aa03"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </td></tr>
-<tr class="memitem:a1adbde01527b1df8fd326569dce6aa03"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03">ArgMin</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a1adbde01527b1df8fd326569dce6aa03"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the first device-wide minimum in each segment using the less-than ('&lt;') operator, also returning the in-segment index of that item.  <a href="#a1adbde01527b1df8fd326569dce6aa03">More...</a><br/></td></tr>
-<tr class="separator:a1adbde01527b1df8fd326569dce6aa03"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a81cc146a6aa537728469ca76f5d91c16"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </td></tr>
-<tr class="memitem:a81cc146a6aa537728469ca76f5d91c16"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16">Max</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:a81cc146a6aa537728469ca76f5d91c16"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented maximum using the greater-than ('&gt;') operator.  <a href="#a81cc146a6aa537728469ca76f5d91c16">More...</a><br/></td></tr>
-<tr class="separator:a81cc146a6aa537728469ca76f5d91c16"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abf36bf795ef905c5f3736f46e7f5d0da"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </td></tr>
-<tr class="memitem:abf36bf795ef905c5f3736f46e7f5d0da"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
-cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da">ArgMax</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, T identity, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
-<tr class="memdesc:abf36bf795ef905c5f3736f46e7f5d0da"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the first device-wide maximum in each segment using the greater-than ('&gt;') operator, also returning the in-segment index of that item.  <a href="#abf36bf795ef905c5f3736f46e7f5d0da">More...</a><br/></td></tr>
-<tr class="separator:abf36bf795ef905c5f3736f46e7f5d0da"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ad9b73f245930740c4d8786fc1a812364"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT , typename ReductionOp , typename T &gt; </td></tr>
+<tr class="memitem:ad9b73f245930740c4d8786fc1a812364"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364">Reduce</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, ReductionOp reduction_op, T init, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:ad9b73f245930740c4d8786fc1a812364"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented reduction using the specified binary <code>reduction_op</code> functor.  <a href="#ad9b73f245930740c4d8786fc1a812364">More...</a><br/></td></tr>
+<tr class="separator:ad9b73f245930740c4d8786fc1a812364"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aefdf8fcdfb5e5d76459ee222360924eb"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
+<tr class="memitem:aefdf8fcdfb5e5d76459ee222360924eb"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb">Sum</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:aefdf8fcdfb5e5d76459ee222360924eb"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented sum using the addition ('+') operator.  <a href="#aefdf8fcdfb5e5d76459ee222360924eb">More...</a><br/></td></tr>
+<tr class="separator:aefdf8fcdfb5e5d76459ee222360924eb"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2fb8a073bb504afd0e05cd06d008ec29"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
+<tr class="memitem:a2fb8a073bb504afd0e05cd06d008ec29"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29">Min</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:a2fb8a073bb504afd0e05cd06d008ec29"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented minimum using the less-than ('&lt;') operator.  <a href="#a2fb8a073bb504afd0e05cd06d008ec29">More...</a><br/></td></tr>
+<tr class="separator:a2fb8a073bb504afd0e05cd06d008ec29"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a085150ad8d55de8665b45a3f69f38bce"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
+<tr class="memitem:a085150ad8d55de8665b45a3f69f38bce"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce">ArgMin</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:a085150ad8d55de8665b45a3f69f38bce"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the first device-wide minimum in each segment using the less-than ('&lt;') operator, also returning the in-segment index of that item.  <a href="#a085150ad8d55de8665b45a3f69f38bce">More...</a><br/></td></tr>
+<tr class="separator:a085150ad8d55de8665b45a3f69f38bce"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aa1f982f913c95d9974412b8fc0995183"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
+<tr class="memitem:aa1f982f913c95d9974412b8fc0995183"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183">Max</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:aa1f982f913c95d9974412b8fc0995183"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes a device-wide segmented maximum using the greater-than ('&gt;') operator.  <a href="#aa1f982f913c95d9974412b8fc0995183">More...</a><br/></td></tr>
+<tr class="separator:aa1f982f913c95d9974412b8fc0995183"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ad0e1f7eede9a0f93a379950eac7d8329"><td class="memTemplParams" colspan="2">template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </td></tr>
+<tr class="memitem:ad0e1f7eede9a0f93a379950eac7d8329"><td class="memTemplItemLeft" align="right" valign="top">static CUB_RUNTIME_FUNCTION <br class="typebreak"/>
+cudaError_t&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329">ArgMax</a> (void *d_temp_storage, size_t &amp;temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, int *d_begin_offsets, int *d_end_offsets, cudaStream_t stream=0, bool debug_synchronous=false)</td></tr>
+<tr class="memdesc:ad0e1f7eede9a0f93a379950eac7d8329"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the first device-wide maximum in each segment using the greater-than ('&gt;') operator, also returning the in-segment index of that item.  <a href="#ad0e1f7eede9a0f93a379950eac7d8329">More...</a><br/></td></tr>
+<tr class="separator:ad0e1f7eede9a0f93a379950eac7d8329"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="abbaebbc469e9603774e1d1fc60435baf"></a>
+<a class="anchor" id="ad9b73f245930740c4d8786fc1a812364"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
@@ -213,7 +213,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
+          <td class="paramname"><em>init</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -242,13 +242,12 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide segmented reduction using the specified binary <code>reduction_op</code> functor. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the given operation, i.e., <code>reduction_op(XXX, identity)</code> will return <code>XXX</code>.</li>
-<li>Does not support non-commutative reduction operators.</li>
+<li>Does not support binary reduction operators that are non-commutative.</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates a custom min reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates a custom min-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// CustomMin functor</span></div>
@@ -267,30 +266,30 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span>          *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line"><span class="keywordtype">int</span>          *d_out;         <span class="comment">// e.g., [-, -, -]</span></div>
 <div class="line">CustomMin    min_op;</div>
-<div class="line"><span class="keywordtype">int</span>          identity;       <span class="comment">// e.g., MAX_INT</span></div>
+<div class="line"><span class="keywordtype">int</span>          init;           <span class="comment">// e.g., INT_MAX</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">cub::DeviceSegmentedReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, min_op, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">cub::DeviceSegmentedReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1, min_op, init);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abbaebbc469e9603774e1d1fc60435baf" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">cub::DeviceSegmentedReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, min_op, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad9b73f245930740c4d8786fc1a812364" title="Computes a device-wide segmented reduction using the specified binary reduction_op functor...">cub::DeviceSegmentedReduce::Reduce</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1, min_op, init);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [6, MAX_INT, 0]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [6, INT_MAX, 0]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> (e.g., <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
+    <tr><td class="paramname">ReductionOp</td><td><b>[inferred]</b> Binary reduction functor type having member <code>T operator()(const T &amp;a, const T &amp;b)</code> </td></tr>
+    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data element type that is convertible to the <code>value</code> type of <code>InputIteratorT</code> </td></tr>
   </table>
   </dd>
 </dl>
@@ -303,23 +302,23 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor (e.g., an instance of <a class="el" href="structcub_1_1_sum.html" title="Default sum functor. ">cub::Sum</a>, <a class="el" href="structcub_1_1_min.html" title="Default min functor. ">cub::Min</a>, <a class="el" href="structcub_1_1_max.html" title="Default max functor. ">cub::Max</a>, etc.) </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">reduction_op</td><td>Binary reduction functor </td></tr>
+    <tr><td class="paramdir">[in]</td><td class="paramname">init</td><td>Initial value of the reduction for each segment </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00132">132</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00130">130</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a62f13e427316b0caec6168ebd9ba85e3"></a>
+<a class="anchor" id="aefdf8fcdfb5e5d76459ee222360924eb"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -366,12 +365,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramtype">int *&#160;</td>
           <td class="paramname"><em>d_end_offsets</em>, </td>
         </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
-        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -399,13 +392,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide segmented sum using the addition ('+') operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the input type's addition operation, i.e., <code>XXX + identity = XXX </code></li>
+<li>Uses <code>0</code> as the initial value of the reduction for each segment.</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>Does not support non-commutative reduction operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>Does not support <code>+</code> operators that are non-commutative..</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the sum reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the sum reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
@@ -413,21 +406,20 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span> *d_offsets;     <span class="comment">// e.g., [0, 3, 3, 7]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_out;         <span class="comment">// e.g., [-, -, -]</span></div>
-<div class="line"><span class="keywordtype">int</span> identity        <span class="comment">// e.g., 0</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">cub::DeviceSegmentedReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">cub::DeviceSegmentedReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run sum-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a62f13e427316b0caec6168ebd9ba85e3" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">cub::DeviceSegmentedReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aefdf8fcdfb5e5d76459ee222360924eb" title="Computes a device-wide segmented sum using the addition (&#39;+&#39;) operator. ">cub::DeviceSegmentedReduce::Sum</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// d_out &lt;-- [21, 0, 17]</span></div>
 </div><!-- fragment --></dd></dl>
@@ -435,7 +427,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
   </table>
   </dd>
 </dl>
@@ -448,22 +439,21 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00217">217</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00210">210</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a687d16fb62173a4f2630db8fe4db15fe"></a>
+<a class="anchor" id="a2fb8a073bb504afd0e05cd06d008ec29"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -510,12 +500,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramtype">int *&#160;</td>
           <td class="paramname"><em>d_end_offsets</em>, </td>
         </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
-        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -543,13 +527,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide segmented minimum using the less-than ('&lt;') operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the input type's <code>&lt;</code> operation, i.e., <code>(identity &lt; XXX) = false</code></li>
+<li>Uses <code>std::numeric_limits&lt;T&gt;::max()</code> as the initial value of the reduction for each segment.</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>Does not support non-commutative minimum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>Does not support <code>&lt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the min-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the min-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
@@ -557,29 +541,27 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span> *d_offsets;     <span class="comment">// e.g., [0, 3, 3, 7]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_out;         <span class="comment">// e.g., [-, -, -]</span></div>
-<div class="line"><span class="keywordtype">int</span> identity;       <span class="comment">// e.g., MAX_INT</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceSegmentedReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceSegmentedReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run min-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a687d16fb62173a4f2630db8fe4db15fe" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceSegmentedReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a2fb8a073bb504afd0e05cd06d008ec29" title="Computes a device-wide segmented minimum using the less-than (&#39;&lt;&#39;) operator. ">cub::DeviceSegmentedReduce::Min</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [6, MAX_INT, 0]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [6, INT_MAX, 0]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
   </table>
   </dd>
 </dl>
@@ -592,22 +574,21 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00301">301</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00288">288</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a1adbde01527b1df8fd326569dce6aa03"></a>
+<a class="anchor" id="a085150ad8d55de8665b45a3f69f38bce"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -654,12 +635,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramtype">int *&#160;</td>
           <td class="paramname"><em>d_end_offsets</em>, </td>
         </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
-        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -687,14 +662,17 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Finds the first device-wide minimum in each segment using the less-than ('&lt;') operator, also returning the in-segment index of that item. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Assuming the input <code>d_in</code> has value type <code>T</code>, the output <code>d_out</code> must have value type <code>KeyValuePair&lt;int, T&gt;</code>. The minimum value is written to <code>d_out.value</code> and its location in the input array is written to <code>d_out.key</code>.</li>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the input type's <code>&lt;</code> operation, i.e., <code>(identity &lt; XXX) = false</code></li>
+<li>The output value type of <code>d_out</code> is cub::KeyValuePair <code>&lt;int, T&gt;</code> (assuming the value type of <code>d_in</code> is <code>T</code>)<ul>
+<li>The minimum of the <em>i</em><sup>th</sup> segment is written to <code>d_out[i].value</code> and its offset in that segment is written to <code>d_out[i].key</code>.</li>
+<li>The <code>{1, std::numeric_limits&lt;T&gt;::max()}</code> tuple is produced for zero-length inputs</li>
+</ul>
+</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>Does not support non-commutative minimum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>Does not support <code>&lt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmin-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmin-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
@@ -702,29 +680,27 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span>                      *d_offsets;     <span class="comment">// e.g., [0, 3, 3, 7]</span></div>
 <div class="line"><span class="keywordtype">int</span>                      *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{-,-}, {-,-}, {-,-}]</span></div>
-<div class="line"><span class="keywordtype">int</span>                      identity;       <span class="comment">// e.g., MAX_INT</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">cub::DeviceSegmentedReduce::ArgMin</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">cub::DeviceSegmentedReduce::ArgMin</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run argmin-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a1adbde01527b1df8fd326569dce6aa03" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">cub::DeviceSegmentedReduce::ArgMin</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a085150ad8d55de8665b45a3f69f38bce" title="Finds the first device-wide minimum in each segment using the less-than (&#39;&lt;&#39;) operator, also returning the in-segment index of that item. ">cub::DeviceSegmentedReduce::ArgMin</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [{1,6}, {-1,MAX_INT}, {2,0}]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [{1,6}, {1,INT_MAX}, {2,0}]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (of some type <code>T</code>) (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
   </table>
   </dd>
 </dl>
@@ -737,22 +713,21 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00388">388</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00368">368</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a81cc146a6aa537728469ca76f5d91c16"></a>
+<a class="anchor" id="aa1f982f913c95d9974412b8fc0995183"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -799,12 +774,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramtype">int *&#160;</td>
           <td class="paramname"><em>d_end_offsets</em>, </td>
         </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
-        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -832,13 +801,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Computes a device-wide segmented maximum using the greater-than ('&gt;') operator. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the input type's <code>&gt;</code> operation, i.e., <code>(identity &gt; XXX) = false</code></li>
+<li>Uses <code>std::numeric_limits&lt;T&gt;::lowest()</code> as the initial value of the reduction.</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>Does not support non-commutative maximum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>Does not support <code>&gt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the max-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the max-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_radix_sort.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
@@ -846,29 +815,27 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span> *d_offsets;     <span class="comment">// e.g., [0, 3, 3, 7]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line"><span class="keywordtype">int</span> *d_out;         <span class="comment">// e.g., [-, -, -]</span></div>
-<div class="line"><span class="keywordtype">int</span> identity;       <span class="comment">// e.g., MIN_INT</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">cub::DeviceSegmentedReduce::Max</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">cub::DeviceSegmentedReduce::Max</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run max-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#a81cc146a6aa537728469ca76f5d91c16" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">cub::DeviceSegmentedReduce::Max</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#aa1f982f913c95d9974412b8fc0995183" title="Computes a device-wide segmented maximum using the greater-than (&#39;&gt;&#39;) operator. ">cub::DeviceSegmentedReduce::Max</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [8, MIN_INT, 9]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [8, INT_MIN, 9]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
   </table>
   </dd>
 </dl>
@@ -881,22 +848,21 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00478">478</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00450">450</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="abf36bf795ef905c5f3736f46e7f5d0da"></a>
+<a class="anchor" id="ad0e1f7eede9a0f93a379950eac7d8329"></a>
 <div class="memitem">
 <div class="memproto">
 <div class="memtemplate">
-template&lt;typename InputIteratorT , typename OutputIteratorT , typename T &gt; </div>
+template&lt;typename InputIteratorT , typename OutputIteratorT &gt; </div>
 <table class="mlabels">
   <tr>
   <td class="mlabels-left">
@@ -943,12 +909,6 @@ <h2 class="groupheader">Member Function Documentation</h2>
           <td class="paramtype">int *&#160;</td>
           <td class="paramname"><em>d_end_offsets</em>, </td>
         </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">T&#160;</td>
-          <td class="paramname"><em>identity</em>, </td>
-        </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
@@ -976,14 +936,17 @@ <h2 class="groupheader">Member Function Documentation</h2>
 
 <p>Finds the first device-wide maximum in each segment using the greater-than ('&gt;') operator, also returning the in-segment index of that item. </p>
 <dl class="section user"><dt></dt><dd><ul>
-<li>Assuming the input <code>d_in</code> has value type <code>T</code>, the output <code>d_out</code> must have value type <code>KeyValuePair&lt;int, T&gt;</code>. The maximum value is written to <code>d_out.value</code> and its location in the input array is written to <code>d_out.key</code>.</li>
-<li>The specified <code>identity</code> value is produced for zero-length segments. <code>identity</code> is assumed to be arithmetically neutral for the input type's <code>&gt;</code> operation, i.e., <code>(identity &gt; XXX) = false</code></li>
+<li>The output value type of <code>d_out</code> is cub::KeyValuePair <code>&lt;int, T&gt;</code> (assuming the value type of <code>d_in</code> is <code>T</code>)<ul>
+<li>The maximum of the <em>i</em><sup>th</sup> segment is written to <code>d_out[i].value</code> and its offset in that segment is written to <code>d_out[i].key</code>.</li>
+<li>The <code>{1, std::numeric_limits&lt;T&gt;::lowest()}</code> tuple is produced for zero-length inputs</li>
+</ul>
+</li>
 <li>When input a contiguous sequence of segments, a single sequence <code>segment_offsets</code> (of length <code>num_segments+1</code>) can be aliased for both the <code>d_begin_offsets</code> and <code>d_end_offsets</code> parameters (where the latter is specified as <code>segment_offsets+1</code>).</li>
-<li>Does not support non-commutative maximum operators.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
+<li>Does not support <code>&gt;</code> operators that are non-commutative.</li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
-<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmax-reduction of a device vector of <code>int</code> items. </dd></dl>
+<dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the argmax-reduction of a device vector of <code>int</code> data elements. </dd></dl>
 <dl class="section user"><dt></dt><dd><div class="fragment"><div class="line"><span class="preprocessor">#include &lt;<a class="code" href="cub_8cuh.html">cub/cub.cuh</a>&gt;</span>   <span class="comment">// or equivalently &lt;cub/device/device_reduce.cuh&gt;</span></div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Declare, allocate, and initialize device-accessible pointers for input and output</span></div>
@@ -991,29 +954,27 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <div class="line"><span class="keywordtype">int</span>                      *d_offsets;     <span class="comment">// e.g., [0, 3, 3, 7]</span></div>
 <div class="line"><span class="keywordtype">int</span>                      *d_in;          <span class="comment">// e.g., [8, 6, 7, 5, 3, 0, 9]</span></div>
 <div class="line">KeyValuePair&lt;int, int&gt;   *d_out;         <span class="comment">// e.g., [{-,-}, {-,-}, {-,-}]</span></div>
-<div class="line"><span class="keywordtype">int</span>                      identity;       <span class="comment">// e.g., MIN_INT</span></div>
 <div class="line">...</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Determine temporary device storage requirements</span></div>
 <div class="line">void     *d_temp_storage = NULL;</div>
 <div class="line"><span class="keywordtype">size_t</span>   temp_storage_bytes = 0;</div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">cub::DeviceSegmentedReduce::ArgMax</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">cub::DeviceSegmentedReduce::ArgMax</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Allocate temporary storage</span></div>
 <div class="line">cudaMalloc(&amp;d_temp_storage, temp_storage_bytes);</div>
 <div class="line"></div>
 <div class="line"><span class="comment">// Run argmax-reduction</span></div>
-<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#abf36bf795ef905c5f3736f46e7f5d0da" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">cub::DeviceSegmentedReduce::ArgMax</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
-<div class="line">    num_segments, d_offsets, d_offsets + 1, identity);</div>
+<div class="line"><a class="code" href="structcub_1_1_device_segmented_reduce.html#ad0e1f7eede9a0f93a379950eac7d8329" title="Finds the first device-wide maximum in each segment using the greater-than (&#39;&gt;&#39;) operator...">cub::DeviceSegmentedReduce::ArgMax</a>(d_temp_storage, temp_storage_bytes, d_in, d_out,</div>
+<div class="line">    num_segments, d_offsets, d_offsets + 1);</div>
 <div class="line"></div>
-<div class="line"><span class="comment">// d_out &lt;-- [{0,8}, {-1,MIN_INT}, {3,9}]</span></div>
+<div class="line"><span class="comment">// d_out &lt;-- [{0,8}, {1,INT_MIN}, {3,9}]</span></div>
 </div><!-- fragment --></dd></dl>
 <dl class="tparams"><dt>Template Parameters</dt><dd>
   <table class="tparams">
     <tr><td class="paramname">InputIteratorT</td><td><b>[inferred]</b> Random-access input iterator type for reading input items (of some type <code>T</code>) (may be a simple pointer type) </td></tr>
     <tr><td class="paramname">OutputIteratorT</td><td><b>[inferred]</b> Output iterator type for recording the reduced aggregate (having value type <code>KeyValuePair&lt;int, T&gt;</code>) (may be a simple pointer type) </td></tr>
-    <tr><td class="paramname">T</td><td><b>[inferred]</b> Data type being reduced </td></tr>
   </table>
   </dd>
 </dl>
@@ -1026,14 +987,13 @@ <h2 class="groupheader">Member Function Documentation</h2>
     <tr><td class="paramdir">[in]</td><td class="paramname">num_segments</td><td>The number of segments that comprise the sorting data </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_begin_offsets</td><td>Device-accessible pointer to the sequence of beginning offsets of length <code>num_segments</code>, such that <code>d_begin_offsets[i]</code> is the first element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code> </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">d_end_offsets</td><td>Device-accessible pointer to the sequence of ending offsets of length <code>num_segments</code>, such that <code>d_end_offsets[i]-1</code> is the last element of the <em>i</em><sup>th</sup> data segment in <code>d_keys_*</code> and <code>d_values_*</code>. <a class="el" href="structcub_1_1_if.html" title="Type selection (IF ? ThenType : ElseType) ">If</a> <code>d_end_offsets[i]-1</code> &lt;= <code>d_begin_offsets[i]</code>, the <em>i</em><sup>th</sup> is considered empty. </td></tr>
-    <tr><td class="paramdir">[in]</td><td class="paramname">identity</td><td>The identity value to be returned for zero-length segments </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">stream</td><td><b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>. </td></tr>
     <tr><td class="paramdir">[in]</td><td class="paramname">debug_synchronous</td><td><b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is <code>false</code>. </td></tr>
   </table>
   </dd>
 </dl>
 
-<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00565">565</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__segmented__reduce_8cuh_source.html#l00530">530</a> of file <a class="el" href="device__segmented__reduce_8cuh_source.html">device_segmented_reduce.cuh</a>.</p>
 
 </div>
 </div>
@@ -1044,7 +1004,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Wed Nov 18 2015 10:24:47 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_select-members.html b/docs/html/structcub_1_1_device_select-members.html
index 3bd4ef0552..1ddef29561 100644
--- a/docs/html/structcub_1_1_device_select-members.html
+++ b/docs/html/structcub_1_1_device_select-members.html
@@ -111,7 +111,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_select.html b/docs/html/structcub_1_1_device_select.html
index fb90b07ec1..aa12eb3825 100644
--- a/docs/html/structcub_1_1_device_select.html
+++ b/docs/html/structcub_1_1_device_select.html
@@ -112,7 +112,7 @@
 .</div></div>
  <dl class="section user"><dt>Overview</dt><dd>These operations apply a selection criterion to selectively copy items from a specified input sequence to a compact output sequence.</dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions.</li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_select.html" title="DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...">DeviceSelect</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The work-complexity of select-flagged, select-if, and select-unique as a function of input size is linear, resulting in performance throughput that plateaus with problem sizes large enough to saturate the GPU.</dd></dl>
@@ -247,8 +247,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The value type of <code>d_flags</code> must be castable to <code>bool</code> (e.g., <code>bool</code>, <code>char</code>, <code>int</code>, etc.).</li>
 <li>Copies of the selected items are compacted into <code>d_out</code> and maintain their original relative ordering.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Snippet</dt><dd>The code snippet below illustrates the compaction of items selected from an <code>int</code> device vector. </dd></dl>
@@ -301,7 +300,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_select_flagged_8cu-example.html#a2">example_device_select_flagged.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00134">134</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00133">133</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
 
 </div>
 </div>
@@ -389,8 +388,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
  </p>
 <dl class="section user"><dt></dt><dd><ul>
 <li>Copies of the selected items are compacted into <code>d_out</code> and maintain their original relative ordering.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated select-if performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively. Items are selected with 50% probability.</dd></dl>
@@ -471,7 +469,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_select_if_8cu-example.html#a2">example_device_select_if.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00241">241</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00239">239</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
 
 </div>
 </div>
@@ -554,8 +552,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <dl class="section user"><dt></dt><dd><ul>
 <li>The <code>==</code> equality operator is used to determine whether keys are equivalent</li>
 <li>Copies of the selected items are compacted into <code>d_out</code> and maintain their original relative ordering.</li>
-<li>When <code>d_temp_storage</code> is NULL, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
-<li></li>
+<li>When <code>d_temp_storage</code> is <code>NULL</code>, no work is done and the required allocation size is returned in <code>temp_storage_bytes</code>.</li>
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Performance</dt><dd>The following charts illustrate saturated select-unique performance across different CUDA architectures for <code>int32</code> and <code>int64</code> items, respectively. Segments have lengths uniformly sampled from [1,1000].</dd></dl>
@@ -619,7 +616,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </dl>
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="example_device_select_unique_8cu-example.html#a2">example_device_select_unique.cu</a>.</dd>
 </dl>
-<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00332">332</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
+<p>Definition at line <a class="el" href="device__select_8cuh_source.html#l00329">329</a> of file <a class="el" href="device__select_8cuh_source.html">device_select.cuh</a>.</p>
 
 </div>
 </div>
@@ -630,7 +627,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_spmv-members.html b/docs/html/structcub_1_1_device_spmv-members.html
index 34680d3006..1d18350566 100644
--- a/docs/html/structcub_1_1_device_spmv-members.html
+++ b/docs/html/structcub_1_1_device_spmv-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_device_spmv.html b/docs/html/structcub_1_1_device_spmv.html
index 28f6892cbc..2866fbd2cf 100644
--- a/docs/html/structcub_1_1_device_spmv.html
+++ b/docs/html/structcub_1_1_device_spmv.html
@@ -112,7 +112,7 @@
 </ul>
 </dd></dl>
 <dl class="section user"><dt>Usage Considerations</dt><dd><ul>
-<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">DeviceSpmv</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. When calling these methods from kernel code, be sure to define the <code>CUB_CDP</code> macro in your compiler's macro definitions. </li>
+<li><em>Dynamic parallelism</em>. <a class="el" href="structcub_1_1_device_spmv.html" title="DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...">DeviceSpmv</a> methods can be called within kernel code on devices in which CUDA dynamic parallelism is supported. </li>
 </ul>
 </dd></dl>
 
@@ -298,7 +298,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:07 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:18 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_equality-members.html b/docs/html/structcub_1_1_equality-members.html
index 539519d703..3aaceca78c 100644
--- a/docs/html/structcub_1_1_equality-members.html
+++ b/docs/html/structcub_1_1_equality-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_equality.html b/docs/html/structcub_1_1_equality.html
index 4850362ca7..27c3217fe6 100644
--- a/docs/html/structcub_1_1_equality.html
+++ b/docs/html/structcub_1_1_equality.html
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_equals-members.html b/docs/html/structcub_1_1_equals-members.html
index 9e48e992a0..54b06ccee3 100644
--- a/docs/html/structcub_1_1_equals-members.html
+++ b/docs/html/structcub_1_1_equals-members.html
@@ -110,7 +110,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_equals.html b/docs/html/structcub_1_1_equals.html
index 3e14827c84..13073f5f8c 100644
--- a/docs/html/structcub_1_1_equals.html
+++ b/docs/html/structcub_1_1_equals.html
@@ -112,7 +112,7 @@
 
 <p>Type equality test. </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00091">91</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00092">92</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
@@ -128,7 +128,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_if-members.html b/docs/html/structcub_1_1_if-members.html
index c2b46812df..e59a3b22e8 100644
--- a/docs/html/structcub_1_1_if-members.html
+++ b/docs/html/structcub_1_1_if-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_if.html b/docs/html/structcub_1_1_if.html
index 3e0a0ba60e..29a09f5540 100644
--- a/docs/html/structcub_1_1_if.html
+++ b/docs/html/structcub_1_1_if.html
@@ -113,7 +113,7 @@
 
 <p>Type selection (<code>IF ? ThenType : ElseType</code>) </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00065">65</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00066">66</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
@@ -129,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_inequality-members.html b/docs/html/structcub_1_1_inequality-members.html
index df9c7f55e0..393d7c474e 100644
--- a/docs/html/structcub_1_1_inequality-members.html
+++ b/docs/html/structcub_1_1_inequality-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_inequality.html b/docs/html/structcub_1_1_inequality.html
index 599096c20e..d0f71940e7 100644
--- a/docs/html/structcub_1_1_inequality.html
+++ b/docs/html/structcub_1_1_inequality.html
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_inequality_wrapper-members.html b/docs/html/structcub_1_1_inequality_wrapper-members.html
index 0d5f1b7451..757b97aa6a 100644
--- a/docs/html/structcub_1_1_inequality_wrapper-members.html
+++ b/docs/html/structcub_1_1_inequality_wrapper-members.html
@@ -111,7 +111,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_inequality_wrapper.html b/docs/html/structcub_1_1_inequality_wrapper.html
index 02e471a28a..3fca37405a 100644
--- a/docs/html/structcub_1_1_inequality_wrapper.html
+++ b/docs/html/structcub_1_1_inequality_wrapper.html
@@ -141,7 +141,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_is_pointer-members.html b/docs/html/structcub_1_1_is_pointer-members.html
index 9fb5c05dea..4684e50c80 100644
--- a/docs/html/structcub_1_1_is_pointer-members.html
+++ b/docs/html/structcub_1_1_is_pointer-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_is_pointer.html b/docs/html/structcub_1_1_is_pointer.html
index 6b6c93921a..f79191831d 100644
--- a/docs/html/structcub_1_1_is_pointer.html
+++ b/docs/html/structcub_1_1_is_pointer.html
@@ -111,13 +111,13 @@
 
 <p>Pointer vs. iterator. </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00668">668</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00164">164</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
-<tr class="memitem:a04e609c7722f9314ca4fd3bc959e133c"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = 0
+<tr class="memitem:ae9558c8647b3818091bd167854cf8b8b"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = 0
  }</td></tr>
-<tr class="separator:a04e609c7722f9314ca4fd3bc959e133c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="separator:ae9558c8647b3818091bd167854cf8b8b"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="util__type_8cuh_source.html">util_type.cuh</a></li>
@@ -126,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_is_volatile-members.html b/docs/html/structcub_1_1_is_volatile-members.html
index c1a8ba9cc4..07573ac7d6 100644
--- a/docs/html/structcub_1_1_is_volatile-members.html
+++ b/docs/html/structcub_1_1_is_volatile-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_is_volatile.html b/docs/html/structcub_1_1_is_volatile.html
index f3ab3b9306..66e7bd0708 100644
--- a/docs/html/structcub_1_1_is_volatile.html
+++ b/docs/html/structcub_1_1_is_volatile.html
@@ -111,13 +111,13 @@
 
 <p>Volatile modifier test. </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00693">693</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00189">189</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
-<tr class="memitem:ad50d811f4a0635283cd5d6172c74b62f"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = 0
+<tr class="memitem:a18bffa18d9cd8dbd0635c4a5691cbd1a"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = 0
  }</td></tr>
-<tr class="separator:ad50d811f4a0635283cd5d6172c74b62f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="separator:a18bffa18d9cd8dbd0635c4a5691cbd1a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="util__type_8cuh_source.html">util_type.cuh</a></li>
@@ -126,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_log2-members.html b/docs/html/structcub_1_1_log2-members.html
index 5992ffac7d..6d928025b5 100644
--- a/docs/html/structcub_1_1_log2-members.html
+++ b/docs/html/structcub_1_1_log2-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_log2.html b/docs/html/structcub_1_1_log2.html
index 905952bc43..b0f0dd1abe 100644
--- a/docs/html/structcub_1_1_log2.html
+++ b/docs/html/structcub_1_1_log2.html
@@ -114,14 +114,14 @@
 <p>Statically determine log2(N), rounded up. </p>
 <p>For example: Log2&lt;8&gt;::VALUE // 3 Log2&lt;3&gt;::VALUE // 2 </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00630">630</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00126">126</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
-<tr class="memitem:a8ea7cf7cc90bf951e90c6e7621dab514"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = Log2&lt;N, (CURRENT_VAL &gt;&gt; 1), COUNT + 1&gt;::VALUE
+<tr class="memitem:aa9b3b55a22a2930a9006dbe5ba71b70b"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = Log2&lt;N, (CURRENT_VAL &gt;&gt; 1), COUNT + 1&gt;::VALUE
  }</td></tr>
-<tr class="memdesc:a8ea7cf7cc90bf951e90c6e7621dab514"><td class="mdescLeft">&#160;</td><td class="mdescRight">Static logarithm value. <br/></td></tr>
-<tr class="separator:a8ea7cf7cc90bf951e90c6e7621dab514"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memdesc:aa9b3b55a22a2930a9006dbe5ba71b70b"><td class="mdescLeft">&#160;</td><td class="mdescRight">Static logarithm value. <br/></td></tr>
+<tr class="separator:aa9b3b55a22a2930a9006dbe5ba71b70b"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="util__type_8cuh_source.html">util_type.cuh</a></li>
@@ -130,7 +130,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_max-members.html b/docs/html/structcub_1_1_max-members.html
index 1c4c4a611e..ced27421ee 100644
--- a/docs/html/structcub_1_1_max-members.html
+++ b/docs/html/structcub_1_1_max-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_max.html b/docs/html/structcub_1_1_max.html
index 258670f6c4..e4a0ea37cb 100644
--- a/docs/html/structcub_1_1_max.html
+++ b/docs/html/structcub_1_1_max.html
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_min-members.html b/docs/html/structcub_1_1_min-members.html
index b7718bcc04..9ca53347e1 100644
--- a/docs/html/structcub_1_1_min-members.html
+++ b/docs/html/structcub_1_1_min-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_min.html b/docs/html/structcub_1_1_min.html
index 6f0ef3d04a..40032c1acd 100644
--- a/docs/html/structcub_1_1_min.html
+++ b/docs/html/structcub_1_1_min.html
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_power_of_two-members.html b/docs/html/structcub_1_1_power_of_two-members.html
index 47eff51d7a..cb734fc323 100644
--- a/docs/html/structcub_1_1_power_of_two-members.html
+++ b/docs/html/structcub_1_1_power_of_two-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_power_of_two.html b/docs/html/structcub_1_1_power_of_two.html
index 8411145afd..300208f4d4 100644
--- a/docs/html/structcub_1_1_power_of_two.html
+++ b/docs/html/structcub_1_1_power_of_two.html
@@ -111,13 +111,13 @@
 
 <p>Statically determine if N is a power-of-two. </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00653">653</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00149">149</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
-<tr class="memitem:a61185bc62fe8694243edc91cefd14d52"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = ((N &amp; (N - 1)) == 0)
+<tr class="memitem:a595019c57710039f939ae8e66a860d59"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom">{ <b>VALUE</b> = ((N &amp; (N - 1)) == 0)
  }</td></tr>
-<tr class="separator:a61185bc62fe8694243edc91cefd14d52"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="separator:a595019c57710039f939ae8e66a860d59"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="util__type_8cuh_source.html">util_type.cuh</a></li>
@@ -126,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_reduce_by_key_op-members.html b/docs/html/structcub_1_1_reduce_by_key_op-members.html
index dc11ac52af..09b914d123 100644
--- a/docs/html/structcub_1_1_reduce_by_key_op-members.html
+++ b/docs/html/structcub_1_1_reduce_by_key_op-members.html
@@ -112,7 +112,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_reduce_by_key_op.html b/docs/html/structcub_1_1_reduce_by_key_op.html
index 00642ca142..e26c572052 100644
--- a/docs/html/structcub_1_1_reduce_by_key_op.html
+++ b/docs/html/structcub_1_1_reduce_by_key_op.html
@@ -194,7 +194,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_reduce_by_segment_op-members.html b/docs/html/structcub_1_1_reduce_by_segment_op-members.html
index 96b5f76b3e..e950e40b68 100644
--- a/docs/html/structcub_1_1_reduce_by_segment_op-members.html
+++ b/docs/html/structcub_1_1_reduce_by_segment_op-members.html
@@ -112,7 +112,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_reduce_by_segment_op.html b/docs/html/structcub_1_1_reduce_by_segment_op.html
index 5e2b2bfc43..4dee525b6d 100644
--- a/docs/html/structcub_1_1_reduce_by_segment_op.html
+++ b/docs/html/structcub_1_1_reduce_by_segment_op.html
@@ -111,8 +111,8 @@
 struct cub::ReduceBySegmentOp&lt; ReductionOpT &gt;</h3>
 
 <p>Reduce-by-segment functor. </p>
-<p>Given two <a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">cub::KeyValuePair</a> inputs <code>a</code> and <code>b</code> and a binary associative combining operator <code><code>f(const T &amp;x, const T &amp;y)</code></code>, an instance of this functor returns a <a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">cub::KeyValuePair</a> whose <code>key</code> field is <code>a.key</code> + <code>a.key</code>, and whose <code>value</code> field is either b.value if b.key is non-zero, or f(a.value, b.value) otherwise.</p>
-<p><a class="el" href="structcub_1_1_reduce_by_segment_op.html" title="Reduce-by-segment functor. ">ReduceBySegmentOp</a> is an associative, non-commutative binary combining operator for input sequences of <a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">cub::KeyValuePair</a> pairings. Such sequences are typically used to represent a segmented set of values to be reduced and a corresponding set of {0,1}-valued integer "head flags" demarcating the first value of each segment.&lt; Binary reduction operator to apply to values </p>
+<p>Given two cub::KeyValuePair inputs <code>a</code> and <code>b</code> and a binary associative combining operator <code><code>f(const T &amp;x, const T &amp;y)</code></code>, an instance of this functor returns a cub::KeyValuePair whose <code>key</code> field is <code>a.key</code> + <code>a.key</code>, and whose <code>value</code> field is either b.value if b.key is non-zero, or f(a.value, b.value) otherwise.</p>
+<p><a class="el" href="structcub_1_1_reduce_by_segment_op.html" title="Reduce-by-segment functor. ">ReduceBySegmentOp</a> is an associative, non-commutative binary combining operator for input sequences of cub::KeyValuePair pairings. Such sequences are typically used to represent a segmented set of values to be reduced and a corresponding set of {0,1}-valued integer "head flags" demarcating the first value of each segment.&lt; Binary reduction operator to apply to values </p>
 
 <p>Definition at line <a class="el" href="thread__operators_8cuh_source.html#l00249">249</a> of file <a class="el" href="thread__operators_8cuh_source.html">thread_operators.cuh</a>.</p>
 </div><table class="memberdecls">
@@ -177,7 +177,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 </div><div class="memdoc">
 
 <p>Scan operator. </p>
-<p>&lt; <a class="el" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair</a> pairing of T (value) and OffsetT (head flag) </p>
+<p>&lt; KeyValuePair pairing of T (value) and OffsetT (head flag) </p>
 <dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">first</td><td>First partial reduction </td></tr>
@@ -197,7 +197,7 @@ <h2 class="groupheader">Member Function Documentation</h2>
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_remove_qualifiers-members.html b/docs/html/structcub_1_1_remove_qualifiers-members.html
index a8e364bb4e..dcb95d6355 100644
--- a/docs/html/structcub_1_1_remove_qualifiers-members.html
+++ b/docs/html/structcub_1_1_remove_qualifiers-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_remove_qualifiers.html b/docs/html/structcub_1_1_remove_qualifiers.html
index bc97b30f50..2e4b3ae532 100644
--- a/docs/html/structcub_1_1_remove_qualifiers.html
+++ b/docs/html/structcub_1_1_remove_qualifiers.html
@@ -113,7 +113,7 @@
 <p>Removes <code>const</code> and <code>volatile</code> qualifiers from type <code>Tp</code>. </p>
 <p>For example: <code>typename <a class="el" href="structcub_1_1_remove_qualifiers.html#a9143e196ef5e6a0176b953f677e94671" title="Type without const and volatile qualifiers. ">RemoveQualifiers&lt;volatile int&gt;::Type</a> // int;</code> </p>
 
-<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00720">720</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
+<p>Definition at line <a class="el" href="util__type_8cuh_source.html#l00216">216</a> of file <a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>.</p>
 </div><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
 Public Types</h2></td></tr>
@@ -129,7 +129,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_sum-members.html b/docs/html/structcub_1_1_sum-members.html
index 798c56f67b..b59d184379 100644
--- a/docs/html/structcub_1_1_sum-members.html
+++ b/docs/html/structcub_1_1_sum-members.html
@@ -109,7 +109,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_sum.html b/docs/html/structcub_1_1_sum.html
index 8bd3c1f3b9..f0ca92271d 100644
--- a/docs/html/structcub_1_1_sum.html
+++ b/docs/html/structcub_1_1_sum.html
@@ -125,7 +125,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:05 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:16 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.html b/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.html
index 60c5dd1d34..1dda92ed99 100644
--- a/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_warp_reduce_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -119,32 +117,8 @@
  <div class="center">
   <img src="structcub_1_1_warp_reduce_1_1_temp_storage.png" usemap="#cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map" name="cub::WarpReduce&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,469,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="warp__reduce_8cuh_source.html">warp_reduce.cuh</a></li>
 </ul>
@@ -152,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.png b/docs/html/structcub_1_1_warp_reduce_1_1_temp_storage.png
index 4ba7bae455d8c38077bce3d8d39323b865d2cad2..13024937c7f9a4baf099313766da4d7df6168220 100644
GIT binary patch
delta 1114
zcmV-g1f~0)3V;fbTz`*AL_t(|0qvd9nxh~Lh5y{ldH*-wwki-nYo|NAHj{py8AS*&
zKS^#eGfSD71rjDnDM<>Wq$)|#lvE`tnv$v{MN?9hq-eS*)rV8nGk3RK?pczYrXOX?
zPm-ouUj{hlpFvepz1;AbIVZ;3J(H>={e)_SxAZA@_k?=f)qgi7%a%l3CrDDSWqez@
zBCMN~mz$;4cw_Y_*(K=`x;oWTahto6QEfT)LmNFw?kY84nY55vO_x<=Mt#$|P>#%X
z`CdL(rxdrest<*OM<U5xj%sImThf~F-MgcDU9O)?b*Z{-pzkBTs7jJkH?ODKkCf?6
zg8LGDO|BnHb$@u%X7KY|9nOf`(<F6jCuOcm((8JWYH5fs_0_RuA5+b~DoO8XEp$_=
zxAMBoxEI<|7uTUB+It_h(eA)*>sBk((l*;~D@m8uRjEqSztZUld`zf*1=V{J9^}}m
z2(+bFk}j>x)w8fZqVy(JNeZf@DoN3lR3#~zlBy&{Q!i4Lq-eTPH8V?@nZ*&nFVPo3
z3Lub@DnKxOeI-_aSW2n@!E}R~SOG#QsRG1PGLu{aFq3NnA(LwY6@TAE)yynqW)?^o
zKnfs?k}5znB~^fEN~!?SlvDwtX@{yCygzIL@Ur_4Xs#0n<*}+=S8nU@diQv0MkiZ%
z57)oja<|X6Z<*<)&DOVjyDuJ0^&c}>n7O_a4xqi90f1xu1*-EP0~~9<rkdOSRI-i3
z2UOb`>vA`}8E9TN;D0+G4h&RGUBKoR_NhJz+UO*}zADvv8Xn8r^WAD0zb{+oRNvQE
z50m};$vK=-t^aEBx~cz`>NJ$EQf<HF8P$zK31OR~TZt%pxR5tNE>sK7wJvQ>b=<>k
z=kycZlS{Y$u)Iulr*3#x#sqnR>R8RE4W>2Q6}+hjBh^~(hkwkzV_LB}7hJmRgnXbn
zz-`bsN$ph{(~P6H>f>{Q>T)<wQ7s-eRbtXt+df^pW7}8fJ$lkt`?fLq*muLedye67
zIW(q2ms>+VeV6L^>D@QXsXpFyXZ}>K4pjR{O+6gx9Omj6`Z2Mlj_>5^o(E$E?n_q<
zVo!BeF;^O0dVigz>gvgmtG>?c=IZ=#zDRYq**On)8s@1A-0iSi!aGb|u=b0)KcaKD
zFzVG=GN6->^-f=Hdgd|JCw+Cy+!KAZ?=kJ-A=E!lwKMxYs-yRlvc6M+hZq~s>7cJJ
zJ#1;zkKyi^6e{ccc23g-Hv{$GRw@9T`fsdLYu&CMbbn>B@0RaVZ8}J|L-A}*b;!Um
z$)`o#H^wHuVD)lASrX3KU70qq9$NyyS$%sUP_MsZ1>nc^A6z}XWuN!|m+E^)mIOOx
z<AtdJ6Fpn|{tf_M=RWOnQ~^=|Q=N<PaF*Vn3XlT0oRTU)G$mDlXiBO8(UepHqUlD}
g%q(SQ78gf<0DOxaO$k8EJpcdz07*qoM6N<$g6YsA1poj5

delta 1145
zcmV-<1cv*73Z4p(Tz{=eL_t(|0qvdPa-%v7g+Cu=`u>l6n*g>Y8$#OMEmu{ClY)_D
zqtBB5vMkGzT9#!ABo;|ZNm3XkRY{7bq$)|#lvE`tnv$v{MbllWmf@j|n7j2d&yr*`
z8&g^TBx$Pc<*|Vz-BkZas*>6@8cLGY5t%b$`<h8rlD@8%8-LGWc4^ciyqna!$E4?7
zdsDM)^{^R8lGi%EPu&o<Me57l)8@SMdY0^x^h3H2)kb9vo>aQa4;IN?rX1U$ot`9f
z)f%uGt)w>Bbyl@#Z~86NBXcLd_o6z$9joDlVw>5W0PkoPbVJp--ly~~Z1?uNyb;$Z
zRM)E8PkfBnqJJt$M$XPt^&{21OK@9)@4__*)!~izcb==m88K@XNlv{{=Bgwe$aSnf
zW^Yoh4e_nMI=1X{s@Yd1>7}+}_WApam+5NjyD8PD`E&9%v0C^3Bx%r2+kxHZZC;wE
z-P$fINjKAts7lfebQ#kvdH!0ek0d<Fu}iQ_+n1z!DNb`$lDV$Ix@qZ6s*)5`NmY`f
zDXB_QG$mC@il(G0NzwF4by=1rwJggLM*zP>9{?$UKuW3r!SwNwSOH=wsR9JkC)C6W
z5K2iEAfA#blQsf4lT894lT88)fBhz^rIeB?rIbLT0HgrID5(NOQ&I(prlblGO-U6X
znhvO%!N<cc0ITx&1G?+NL3z&V=hfRbyxl#Yy3xrJc8A;VKHaRduUjpR*=&Blw)x`0
zmH(K*!))sZ;Q;!}H~=`;AD}u9GQhd!bDF#Tr;+V8d_uLi*rprvZlHVJe}Q*<IC0Q0
zxq!_*98-M}v~iOF$7)pDX?RZW&v&nN{4s5_X?x#ZJzaYH$vK=-ZU0*G8q<GEbsEZd
zsrFy;it0|GhOp1ky+qVCOvt++SE?1~R+sjtHg4heHr~)YnRN4q<!!15b;E9DOprII
zj@5kHV0yQ%;7xZhQf>8qf5_|yrVX3ff=QQykPlP`xE=Z~sjX^bnsKyVZG0|JT@U9a
zs&$85m6-I^zE8LA*!R_WkDl~Z-!{fQ`fljE=NJx?Lt{F)+#2%ft5nBNci%9l`h4fk
z{H0tSsQO4vcR12H&DAmVV`5D=evqq2I~XglFI_E&p6aY(ZZuqae_f?&>dBC+w$2>p
z>ilrNN%gSVnH?T9%u5y6t#DYv9VQp7<Kk|Q=zLog+T}bIz{$q?ps#j4^PK97zB*>^
zg}&;0Oh0)D_1~xJ+<uMfxcf<2Kd8V%j0@m&(pT3Wwl?bLusbG|%J#mu>6+l~p#9rR
z1%Q$N#%9{;cH4m~e~Z3bexGXBLEH*;XLG7U298NSE$X&0cJYPME)$eB;f&vnX&39U
zB>-I2mnQ=4`ZHbt{<?l(tCyGbdH=IizvpC4uuDGPFa?<C)!gr|0AO|P({4u<AO$eh
zwHOa)=^Io5QUJG8QU!>nqzVvCNfjWPk}5zneNru@lvF9D7sSzj=K5m7TIwW+00000
LNkvXXu0mjf`T<Pc

diff --git a/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.html b/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.html
index 3740ef36c5..63502d6821 100644
--- a/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.html
+++ b/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.html
@@ -97,8 +97,6 @@
 </div>
 </div><!-- top -->
 <div class="header">
-  <div class="summary">
-<a href="structcub_1_1_warp_scan_1_1_temp_storage-members.html">List of all members</a>  </div>
   <div class="headertitle">
 <div class="title">cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage Struct Reference</div>  </div>
 </div><!--header-->
@@ -119,32 +117,8 @@
  <div class="center">
   <img src="structcub_1_1_warp_scan_1_1_temp_storage.png" usemap="#cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map" alt=""/>
   <map id="cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map" name="cub::WarpScan&lt; T, LOGICAL_WARP_THREADS, PTX_ARCH &gt;::TempStorage_map">
-<area href="structcub_1_1_uninitialized.html" alt="cub::Uninitialized&lt; _TempStorage &gt;" shape="rect" coords="0,0,455,24"/>
 </map>
  </div></div>
-<table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="inherited"></a>
-Additional Inherited Members</h2></td></tr>
-<tr class="inherit_header pub_types_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_types_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Types inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"></td></tr>
-<tr class="separator:a152b1045d5ee735bc26916aa052de786 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a848f6233dbde22d8a42c022d3f0aa6f6"></a>
-typedef UnitWord&lt; _TempStorage &gt;<br class="typebreak"/>
-::<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a></td></tr>
-<tr class="memdesc:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. <br/></td></tr>
-<tr class="separator:a848f6233dbde22d8a42c022d3f0aa6f6 inherit pub_types_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_methods_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Methods inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a790b865325f19ac45cc84d3fed0d3038"></a>
-__host__ __device__ <br class="typebreak"/>
-__forceinline__ _TempStorage &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a790b865325f19ac45cc84d3fed0d3038">Alias</a> ()</td></tr>
-<tr class="memdesc:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Alias. <br/></td></tr>
-<tr class="separator:a790b865325f19ac45cc84d3fed0d3038 inherit pub_methods_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="inherit_header pub_attribs_structcub_1_1_uninitialized"><td colspan="2" onclick="javascript:toggleInherit('pub_attribs_structcub_1_1_uninitialized')"><img src="closed.png" alt="-"/>&#160;Public Members inherited from <a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; _TempStorage &gt;</a></td></tr>
-<tr class="memitem:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="a5fa7311d943222333e8c87497ff8e782"></a>
-<a class="el" href="structcub_1_1_uninitialized.html#a848f6233dbde22d8a42c022d3f0aa6f6">DeviceWord</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html#a5fa7311d943222333e8c87497ff8e782">storage</a> [WORDS]</td></tr>
-<tr class="memdesc:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="mdescLeft">&#160;</td><td class="mdescRight">Backing storage. <br/></td></tr>
-<tr class="separator:a5fa7311d943222333e8c87497ff8e782 inherit pub_attribs_structcub_1_1_uninitialized"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table>
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li><a class="el" href="warp__scan_8cuh_source.html">warp_scan.cuh</a></li>
 </ul>
@@ -152,7 +126,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:08 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:19 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.png b/docs/html/structcub_1_1_warp_scan_1_1_temp_storage.png
index c76c1b9624416c63d711cf3c9ee90baff701319f..c1582c4722eb94d8684a95f37581fbc0bd8c9a11 100644
GIT binary patch
delta 1183
zcmZqYdd)c@roPJ4#WAFU@$KBZRgbMWT*b}rzW;x0`O3BSD%O5h%ag61e9jY&bm~xj
zAZEOI%FM`&#7R>oEji%TH%TP^WZ<Mn+Lm5HHLC8OUF&xj{`s_Qr%BE2c@MX%@J7u$
zwyj@9ckBM1n#K9&0)wjC9%>is^?a{ZbNBpZoMou18MNPa(zDm~(bd7mncmxKn^Y=u
zAE&<LTbmmcFXp}b<GELnPTrU9U(L0=W_7D}b>wC7n}sVrOfs=CJ-^*kdiLFt*wFMX
z=6kunPP#MUMeXOplWUjw{SH*NcbZ^g!|a~EOXsC;Pwb7tJ&GqAWVT1$YX4?vG|kIX
z>dCXpzPcrT?<ZC0tT9)X?5}_PW!d>l&wpCn?Mn2Y{N#%NshTNEw5uj2=jP~tJ-+kn
zyy7$eG(6X5>c@Nc&37%H{ZV87)?06#qig#8f`3>|Nxhr5G*WBx@m$rH^LEJsy`~v!
z_v>%+m)(|jLT^?2^<?+zub5QgpK9lIY2~}vfXRzh{z`#E>xqWv$Eh-^n)Ul8Emk>t
zUELiVx+#o*%4!*wFgR&jGPPVk?%u@GvF-_nK-_!@VHJl0RriMb3zl#Q#Ge#Y*f&Yp
z0T}KLKfL-FfCe!FBZBEjco9p-dZ2m|6x8D~;y(+-Ng0U@nhn7CMze*v@9=}i?t%@I
zYwH;1y)ew35xIZc!@sw$ug@zfJNa|Vf8EpVwWs!#J~PPOT4#6ZUz*WVe&fi9opl0y
z3+maJ8J_>LJFfeJ!QZrgu>|iF``*;&k2sIl9ottk;~sCxdh09a+J%{Ud$=#&I%IfU
z-EsC@7K^aQH-6OXzRxdNpZjNH&iu>EL-W%MuTPV?y+18;#;?sX(+^fpO|GAmE|a~d
z>!#hRr94+`vnEta*&gRASa16@Tw1>HnkDOvOAgh4&5k!`U8wow6aUQOdPaRcyRCct
zv1xK=WTKZWdsEwR)A8pDKEr(V_phftwz&8rp}f;t{_fG6x&OG1zh#Ph6jA#s>*yl;
zZ-2Ly?%Zd;uKMjF`(J4WzhCU0^}eatqRmE9DPHF%1CPc1r#ozZ3H-E;SbeKXQ(nFA
zb>I7%D9h4G{ly!eKZ&%eo6zqrcc%2}^!nvRJJb4BTUoel-@jqHMc>)`C$9?CcgCGL
z-uXE3vQ&*#M$PnBt8PEsU|?;aXLDS&de4;X?FP3V>qtJh;&1h)zxKWLQ;TgYD!a4)
zUi{uw_fh1||M`oJf6E=e`P+4)-HO%0A-n}vFZ-j@Z%O6pc23TY-yCyO=j7eLx7VzD
zY$@{KVEuuFz3U(B>-eMCw|2=-*;VP;eZkYtcJ9%+x{h;eN7L4Hj#Kw8RPVMvx>x>p
z4NHaZd%c=!=Z5)L;|soinr64Xe%9mfNt;zKs!HqoutvPNdF%aq27_0(F6JE&RAAk3
yW$OyR?a!rU7=swh1JKiEZIOww_e_bIuGi{&s)g%MdY0W|00K`}KbLh*2~7Z@SVnaK

delta 1211
zcmaiydsNZ~9K}(y*UaTk?UZ8nkc#Q-VJT%=cv6IcM%H{LEyI%|5H%zp404*sS!h4h
z@_|<PC`D0WG9w0_vmz(he2a*~L!zMgfY4T{<KL~Fv-{`0pZm}EerGo2Z7zzXmm<%?
zFWj^-vW+Q?Cmn+B`+C#X5CKRju5C}zxa!A@f$!?`@8KWi)<`_N+j2_rQLzO<Kp^lY
zvJ(q@^njrNx;+AE;%N>dS1EqX#CO<;XGt@IE2TA|{NuHjFraRfH0Ki1r+)JjrzYd>
zZD4><_`->kl|U}8@o1P|qkGUWPX{U!SSDkK-qJJ<EfHjlIJkFxf9Z9M?=?$Nig)H{
zrEgr}=4jxDXkSg_39cSEpQ?P}Ue-uxTRE?TJQbnXykL(C1x0z$)4=x0F#%)Q(-@c|
zOt#B+;#;bZyonQ4?#Fyd8Si^GUEgxo<mj$Wjm=CHEq3$?QFNr?>Z~V1bY5Pucg|%7
zcR<h9l4bAxnq?DB(>-Y5PBiBU0%<wQs#o*{G4U;z$_usA{<<AzEU<qxdO&L~c|<zd
zv|I<q>39m&%PZ>ltQvS34_>xRjBBXBu6FZ4QhQxtsJ<;XoaN$;tC`^H+_ZCu`h^?X
z7=ZNn30+ozPZQpD21E422mf)+?O;fkWEc28J}R)g&9lmWL1nSuYD;d?Y}CZza5i5^
zMcMlpnf`~@hk&LwGeHtK+(|02TGl;4YgS`*JJ`lLknOt&^120{wT0{a4}ku^#3#V&
zCML9Y8wv^;-fNQs0)LXH!O{v|yG4P5^@|%8g2VVdZCOp-D&a6Mf#T5hn}DV>s!#ki
zc12mHSSl=}3)DMfm3cc)$pimHZuldPZO4eGRcp&{Foe$7w~i(RUYT=K+WPZIJ5{F$
zOclY2S779Goe#Dt{AW|MSb`%V4BACz$r>T^L$EM<xLV~?(aw!N&7u;2Z(VJyI@w=6
z3B!zEWExq|wUMS0b$gV8_^=WSjP+eR@g*Z>p57%fWEAp8u3iq`U|EuZsn6hdyN-(G
z$+Nvnt^zR8r6Txn3b)L;VR=)69iBfr`8*(9C3KM83bqqL#}<1rRf>rSSo0wblnJS;
zi$Lq*{gP&i2~*mvp->&ib$$Guv<4*<R>x^%d6|_g%$YoMII|xye^de?48?>E(zVj$
zfJF?N|IpEpj9!X7)i0$Qk`HCK@<x}6an1L%+|SdLMBGR?_`^if)r@>vzSoeoLcK;g
zVs!X5oHkh5qI$4p2diqOR4pksw}cFI-$XI61c9HMN=bdy>W0-iP3bu7?MB_6j<wSW
z?UFY${w*s(+#iw0-7lS{Uk+$yCA$7n3vqlT=Au>d7wV5O;vTc|xTXg_B{pi>^FJcM
zgm3r<ieKr#d0)AFmOk2`*xIl&_X5ugsydUjLfR%wzdqxnDGyM;Tf7B2-6+{=(C;G>
z*N(~NClkJY93J5Hz!v?MG`}1Mnx@S4lB7<%*Bu`0;5sI~_Jtj@%>j`f{`)&K$Y^No
cITSQ~$jx{VC*5rj7(rGcgF??X2VTzp8!XXsoB#j-

diff --git a/docs/html/tex__obj__input__iterator_8cuh.html b/docs/html/tex__obj__input__iterator_8cuh.html
index 8bc88a3b34..a19cb3c494 100644
--- a/docs/html/tex__obj__input__iterator_8cuh.html
+++ b/docs/html/tex__obj__input__iterator_8cuh.html
@@ -134,7 +134,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/tex__obj__input__iterator_8cuh_source.html b/docs/html/tex__obj__input__iterator_8cuh_source.html
index 27c08c85da..4530b87bfa 100644
--- a/docs/html/tex__obj__input__iterator_8cuh_source.html
+++ b/docs/html/tex__obj__input__iterator_8cuh_source.html
@@ -341,7 +341,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/tex__ref__input__iterator_8cuh.html b/docs/html/tex__ref__input__iterator_8cuh.html
index 0b53c1734a..1f316660ac 100644
--- a/docs/html/tex__ref__input__iterator_8cuh.html
+++ b/docs/html/tex__ref__input__iterator_8cuh.html
@@ -134,7 +134,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/tex__ref__input__iterator_8cuh_source.html b/docs/html/tex__ref__input__iterator_8cuh_source.html
index f32e8d9e7e..60bfd43322 100644
--- a/docs/html/tex__ref__input__iterator_8cuh_source.html
+++ b/docs/html/tex__ref__input__iterator_8cuh_source.html
@@ -393,7 +393,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__load_8cuh.html b/docs/html/thread__load_8cuh.html
index 9c85579d3e..0b69ad7b2a 100644
--- a/docs/html/thread__load_8cuh.html
+++ b/docs/html/thread__load_8cuh.html
@@ -152,7 +152,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__load_8cuh_source.html b/docs/html/thread__load_8cuh_source.html
index 6600666ba1..1fee5c667d 100644
--- a/docs/html/thread__load_8cuh_source.html
+++ b/docs/html/thread__load_8cuh_source.html
@@ -174,17 +174,17 @@
 <div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;<span class="keyword">struct </span>IterateThreadLoad</div>
 <div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;{</div>
 <div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;    <span class="keyword">template</span> &lt;CacheLoadModifier MODIFIER, <span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Load(T *ptr, T *vals)</div>
+<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Load(T <span class="keyword">const</span> *ptr, T *vals)</div>
 <div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;    {</div>
 <div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;        vals[COUNT] = ThreadLoad&lt;MODIFIER&gt;(ptr + COUNT);</div>
 <div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        IterateThreadLoad&lt;COUNT + 1, MAX&gt;::template Load&lt;MODIFIER&gt;(ptr, vals);</div>
 <div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;    }</div>
 <div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;</div>
 <div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> InputIteratorT, <span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Dereference(InputIteratorT ptr, T *vals)</div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Dereference(InputIteratorT itr, T *vals)</div>
 <div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;    {</div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        vals[COUNT] = ptr[COUNT];</div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        IterateThreadLoad&lt;COUNT + 1, MAX&gt;::Dereference(ptr, vals);</div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;        vals[COUNT] = itr[COUNT];</div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;        IterateThreadLoad&lt;COUNT + 1, MAX&gt;::Dereference(itr, vals);</div>
 <div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;    }</div>
 <div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;};</div>
 <div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;</div>
@@ -193,16 +193,16 @@
 <div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;<span class="keyword">struct </span>IterateThreadLoad&lt;MAX, MAX&gt;</div>
 <div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;{</div>
 <div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;    <span class="keyword">template</span> &lt;CacheLoadModifier MODIFIER, <span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Load(T *ptr, T *vals) {}</div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Load(T <span class="keyword">const</span> *ptr, T *vals) {}</div>
 <div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;</div>
 <div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> InputIteratorT, <span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Dereference(InputIteratorT ptr, T *vals) {}</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;    <span class="keyword">static</span> __device__ __forceinline__ <span class="keywordtype">void</span> Dereference(InputIteratorT itr, T *vals) {}</div>
 <div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;};</div>
 <div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;</div>
 <div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;</div>
 <div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;<span class="preprocessor">#define CUB_LOAD_16(cub_modifier, ptx_modifier)                                             \</span></div>
 <div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;<span class="preprocessor">    __device__ __forceinline__ uint4 ThreadLoad&lt;cub_modifier, uint4*&gt;(uint4* ptr)           \</span></div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;<span class="preprocessor">    __device__ __forceinline__ uint4 ThreadLoad&lt;cub_modifier, uint4 const *&gt;(uint4 const *ptr)                   \</span></div>
 <div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;<span class="preprocessor">        uint4 retval;                                                                       \</span></div>
 <div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.v4.u32 {%0, %1, %2, %3}, [%4];&quot; :                 \</span></div>
@@ -214,7 +214,7 @@
 <div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;<span class="preprocessor">        return retval;                                                                      \</span></div>
 <div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;<span class="preprocessor">    }                                                                                       \</span></div>
 <div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;<span class="preprocessor">    __device__ __forceinline__ ulonglong2 ThreadLoad&lt;cub_modifier, ulonglong2*&gt;(ulonglong2* ptr)              \</span></div>
+<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;<span class="preprocessor">    __device__ __forceinline__ ulonglong2 ThreadLoad&lt;cub_modifier, ulonglong2 const *&gt;(ulonglong2 const *ptr)    \</span></div>
 <div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;<span class="preprocessor">        ulonglong2 retval;                                                                  \</span></div>
 <div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.v2.u64 {%0, %1}, [%2];&quot; :                         \</span></div>
@@ -226,7 +226,7 @@
 <div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;<span class="preprocessor">#define CUB_LOAD_8(cub_modifier, ptx_modifier)                                              \</span></div>
 <div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;<span class="preprocessor">    __device__ __forceinline__ ushort4 ThreadLoad&lt;cub_modifier, ushort4*&gt;(ushort4* ptr)     \</span></div>
+<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;<span class="preprocessor">    __device__ __forceinline__ ushort4 ThreadLoad&lt;cub_modifier, ushort4 const *&gt;(ushort4 const *ptr)             \</span></div>
 <div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;<span class="preprocessor">        ushort4 retval;                                                                     \</span></div>
 <div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.v4.u16 {%0, %1, %2, %3}, [%4];&quot; :                 \</span></div>
@@ -238,7 +238,7 @@
 <div class="line"><a name="l00192"></a><span class="lineno">  192</span>&#160;<span class="preprocessor">        return retval;                                                                      \</span></div>
 <div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;<span class="preprocessor">    }                                                                                       \</span></div>
 <div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;<span class="preprocessor">    __device__ __forceinline__ uint2 ThreadLoad&lt;cub_modifier, uint2*&gt;(uint2* ptr)           \</span></div>
+<div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;<span class="preprocessor">    __device__ __forceinline__ uint2 ThreadLoad&lt;cub_modifier, uint2 const *&gt;(uint2 const *ptr)                   \</span></div>
 <div class="line"><a name="l00196"></a><span class="lineno">  196</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00197"></a><span class="lineno">  197</span>&#160;<span class="preprocessor">        uint2 retval;                                                                       \</span></div>
 <div class="line"><a name="l00198"></a><span class="lineno">  198</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.v2.u32 {%0, %1}, [%2];&quot; :                         \</span></div>
@@ -248,7 +248,7 @@
 <div class="line"><a name="l00202"></a><span class="lineno">  202</span>&#160;<span class="preprocessor">        return retval;                                                                      \</span></div>
 <div class="line"><a name="l00203"></a><span class="lineno">  203</span>&#160;<span class="preprocessor">    }                                                                                       \</span></div>
 <div class="line"><a name="l00204"></a><span class="lineno">  204</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00205"></a><span class="lineno">  205</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned long long ThreadLoad&lt;cub_modifier, unsigned long long*&gt;(unsigned long long* ptr)                 \</span></div>
+<div class="line"><a name="l00205"></a><span class="lineno">  205</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned long long ThreadLoad&lt;cub_modifier, unsigned long long const *&gt;(unsigned long long const *ptr)    \</span></div>
 <div class="line"><a name="l00206"></a><span class="lineno">  206</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00207"></a><span class="lineno">  207</span>&#160;<span class="preprocessor">        unsigned long long retval;                                                          \</span></div>
 <div class="line"><a name="l00208"></a><span class="lineno">  208</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.u64 %0, [%1];&quot; :                                  \</span></div>
@@ -259,7 +259,7 @@
 <div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;<span class="preprocessor">#define CUB_LOAD_4(cub_modifier, ptx_modifier)                                              \</span></div>
 <div class="line"><a name="l00218"></a><span class="lineno">  218</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned int ThreadLoad&lt;cub_modifier, unsigned int*&gt;(unsigned int* ptr)                 \</span></div>
+<div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned int ThreadLoad&lt;cub_modifier, unsigned int const *&gt;(unsigned int const *ptr)                      \</span></div>
 <div class="line"><a name="l00220"></a><span class="lineno">  220</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00221"></a><span class="lineno">  221</span>&#160;<span class="preprocessor">        unsigned int retval;                                                                \</span></div>
 <div class="line"><a name="l00222"></a><span class="lineno">  222</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.u32 %0, [%1];&quot; :                                  \</span></div>
@@ -271,7 +271,7 @@
 <div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;</div>
 <div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;<span class="preprocessor">#define CUB_LOAD_2(cub_modifier, ptx_modifier)                                              \</span></div>
 <div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned short ThreadLoad&lt;cub_modifier, unsigned short*&gt;(unsigned short* ptr)           \</span></div>
+<div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned short ThreadLoad&lt;cub_modifier, unsigned short const *&gt;(unsigned short const *ptr)                \</span></div>
 <div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;<span class="preprocessor">        unsigned short retval;                                                              \</span></div>
 <div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;<span class="preprocessor">        asm volatile (&quot;ld.&quot;#ptx_modifier&quot;.u16 %0, [%1];&quot; :                                  \</span></div>
@@ -283,7 +283,7 @@
 <div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;</div>
 <div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;<span class="preprocessor">#define CUB_LOAD_1(cub_modifier, ptx_modifier)                                              \</span></div>
 <div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;<span class="preprocessor">    template&lt;&gt;                                                                              \</span></div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned char ThreadLoad&lt;cub_modifier, unsigned char*&gt;(unsigned char* ptr)              \</span></div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;<span class="preprocessor">    __device__ __forceinline__ unsigned char ThreadLoad&lt;cub_modifier, unsigned char const *&gt;(unsigned char const *ptr)                   \</span></div>
 <div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;<span class="preprocessor">    {                                                                                       \</span></div>
 <div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;<span class="preprocessor">        unsigned short retval;                                                              \</span></div>
 <div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;<span class="preprocessor">        asm volatile (                                                                      \</span></div>
@@ -294,7 +294,7 @@
 <div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;<span class="preprocessor">        &quot;}&quot; :                                                                               \</span></div>
 <div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;<span class="preprocessor">            &quot;=h&quot;(retval) :                                                                  \</span></div>
 <div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;<span class="preprocessor">            _CUB_ASM_PTR_(ptr));                                                            \</span></div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;<span class="preprocessor">        return (unsigned char) retval;                                                               \</span></div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;<span class="preprocessor">        return (unsigned char) retval;                                                      \</span></div>
 <div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;<span class="preprocessor">    }</span></div>
 <div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;</div>
@@ -374,7 +374,7 @@
 <div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
 <div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;__device__ __forceinline__ T ThreadLoadVolatilePointer(</div>
 <div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;    T                       *ptr,</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;    Int2Type&lt;false&gt;          is_primitive)</div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;    Int2Type&lt;false&gt;         is_primitive)</div>
 <div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;{</div>
 <div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;</div>
 <div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;<span class="preprocessor">#if CUB_PTX_ARCH &lt;= 130</span></div>
@@ -422,7 +422,7 @@
 <div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;</div>
 <div class="line"><a name="l00410"></a><span class="lineno">  410</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keywordtype">int</span> MODIFIER&gt;</div>
 <div class="line"><a name="l00411"></a><span class="lineno">  411</span>&#160;__device__ __forceinline__ T <a class="code" href="group___util_io.html#ga54f8f9a6645b94a484fab6f73568b81c" title="Thread utility for reading memory using cub::CacheLoadModifier cache modifiers. Can be used to load a...">ThreadLoad</a>(</div>
-<div class="line"><a name="l00412"></a><span class="lineno">  412</span>&#160;    T                       *ptr,</div>
+<div class="line"><a name="l00412"></a><span class="lineno">  412</span>&#160;    T <span class="keyword">const</span>                 *ptr,</div>
 <div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;    Int2Type&lt;MODIFIER&gt;      modifier,</div>
 <div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;    Int2Type&lt;true&gt;          is_pointer)</div>
 <div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;{</div>
@@ -433,7 +433,7 @@
 <div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;    DeviceWord words[DEVICE_MULTIPLE];</div>
 <div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;</div>
 <div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;    IterateThreadLoad&lt;0, DEVICE_MULTIPLE&gt;::template Load&lt;CacheLoadModifier(MODIFIER)&gt;(</div>
-<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;        <span class="keyword">reinterpret_cast&lt;</span>DeviceWord*<span class="keyword">&gt;</span>(ptr),</div>
+<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;        <span class="keyword">reinterpret_cast&lt;</span>DeviceWord*<span class="keyword">&gt;</span>(<span class="keyword">const_cast&lt;</span>T*<span class="keyword">&gt;</span>(ptr)),</div>
 <div class="line"><a name="l00424"></a><span class="lineno">  424</span>&#160;        words);</div>
 <div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;</div>
 <div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;    <span class="keywordflow">return</span> *<span class="keyword">reinterpret_cast&lt;</span>T*<span class="keyword">&gt;</span>(words);</div>
@@ -465,7 +465,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__operators_8cuh.html b/docs/html/thread__operators_8cuh.html
index a194e63460..afc65607de 100644
--- a/docs/html/thread__operators_8cuh.html
+++ b/docs/html/thread__operators_8cuh.html
@@ -163,7 +163,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__operators_8cuh_source.html b/docs/html/thread__operators_8cuh_source.html
index 2caf4c57dc..1b75d13672 100644
--- a/docs/html/thread__operators_8cuh_source.html
+++ b/docs/html/thread__operators_8cuh_source.html
@@ -203,14 +203,14 @@
 <div class="line"><a name="l00137"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_max.html">  137</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_arg_max.html" title="Arg max functor (keeps the value and offset of the first occurrence of the larger item) ...">ArgMax</a></div>
 <div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;{</div>
 <div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keyword">typename</span> OffsetT&gt;</div>
-<div class="line"><a name="l00141"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">  141</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> <a class="code" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d" title="Boolean max operator, preferring the item having the smaller offset in case of ties. ">operator()</a>(</div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keyword">const</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> &amp;a,</div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keyword">const</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> &amp;b)<span class="keyword"> const</span></div>
+<div class="line"><a name="l00141"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d">  141</a></span>&#160;    __host__ __device__ __forceinline__ KeyValuePair&lt;OffsetT, T&gt; <a class="code" href="structcub_1_1_arg_max.html#abc8619b45e188b364d96c0bdf0b29c2d" title="Boolean max operator, preferring the item having the smaller offset in case of ties. ">operator()</a>(</div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;        <span class="keyword">const</span> KeyValuePair&lt;OffsetT, T&gt; &amp;a,</div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;        <span class="keyword">const</span> KeyValuePair&lt;OffsetT, T&gt; &amp;b)<span class="keyword"> const</span></div>
 <div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;<span class="keyword">    </span>{</div>
 <div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;<span class="comment">// Mooch BUG (device reduce argmax gk110 3.2 million random fp32)</span></div>
 <div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;<span class="comment">//        return ((b.value &gt; a.value) || ((a.value == b.value) &amp;&amp; (b.key &lt; a.key))) ? b : a;</span></div>
 <div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">if</span> ((b.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a> &gt; a.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a>) || ((a.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a> == b.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a>) &amp;&amp; (b.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a> &lt; a.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a>)))</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">if</span> ((b.value &gt; a.value) || ((a.value == b.value) &amp;&amp; (b.key &lt; a.key)))</div>
 <div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;            <span class="keywordflow">return</span> b;</div>
 <div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;        <span class="keywordflow">return</span> a;</div>
 <div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;    }</div>
@@ -230,14 +230,14 @@
 <div class="line"><a name="l00172"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_min.html">  172</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structcub_1_1_arg_min.html" title="Arg min functor (keeps the value and offset of the first occurrence of the smallest item) ...">ArgMin</a></div>
 <div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;{</div>
 <div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keyword">typename</span> OffsetT&gt;</div>
-<div class="line"><a name="l00176"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">  176</a></span>&#160;    __host__ __device__ __forceinline__ <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> <a class="code" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710" title="Boolean min operator, preferring the item having the smaller offset in case of ties. ">operator()</a>(</div>
-<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;        <span class="keyword">const</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> &amp;a,</div>
-<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;        <span class="keyword">const</span> <a class="code" href="structcub_1_1_key_value_pair.html" title="A key identifier paired with a corresponding value. ">KeyValuePair&lt;OffsetT, T&gt;</a> &amp;b)<span class="keyword"> const</span></div>
+<div class="line"><a name="l00176"></a><span class="lineno"><a class="code" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710">  176</a></span>&#160;    __host__ __device__ __forceinline__ KeyValuePair&lt;OffsetT, T&gt; <a class="code" href="structcub_1_1_arg_min.html#a57bab80de70f6401ea6899ca2488f710" title="Boolean min operator, preferring the item having the smaller offset in case of ties. ">operator()</a>(</div>
+<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;        <span class="keyword">const</span> KeyValuePair&lt;OffsetT, T&gt; &amp;a,</div>
+<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;        <span class="keyword">const</span> KeyValuePair&lt;OffsetT, T&gt; &amp;b)<span class="keyword"> const</span></div>
 <div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;<span class="keyword">    </span>{</div>
 <div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;<span class="comment">// Mooch BUG (device reduce argmax gk110 3.2 million random fp32)</span></div>
 <div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;<span class="comment">//        return ((b.value &lt; a.value) || ((a.value == b.value) &amp;&amp; (b.key &lt; a.key))) ? b : a;</span></div>
 <div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;</div>
-<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;        <span class="keywordflow">if</span> ((b.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a> &lt; a.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a>) || ((a.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a> == b.<a class="code" href="structcub_1_1_key_value_pair.html#a468bef1440a66f45bd9b5193594bf1a4" title="Item value. ">value</a>) &amp;&amp; (b.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a> &lt; a.<a class="code" href="structcub_1_1_key_value_pair.html#a648308be997ae9c79f47f6006ec7b494" title="Item key. ">key</a>)))</div>
+<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;        <span class="keywordflow">if</span> ((b.value &lt; a.value) || ((a.value == b.value) &amp;&amp; (b.key &lt; a.key)))</div>
 <div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;            <span class="keywordflow">return</span> b;</div>
 <div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;        <span class="keywordflow">return</span> a;</div>
 <div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;    }</div>
@@ -338,7 +338,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__store_8cuh.html b/docs/html/thread__store_8cuh.html
index 6c5c4e2e57..75f02b71c8 100644
--- a/docs/html/thread__store_8cuh.html
+++ b/docs/html/thread__store_8cuh.html
@@ -148,7 +148,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/thread__store_8cuh_source.html b/docs/html/thread__store_8cuh_source.html
index 1c0469b7bf..f2c9261768 100644
--- a/docs/html/thread__store_8cuh_source.html
+++ b/docs/html/thread__store_8cuh_source.html
@@ -355,80 +355,89 @@
 <div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;</div>
 <div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;<span class="preprocessor">#else</span></div>
 <div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::VolatileWord VolatileWord;   <span class="comment">// Word type for memcopying</span></div>
-<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;</div>
-<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> VOLATILE_MULTIPLE = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(VolatileWord);</div>
+<div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;    <span class="comment">// Create a temporary using shuffle-words, then store using volatile-words</span></div>
+<div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::VolatileWord  VolatileWord;  </div>
+<div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::ShuffleWord   ShuffleWord;</div>
 <div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;</div>
-<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;    VolatileWord words[VOLATILE_MULTIPLE];</div>
-<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;    *<span class="keyword">reinterpret_cast&lt;</span>T*<span class="keyword">&gt;</span>(words) = val;</div>
-<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;</div>
-<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;<span class="comment">//    VolatileWord *words = reinterpret_cast&lt;VolatileWord*&gt;(&amp;val);</span></div>
+<div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> VOLATILE_MULTIPLE = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(VolatileWord);</div>
+<div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> SHUFFLE_MULTIPLE  = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(ShuffleWord);</div>
+<div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;    </div>
+<div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;    VolatileWord words[VOLATILE_MULTIPLE];</div>
 <div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;</div>
-<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;    IterateThreadStore&lt;0, VOLATILE_MULTIPLE&gt;::template Dereference(</div>
-<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;        reinterpret_cast&lt;volatile VolatileWord*&gt;(ptr),</div>
-<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;        words);</div>
+<div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;<span class="preprocessor">    #pragma unroll</span></div>
+<div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;<span class="preprocessor"></span>    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; SHUFFLE_MULTIPLE; ++i)</div>
+<div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;        reinterpret_cast&lt;ShuffleWord*&gt;(words)[i] = <span class="keyword">reinterpret_cast&lt;</span>ShuffleWord*<span class="keyword">&gt;</span>(&amp;val)[i];</div>
 <div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;</div>
-<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;<span class="preprocessor">#endif  // CUB_PTX_ARCH &lt;= 130</span></div>
-<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;}</div>
+<div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;    IterateThreadStore&lt;0, VOLATILE_MULTIPLE&gt;::template Dereference(</div>
+<div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;        reinterpret_cast&lt;volatile VolatileWord*&gt;(ptr),</div>
+<div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;        words);</div>
 <div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;</div>
-<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;</div>
-<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
-<div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;    T                           *ptr,</div>
-<div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;    T                           val,</div>
-<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;    Int2Type&lt;STORE_VOLATILE&gt;    modifier,</div>
-<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;    Int2Type&lt;true&gt;              is_pointer)</div>
-<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;{</div>
-<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;    ThreadStoreVolatilePtr(ptr, val, Int2Type&lt;Traits&lt;T&gt;::PRIMITIVE&gt;());</div>
-<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;}</div>
-<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;</div>
-<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;</div>
-<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keywordtype">int</span> MODIFIER&gt;</div>
-<div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
-<div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;    T                           *ptr,</div>
-<div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;    T                           val,</div>
-<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;    Int2Type&lt;MODIFIER&gt;          modifier,</div>
-<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;    Int2Type&lt;true&gt;              is_pointer)</div>
-<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;{</div>
-<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::DeviceWord DeviceWord;   <span class="comment">// Word type for memcopying</span></div>
-<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;</div>
-<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> DEVICE_MULTIPLE = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(DeviceWord);</div>
-<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;</div>
-<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;    DeviceWord words[DEVICE_MULTIPLE];</div>
-<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;</div>
-<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;    *<span class="keyword">reinterpret_cast&lt;</span>T*<span class="keyword">&gt;</span>(words) = val;</div>
+<div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;<span class="preprocessor">#endif  // CUB_PTX_ARCH &lt;= 130</span></div>
+<div class="line"><a name="l00363"></a><span class="lineno">  363</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;}</div>
+<div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;</div>
+<div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;</div>
+<div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
+<div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
+<div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;    T                           *ptr,</div>
+<div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;    T                           val,</div>
+<div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;    Int2Type&lt;STORE_VOLATILE&gt;    modifier,</div>
+<div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;    Int2Type&lt;true&gt;              is_pointer)</div>
+<div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;{</div>
+<div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;    ThreadStoreVolatilePtr(ptr, val, Int2Type&lt;Traits&lt;T&gt;::PRIMITIVE&gt;());</div>
+<div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;}</div>
+<div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;</div>
+<div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;</div>
+<div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keywordtype">int</span> MODIFIER&gt;</div>
+<div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
+<div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;    T                           *ptr,</div>
+<div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;    T                           val,</div>
+<div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;    Int2Type&lt;MODIFIER&gt;          modifier,</div>
+<div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;    Int2Type&lt;true&gt;              is_pointer)</div>
+<div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;{</div>
+<div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;    <span class="comment">// Create a temporary using shuffle-words, then store using device-words</span></div>
+<div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::DeviceWord    DeviceWord;  </div>
+<div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;    <span class="keyword">typedef</span> <span class="keyword">typename</span> UnitWord&lt;T&gt;::ShuffleWord   ShuffleWord;</div>
 <div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;</div>
-<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;    IterateThreadStore&lt;0, DEVICE_MULTIPLE&gt;::template Store&lt;CacheStoreModifier(MODIFIER)&gt;(</div>
-<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;        <span class="keyword">reinterpret_cast&lt;</span>DeviceWord*<span class="keyword">&gt;</span>(ptr),</div>
-<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;        words);</div>
-<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;}</div>
+<div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> DEVICE_MULTIPLE   = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(DeviceWord);</div>
+<div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> SHUFFLE_MULTIPLE  = <span class="keyword">sizeof</span>(T) / <span class="keyword">sizeof</span>(ShuffleWord);</div>
+<div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;    </div>
+<div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;    DeviceWord words[DEVICE_MULTIPLE];</div>
 <div class="line"><a name="l00399"></a><span class="lineno">  399</span>&#160;</div>
-<div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;</div>
-<div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;<span class="keyword">template</span> &lt;CacheStoreModifier MODIFIER, <span class="keyword">typename</span> OutputIteratorT, <span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(OutputIteratorT itr, T val)</div>
-<div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;{</div>
-<div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;    <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
-<div class="line"><a name="l00408"></a><span class="lineno">  408</span>&#160;        itr,</div>
-<div class="line"><a name="l00409"></a><span class="lineno">  409</span>&#160;        val,</div>
-<div class="line"><a name="l00410"></a><span class="lineno">  410</span>&#160;        Int2Type&lt;MODIFIER&gt;(),</div>
-<div class="line"><a name="l00411"></a><span class="lineno">  411</span>&#160;        Int2Type&lt;IsPointer&lt;OutputIteratorT&gt;::VALUE&gt;());</div>
-<div class="line"><a name="l00412"></a><span class="lineno">  412</span>&#160;}</div>
-<div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;</div>
-<div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;</div>
-<div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;</div>
-<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;<span class="preprocessor">#endif // DOXYGEN_SHOULD_SKIP_THIS</span></div>
-<div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;       <span class="comment">// end group UtilIo</span></div>
-<div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;</div>
-<div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;</div>
-<div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;<span class="preprocessor">    #pragma unroll</span></div>
+<div class="line"><a name="l00401"></a><span class="lineno">  401</span>&#160;<span class="preprocessor"></span>    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; SHUFFLE_MULTIPLE; ++i)</div>
+<div class="line"><a name="l00402"></a><span class="lineno">  402</span>&#160;        reinterpret_cast&lt;ShuffleWord*&gt;(words)[i] = <span class="keyword">reinterpret_cast&lt;</span>ShuffleWord*<span class="keyword">&gt;</span>(&amp;val)[i];</div>
+<div class="line"><a name="l00403"></a><span class="lineno">  403</span>&#160;</div>
+<div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;    IterateThreadStore&lt;0, DEVICE_MULTIPLE&gt;::template Store&lt;CacheStoreModifier(MODIFIER)&gt;(</div>
+<div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;        <span class="keyword">reinterpret_cast&lt;</span>DeviceWord*<span class="keyword">&gt;</span>(ptr),</div>
+<div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;        words);</div>
+<div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;}</div>
+<div class="line"><a name="l00408"></a><span class="lineno">  408</span>&#160;</div>
+<div class="line"><a name="l00409"></a><span class="lineno">  409</span>&#160;</div>
+<div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;<span class="keyword">template</span> &lt;CacheStoreModifier MODIFIER, <span class="keyword">typename</span> OutputIteratorT, <span class="keyword">typename</span> T&gt;</div>
+<div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;__device__ __forceinline__ <span class="keywordtype">void</span> <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(OutputIteratorT itr, T val)</div>
+<div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;{</div>
+<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;    <a class="code" href="group___util_io.html#ga7e46f6e1c83bab6124cf8d4b66377812" title="Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store...">ThreadStore</a>(</div>
+<div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;        itr,</div>
+<div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;        val,</div>
+<div class="line"><a name="l00419"></a><span class="lineno">  419</span>&#160;        Int2Type&lt;MODIFIER&gt;(),</div>
+<div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;        Int2Type&lt;IsPointer&lt;OutputIteratorT&gt;::VALUE&gt;());</div>
+<div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;}</div>
+<div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;</div>
+<div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;</div>
+<div class="line"><a name="l00424"></a><span class="lineno">  424</span>&#160;</div>
+<div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;<span class="preprocessor">#endif // DOXYGEN_SHOULD_SKIP_THIS</span></div>
+<div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;       <span class="comment">// end group UtilIo</span></div>
+<div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;</div>
+<div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;</div>
+<div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/transform__input__iterator_8cuh.html b/docs/html/transform__input__iterator_8cuh.html
index 3047305ec1..4f0177932a 100644
--- a/docs/html/transform__input__iterator_8cuh.html
+++ b/docs/html/transform__input__iterator_8cuh.html
@@ -133,7 +133,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/transform__input__iterator_8cuh_source.html b/docs/html/transform__input__iterator_8cuh_source.html
index b14ea97eba..77cffaccbe 100644
--- a/docs/html/transform__input__iterator_8cuh_source.html
+++ b/docs/html/transform__input__iterator_8cuh_source.html
@@ -280,7 +280,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__allocator_8cuh_source.html b/docs/html/util__allocator_8cuh_source.html
index 5c8015a293..ad2d2c0300 100644
--- a/docs/html/util__allocator_8cuh_source.html
+++ b/docs/html/util__allocator_8cuh_source.html
@@ -349,7 +349,7 @@
 <div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;</div>
 <div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;        this-&gt;max_cached_bytes = max_cached_bytes;</div>
 <div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;New max_cached_bytes(%lld)\n&quot;</span>, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) max_cached_bytes);</div>
+<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;        <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;New max_cached_bytes(%lld)\n&quot;</span>, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) max_cached_bytes);</div>
 <div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;</div>
 <div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;        <span class="comment">// Unlock</span></div>
 <div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;        Unlock(&amp;spin_lock);</div>
@@ -429,7 +429,7 @@
 <div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;                    cached_blocks.erase(block_itr);</div>
 <div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;                    cached_bytes[device] -= search_key.bytes;</div>
 <div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;</div>
-<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;\tdevice %d reused cached block for stream %lld (%lld bytes, previously associated with stream %lld).\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
+<div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;\tdevice %d reused cached block for stream %lld (%lld bytes, previously associated with stream %lld).\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
 <div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;                        device, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) active_stream, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.bytes, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) prev_stream, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) cached_blocks.size(), (<span class="keywordtype">long</span> long) cached_bytes[device], (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) live_blocks.size());</div>
 <div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;</div>
 <div class="line"><a name="l00419"></a><span class="lineno">  419</span>&#160;                    <span class="keywordflow">break</span>;</div>
@@ -464,7 +464,7 @@
 <div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;                <span class="comment">// Insert into live blocks</span></div>
 <div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;                live_blocks.insert(search_key);</div>
 <div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;</div>
-<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;                <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;\tdevice %d allocating new device block %lld bytes associated with stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
+<div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;                <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;\tdevice %d allocating new device block %lld bytes associated with stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
 <div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;                    device, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.bytes, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.associated_stream, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) cached_blocks.size(), (<span class="keywordtype">long</span> long) cached_bytes[device], (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) live_blocks.size());</div>
 <div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;            }</div>
 <div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;</div>
@@ -558,7 +558,7 @@
 <div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;                    cached_blocks.insert(search_key);</div>
 <div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;                    cached_bytes[device] += search_key.bytes;</div>
 <div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;</div>
-<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;\tdevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
+<div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;\tdevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
 <div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;                        device, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.bytes, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.associated_stream, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) cached_blocks.size(), (<span class="keywordtype">long</span> long) cached_bytes[device], (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) live_blocks.size());</div>
 <div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;                }</div>
 <div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;                <span class="keywordflow">else</span></div>
@@ -573,7 +573,7 @@
 <div class="line"><a name="l00571"></a><span class="lineno">  571</span>&#160;                    <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaFree(d_ptr))) <span class="keywordflow">break</span>;</div>
 <div class="line"><a name="l00572"></a><span class="lineno">  572</span>&#160;                    <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaEventDestroy(search_key.ready_event))) <span class="keywordflow">break</span>;</div>
 <div class="line"><a name="l00573"></a><span class="lineno">  573</span>&#160;</div>
-<div class="line"><a name="l00574"></a><span class="lineno">  574</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;\tdevice %d freed %lld bytes from associated stream %lld.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
+<div class="line"><a name="l00574"></a><span class="lineno">  574</span>&#160;                    <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;\tdevice %d freed %lld bytes from associated stream %lld.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
 <div class="line"><a name="l00575"></a><span class="lineno">  575</span>&#160;                        device, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.bytes, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) search_key.associated_stream, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) cached_blocks.size(), (<span class="keywordtype">long</span> long) cached_bytes[device], (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) live_blocks.size());</div>
 <div class="line"><a name="l00576"></a><span class="lineno">  576</span>&#160;                }</div>
 <div class="line"><a name="l00577"></a><span class="lineno">  577</span>&#160;            }</div>
@@ -652,7 +652,7 @@
 <div class="line"><a name="l00660"></a><span class="lineno">  660</span>&#160;            cached_bytes[current_device] -= begin-&gt;bytes;</div>
 <div class="line"><a name="l00661"></a><span class="lineno">  661</span>&#160;            cached_blocks.erase(begin);</div>
 <div class="line"><a name="l00662"></a><span class="lineno">  662</span>&#160;</div>
-<div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;            <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db" title="Log macro for printf statements. ">CubLog</a>(<span class="stringliteral">&quot;\tdevice %d freed %lld bytes.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
+<div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;            <span class="keywordflow">if</span> (debug) <a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7" title="Log macro for printf statements. ">_CubLog</a>(<span class="stringliteral">&quot;\tdevice %d freed %lld bytes.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n&quot;</span>,</div>
 <div class="line"><a name="l00664"></a><span class="lineno">  664</span>&#160;                current_device, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) begin-&gt;bytes, (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) cached_blocks.size(), (<span class="keywordtype">long</span> long) cached_bytes[current_device], (<span class="keywordtype">long</span> <span class="keywordtype">long</span>) live_blocks.size());</div>
 <div class="line"><a name="l00665"></a><span class="lineno">  665</span>&#160;        }</div>
 <div class="line"><a name="l00666"></a><span class="lineno">  666</span>&#160;</div>
@@ -692,7 +692,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__arch_8cuh.html b/docs/html/util__arch_8cuh.html
index 8ef6316989..341691a911 100644
--- a/docs/html/util__arch_8cuh.html
+++ b/docs/html/util__arch_8cuh.html
@@ -121,7 +121,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__arch_8cuh_source.html b/docs/html/util__arch_8cuh_source.html
index 9191fc47df..f277d2c991 100644
--- a/docs/html/util__arch_8cuh_source.html
+++ b/docs/html/util__arch_8cuh_source.html
@@ -211,7 +211,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__debug_8cuh.html b/docs/html/util__debug_8cuh.html
index b0233a5a0c..ee3aac79eb 100644
--- a/docs/html/util__debug_8cuh.html
+++ b/docs/html/util__debug_8cuh.html
@@ -127,10 +127,10 @@
 #define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga26211db894893b3cec946e4e537536f8">CubDebugExit</a>(e)&#160;&#160;&#160;if (<a class="el" href="group___util_mgmt.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95">cub::Debug</a>((e), __FILE__, __LINE__)) { exit(1); }</td></tr>
 <tr class="memdesc:ga26211db894893b3cec946e4e537536f8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Debug macro with exit. <br/></td></tr>
 <tr class="separator:ga26211db894893b3cec946e4e537536f8"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="ga6788287a780dc10c443aa1ab5ae9f0db"></a>
-#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db">CubLog</a>(format,...)&#160;&#160;&#160;printf(format,__VA_ARGS__);</td></tr>
-<tr class="memdesc:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="mdescLeft">&#160;</td><td class="mdescRight">Log macro for printf statements. <br/></td></tr>
-<tr class="separator:ga6788287a780dc10c443aa1ab5ae9f0db"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga25f361894440b53e637cb7ead2a4c0b7"><td class="memItemLeft" align="right" valign="top"><a class="anchor" id="ga25f361894440b53e637cb7ead2a4c0b7"></a>
+#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7">_CubLog</a>(format,...)&#160;&#160;&#160;printf(format,__VA_ARGS__);</td></tr>
+<tr class="memdesc:ga25f361894440b53e637cb7ead2a4c0b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">Log macro for printf statements. <br/></td></tr>
+<tr class="separator:ga25f361894440b53e637cb7ead2a4c0b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
 Functions</h2></td></tr>
@@ -151,7 +151,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__debug_8cuh_source.html b/docs/html/util__debug_8cuh_source.html
index 21ff2909ef..8b99dced13 100644
--- a/docs/html/util__debug_8cuh_source.html
+++ b/docs/html/util__debug_8cuh_source.html
@@ -176,11 +176,11 @@
 <div class="line"><a name="l00101"></a><span class="lineno">  101</span>&#160;<span class="preprocessor"></span><span class="preprocessor">#endif</span></div>
 <div class="line"><a name="l00102"></a><span class="lineno">  102</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00103"></a><span class="lineno">  103</span>&#160;</div>
-<div class="line"><a name="l00107"></a><span class="lineno">  107</span>&#160;<span class="preprocessor">#if !defined(CubLog)</span></div>
+<div class="line"><a name="l00107"></a><span class="lineno">  107</span>&#160;<span class="preprocessor">#if !defined(_CubLog)</span></div>
 <div class="line"><a name="l00108"></a><span class="lineno">  108</span>&#160;<span class="preprocessor"></span><span class="preprocessor">    #if (CUB_PTX_ARCH == 0)</span></div>
-<div class="line"><a name="l00109"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga6788287a780dc10c443aa1ab5ae9f0db">  109</a></span>&#160;<span class="preprocessor"></span><span class="preprocessor">        #define CubLog(format, ...) printf(format,__VA_ARGS__);</span></div>
+<div class="line"><a name="l00109"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga25f361894440b53e637cb7ead2a4c0b7">  109</a></span>&#160;<span class="preprocessor"></span><span class="preprocessor">        #define _CubLog(format, ...) printf(format,__VA_ARGS__);</span></div>
 <div class="line"><a name="l00110"></a><span class="lineno">  110</span>&#160;<span class="preprocessor"></span><span class="preprocessor">    #elif (CUB_PTX_ARCH &gt;= 200)</span></div>
-<div class="line"><a name="l00111"></a><span class="lineno">  111</span>&#160;<span class="preprocessor"></span><span class="preprocessor">        #define CubLog(format, ...) printf(&quot;[block (%d,%d,%d), thread (%d,%d,%d)]: &quot; format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, __VA_ARGS__);</span></div>
+<div class="line"><a name="l00111"></a><span class="lineno">  111</span>&#160;<span class="preprocessor"></span><span class="preprocessor">        #define _CubLog(format, ...) printf(&quot;[block (%d,%d,%d), thread (%d,%d,%d)]: &quot; format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, __VA_ARGS__);</span></div>
 <div class="line"><a name="l00112"></a><span class="lineno">  112</span>&#160;<span class="preprocessor"></span><span class="preprocessor">    #endif</span></div>
 <div class="line"><a name="l00113"></a><span class="lineno">  113</span>&#160;<span class="preprocessor"></span><span class="preprocessor">#endif</span></div>
 <div class="line"><a name="l00114"></a><span class="lineno">  114</span>&#160;<span class="preprocessor"></span></div>
@@ -194,7 +194,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__device_8cuh.html b/docs/html/util__device_8cuh.html
index 1fbba7fc5b..870deb4966 100644
--- a/docs/html/util__device_8cuh.html
+++ b/docs/html/util__device_8cuh.html
@@ -104,7 +104,8 @@
 <div class="title">util_device.cuh File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="textblock"><code>#include &quot;<a class="el" href="util__arch_8cuh_source.html">util_arch.cuh</a>&quot;</code><br/>
+<div class="textblock"><code>#include &quot;<a class="el" href="util__type_8cuh_source.html">util_type.cuh</a>&quot;</code><br/>
+<code>#include &quot;<a class="el" href="util__arch_8cuh_source.html">util_arch.cuh</a>&quot;</code><br/>
 <code>#include &quot;<a class="el" href="util__debug_8cuh_source.html">util_debug.cuh</a>&quot;</code><br/>
 <code>#include &quot;util_namespace.cuh&quot;</code><br/>
 <code>#include &quot;util_macro.cuh&quot;</code><br/>
@@ -137,7 +138,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__device_8cuh_source.html b/docs/html/util__device_8cuh_source.html
index c9918da73c..02950e2b23 100644
--- a/docs/html/util__device_8cuh_source.html
+++ b/docs/html/util__device_8cuh_source.html
@@ -131,244 +131,248 @@
 <div class="line"><a name="l00028"></a><span class="lineno">   28</span>&#160;</div>
 <div class="line"><a name="l00034"></a><span class="lineno">   34</span>&#160;<span class="preprocessor">#pragma once</span></div>
 <div class="line"><a name="l00035"></a><span class="lineno">   35</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00036"></a><span class="lineno">   36</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__arch_8cuh.html">util_arch.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00037"></a><span class="lineno">   37</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__debug_8cuh.html">util_debug.cuh</a>&quot;</span></div>
-<div class="line"><a name="l00038"></a><span class="lineno">   38</span>&#160;<span class="preprocessor">#include &quot;util_namespace.cuh&quot;</span></div>
-<div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;<span class="preprocessor">#include &quot;util_macro.cuh&quot;</span></div>
-<div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;</div>
-<div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;CUB_NS_PREFIX</div>
-<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;</div>
-<div class="line"><a name="l00045"></a><span class="lineno">   45</span>&#160;<span class="keyword">namespace </span>cub {</div>
-<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;</div>
+<div class="line"><a name="l00036"></a><span class="lineno">   36</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__type_8cuh.html">util_type.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00037"></a><span class="lineno">   37</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__arch_8cuh.html">util_arch.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00038"></a><span class="lineno">   38</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="util__debug_8cuh.html">util_debug.cuh</a>&quot;</span></div>
+<div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;<span class="preprocessor">#include &quot;util_namespace.cuh&quot;</span></div>
+<div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;<span class="preprocessor">#include &quot;util_macro.cuh&quot;</span></div>
+<div class="line"><a name="l00041"></a><span class="lineno">   41</span>&#160;</div>
+<div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;CUB_NS_PREFIX</div>
+<div class="line"><a name="l00044"></a><span class="lineno">   44</span>&#160;</div>
+<div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;<span class="keyword">namespace </span>cub {</div>
 <div class="line"><a name="l00047"></a><span class="lineno">   47</span>&#160;</div>
-<div class="line"><a name="l00053"></a><span class="lineno">   53</span>&#160;<span class="preprocessor">#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document</span></div>
-<div class="line"><a name="l00054"></a><span class="lineno">   54</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00055"></a><span class="lineno">   55</span>&#160;</div>
-<div class="line"><a name="l00059"></a><span class="lineno">   59</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> ALLOCATIONS&gt;</div>
-<div class="line"><a name="l00060"></a><span class="lineno">   60</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00061"></a><span class="lineno">   61</span>&#160;cudaError_t AliasTemporaries(</div>
-<div class="line"><a name="l00062"></a><span class="lineno">   62</span>&#160;    <span class="keywordtype">void</span>    *d_temp_storage,                    </div>
-<div class="line"><a name="l00063"></a><span class="lineno">   63</span>&#160;    <span class="keywordtype">size_t</span>  &amp;temp_storage_bytes,                </div>
-<div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;    <span class="keywordtype">void</span>*   (&amp;allocations)[ALLOCATIONS],        </div>
-<div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;    <span class="keywordtype">size_t</span>  (&amp;allocation_sizes)[ALLOCATIONS])   </div>
-<div class="line"><a name="l00066"></a><span class="lineno">   66</span>&#160;{</div>
-<div class="line"><a name="l00067"></a><span class="lineno">   67</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> ALIGN_BYTES   = 256;</div>
-<div class="line"><a name="l00068"></a><span class="lineno">   68</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> ALIGN_MASK    = ~(ALIGN_BYTES - 1);</div>
-<div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;</div>
-<div class="line"><a name="l00070"></a><span class="lineno">   70</span>&#160;    <span class="comment">// Compute exclusive prefix sum over allocation requests</span></div>
-<div class="line"><a name="l00071"></a><span class="lineno">   71</span>&#160;    <span class="keywordtype">size_t</span> allocation_offsets[ALLOCATIONS];</div>
-<div class="line"><a name="l00072"></a><span class="lineno">   72</span>&#160;    <span class="keywordtype">size_t</span> bytes_needed = 0;</div>
-<div class="line"><a name="l00073"></a><span class="lineno">   73</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; ALLOCATIONS; ++i)</div>
-<div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;    {</div>
-<div class="line"><a name="l00075"></a><span class="lineno">   75</span>&#160;        <span class="keywordtype">size_t</span> allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) &amp; ALIGN_MASK;</div>
-<div class="line"><a name="l00076"></a><span class="lineno">   76</span>&#160;        allocation_offsets[i] = bytes_needed;</div>
-<div class="line"><a name="l00077"></a><span class="lineno">   77</span>&#160;        bytes_needed += allocation_bytes;</div>
-<div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;    }</div>
-<div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;</div>
-<div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;    <span class="comment">// Check if the caller is simply requesting the size of the storage allocation</span></div>
-<div class="line"><a name="l00081"></a><span class="lineno">   81</span>&#160;    <span class="keywordflow">if</span> (!d_temp_storage)</div>
-<div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;    {</div>
-<div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;        temp_storage_bytes = bytes_needed;</div>
-<div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;        <span class="keywordflow">return</span> cudaSuccess;</div>
-<div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;    }</div>
-<div class="line"><a name="l00086"></a><span class="lineno">   86</span>&#160;</div>
-<div class="line"><a name="l00087"></a><span class="lineno">   87</span>&#160;    <span class="comment">// Check if enough storage provided</span></div>
-<div class="line"><a name="l00088"></a><span class="lineno">   88</span>&#160;    <span class="keywordflow">if</span> (temp_storage_bytes &lt; bytes_needed)</div>
-<div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;    {</div>
-<div class="line"><a name="l00090"></a><span class="lineno">   90</span>&#160;        <span class="keywordflow">return</span> <a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(cudaErrorInvalidValue);</div>
-<div class="line"><a name="l00091"></a><span class="lineno">   91</span>&#160;    }</div>
-<div class="line"><a name="l00092"></a><span class="lineno">   92</span>&#160;</div>
-<div class="line"><a name="l00093"></a><span class="lineno">   93</span>&#160;    <span class="comment">// Alias</span></div>
-<div class="line"><a name="l00094"></a><span class="lineno">   94</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; ALLOCATIONS; ++i)</div>
-<div class="line"><a name="l00095"></a><span class="lineno">   95</span>&#160;    {</div>
-<div class="line"><a name="l00096"></a><span class="lineno">   96</span>&#160;        allocations[i] = <span class="keyword">static_cast&lt;</span><span class="keywordtype">char</span>*<span class="keyword">&gt;</span>(d_temp_storage) + allocation_offsets[i];</div>
-<div class="line"><a name="l00097"></a><span class="lineno">   97</span>&#160;    }</div>
-<div class="line"><a name="l00098"></a><span class="lineno">   98</span>&#160;</div>
-<div class="line"><a name="l00099"></a><span class="lineno">   99</span>&#160;    <span class="keywordflow">return</span> cudaSuccess;</div>
-<div class="line"><a name="l00100"></a><span class="lineno">  100</span>&#160;}</div>
-<div class="line"><a name="l00101"></a><span class="lineno">  101</span>&#160;</div>
+<div class="line"><a name="l00048"></a><span class="lineno">   48</span>&#160;</div>
+<div class="line"><a name="l00054"></a><span class="lineno">   54</span>&#160;<span class="preprocessor">#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document</span></div>
+<div class="line"><a name="l00055"></a><span class="lineno">   55</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00056"></a><span class="lineno">   56</span>&#160;</div>
+<div class="line"><a name="l00060"></a><span class="lineno">   60</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> ALLOCATIONS&gt;</div>
+<div class="line"><a name="l00061"></a><span class="lineno">   61</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00062"></a><span class="lineno">   62</span>&#160;cudaError_t AliasTemporaries(</div>
+<div class="line"><a name="l00063"></a><span class="lineno">   63</span>&#160;    <span class="keywordtype">void</span>    *d_temp_storage,                    </div>
+<div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;    <span class="keywordtype">size_t</span>  &amp;temp_storage_bytes,                </div>
+<div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;    <span class="keywordtype">void</span>*   (&amp;allocations)[ALLOCATIONS],        </div>
+<div class="line"><a name="l00066"></a><span class="lineno">   66</span>&#160;    <span class="keywordtype">size_t</span>  (&amp;allocation_sizes)[ALLOCATIONS])   </div>
+<div class="line"><a name="l00067"></a><span class="lineno">   67</span>&#160;{</div>
+<div class="line"><a name="l00068"></a><span class="lineno">   68</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> ALIGN_BYTES   = 256;</div>
+<div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">int</span> ALIGN_MASK    = ~(ALIGN_BYTES - 1);</div>
+<div class="line"><a name="l00070"></a><span class="lineno">   70</span>&#160;</div>
+<div class="line"><a name="l00071"></a><span class="lineno">   71</span>&#160;    <span class="comment">// Compute exclusive prefix sum over allocation requests</span></div>
+<div class="line"><a name="l00072"></a><span class="lineno">   72</span>&#160;    <span class="keywordtype">size_t</span> allocation_offsets[ALLOCATIONS];</div>
+<div class="line"><a name="l00073"></a><span class="lineno">   73</span>&#160;    <span class="keywordtype">size_t</span> bytes_needed = 0;</div>
+<div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; ALLOCATIONS; ++i)</div>
+<div class="line"><a name="l00075"></a><span class="lineno">   75</span>&#160;    {</div>
+<div class="line"><a name="l00076"></a><span class="lineno">   76</span>&#160;        <span class="keywordtype">size_t</span> allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) &amp; ALIGN_MASK;</div>
+<div class="line"><a name="l00077"></a><span class="lineno">   77</span>&#160;        allocation_offsets[i] = bytes_needed;</div>
+<div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;        bytes_needed += allocation_bytes;</div>
+<div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;    }</div>
+<div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;</div>
+<div class="line"><a name="l00081"></a><span class="lineno">   81</span>&#160;    <span class="comment">// Check if the caller is simply requesting the size of the storage allocation</span></div>
+<div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;    <span class="keywordflow">if</span> (!d_temp_storage)</div>
+<div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;    {</div>
+<div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;        temp_storage_bytes = bytes_needed;</div>
+<div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;        <span class="keywordflow">return</span> cudaSuccess;</div>
+<div class="line"><a name="l00086"></a><span class="lineno">   86</span>&#160;    }</div>
+<div class="line"><a name="l00087"></a><span class="lineno">   87</span>&#160;</div>
+<div class="line"><a name="l00088"></a><span class="lineno">   88</span>&#160;    <span class="comment">// Check if enough storage provided</span></div>
+<div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;    <span class="keywordflow">if</span> (temp_storage_bytes &lt; bytes_needed)</div>
+<div class="line"><a name="l00090"></a><span class="lineno">   90</span>&#160;    {</div>
+<div class="line"><a name="l00091"></a><span class="lineno">   91</span>&#160;        <span class="keywordflow">return</span> <a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(cudaErrorInvalidValue);</div>
+<div class="line"><a name="l00092"></a><span class="lineno">   92</span>&#160;    }</div>
+<div class="line"><a name="l00093"></a><span class="lineno">   93</span>&#160;</div>
+<div class="line"><a name="l00094"></a><span class="lineno">   94</span>&#160;    <span class="comment">// Alias</span></div>
+<div class="line"><a name="l00095"></a><span class="lineno">   95</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; ALLOCATIONS; ++i)</div>
+<div class="line"><a name="l00096"></a><span class="lineno">   96</span>&#160;    {</div>
+<div class="line"><a name="l00097"></a><span class="lineno">   97</span>&#160;        allocations[i] = <span class="keyword">static_cast&lt;</span><span class="keywordtype">char</span>*<span class="keyword">&gt;</span>(d_temp_storage) + allocation_offsets[i];</div>
+<div class="line"><a name="l00098"></a><span class="lineno">   98</span>&#160;    }</div>
+<div class="line"><a name="l00099"></a><span class="lineno">   99</span>&#160;</div>
+<div class="line"><a name="l00100"></a><span class="lineno">  100</span>&#160;    <span class="keywordflow">return</span> cudaSuccess;</div>
+<div class="line"><a name="l00101"></a><span class="lineno">  101</span>&#160;}</div>
 <div class="line"><a name="l00102"></a><span class="lineno">  102</span>&#160;</div>
-<div class="line"><a name="l00106"></a><span class="lineno">  106</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
-<div class="line"><a name="l00107"></a><span class="lineno">  107</span>&#160;__global__ <span class="keywordtype">void</span> EmptyKernel(<span class="keywordtype">void</span>) { }</div>
-<div class="line"><a name="l00108"></a><span class="lineno">  108</span>&#160;</div>
+<div class="line"><a name="l00103"></a><span class="lineno">  103</span>&#160;</div>
+<div class="line"><a name="l00107"></a><span class="lineno">  107</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
+<div class="line"><a name="l00108"></a><span class="lineno">  108</span>&#160;__global__ <span class="keywordtype">void</span> EmptyKernel(<span class="keywordtype">void</span>) { }</div>
 <div class="line"><a name="l00109"></a><span class="lineno">  109</span>&#160;</div>
-<div class="line"><a name="l00110"></a><span class="lineno">  110</span>&#160;<span class="preprocessor">#endif  // DOXYGEN_SHOULD_SKIP_THIS</span></div>
-<div class="line"><a name="l00111"></a><span class="lineno">  111</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00115"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga274acbdeef0a8f56373501323ef51d05">  115</a></span>&#160;CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t <a class="code" href="group___util_mgmt.html#ga274acbdeef0a8f56373501323ef51d05" title="Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) ...">PtxVersion</a>(<span class="keywordtype">int</span> &amp;ptx_version)</div>
-<div class="line"><a name="l00116"></a><span class="lineno">  116</span>&#160;{</div>
-<div class="line"><a name="l00117"></a><span class="lineno">  117</span>&#160;    <span class="keyword">struct </span>Dummy</div>
-<div class="line"><a name="l00118"></a><span class="lineno">  118</span>&#160;    {</div>
-<div class="line"><a name="l00120"></a><span class="lineno">  120</span>&#160;        <span class="keyword">typedef</span> void (*EmptyKernelPtr)();</div>
-<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;</div>
-<div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;        CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;        EmptyKernelPtr Empty()</div>
-<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;        {</div>
-<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;            <span class="keywordflow">return</span> EmptyKernel&lt;void&gt;;</div>
-<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;        }</div>
-<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;    };</div>
-<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;</div>
+<div class="line"><a name="l00110"></a><span class="lineno">  110</span>&#160;</div>
+<div class="line"><a name="l00111"></a><span class="lineno">  111</span>&#160;<span class="preprocessor">#endif  // DOXYGEN_SHOULD_SKIP_THIS</span></div>
+<div class="line"><a name="l00112"></a><span class="lineno">  112</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00116"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga274acbdeef0a8f56373501323ef51d05">  116</a></span>&#160;CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t <a class="code" href="group___util_mgmt.html#ga274acbdeef0a8f56373501323ef51d05" title="Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) ...">PtxVersion</a>(<span class="keywordtype">int</span> &amp;ptx_version)</div>
+<div class="line"><a name="l00117"></a><span class="lineno">  117</span>&#160;{</div>
+<div class="line"><a name="l00118"></a><span class="lineno">  118</span>&#160;    <span class="keyword">struct </span>Dummy</div>
+<div class="line"><a name="l00119"></a><span class="lineno">  119</span>&#160;    {</div>
+<div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;        <span class="keyword">typedef</span> void (*EmptyKernelPtr)();</div>
+<div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;</div>
+<div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;        CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;        EmptyKernelPtr Empty()</div>
+<div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;        {</div>
+<div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;            <span class="keywordflow">return</span> EmptyKernel&lt;void&gt;;</div>
+<div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;        }</div>
+<div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;    };</div>
 <div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;</div>
-<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
-<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
-<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;    <span class="keywordflow">return</span> cudaErrorInvalidConfiguration;</div>
-<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;</div>
-<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;<span class="preprocessor">#elif (CUB_PTX_ARCH &gt; 0)</span></div>
-<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;    ptx_version = CUB_PTX_ARCH;</div>
-<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;    <span class="keywordflow">return</span> cudaSuccess;</div>
-<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;</div>
-<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;<span class="preprocessor">#else</span></div>
-<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;    cudaError_t error = cudaSuccess;</div>
-<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    <span class="keywordflow">do</span></div>
-<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;    {</div>
-<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;        cudaFuncAttributes empty_kernel_attrs;</div>
-<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaFuncGetAttributes(&amp;empty_kernel_attrs, EmptyKernel&lt;void&gt;))) <span class="keywordflow">break</span>;</div>
-<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        ptx_version = empty_kernel_attrs.ptxVersion * 10;</div>
-<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;    }</div>
-<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;    <span class="keywordflow">while</span> (0);</div>
-<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;</div>
-<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;    <span class="keywordflow">return</span> error;</div>
-<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;</div>
-<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;<span class="preprocessor">#endif</span></div>
-<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;<span class="preprocessor"></span>}</div>
-<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;</div>
+<div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;</div>
+<div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
+<div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
+<div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;    <span class="keywordflow">return</span> cudaErrorInvalidConfiguration;</div>
+<div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;</div>
+<div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;<span class="preprocessor">#elif (CUB_PTX_ARCH &gt; 0)</span></div>
+<div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;    ptx_version = CUB_PTX_ARCH;</div>
+<div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;    <span class="keywordflow">return</span> cudaSuccess;</div>
+<div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;</div>
+<div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;<span class="preprocessor">#else</span></div>
+<div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;    cudaError_t error = cudaSuccess;</div>
+<div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;    <span class="keywordflow">do</span></div>
+<div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;    {</div>
+<div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;        cudaFuncAttributes empty_kernel_attrs;</div>
+<div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaFuncGetAttributes(&amp;empty_kernel_attrs, EmptyKernel&lt;void&gt;))) <span class="keywordflow">break</span>;</div>
+<div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;        ptx_version = empty_kernel_attrs.ptxVersion * 10;</div>
+<div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;    }</div>
+<div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;    <span class="keywordflow">while</span> (0);</div>
+<div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;</div>
+<div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;    <span class="keywordflow">return</span> error;</div>
+<div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;</div>
+<div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;<span class="preprocessor">#endif</span></div>
+<div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;<span class="preprocessor"></span>}</div>
 <div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;</div>
-<div class="line"><a name="l00161"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga791244a362e168e8404403b5f4148de4">  161</a></span>&#160;CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t <a class="code" href="group___util_mgmt.html#ga791244a362e168e8404403b5f4148de4" title="Retrieves the SM version (major * 100 + minor * 10) ">SmVersion</a>(<span class="keywordtype">int</span> &amp;sm_version, <span class="keywordtype">int</span> device_ordinal)</div>
-<div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;{</div>
-<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
-<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
-<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;    <span class="keywordflow">return</span> cudaErrorInvalidConfiguration;</div>
-<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;</div>
-<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;<span class="preprocessor">#else</span></div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;    cudaError_t error = cudaSuccess;</div>
-<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;    <span class="keywordflow">do</span></div>
-<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;    {</div>
-<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;        <span class="comment">// Fill in SM version</span></div>
-<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;        <span class="keywordtype">int</span> major, minor;</div>
-<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaDeviceGetAttribute(&amp;major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) <span class="keywordflow">break</span>;</div>
-<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaDeviceGetAttribute(&amp;minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) <span class="keywordflow">break</span>;</div>
-<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;        sm_version = major * 100 + minor * 10;</div>
-<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;    }</div>
-<div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;    <span class="keywordflow">while</span> (0);</div>
-<div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;</div>
-<div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;    <span class="keywordflow">return</span> error;</div>
-<div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;</div>
-<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;<span class="preprocessor">#endif</span></div>
-<div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;<span class="preprocessor"></span>}</div>
-<div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;</div>
+<div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;</div>
+<div class="line"><a name="l00162"></a><span class="lineno"><a class="code" href="group___util_mgmt.html#ga791244a362e168e8404403b5f4148de4">  162</a></span>&#160;CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t <a class="code" href="group___util_mgmt.html#ga791244a362e168e8404403b5f4148de4" title="Retrieves the SM version (major * 100 + minor * 10) ">SmVersion</a>(<span class="keywordtype">int</span> &amp;sm_version, <span class="keywordtype">int</span> device_ordinal)</div>
+<div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;{</div>
+<div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
+<div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
+<div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;    <span class="keywordflow">return</span> cudaErrorInvalidConfiguration;</div>
+<div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;</div>
+<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;<span class="preprocessor">#else</span></div>
+<div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;    cudaError_t error = cudaSuccess;</div>
+<div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;    <span class="keywordflow">do</span></div>
+<div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;    {</div>
+<div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;        <span class="comment">// Fill in SM version</span></div>
+<div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;        <span class="keywordtype">int</span> major, minor;</div>
+<div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaDeviceGetAttribute(&amp;major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) <span class="keywordflow">break</span>;</div>
+<div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;        <span class="keywordflow">if</span> (<a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(error = cudaDeviceGetAttribute(&amp;minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) <span class="keywordflow">break</span>;</div>
+<div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;        sm_version = major * 100 + minor * 10;</div>
+<div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;    }</div>
+<div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;    <span class="keywordflow">while</span> (0);</div>
+<div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;</div>
+<div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;    <span class="keywordflow">return</span> error;</div>
+<div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;</div>
+<div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;<span class="preprocessor">#endif</span></div>
+<div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;<span class="preprocessor"></span>}</div>
 <div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;</div>
-<div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;<span class="preprocessor">#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document</span></div>
-<div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00192"></a><span class="lineno">  192</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;<span class="keyword">static</span> cudaError_t SyncStream(cudaStream_t stream)</div>
-<div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;{</div>
-<div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;<span class="preprocessor">#if (CUB_PTX_ARCH == 0)</span></div>
-<div class="line"><a name="l00196"></a><span class="lineno">  196</span>&#160;<span class="preprocessor"></span>    <span class="keywordflow">return</span> cudaStreamSynchronize(stream);</div>
-<div class="line"><a name="l00197"></a><span class="lineno">  197</span>&#160;<span class="preprocessor">#else</span></div>
-<div class="line"><a name="l00198"></a><span class="lineno">  198</span>&#160;<span class="preprocessor"></span>    <span class="comment">// Device can&#39;t yet sync on a specific stream</span></div>
-<div class="line"><a name="l00199"></a><span class="lineno">  199</span>&#160;    <span class="keywordflow">return</span> cudaDeviceSynchronize();</div>
-<div class="line"><a name="l00200"></a><span class="lineno">  200</span>&#160;<span class="preprocessor">#endif</span></div>
-<div class="line"><a name="l00201"></a><span class="lineno">  201</span>&#160;<span class="preprocessor"></span>}</div>
-<div class="line"><a name="l00202"></a><span class="lineno">  202</span>&#160;</div>
+<div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;</div>
+<div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;<span class="preprocessor">#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document</span></div>
+<div class="line"><a name="l00189"></a><span class="lineno">  189</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;<span class="keyword">static</span> cudaError_t SyncStream(cudaStream_t stream)</div>
+<div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;{</div>
+<div class="line"><a name="l00196"></a><span class="lineno">  196</span>&#160;<span class="preprocessor">#if (CUB_PTX_ARCH == 0)</span></div>
+<div class="line"><a name="l00197"></a><span class="lineno">  197</span>&#160;<span class="preprocessor"></span>    <span class="keywordflow">return</span> cudaStreamSynchronize(stream);</div>
+<div class="line"><a name="l00198"></a><span class="lineno">  198</span>&#160;<span class="preprocessor">#else</span></div>
+<div class="line"><a name="l00199"></a><span class="lineno">  199</span>&#160;<span class="preprocessor"></span>    <span class="comment">// Device can&#39;t yet sync on a specific stream</span></div>
+<div class="line"><a name="l00200"></a><span class="lineno">  200</span>&#160;    <span class="keywordflow">return</span> cudaDeviceSynchronize();</div>
+<div class="line"><a name="l00201"></a><span class="lineno">  201</span>&#160;<span class="preprocessor">#endif</span></div>
+<div class="line"><a name="l00202"></a><span class="lineno">  202</span>&#160;<span class="preprocessor"></span>}</div>
 <div class="line"><a name="l00203"></a><span class="lineno">  203</span>&#160;</div>
-<div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> KernelPtr&gt;</div>
-<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;cudaError_t MaxSmOccupancy(</div>
-<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;    <span class="keywordtype">int</span>                 &amp;max_sm_occupancy,          </div>
-<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;    KernelPtr           kernel_ptr,                 </div>
-<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    <span class="keywordtype">int</span>                 block_threads,              </div>
-<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;    <span class="keywordtype">int</span>                 dynamic_smem_bytes = 0)</div>
-<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;{</div>
-<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
-<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
-<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    <span class="keywordflow">return</span> <a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(cudaErrorInvalidConfiguration);</div>
-<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;</div>
-<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;<span class="preprocessor">#else</span></div>
-<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;<span class="preprocessor"></span></div>
-<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;    <span class="keywordflow">return</span> cudaOccupancyMaxActiveBlocksPerMultiprocessor (</div>
-<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;        &amp;max_sm_occupancy,</div>
-<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        kernel_ptr,</div>
-<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        block_threads,</div>
-<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        dynamic_smem_bytes);</div>
-<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;</div>
-<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;<span class="preprocessor">#endif  // CUB_RUNTIME_ENABLED</span></div>
-<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;<span class="preprocessor"></span>}</div>
-<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div>
+<div class="line"><a name="l00204"></a><span class="lineno">  204</span>&#160;</div>
+<div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;<span class="keyword">template</span> &lt;<span class="keyword">typename</span> KernelPtr&gt;</div>
+<div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;cudaError_t MaxSmOccupancy(</div>
+<div class="line"><a name="l00239"></a><span class="lineno">  239</span>&#160;    <span class="keywordtype">int</span>                 &amp;max_sm_occupancy,          </div>
+<div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    KernelPtr           kernel_ptr,                 </div>
+<div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;    <span class="keywordtype">int</span>                 block_threads,              </div>
+<div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;    <span class="keywordtype">int</span>                 dynamic_smem_bytes = 0)</div>
+<div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;{</div>
+<div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;<span class="preprocessor">#ifndef CUB_RUNTIME_ENABLED</span></div>
+<div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00246"></a><span class="lineno">  246</span>&#160;    <span class="comment">// CUDA API calls not supported from this device</span></div>
+<div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;    <span class="keywordflow">return</span> <a class="code" href="group___util_mgmt.html#ga84c3a4c178bf6593e0fad2b763606236" title="Debug macro. ">CubDebug</a>(cudaErrorInvalidConfiguration);</div>
+<div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;</div>
+<div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;<span class="preprocessor">#else</span></div>
+<div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;    <span class="keywordflow">return</span> cudaOccupancyMaxActiveBlocksPerMultiprocessor (</div>
+<div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;        &amp;max_sm_occupancy,</div>
+<div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;        kernel_ptr,</div>
+<div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;        block_threads,</div>
+<div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;        dynamic_smem_bytes);</div>
+<div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;</div>
+<div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;<span class="preprocessor">#endif  // CUB_RUNTIME_ENABLED</span></div>
+<div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;<span class="preprocessor"></span>}</div>
 <div class="line"><a name="l00259"></a><span class="lineno">  259</span>&#160;</div>
-<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;<span class="comment">/******************************************************************************</span></div>
-<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;<span class="comment"> * Policy management</span></div>
-<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;<span class="comment"> ******************************************************************************/</span></div>
-<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;</div>
-<div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;<span class="keyword">struct </span>KernelConfig</div>
-<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;{</div>
-<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;    <span class="keywordtype">int</span> block_threads;</div>
-<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;    <span class="keywordtype">int</span> items_per_thread;</div>
-<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;    <span class="keywordtype">int</span> tile_size;</div>
-<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;    <span class="keywordtype">int</span> sm_occupancy;</div>
-<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;</div>
-<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> AgentPolicyT, <span class="keyword">typename</span> KernelPtrT&gt;</div>
+<div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;</div>
+<div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;<span class="comment">/******************************************************************************</span></div>
+<div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;<span class="comment"> * Policy management</span></div>
+<div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;<span class="comment"> ******************************************************************************/</span></div>
+<div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;</div>
+<div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;<span class="keyword">struct </span>KernelConfig</div>
+<div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;{</div>
+<div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;    <span class="keywordtype">int</span> block_threads;</div>
+<div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;    <span class="keywordtype">int</span> items_per_thread;</div>
+<div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;    <span class="keywordtype">int</span> tile_size;</div>
+<div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;    <span class="keywordtype">int</span> sm_occupancy;</div>
+<div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;</div>
 <div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;    cudaError_t Init(KernelPtrT kernel_ptr)</div>
-<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;    {</div>
-<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;        block_threads        = AgentPolicyT::BLOCK_THREADS;</div>
-<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;        items_per_thread     = AgentPolicyT::ITEMS_PER_THREAD;</div>
-<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;        tile_size            = block_threads * items_per_thread;</div>
-<div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;        cudaError_t retval   = MaxSmOccupancy(sm_occupancy, kernel_ptr, block_threads);</div>
-<div class="line"><a name="l00282"></a><span class="lineno">  282</span>&#160;        <span class="keywordflow">return</span> retval;</div>
-<div class="line"><a name="l00283"></a><span class="lineno">  283</span>&#160;    }</div>
-<div class="line"><a name="l00284"></a><span class="lineno">  284</span>&#160;};</div>
-<div class="line"><a name="l00285"></a><span class="lineno">  285</span>&#160;</div>
-<div class="line"><a name="l00286"></a><span class="lineno">  286</span>&#160;</div>
-<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;</div>
-<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> PTX_VERSION, <span class="keyword">typename</span> PolicyT, <span class="keyword">typename</span> PrevPolicyT&gt;</div>
-<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;<span class="keyword">struct </span>ChainedPolicy</div>
-<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;{</div>
-<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;   <span class="keyword">typedef</span> <span class="keyword">typename</span> If&lt;(CUB_PTX_ARCH &lt; PTX_VERSION), PrevPolicyT::ActivePolicy, PolicyT&gt;::Type ActivePolicy;</div>
-<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;</div>
-<div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;   <span class="keyword">template</span> &lt;<span class="keyword">typename</span> FunctorT&gt;</div>
-<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;   CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;   <span class="keyword">static</span> cudaError_t Invoke(<span class="keywordtype">int</span> ptx_version, FunctorT &amp;op)</div>
-<div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;   {</div>
-<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;       <span class="keywordflow">if</span> (ptx_version &lt; PTX_VERSION) {</div>
-<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;           <span class="keywordflow">return</span> PrevPolicyT::Invoke(ptx_version, op);</div>
-<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;       }</div>
-<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;       <span class="keywordflow">return</span> op.template Invoke&lt;PolicyT&gt;();</div>
-<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;   }</div>
-<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;};</div>
-<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;</div>
-<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> PTX_VERSION, <span class="keyword">typename</span> PolicyT&gt;</div>
-<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;<span class="keyword">struct </span>ChainedPolicy&lt;PTX_VERSION, PolicyT, PolicyT&gt;</div>
-<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;{</div>
-<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;    <span class="keyword">typedef</span> PolicyT ActivePolicy;</div>
-<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;</div>
-<div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> FunctorT&gt;</div>
-<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
-<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;    <span class="keyword">static</span> cudaError_t Invoke(<span class="keywordtype">int</span> ptx_version, FunctorT &amp;op) {</div>
-<div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;        <span class="keywordflow">return</span> op.template Invoke&lt;PolicyT&gt;();</div>
-<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;    }</div>
-<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;};</div>
-<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;</div>
-<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;</div>
-<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;</div>
-<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;</div>
-<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;<span class="preprocessor">#endif  // Do not document</span></div>
-<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;<span class="preprocessor"></span></div>
+<div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;    KernelConfig() : block_threads(0), items_per_thread(0), tile_size(0), sm_occupancy(0) {}</div>
+<div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;</div>
+<div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> AgentPolicyT, <span class="keyword">typename</span> KernelPtrT&gt;</div>
+<div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;    cudaError_t Init(KernelPtrT kernel_ptr)</div>
+<div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;    {</div>
+<div class="line"><a name="l00282"></a><span class="lineno">  282</span>&#160;        block_threads        = AgentPolicyT::BLOCK_THREADS;</div>
+<div class="line"><a name="l00283"></a><span class="lineno">  283</span>&#160;        items_per_thread     = AgentPolicyT::ITEMS_PER_THREAD;</div>
+<div class="line"><a name="l00284"></a><span class="lineno">  284</span>&#160;        tile_size            = block_threads * items_per_thread;</div>
+<div class="line"><a name="l00285"></a><span class="lineno">  285</span>&#160;        cudaError_t retval   = MaxSmOccupancy(sm_occupancy, kernel_ptr, block_threads);</div>
+<div class="line"><a name="l00286"></a><span class="lineno">  286</span>&#160;        <span class="keywordflow">return</span> retval;</div>
+<div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;    }</div>
+<div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;};</div>
+<div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;</div>
+<div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;</div>
+<div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;</div>
+<div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> PTX_VERSION, <span class="keyword">typename</span> PolicyT, <span class="keyword">typename</span> PrevPolicyT&gt;</div>
+<div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;<span class="keyword">struct </span>ChainedPolicy</div>
+<div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;{</div>
+<div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;   <span class="keyword">typedef</span> <span class="keyword">typename</span> If&lt;(CUB_PTX_ARCH &lt; PTX_VERSION), typename PrevPolicyT::ActivePolicy, PolicyT&gt;::Type ActivePolicy;</div>
+<div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;</div>
+<div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;   <span class="keyword">template</span> &lt;<span class="keyword">typename</span> FunctorT&gt;</div>
+<div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;   CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;   <span class="keyword">static</span> cudaError_t Invoke(<span class="keywordtype">int</span> ptx_version, FunctorT &amp;op)</div>
+<div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;   {</div>
+<div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;       <span class="keywordflow">if</span> (ptx_version &lt; PTX_VERSION) {</div>
+<div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;           <span class="keywordflow">return</span> PrevPolicyT::Invoke(ptx_version, op);</div>
+<div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;       }</div>
+<div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;       <span class="keywordflow">return</span> op.template Invoke&lt;PolicyT&gt;();</div>
+<div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;   }</div>
+<div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;};</div>
+<div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;</div>
+<div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;<span class="keyword">template</span> &lt;<span class="keywordtype">int</span> PTX_VERSION, <span class="keyword">typename</span> PolicyT&gt;</div>
+<div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;<span class="keyword">struct </span>ChainedPolicy&lt;PTX_VERSION, PolicyT, PolicyT&gt;</div>
+<div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;{</div>
+<div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;    <span class="keyword">typedef</span> PolicyT ActivePolicy;</div>
+<div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;</div>
+<div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;    <span class="keyword">template</span> &lt;<span class="keyword">typename</span> FunctorT&gt;</div>
+<div class="line"><a name="l00320"></a><span class="lineno">  320</span>&#160;    CUB_RUNTIME_FUNCTION __forceinline__</div>
+<div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;    <span class="keyword">static</span> cudaError_t Invoke(<span class="keywordtype">int</span> ptx_version, FunctorT &amp;op) {</div>
+<div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;        <span class="keywordflow">return</span> op.template Invoke&lt;PolicyT&gt;();</div>
+<div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;    }</div>
+<div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;};</div>
+<div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;</div>
+<div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;</div>
 <div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;</div>
 <div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;</div>
-<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;       <span class="comment">// end group UtilMgmt</span></div>
+<div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;<span class="preprocessor">#endif  // Do not document</span></div>
+<div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;<span class="preprocessor"></span></div>
 <div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;</div>
-<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;}               <span class="comment">// CUB namespace</span></div>
-<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
+<div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;</div>
+<div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;       <span class="comment">// end group UtilMgmt</span></div>
+<div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;</div>
+<div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;}               <span class="comment">// CUB namespace</span></div>
+<div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;CUB_NS_POSTFIX  <span class="comment">// Optional outer namespace(s)</span></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__ptx_8cuh.html b/docs/html/util__ptx_8cuh.html
index 2da638fb01..5e798e6cbf 100644
--- a/docs/html/util__ptx_8cuh.html
+++ b/docs/html/util__ptx_8cuh.html
@@ -229,7 +229,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__ptx_8cuh_source.html b/docs/html/util__ptx_8cuh_source.html
index fe1b6cae21..7612174173 100644
--- a/docs/html/util__ptx_8cuh_source.html
+++ b/docs/html/util__ptx_8cuh_source.html
@@ -237,7 +237,7 @@
 <div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> bit_start,</div>
 <div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> num_bits)</div>
 <div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;{</div>
-<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;    <span class="keywordflow">return</span> <a class="code" href="group___util_ptx.html#gae6e3003e44d5f640aeea7d67414b3668" title="Bitfield-extract. Extracts num_bits from source starting at bit-offset bit_start. The input source ma...">BFE</a>(source, bit_start, num_bits, <a class="code" href="structcub_1_1_int2_type.html" title="Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...">Int2Type</a>&lt;<span class="keyword">sizeof</span>(UnsignedBits)&gt;());</div>
+<div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;    <span class="keywordflow">return</span> <a class="code" href="group___util_ptx.html#gae6e3003e44d5f640aeea7d67414b3668" title="Bitfield-extract. Extracts num_bits from source starting at bit-offset bit_start. The input source ma...">BFE</a>(source, bit_start, num_bits, Int2Type&lt;<span class="keyword">sizeof</span>(UnsignedBits)&gt;());</div>
 <div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;}</div>
 <div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;</div>
 <div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;</div>
@@ -617,7 +617,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__type_8cuh.html b/docs/html/util__type_8cuh.html
index cabebdaf8b..ef388725d5 100644
--- a/docs/html/util__type_8cuh.html
+++ b/docs/html/util__type_8cuh.html
@@ -106,6 +106,7 @@
 <div class="contents">
 <div class="textblock"><code>#include &lt;iostream&gt;</code><br/>
 <code>#include &lt;limits&gt;</code><br/>
+<code>#include &lt;cfloat&gt;</code><br/>
 <code>#include &quot;util_macro.cuh&quot;</code><br/>
 <code>#include &quot;<a class="el" href="util__arch_8cuh_source.html">util_arch.cuh</a>&quot;</code><br/>
 <code>#include &quot;util_namespace.cuh&quot;</code><br/>
@@ -120,24 +121,6 @@
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_equals.html">cub::Equals&lt; A, B &gt;</a></td></tr>
 <tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Type equality test.  <a href="structcub_1_1_equals.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_null_type.html">cub::NullType</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A simple "NULL" marker type.  <a href="structcub_1_1_null_type.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_int2_type.html">cub::Int2Type&lt; A &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values)  <a href="structcub_1_1_int2_type.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_cub_vector.html">cub::CubVector&lt; T, vec_elements &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Exposes a member typedef <code>Type</code> that names the corresponding CUDA vector type if one exists. Otherwise <code>Type</code> refers to the <a class="el" href="structcub_1_1_cub_vector.html" title="Exposes a member typedef Type that names the corresponding CUDA vector type if one exists...">CubVector</a> structure itself, which will wrap the corresponding <code>x</code>, <code>y</code>, etc. vector fields.  <a href="structcub_1_1_cub_vector.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_uninitialized.html">cub::Uninitialized&lt; T &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.  <a href="structcub_1_1_uninitialized.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_key_value_pair.html">cub::KeyValuePair&lt; _Key, _Value &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">A key identifier paired with a corresponding value.  <a href="structcub_1_1_key_value_pair.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_double_buffer.html">cub::DoubleBuffer&lt; T &gt;</a></td></tr>
-<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth.  <a href="structcub_1_1_double_buffer.html#details">More...</a><br/></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcub_1_1_log2.html">cub::Log2&lt; N, CURRENT_VAL, COUNT &gt;</a></td></tr>
 <tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Statically determine log2(N), rounded up.  <a href="structcub_1_1_log2.html#details">More...</a><br/></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -168,7 +151,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/util__type_8cuh_source.html.REMOVED.git-id b/docs/html/util__type_8cuh_source.html.REMOVED.git-id
index ca40c1f51f..05eb304320 100644
--- a/docs/html/util__type_8cuh_source.html.REMOVED.git-id
+++ b/docs/html/util__type_8cuh_source.html.REMOVED.git-id
@@ -1 +1 @@
-72d4fabf6d2b327ccbd3b99922d2c0e9bcaf53b2
\ No newline at end of file
+c8dbf5bea258732932a8a61fe92a028b93b2f5f4
\ No newline at end of file
diff --git a/docs/html/warp__reduce_8cuh.html b/docs/html/warp__reduce_8cuh.html
index 1f1de2a3d0..8e5002d6d9 100644
--- a/docs/html/warp__reduce_8cuh.html
+++ b/docs/html/warp__reduce_8cuh.html
@@ -141,7 +141,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/warp__reduce_8cuh_source.html b/docs/html/warp__reduce_8cuh_source.html
index 56d73da582..cda9be286f 100644
--- a/docs/html/warp__reduce_8cuh_source.html
+++ b/docs/html/warp__reduce_8cuh_source.html
@@ -191,7 +191,7 @@
 <div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;</div>
 <div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00189"></a><span class="lineno">  189</span>&#160;</div>
-<div class="line"><a name="l00191"></a><span class="lineno"><a class="code" href="structcub_1_1_warp_reduce_1_1_temp_storage.html">  191</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" title="The operations exposed by WarpReduce require a temporary memory allocation of this nested type for th...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00191"></a><span class="lineno"><a class="code" href="structcub_1_1_warp_reduce_1_1_temp_storage.html">  191</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_warp_reduce_1_1_temp_storage.html" title="The operations exposed by WarpReduce require a temporary memory allocation of this nested type for th...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00192"></a><span class="lineno">  192</span>&#160;</div>
 <div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;</div>
 <div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -297,7 +297,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:14 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/warp__scan_8cuh.html b/docs/html/warp__scan_8cuh.html
index c1cccc5d98..6d82b618a3 100644
--- a/docs/html/warp__scan_8cuh.html
+++ b/docs/html/warp__scan_8cuh.html
@@ -141,7 +141,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:04 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/html/warp__scan_8cuh_source.html b/docs/html/warp__scan_8cuh_source.html
index dc760bc441..071623cc03 100644
--- a/docs/html/warp__scan_8cuh_source.html
+++ b/docs/html/warp__scan_8cuh_source.html
@@ -185,7 +185,7 @@
 <div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;</div>
 <div class="line"><a name="l00189"></a><span class="lineno">  189</span>&#160;<span class="keyword">public</span>:</div>
 <div class="line"><a name="l00190"></a><span class="lineno">  190</span>&#160;</div>
-<div class="line"><a name="l00192"></a><span class="lineno"><a class="code" href="structcub_1_1_warp_scan_1_1_temp_storage.html">  192</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_warp_scan_1_1_temp_storage.html" title="The operations exposed by WarpScan require a temporary memory allocation of this nested type for thre...">TempStorage</a> : <a class="code" href="structcub_1_1_uninitialized.html" title="A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...">Uninitialized</a>&lt;_TempStorage&gt; {};</div>
+<div class="line"><a name="l00192"></a><span class="lineno"><a class="code" href="structcub_1_1_warp_scan_1_1_temp_storage.html">  192</a></span>&#160;    <span class="keyword">struct </span><a class="code" href="structcub_1_1_warp_scan_1_1_temp_storage.html" title="The operations exposed by WarpScan require a temporary memory allocation of this nested type for thre...">TempStorage</a> : Uninitialized&lt;_TempStorage&gt; {};</div>
 <div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;</div>
 <div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;</div>
 <div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;    <span class="comment">/******************************************************************/</span></div>
@@ -365,7 +365,7 @@
 <!-- HTML footer for doxygen 1.8.3.1-->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
-Generated on Tue Nov 10 2015 13:58:03 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
+Generated on Mon Dec 14 2015 13:11:15 for CUB by &#160;<a href="http://www.doxygen.org/index.html">
 <img class="footer" src="doxygen.png" alt="doxygen"/>
 </a> 1.8.4
 <br>
diff --git a/docs/mainpage.dox b/docs/mainpage.dox
index e2daa2b44a..03b9526cc7 100644
--- a/docs/mainpage.dox
+++ b/docs/mainpage.dox
@@ -62,7 +62,7 @@
 
 <a href="download_cub.html"><img src="download-icon.png" style="position:relative; bottom:-10px; border:0px;"/></a>
 &nbsp;&nbsp;
-<a href="download_cub.html"><em><b>Download CUB v1.5.1</b></em></a>
+<a href="download_cub.html"><em><b>Download CUB v1.5.0</b></em></a>
 
 </td><td>
 
@@ -527,11 +527,11 @@ for the current feature release.
 <table>
 
 <tr><td style="white-space: nowrap; vertical-align:text-top;">
-11/13/2015<br>
+12/15/2015<br>
 [<b>CUB v1.5.0</b>](download_cub.html)
 </td><td style="vertical-align:text-top;">
   - New Features: 
-    - Added new [segmented device-wide operations](group___segmented_module.html) for sort, scan, and reduction primitives.
+    - Added new [segmented device-wide operations](group___segmented_module.html) for sort and reduction primitives.
   - See the [change-log](CHANGE_LOG.TXT) for further details, including bug-fixes
 </td></tr>
 
diff --git a/test/test_device_radix_sort.cu b/test/test_device_radix_sort.cu
index 9876916fb5..ef0b7f4af1 100644
--- a/test/test_device_radix_sort.cu
+++ b/test/test_device_radix_sort.cu
@@ -1217,6 +1217,8 @@ int main(int argc, char** argv)
     // Compile/run thorough tests
     for (int i = 0; i <= g_repeat; ++i)
     {
+        TestGen<bool>                 (num_items, num_segments);
+/*
         TestGen<char>                 (num_items, num_segments);
         TestGen<signed char>          (num_items, num_segments);
         TestGen<unsigned char>        (num_items, num_segments);
@@ -1237,6 +1239,7 @@ int main(int argc, char** argv)
 
         if (ptx_version > 120)                          // Don't check doubles on PTX120 or below because they're down-converted
             TestGen<double>           (num_items, num_segments);
+*/
 
     }
 
diff --git a/test/test_util.h b/test/test_util.h
index cb1d3381bc..944431a417 100644
--- a/test/test_util.h
+++ b/test/test_util.h
@@ -526,6 +526,31 @@ __host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, i
 }
 
 
+/**
+ * Initialize value (bool)
+ */
+__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, bool &value, int index = 0)
+{
+    switch (gen_mode)
+    {
+#if (CUB_PTX_ARCH == 0)
+    case RANDOM:
+        char c;
+        RandomBits(c, 0, 0, 1);
+        value = (bool) c;
+        break;
+#endif
+     case UNIFORM:
+        value = true;
+        break;
+    case INTEGER_SEED:
+    default:
+        value = (bool) index;
+        break;
+    }
+}
+
+
 /**
  * cub::NullType test initialization
  */