diff --git a/CHANGE_LOG.TXT b/CHANGE_LOG.TXT index b2695c0e42..202ec6f160 100644 --- a/CHANGE_LOG.TXT +++ b/CHANGE_LOG.TXT @@ -1,5 +1,19 @@ //----------------------------------------------------------------------------- +1.4.1 04/13/2015 + - Bug fixes: + - Fixes for CUDA 7.0 issues with SHFL-based warp-scan and warp-reduction + on non-primitive data types (e.g., user-defined structs) + - Fixes for minor CUDA 7.0 performance regressions in cub::DeviceScan, + DeviceReduceByKey + - Fixes to allow cub::DeviceRadixSort and cub::BlockRadixSort on bool types + - Remove requirement for callers to define the CUB_CDP macro + when invoking CUB device-wide rountines using CUDA dynamic parallelism + - Fix for headers not being included in the proper order (or missing includes) + for some block-wide functions + +//----------------------------------------------------------------------------- + 1.4.0 03/18/2015 - New Features: - Support and performance tuning for new Maxwell GPU architectures diff --git a/README.md b/README.md index 98c5f72ecb..6cbd133f26 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@
Type
that names the corresponding CUDA vector type if one exists. Otherwise Type
refers to the CubVector structure itself, which will wrap the corresponding x
, y
, etc. vector fields ItemOffsetPair
tuples) const
and volatile
qualifiers from type Tp
KeyValuePair
tuples) ItemOffsetPair
tuples). More...KeyValuePair
tuples). More...
@@ -134,7 +134,7 @@
self_type typedef | cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | value_type typedef | cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | | pointer typedef | cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | reference typedef | cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | value_type typedef | cub::ArgIndexInputIterator< InputIteratorT, OffsetT > |
A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming ItemOffsetPair
tuples).
A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming KeyValuePair
tuples).
itr
of type InputIteratorT
. Dereferencing an ArgIndexInputIteratorTat offset i
produces a ItemOffsetPair
value whose offset
field is i
and whose item
field is itr[i]
.itr
of type InputIteratorT
. Dereferencing an ArgIndexInputIteratorTat offset i
produces a KeyValuePair
value whose key
field is i
and whose value
field is itr[i]
.difference_type | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type to express the result of subtracting one iterator from another. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
-typedef ItemOffsetPair< T, -difference_type > | value_type | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
The type of the element the iterator can point to. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+typedef KeyValuePair +< difference_type, T > | value_type | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
The type of the element the iterator can point to. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
-typedef value_type * | pointer | pointer | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
The type of a pointer to an element the iterator can point to. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
-typedef value_type | reference | reference | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
The type of a reference to an element the iterator can point to. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
@@ -315,7 +315,7 @@ Constructor & Destructor Documentation-Generated on Wed Mar 18 2015 18:50:32 for CUB by +Generated on Mon Apr 13 2015 13:56:51 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_discontinuity-members.html b/docs/html/classcub_1_1_block_discontinuity-members.html index 054453868d..1c139f7784 100644 --- a/docs/html/classcub_1_1_block_discontinuity-members.html +++ b/docs/html/classcub_1_1_block_discontinuity-members.html @@ -118,7 +118,7 @@ -Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_discontinuity.html b/docs/html/classcub_1_1_block_discontinuity.html index cfdb74aa6e..3b2abe1224 100644 --- a/docs/html/classcub_1_1_block_discontinuity.html +++ b/docs/html/classcub_1_1_block_discontinuity.html @@ -1116,7 +1116,7 @@ Member Function Documentation-Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_exchange-members.html b/docs/html/classcub_1_1_block_exchange-members.html index fea74399f7..13f16e5f05 100644 --- a/docs/html/classcub_1_1_block_exchange-members.html +++ b/docs/html/classcub_1_1_block_exchange-members.html @@ -118,7 +118,7 @@ -Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_exchange.html b/docs/html/classcub_1_1_block_exchange.html index ffe41570f4..3909f775c6 100644 --- a/docs/html/classcub_1_1_block_exchange.html +++ b/docs/html/classcub_1_1_block_exchange.html @@ -749,7 +749,7 @@ Member Function Documentation-Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_histogram-members.html b/docs/html/classcub_1_1_block_histogram-members.html index 4943ee23b1..18d3c975eb 100644 --- a/docs/html/classcub_1_1_block_histogram-members.html +++ b/docs/html/classcub_1_1_block_histogram-members.html @@ -113,7 +113,7 @@ -Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_histogram.html b/docs/html/classcub_1_1_block_histogram.html index 61285b65e7..b59d6a4a1d 100644 --- a/docs/html/classcub_1_1_block_histogram.html +++ b/docs/html/classcub_1_1_block_histogram.html @@ -479,7 +479,7 @@ Member Function Documentation-Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_load-members.html b/docs/html/classcub_1_1_block_load-members.html index 8b22025754..d19bdcc770 100644 --- a/docs/html/classcub_1_1_block_load-members.html +++ b/docs/html/classcub_1_1_block_load-members.html @@ -113,7 +113,7 @@ -Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_block_load.html b/docs/html/classcub_1_1_block_load.html index d0876ed9a2..bcd5c8dd2d 100644 --- a/docs/html/classcub_1_1_block_load.html +++ b/docs/html/classcub_1_1_block_load.html @@ -165,7 +165,7 @@
Definition at line 628 of file block_load.cuh. +Definition at line 659 of file block_load.cuh.
Member Function Documentation- +
@@ -289,7 +289,7 @@ Member Function Documentation
|
| size_t |
- bytes, |
+ bytes = |
size_t(-1) ,
@@ -331,7 +331,7 @@ | Member Function Documentation-Generated on Wed Mar 18 2015 18:50:33 for CUB by +Generated on Mon Apr 13 2015 13:56:51 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_tex_ref_input_iterator-members.html b/docs/html/classcub_1_1_tex_ref_input_iterator-members.html index 2732f56fd5..692ad4fe0c 100644 --- a/docs/html/classcub_1_1_tex_ref_input_iterator-members.html +++ b/docs/html/classcub_1_1_tex_ref_input_iterator-members.html @@ -104,7 +104,7 @@ This is the complete list of members for cub::TexRefInputIterator< T, UNIQUE_ID, OffsetT >, including all inherited members. -Generated on Wed Mar 18 2015 18:50:33 for CUB by +Generated on Mon Apr 13 2015 13:56:51 for CUB by 1.8.4 diff --git a/docs/html/classcub_1_1_tex_ref_input_iterator.html b/docs/html/classcub_1_1_tex_ref_input_iterator.html index e92332c4de..309a62ceb7 100644 --- a/docs/html/classcub_1_1_tex_ref_input_iterator.html +++ b/docs/html/classcub_1_1_tex_ref_input_iterator.html @@ -133,7 +133,7 @@ // Create an iterator wrapper
-
+
...
// Within device code:
@@ -186,14 +186,10 @@
Member Function Documentation- +
@@ -294,7 +290,7 @@ Member Function Documentation
|
| size_t |
- bytes, |
+ bytes = |
size_t(-1) , | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
@@ -336,7 +332,7 @@ |
Modules | |
Device-wide | |
Warp-wide (collective) | |
Block-wide (collective) | |
Warp-wide (collective) | |
Device-wide | |
Definition at line 253 of file block_load.cuh.
+Definition at line 280 of file block_load.cuh.
@@ -777,7 +777,7 @@Definition at line 281 of file block_load.cuh.
+Definition at line 312 of file block_load.cuh.
@@ -847,7 +847,7 @@Definition at line 312 of file block_load.cuh.
+Definition at line 343 of file block_load.cuh.
@@ -903,7 +903,7 @@Definition at line 352 of file block_load.cuh.
+Definition at line 383 of file block_load.cuh.
@@ -966,7 +966,7 @@Definition at line 386 of file block_load.cuh.
+Definition at line 417 of file block_load.cuh.
@@ -1036,7 +1036,7 @@Definition at line 423 of file block_load.cuh.
+Definition at line 454 of file block_load.cuh.
@@ -1459,7 +1459,7 @@ItemOffsetPair
tuples). More...KeyValuePair
tuples). More...const
and volatile
qualifiers from type Tp
. More...-Enumerations | |
enum | cub::Category { NOT_A_NUMBER, -SIGNED_INTEGER, -UNSIGNED_INTEGER, -FLOATING_POINT - } |
Basic type traits categories. | |
Definition at line 233 of file util_ptx.cuh.
+Definition at line 235 of file util_ptx.cuh.
@@ -216,7 +216,7 @@Functions | |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleUp (T input, int src_offset) |
Shuffle-up for any data type. Each warp-lanei obtains the value input contributed by warp-lanei-src_offset . For thread lanes i < src_offset, the thread's own input is returned to the thread.
+ | |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleUp (T input, int src_offset, int first_lane=0) |
Shuffle-up for any data type. Each warp-lanei obtains the value input contributed by warp-lanei-src_offset . For thread lanes i < src_offset, the thread's own input is returned to the thread.
| |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleDown (T input, int src_offset) |
Shuffle-down for any data type. Each warp-lanei obtains the value input contributed by warp-lanei+src_offset . For thread lanes i >= WARP_THREADS, the thread's own input is returned to the thread.
+ More... | |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleDown (T input, int src_offset, int last_lane=CUB_PTX_WARP_THREADS-1) |
Shuffle-down for any data type. Each warp-lanei obtains the value input contributed by warp-lanei+src_offset . For thread lanes i >= WARP_THREADS, the thread's own input is returned to the thread.
| |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleBroadcast (T input, int src_lane) |
Shuffle-broadcast for any data type. Each warp-lanei obtains the value input contributed by warp-lanesrc_lane . For src_lane < 0 or src_lane >= WARP_THREADS, then the thread's own input is returned to the thread.
+ More... | |
template<typename T > | |
__device__ __forceinline__ T | cub::ShuffleIndex (T input, int src_lane) |
Shuffle-broadcast for any data type. Each warp-lanei obtains the value input contributed by warp-lanesrc_lane . For src_lane < 0 or src_lane >= WARP_THREADS, then the thread's own input is returned to the thread.
| |
__device__ __forceinline__ int | cub::WarpAll (int cond) |
Portable implementation of __all. | |
0
thread_data
across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}
. The corresponding output peer_data
will be {1.0, 2.0, 1.0, 2.0, 3.0, ..., 30.0}
. [in] | input | The value to broadcast |
[in] | src_offset | The relative down-offset of the peer to read from |
[in] | first_lane | Index of first lane in segment |
Definition at line 388 of file util_ptx.cuh.
+Definition at line 498 of file util_ptx.cuh.
- +CUB_PTX_WARP_THREADS - 1
thread_data
across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}
. The corresponding output peer_data
will be {3.0, 4.0, 5.0, 6.0, 7.0, ..., 32.0}
. [in] | input | The value to broadcast |
[in] | src_offset | The relative up-offset of the peer to read from |
[in] | last_lane | Index of first lane in segment |
Definition at line 447 of file util_ptx.cuh.
+Definition at line 559 of file util_ptx.cuh.
- +__device__ __forceinline__ T cub::ShuffleBroadcast | +__device__ __forceinline__ T cub::ShuffleIndex | ( | T | input, | @@ -318,7 +332,7 @@
cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming ItemOffsetPair tuples) |
cub::ArgIndexInputIterator< InputIteratorT, OffsetT > | A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming KeyValuePair tuples) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ArgMax | Arg max functor (keeps the value and offset of the first occurrence of the larger item) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ArgMin | Arg min functor (keeps the value and offset of the first occurrence of the smallest item) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > | Basic type traits | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::NumericTraits< RemoveQualifiers< T >::Type > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Traits< T > | Type traits | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BaseTraits< NOT_A_NUMBER, false, false, T > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::NumericTraits< T > | Numeric type traits | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockDiscontinuity class provides collective methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockDiscontinuity class provides collective methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockExchange< T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockExchange< T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockLoad class provides collective data movement methods for loading a linear segment of items from memory into a blocked arrangement across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockLoad class provides collective data movement methods for loading a linear segment of items from memory into a blocked arrangement across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockRadixSort< KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockRadixSort class provides collective methods for sorting items partitioned across a CUDA thread block using a radix sorting method. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockRadixSort< KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockRadixSort class provides collective methods for sorting items partitioned across a CUDA thread block using a radix sorting method.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockScan< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockScan< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockScan class provides collective methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA thread block to a linear segment of memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | The BlockStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA thread block to a linear segment of memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CacheModifiedInputIterator< MODIFIER, ValueType, OffsetT > | A random-access input wrapper for dereferencing array values using a PTX cache load modifier | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CacheModifiedOutputIterator< MODIFIER, ValueType, OffsetT > | A random-access output wrapper for storing array values using a PTX cache-modifier | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CachingDeviceAllocator | A simple caching allocator for device memory allocations | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Cast< B > | Default cast functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ConstantInputIterator< ValueType, OffsetT > | A random-access input generator for dereferencing a sequence of homogeneous values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CountingInputIterator< ValueType, OffsetT > | A random-access input generator for dereferencing a sequence of incrementing integer values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CubVector< T, vec_elements > | Exposes a member typedef Type that names the corresponding CUDA vector type if one exists. Otherwise Type refers to the CubVector structure itself, which will wrap the corresponding x , y , etc. vector fields | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceHistogram | DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CacheModifiedInputIterator< MODIFIER, ValueType, OffsetT > | A random-access input wrapper for dereferencing array values using a PTX cache load modifier | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CacheModifiedOutputIterator< MODIFIER, ValueType, OffsetT > | A random-access output wrapper for storing array values using a PTX cache-modifier | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CachingDeviceAllocator | A simple caching allocator for device memory allocations | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Cast< B > | Default cast functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ConstantInputIterator< ValueType, OffsetT > | A random-access input generator for dereferencing a sequence of homogeneous values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CountingInputIterator< ValueType, OffsetT > | A random-access input generator for dereferencing a sequence of incrementing integer values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::CubVector< T, vec_elements > | Exposes a member typedef Type that names the corresponding CUDA vector type if one exists. Otherwise Type refers to the CubVector structure itself, which will wrap the corresponding x , y , etc. vector fields | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceHistogram | DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DevicePartition | DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DevicePartition | DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceRadixSort | DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceRadixSort | DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceReduce | DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceReduce | DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceRunLengthEncode | DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceRunLengthEncode | DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceScan | DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceScan | DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceSelect | DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within global memory. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceSelect | DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within global memory.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceSpmv | DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DoubleBuffer< T > | Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Equality | Default equality functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Equals< A, B > | Type equality test | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::If< IF, ThenType, ElseType > | Type selection (IF ? ThenType : ElseType ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Inequality | Default inequality functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::InequalityWrapper< EqualityOp > | Inequality functor (wraps equality functor) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Int2Type< A > | Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ItemOffsetPair< _T, _OffsetT > | An item value paired with a corresponding offset | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::KeyValuePair< _Key, _Value > | A key identifier paired with a corresponding value | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Log2< N, CURRENT_VAL, COUNT > | Statically determine log2(N), rounded up | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Max | Default max functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Min | Default min functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::NullType | A simple "NULL" marker type | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::PowerOfTwo< N > | Statically determine if N is a power-of-two | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ReduceBySegmentOp< ReductionOp, ItemOffsetPair > | Reduce-by-segment functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Sum | Default sum functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::SwizzleScanOp< ScanOp > | Binary operator wrapper for switching non-commutative scan arguments | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TexObjInputIterator< T, OffsetT > | A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TexRefInputIterator< T, UNIQUE_ID, OffsetT > | A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TransformInputIterator< ValueType, ConversionOp, InputIteratorT, OffsetT > | A random-access input wrapper for transforming dereferenced values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Uninitialized< T > | A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Uninitialized< _TempStorage > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockDiscontinuity require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockExchange< T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockExchange require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockHistogram require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockLoad require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockRadixSort< KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockReduce require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockScan< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockStore require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpReduce< T, LOGICAL_WARP_THREADS, PTX_ARCH >::TempStorage | The operations exposed by WarpReduce require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpScan< T, LOGICAL_WARP_THREADS, PTX_ARCH >::TempStorage | The operations exposed by WarpScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpReduce< T, LOGICAL_WARP_THREADS, PTX_ARCH > | The WarpReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread warp. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DeviceSpmv | DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::DoubleBuffer< T > | Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Equality | Default equality functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Equals< A, B > | Type equality test | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::If< IF, ThenType, ElseType > | Type selection (IF ? ThenType : ElseType ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Inequality | Default inequality functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::InequalityWrapper< EqualityOp > | Inequality functor (wraps equality functor) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Int2Type< A > | Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::IsPointer< Tp > | Pointer vs. iterator | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::IsVolatile< Tp > | Volatile modifier test | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::KeyValuePair< _Key, _Value > | A key identifier paired with a corresponding value | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Log2< N, CURRENT_VAL, COUNT > | Statically determine log2(N), rounded up | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Max | Default max functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Min | Default min functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::NullType | A simple "NULL" marker type | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::PowerOfTwo< N > | Statically determine if N is a power-of-two | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ReduceByKeyOp< ReductionOpT > | < Binary reduction operator to apply to values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::ReduceBySegmentOp< ReductionOpT > | Reduce-by-segment functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::RemoveQualifiers< Tp, Up > | Removes const and volatile qualifiers from type Tp | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Sum | Default sum functor | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::SwizzleScanOp< ScanOp > | Binary operator wrapper for switching non-commutative scan arguments | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TexObjInputIterator< T, OffsetT > | A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TexRefInputIterator< T, UNIQUE_ID, OffsetT > | A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::TransformInputIterator< ValueType, ConversionOp, InputIteratorT, OffsetT > | A random-access input wrapper for transforming dereferenced values | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Uninitialized< T > | A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::Uninitialized< _TempStorage > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockDiscontinuity require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockExchange< T, BLOCK_DIM_X, ITEMS_PER_THREAD, WARP_TIME_SLICING, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockExchange require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockHistogram require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockLoad< InputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockLoad require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockRadixSort< KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, ValueT, RADIX_BITS, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockReduce require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockScan< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage | Alias wrapper allowing storage to be unioned | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::BlockStore< OutputIteratorT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage | The operations exposed by BlockStore require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpReduce< T, LOGICAL_WARP_THREADS, PTX_ARCH >::TempStorage | The operations exposed by WarpReduce require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpScan< T, LOGICAL_WARP_THREADS, PTX_ARCH >::TempStorage | The operations exposed by WarpScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpReduce< T, LOGICAL_WARP_THREADS, PTX_ARCH > | The WarpReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread warp.
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpScan< T, LOGICAL_WARP_THREADS, PTX_ARCH > | The WarpScan class provides collective methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cub::WarpScan< T, LOGICAL_WARP_THREADS, PTX_ARCH > | The WarpScan class provides collective methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp.
-Generated on Wed Mar 18 2015 18:50:40 for CUB by +Generated on Mon Apr 13 2015 13:56:55 for CUB by 1.8.4 diff --git a/docs/html/index.html b/docs/html/index.html index 30d144962e..994d0908ad 100644 --- a/docs/html/index.html +++ b/docs/html/index.html @@ -115,7 +115,7 @@ -Download CUB v1.4.0 +Download CUB v1.4.1 |
@@ -394,6 +394,13 @@
<epoch>.<feature>.<update> . The epoch field corresponds to support for a major change or update to the CUDA programming model. The feature field corresponds to a stable set of features, functionality, and interface. The update field corresponds to a bug-fix or performance update for that feature set. At the moment, we do not publicly provide non-stable releases such as development snapshots, beta releases or rolling releases. (Feel free to contact us if you would like access to such things.)
-Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage-members.html b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage-members.html index cb9889f4d5..a0ae043419 100644 --- a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage-members.html +++ b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage-members.html @@ -112,7 +112,7 @@ -Generated on Wed Mar 18 2015 18:50:34 for CUB by +Generated on Mon Apr 13 2015 13:56:52 for CUB by 1.8.4 diff --git a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html index 10bffca4c7..dea13270ed 100644 --- a/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html +++ b/docs/html/structcub_1_1_block_histogram_1_1_temp_storage.html @@ -135,7 +135,7 @@ |
typedef UnitWord< _TempStorage > | -::DeviceWord DeviceWord | | DeviceWord | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T. | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Public Methods inherited from cub::Uninitialized< _TempStorage > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Public Members inherited from cub::Uninitialized< _TempStorage > | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
-DeviceWord | storage [WORDS] | storage [WORDS] | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Backing storage. | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Alias wrapper allowing storage to be unioned.
-Definition at line 792 of file block_load.cuh.
+Definition at line 823 of file block_load.cuh.
Alias wrapper allowing storage to be unioned.
-Definition at line 863 of file block_load.cuh.
+Definition at line 894 of file block_load.cuh.
Alias wrapper allowing storage to be unioned.
-Definition at line 934 of file block_load.cuh.
+Definition at line 965 of file block_load.cuh.
The operations exposed by BlockLoad require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__
keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union
'd with other storage allocation types to facilitate memory reuse.
Definition at line 1021 of file block_load.cuh.
+Definition at line 1052 of file block_load.cuh.
The operations exposed by BlockScan require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__
keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union
'd with other storage allocation types to facilitate memory reuse.
Definition at line 332 of file block_scan.cuh.
+Definition at line 260 of file block_scan.cuh.
d_flags
must be castable to bool
(e.g., bool
, char
, int
, etc.).d_out
and maintain their original relative ordering, however copies of the unselected items are compacted into the rear of d_out
in reverse order.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int
device vector. d_out
and maintain their original relative ordering, however copies of the unselected items are compacted into the rear of d_out
in reverse order.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively. Items are selected for the first partition with 50% probability.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.N+P
), where N
is the length of the input and P
is the number of streaming multiprocessors on the device. For sorting using only O(P
) temporary storage, see the sorting interface using DoubleBuffer wrappers below.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.uint32,uint32
and uint64,uint64
pairs, respectively.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.P
), where P
is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size N
).d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.uint32,uint32
and uint64,uint64
pairs, respectively.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.N+P
), where N
is the length of the input and P
is the number of streaming multiprocessors on the device. For sorting using only O(P
) temporary storage, see the sorting interface using DoubleBuffer wrappers below.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.P
), where P
is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size N
).d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.N+P
), where N
is the length of the input and P
is the number of streaming multiprocessors on the device. For sorting using only O(P
) temporary storage, see the sorting interface using DoubleBuffer wrappers below.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.uint32
and uint64
keys, respectively.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.P
), where P
is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size N
).d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.uint32
and uint64
keys, respectively.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.N+P
), where N
is the length of the input and P
is the number of streaming multiprocessors on the device. For sorting using only O(P
) temporary storage, see the sorting interface using DoubleBuffer wrappers below.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.[begin_bit, end_bit)
of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement.P
), where P
is the number of streaming multiprocessors on the device (and is typically a small constant relative to the input size N
).d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.InputIteratorT | [inferred] Random-access input iterator type for reading input items (may be a simple pointer type) |
OutputIteratorT | [inferred] Output iterator type for recording the reduced aggregate (may be a simple pointer type) |
ReductionOp | [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) |
ReductionOp | [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) (e.g., cub::Sum, cub::Min, cub::Max, etc.) |
d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.Finds the first device-wide minimum using the less-than ('<') operator, also returning the index of that item.
-d_in
has value type T
, the output d_out
must have value type ItemOffsetPair<T, int>
. The minimum value is written to d_out.value
and its location in the input array is written to d_out.offset
.d_in
has value type T
, the output d_out
must have value type KeyValuePair<int, T>
. The minimum value is written to d_out.value
and its location in the input array is written to d_out.key
.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.InputIteratorT | [inferred] Random-access input iterator type for reading input items (of some type T ) (may be a simple pointer type) |
OutputIteratorT | [inferred] Output iterator type for recording the reduced aggregate (having value type ItemOffsetPair<T, int> ) (may be a simple pointer type) |
OutputIteratorT | [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair<int, T> ) (may be a simple pointer type) |
d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.Finds the first device-wide maximum using the greater-than ('>') operator, also returning the index of that item.
-d_in
has value type T
, the output d_out
must have value type ItemOffsetPair<T, int>
. The maximum value is written to d_out.value
and its location in the input array is written to d_out.offset
.d_in
has value type T
, the output d_out
must have value type KeyValuePair<int, T>
. The maximum value is written to d_out.value
and its location in the input array is written to d_out.key
.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.InputIteratorT | [inferred] Random-access input iterator type for reading input items (of some type T ) (may be a simple pointer type) |
OutputIteratorT | [inferred] Output iterator type for recording the reduced aggregate (having value type ItemOffsetPair<T, int> ) (may be a simple pointer type) |
OutputIteratorT | [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair<int, T> ) (may be a simple pointer type) |
==
equality operator is used to determine whether keys are equivalentd_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.fp32
and fp64
values, respectively. Segments are identified by int32
keys, and have lengths uniformly sampled from [1,1000].T operator()(const T &a, const T &b)
T operator()(const T &a, const T &b)
(e.g., cub::Sum, cub::Min, cub::Max, etc.) d_num_runs_out
.==
equality operator is used to determine whether values are equivalentd_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively. Segments have lengths uniformly sampled from [1,1000].d_num_runs_out
.==
equality operator is used to determine whether values are equivalentd_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.d_flags
must be castable to bool
(e.g., bool
, char
, int
, etc.).d_out
and maintain their original relative ordering.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int
device vector. d_out
and maintain their original relative ordering.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively. Items are selected with 50% probability.==
equality operator is used to determine whether keys are equivalentd_out
and maintain their original relative ordering.d_temp_storage
is NULL, no work is done and the required allocation size is returned in temp_storage_bytes
.CUB_CDP
macro in your compiler's macro definitions.int32
and int64
items, respectively. Segments have lengths uniformly sampled from [1,1000].This is the complete list of members for cub::DeviceSpmv, including all inherited members.
CsrMV(void *d_temp_storage, size_t &temp_storage_bytes, ValueT *d_matrix_values, int *d_matrix_row_offsets, int *d_matrix_column_indices, ValueT *d_vector_x, ValueT *d_vector_y, int num_rows, int num_cols, int num_nonzeros, ValueT alpha, ValueT beta, cudaStream_t stream=0, bool debug_synchronous=false) | cub::DeviceSpmv | inlinestatic |
CsrMV(void *d_temp_storage, size_t &temp_storage_bytes, ValueT *d_values, int *d_row_offsets, int *d_column_indices, ValueT *d_vector_x, ValueT *d_vector_y, int num_rows, int num_cols, int num_nonzeros, ValueT alpha, ValueT beta, cudaStream_t stream=0, bool debug_synchronous=false) | cub::DeviceSpmv | inlinestatic |
[in] | d_temp_storage | Device allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in,out] | temp_storage_bytes | Reference to size in bytes of d_temp_storage allocation | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_matrix_values | Pointer to the array of num_nonzeros values of the corresponding nonzero elements of matrix A. | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_matrix_row_offsets | Pointer to the array of m + 1 offsets demarcating the start of every row in d_matrix_column_indices and d_matrix_values (with the final entry being equal to num_nonzeros ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_matrix_column_indices | Pointer to the array of num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_values | Pointer to the array of num_nonzeros values of the corresponding nonzero elements of matrix A. | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_row_offsets | Pointer to the array of m + 1 offsets demarcating the start of every row in d_column_indices and d_values (with the final entry being equal to num_nonzeros ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_column_indices | Pointer to the array of num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | d_vector_x | Pointer to the array of num_cols values corresponding to the dense input vector x | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[out] | d_vector_y | Pointer to the array of num_rows values corresponding to the dense output vector y | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[in] | num_rows | number of rows of matrix A. |
Public Methods |
CATEGORY | cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | static |
NULL_TYPE enum value (defined in cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >) | cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | |
PRIMITIVE enum value (defined in cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >) | cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | |
VALUE enum value (defined in cub::IsPointer< Tp >) | cub::IsPointer< Tp > |
+ CUB
+
+ |
+
Pointer vs. iterator.
+ +Definition at line 669 of file util_type.cuh.
++Public Types | |
enum | { VALUE = 0 + } |
This is the complete list of members for cub::ReduceBySegmentOp< ReductionOp, ItemOffsetPair >, including all inherited members.
+This is the complete list of members for cub::IsVolatile< Tp >, including all inherited members.
operator()(const ItemOffsetPair &first, const ItemOffsetPair &second) | cub::ReduceBySegmentOp< ReductionOp, ItemOffsetPair > | inline |
ReduceBySegmentOp() | cub::ReduceBySegmentOp< ReductionOp, ItemOffsetPair > | inline |
ReduceBySegmentOp(ReductionOp op) | cub::ReduceBySegmentOp< ReductionOp, ItemOffsetPair > | inline |
VALUE enum value (defined in cub::IsVolatile< Tp >) | cub::IsVolatile< Tp > |
+ CUB
+
+ |
+
Volatile modifier test.
+ +Definition at line 694 of file util_type.cuh.
++Public Types | |
enum | { VALUE = 0 + } |
- CUB
-
- |
-
This is the complete list of members for cub::ItemOffsetPair< _T, _OffsetT >, including all inherited members.
-align0 | cub::ItemOffsetPair< _T, _OffsetT > | |
ItemOffsetPair() | cub::ItemOffsetPair< _T, _OffsetT > | inline |
ItemOffsetPair(_T value, _OffsetT offset) | cub::ItemOffsetPair< _T, _OffsetT > | inline |
offset | cub::ItemOffsetPair< _T, _OffsetT > | |
OffsetT typedef | cub::ItemOffsetPair< _T, _OffsetT > | |
operator!=(const ItemOffsetPair &b) | cub::ItemOffsetPair< _T, _OffsetT > | inline |
T typedef | cub::ItemOffsetPair< _T, _OffsetT > | |
value | cub::ItemOffsetPair< _T, _OffsetT > |
This is the complete list of members for cub::KeyValuePair< _Key, _Value >, including all inherited members.
Key typedef | cub::KeyValuePair< _Key, _Value > | |
key | cub::KeyValuePair< _Key, _Value > | |
operator!=(const KeyValuePair &b) | cub::KeyValuePair< _Key, _Value > | inline |
value | cub::KeyValuePair< _Key, _Value > | |
Value typedef | cub::KeyValuePair< _Key, _Value > | |
align0 | cub::KeyValuePair< _Key, _Value > | |
Key typedef | cub::KeyValuePair< _Key, _Value > | |
key | cub::KeyValuePair< _Key, _Value > | |
operator!=(const KeyValuePair &b) | cub::KeyValuePair< _Key, _Value > | inline |
value | cub::KeyValuePair< _Key, _Value > | |
Value typedef | cub::KeyValuePair< _Key, _Value > |
A key identifier paired with a corresponding value.
-Definition at line 544 of file util_type.cuh.
+Definition at line 506 of file util_type.cuh.
Public Types |
Public Members | |
+union { | |
Key key | |
Item key. | |
UnitWord< Value >::DeviceWord align0 | |
Alignment/padding (for Win32 consistency between host/device) | |
}; | |
Value | value |
Item value. | |
-Key | key |
Item key. | |
Statically determine log2(N), rounded up.
For example: Log2<8>::VALUE // 3 Log2<3>::VALUE // 2
-Definition at line 660 of file util_type.cuh.
+Definition at line 631 of file util_type.cuh.
Public Types |
- CUB
-
- |
-
Numeric type traits.
- -Definition at line 1011 of file util_type.cuh.
--Additional Inherited Members | |
Public Types inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, T > | |
enum | |
Static Public Members inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, T > | |
-static const Category | CATEGORY |
Category. | |
Statically determine if N is a power-of-two.
-Definition at line 681 of file util_type.cuh.
+Definition at line 654 of file util_type.cuh.
Public Types |
CATEGORY | cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > | static |
NULL_TYPE enum value (defined in cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits >) | cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > | |
PRIMITIVE enum value (defined in cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits >) | cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > | |
op | cub::ReduceByKeyOp< ReductionOpT > | |
operator()(const KeyValuePairT &first, const KeyValuePairT &second) | cub::ReduceByKeyOp< ReductionOpT > | inline |
ReduceByKeyOp() | cub::ReduceByKeyOp< ReductionOpT > | inline |
ReduceByKeyOp(ReductionOpT op) | cub::ReduceByKeyOp< ReductionOpT > | inline |
Reduce-by-segment functor.
-Given two cub::ItemOffsetPair inputs a
and b
and a binary associative combining operator
, an instance of this functor returns a cub::ItemOffsetPair whose f(const T &x, const T &y)
offset
field is a.offset
+ a.offset
, and whose value
field is either b.value if b.offset is non-zero, or f(a.value, b.value) otherwise.
ReduceBySegmentOp is an associative, non-commutative binary combining operator for input sequences of cub::ItemOffsetPair pairings. Such sequences are typically used to represent a segmented set of values to be reduced and a corresponding set of {0,1}-valued integer "head flags" demarcating the first value of each segment.< ItemOffsetPair pairing of T (value) and OffsetT (head flag)
+< Binary reduction operator to apply to values
-Definition at line 251 of file thread_operators.cuh.
+Definition at line 278 of file thread_operators.cuh.
Public Methods | |
-__host__ __device__ __forceinline__ | ReduceBySegmentOp () |
Constructor. | |
-__host__ __device__ __forceinline__ | ReduceBySegmentOp (ReductionOp op) |
Constructor. | |
__host__ __device__ -__forceinline__ ItemOffsetPair | operator() (const ItemOffsetPair &first, const ItemOffsetPair &second) |
Scan operator. More... | |
+__host__ __device__ __forceinline__ | ReduceByKeyOp () |
Constructor. | |
+__host__ __device__ __forceinline__ | ReduceByKeyOp (ReductionOpT op) |
Constructor. | |
template<typename KeyValuePairT > | |
__host__ __device__ +__forceinline__ KeyValuePairT | operator() (const KeyValuePairT &first, const KeyValuePairT &second) |
Scan operator. More... | |
+Public Members | |
+ReductionOpT | op |
Wrapped reduction operator. | |
+
+
+
+
+
+
+
+
+cub::ReduceBySegmentOp< ReductionOpT > Member List
+
+
+
+
+This is the complete list of members for cub::ReduceBySegmentOp< ReductionOpT >, including all inherited members. +
+Generated on Mon Apr 13 2015 13:56:52 for CUB by + + 1.8.4 + +© 2013 NVIDIA Corporation + + + diff --git a/docs/html/structcub_1_1_item_offset_pair.html b/docs/html/structcub_1_1_reduce_by_segment_op.html similarity index 51% rename from docs/html/structcub_1_1_item_offset_pair.html rename to docs/html/structcub_1_1_reduce_by_segment_op.html index 92b644c2f4..27fd88f325 100644 --- a/docs/html/structcub_1_1_item_offset_pair.html +++ b/docs/html/structcub_1_1_reduce_by_segment_op.html @@ -5,7 +5,7 @@ -
-
+cub::ItemOffsetPair< _T, _OffsetT > Struct Template Reference cub::ReduceBySegmentOp< ReductionOpT > Struct Template Reference
Detailed descriptiontemplate<
- typename _T,
- typename _OffsetT>
+ typename ReductionOpT> |
-Public Types | |
-typedef _T | T |
Item data type. | |
-typedef _OffsetT | OffsetT |
Integer offset data type. | |
Public Methods | |
-__device__ __forceinline__ | ItemOffsetPair () |
Constructor. | |
-__device__ __forceinline__ | ItemOffsetPair (_T value, _OffsetT offset) |
Constructor. | |
-__host__ __device__ -__forceinline__ bool | operator!= (const ItemOffsetPair &b) |
Inequality operator. | |
+__host__ __device__ __forceinline__ | ReduceBySegmentOp () |
Constructor. | |
+__host__ __device__ __forceinline__ | ReduceBySegmentOp (ReductionOpT op) |
Constructor. | |
template<typename KeyValuePairT > | |
__host__ __device__ +__forceinline__ KeyValuePairT | operator() (const KeyValuePairT &first, const KeyValuePairT &second) |
Scan operator. More... | |
Public Members | |
-union { | |
OffsetT offset | |
OffsetT. | |
UnitWord< T >::DeviceWord align0 | |
Alignment/padding (for Win32 consistency between host/device) | |
}; | |
-T | value |
Item value. | |
+ReductionOpT | op |
Wrapped reduction operator. | |
+
|
+ +inline | +
Scan operator.
+< KeyValuePair pairing of T (value) and OffsetT (head flag)
+first | First partial reduction |
second | Second partial reduction |
Definition at line 262 of file thread_operators.cuh.
+ +This is the complete list of members for cub::NumericTraits< T >, including all inherited members.
+This is the complete list of members for cub::RemoveQualifiers< Tp, Up >, including all inherited members.
CATEGORY | cub::BaseTraits< NOT_A_NUMBER, false, false, T > | static |
NULL_TYPE enum value (defined in cub::BaseTraits< NOT_A_NUMBER, false, false, T >) | cub::BaseTraits< NOT_A_NUMBER, false, false, T > | |
PRIMITIVE enum value (defined in cub::BaseTraits< NOT_A_NUMBER, false, false, T >) | cub::BaseTraits< NOT_A_NUMBER, false, false, T > | |
Type typedef | cub::RemoveQualifiers< Tp, Up > |
Basic type traits.
+Removes const
and volatile
qualifiers from type Tp
.
For example: typename RemoveQualifiers<volatile int>::Type // int;
Definition at line 897 of file util_type.cuh.
+Definition at line 721 of file util_type.cuh.
Public Types | |
enum | { PRIMITIVE = _PRIMITIVE, -NULL_TYPE = _NULL_TYPE - } |
-Static Public Members | |
-static const Category | CATEGORY = _CATEGORY |
Category. | |
+typedef Up | Type |
Type without const and volatile qualifiers. | |
- CUB
-
- |
-
Type traits.
- -Definition at line 1040 of file util_type.cuh.
--Additional Inherited Members | |
Public Types inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | |
enum | |
Static Public Members inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > | |
-static const Category | CATEGORY |
Category. | |
@@ -160,7 +163,7 @@
cub::LaneMaskGe () |
| Returns the warp lane mask of all lanes greater than or equal to the calling thread. | | template<typename T > | __device__ __forceinline__ T | cub::ShuffleUp (T input, int src_offset) | | Shuffle-up for any data type. Each warp-lanei obtains the value | input contributed by warp-lanei-src_offset . For thread lanes i < src_offset, the thread's own input is returned to the thread.
+template<typename T > | __device__ __forceinline__ T | cub::ShuffleUp (T input, int src_offset, int first_lane=0) | | Shuffle-up for any data type. Each warp-lanei obtains the value | input contributed by warp-lanei-src_offset . For thread lanes i < src_offset, the thread's own input is returned to the thread.
| template<typename T > | __device__ __forceinline__ T | cub::ShuffleDown (T input, int src_offset) | | Shuffle-down for any data type. Each warp-lanei obtains the value | input contributed by warp-lanei+src_offset . For thread lanes i >= WARP_THREADS, the thread's own input is returned to the thread.
+ More... | template<typename T > | __device__ __forceinline__ T | cub::ShuffleDown (T input, int src_offset, int last_lane=CUB_PTX_WARP_THREADS-1) | | Shuffle-down for any data type. Each warp-lanei obtains the value | input contributed by warp-lanei+src_offset . For thread lanes i >= WARP_THREADS, the thread's own input is returned to the thread.
| template<typename T > | __device__ __forceinline__ T | cub::ShuffleBroadcast (T input, int src_lane) | | Shuffle-broadcast for any data type. Each warp-lanei obtains the value | input contributed by warp-lanesrc_lane . For src_lane < 0 or src_lane >= WARP_THREADS, then the thread's own input is returned to the thread.
+ More... | template<typename T > | __device__ __forceinline__ T | cub::ShuffleIndex (T input, int src_lane) | | Shuffle-broadcast for any data type. Each warp-lanei obtains the value | input contributed by warp-lanesrc_lane . For src_lane < 0 or src_lane >= WARP_THREADS, then the thread's own input is returned to the thread.
| + |
__device__ __forceinline__ int | cub::WarpAll (int cond) | | Portable implementation of __all. | -Generated on Wed Mar 18 2015 18:50:31 for CUB by +Generated on Mon Apr 13 2015 13:56:51 for CUB by 1.8.4 diff --git a/docs/html/util__ptx_8cuh_source.html b/docs/html/util__ptx_8cuh_source.html index 77b8f675e8..56417876a7 100644 --- a/docs/html/util__ptx_8cuh_source.html +++ b/docs/html/util__ptx_8cuh_source.html @@ -134,387 +134,490 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 133 asm("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits));
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 135 asm volatile("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits));
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 520 : "=r"(shuffle_word) : "r"((unsigned int) input_alias[WORD]), "r"(src_offset), "r"(first_lane));
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 615 : "=r"(shuffle_word) : "r"((unsigned int) input_alias[0]), "r"(src_lane), "r"(logical_warp_threads - 1));
+
+
+
+
+
+
+ 622 : "=r"(shuffle_word) : "r"((unsigned int) input_alias[WORD]), "r"(src_lane), "r"(logical_warp_threads - 1));
+
+
+
+ 626 // ShuffleIdx(input_alias, output_alias, src_lane, logical_warp_threads - 1, Int2Type<WORDS - 1>());
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-Generated on Wed Mar 18 2015 18:50:29 for CUB by +Generated on Mon Apr 13 2015 13:56:50 for CUB by 1.8.4 diff --git a/docs/html/util__type_8cuh.html b/docs/html/util__type_8cuh.html index 6df882ed3a..4f25489ff0 100644 --- a/docs/html/util__type_8cuh.html +++ b/docs/html/util__type_8cuh.html @@ -99,8 +99,7 @@ util_type.cuh File Reference struct | cub::Uninitialized< T > | | A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions. More... | | struct | cub::ItemOffsetPair< _T, _OffsetT > | | An item value paired with a corresponding offset. More... | | struct | cub::KeyValuePair< _Key, _Value > | | A key identifier paired with a corresponding value. More... | | struct | cub::PowerOfTwo< N > | | Statically determine if N is a power-of-two. More... | | struct | cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > | | Basic type traits. More... | struct | cub::IsPointer< Tp > | | Pointer vs. iterator. More... | | struct | cub::NumericTraits< T > | | Numeric type traits. More... | struct | cub::IsVolatile< Tp > | | Volatile modifier test. More... | | struct | cub::Traits< T > | | Type traits. More... | struct | cub::RemoveQualifiers< Tp, Up > | | Removes | const and volatile qualifiers from type Tp . More... | -Enumerationsenum | cub::Category { NOT_A_NUMBER,
-SIGNED_INTEGER,
-UNSIGNED_INTEGER,
-FLOATING_POINT
- } | | Basic type traits categories. | | | ||||||||||||||||||||||||||||||||||||
@@ -163,16 +159,6 @@
| cub | | Optional outer namespace(s) | |
@@ -522,6 +522,13 @@ The following table enumerates prior feature releases as well as update versions
for the current feature release.
|