Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…istogram

null-pointer exception bug for iterator inputs

- Update device histogram testing to include iterator-based samples
- Prevent a few macro redefinitions
- Update doc for 1.7.2
  • Loading branch information
dumerrill committed Aug 28, 2017
1 parent 16c2f87 commit 68a50fa
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 76 deletions.
6 changes: 6 additions & 0 deletions CHANGE_LOG.TXT
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
1.7.2 08/28/2017
- Bug fixes:
- Issue #110: DeviceHistogram null-pointer exception bug for iterator inputs

//-----------------------------------------------------------------------------

1.7.2 08/26/2017
- Bug fixes:
- Issue #104: Device-wide reduction is now "run-to-run" deterministic for
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<hr>
<h3>About CUB</h3>

Current release: v1.7.2 (08/26/2017)
Current release: v1.7.3 (08/28/2017)

We recommend the [CUB Project Website](http://nvlabs.github.com/cub) and the [cub-users discussion forum](http://groups.google.com/group/cub-users) for further information and examples.

Expand Down
2 changes: 1 addition & 1 deletion cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ struct AgentHistogram
((row_bytes & pixel_mask) == 0); // number of row-samples is a multiple of the alignment of the pixel

// Whether rows are aligned and can be vectorized
if ((d_native_samples != nullptr) && (quad_aligned_rows || pixel_aligned_rows))
if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows))
ConsumeTiles<true>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());
else
ConsumeTiles<false>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());
Expand Down
41 changes: 24 additions & 17 deletions cub/util_arch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ namespace cub {

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document

#if (__CUDACC_VER_MAJOR__ >= 9)
#define CUB_USE_COOPERATIVE_GROUPS
#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
#endif

/// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass).
Expand Down Expand Up @@ -117,25 +117,32 @@ namespace cub {


/// Scale down the number of warps to keep same amount of "tile" storage as the nominal configuration for 4B data. Minimum of two warps.
#define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
(CUB_MIN( \
NOMINAL_4B_BLOCK_THREADS * 2, \
CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
#ifndef CUB_BLOCK_THREADS
#define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
(CUB_MIN( \
NOMINAL_4B_BLOCK_THREADS * 2, \
CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
#endif

/// Scale up/down number of items per thread to keep the same amount of "tile" storage as the nominal configuration for 4B data. Minimum 1 item per thread
#define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
(CUB_MIN( \
NOMINAL_4B_ITEMS_PER_THREAD * 2, \
CUB_MAX( \
1, \
(NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))))
#ifndef CUB_ITEMS_PER_THREAD
#define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
(CUB_MIN( \
NOMINAL_4B_ITEMS_PER_THREAD * 2, \
CUB_MAX( \
1, \
(NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))))
#endif

/// Define both nominal threads-per-block and items-per-thread
#ifndef CUB_NOMINAL_CONFIG
#define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \
CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)
#endif

#define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \
CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)


#endif // Do not document
Expand Down
Loading

0 comments on commit 68a50fa

Please sign in to comment.