Skip to content

Commit

Permalink
linux compile test pass all arch cuda 7.5
Browse files Browse the repository at this point in the history
Former-commit-id: 8eed718a846519bcdc8712e83d0bc1f647a7fe70
  • Loading branch information
dumerrill committed Dec 14, 2015
1 parent b7b5893 commit 7d6c8fd
Show file tree
Hide file tree
Showing 15 changed files with 45 additions and 16 deletions.
2 changes: 1 addition & 1 deletion cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ struct DispatchReduce :
cudaStream_t stream, ///< [in] <b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>.
bool debug_synchronous) ///< [in] <b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false.
{
typedef DispatchReduce::MaxPolicy MaxPolicyT;
typedef typename DispatchReduce::MaxPolicy MaxPolicyT;

cudaError error = cudaSuccess;
do
Expand Down
25 changes: 17 additions & 8 deletions cub/thread/thread_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,18 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr(

#else

typedef typename UnitWord<T>::VolatileWord VolatileWord; // Word type for memcopying
// Create a temporary using shuffle-words, then store using volatile-words
typedef typename UnitWord<T>::VolatileWord VolatileWord;
typedef typename UnitWord<T>::ShuffleWord ShuffleWord;

const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord);

const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord);

VolatileWord words[VOLATILE_MULTIPLE];
*reinterpret_cast<T*>(words) = val;

// VolatileWord *words = reinterpret_cast<VolatileWord*>(&val);
#pragma unroll
for (int i = 0; i < SHUFFLE_MULTIPLE; ++i)
reinterpret_cast<ShuffleWord*>(words)[i] = reinterpret_cast<ShuffleWord*>(&val)[i];

IterateThreadStore<0, VOLATILE_MULTIPLE>::template Dereference(
reinterpret_cast<volatile VolatileWord*>(ptr),
Expand Down Expand Up @@ -384,13 +388,18 @@ __device__ __forceinline__ void ThreadStore(
Int2Type<MODIFIER> modifier,
Int2Type<true> is_pointer)
{
typedef typename UnitWord<T>::DeviceWord DeviceWord; // Word type for memcopying

const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord);
// Create a temporary using shuffle-words, then store using device-words
typedef typename UnitWord<T>::DeviceWord DeviceWord;
typedef typename UnitWord<T>::ShuffleWord ShuffleWord;

const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord);
const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord);

DeviceWord words[DEVICE_MULTIPLE];

*reinterpret_cast<T*>(words) = val;
#pragma unroll
for (int i = 0; i < SHUFFLE_MULTIPLE; ++i)
reinterpret_cast<ShuffleWord*>(words)[i] = reinterpret_cast<ShuffleWord*>(&val)[i];

IterateThreadStore<0, DEVICE_MULTIPLE>::template Store<CacheStoreModifier(MODIFIER)>(
reinterpret_cast<DeviceWord*>(ptr),
Expand Down
1 change: 1 addition & 0 deletions test/test_block_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <stdio.h>

#include <device_functions.h>
#include <typeinfo>

#include <cub/block/block_reduce.cuh>
#include <cub/block/block_load.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_block_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <stdio.h>
#include <iostream>
#include <limits>
#include <typeinfo>

#include <cub/block/block_scan.cuh>
#include <cub/block/block_load.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_device_histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <stdio.h>
#include <limits>
#include <algorithm>
#include <typeinfo>

#include <npp.h>

Expand Down
1 change: 1 addition & 0 deletions test/test_device_radix_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <stdio.h>
#include <algorithm>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/device/device_radix_sort.cuh>
Expand Down
8 changes: 5 additions & 3 deletions test/test_device_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1131,9 +1131,11 @@ void TestType(
int max_items,
int max_segments)
{
DeviceReducePolicy<T, int, cub::Sum>::MaxPolicy::Invoke(
g_ptx_version,
TestBySize<T>(max_items, max_segments));
typedef typename DeviceReducePolicy<T, int, cub::Sum>::MaxPolicy MaxPolicyT;

TestBySize<T> dispatch(max_items, max_segments);

MaxPolicyT::Invoke(g_ptx_version, dispatch);
}


Expand Down
1 change: 1 addition & 0 deletions test/test_device_reduce_by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_device_run_length_encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_device_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_device_select_if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/device/device_select.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_device_select_unique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/util_allocator.cuh>
#include <cub/iterator/counting_input_iterator.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_iterator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <iterator>
#include <stdio.h>
#include <typeinfo>

#include <cub/iterator/arg_index_input_iterator.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
Expand Down
1 change: 1 addition & 0 deletions test/test_warp_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/warp/warp_reduce.cuh>
#include <cub/util_allocator.cuh>
Expand Down
15 changes: 11 additions & 4 deletions test/test_warp_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CUB_STDERR

#include <stdio.h>
#include <typeinfo>

#include <cub/warp/warp_scan.cuh>
#include <cub/util_allocator.cuh>
Expand Down Expand Up @@ -348,6 +349,13 @@ void Test(
T *p_prefix = NULL;
T aggregate = Initialize(gen_mode, h_in, h_reference, LOGICAL_WARP_THREADS, scan_op, identity, p_prefix);

if (g_verbose)
{
printf("Input: \n");
DisplayResults(h_in, LOGICAL_WARP_THREADS);
printf("\n");
}

for (int i = 0; i < LOGICAL_WARP_THREADS; ++i)
{
h_aggregate[i] = aggregate;
Expand All @@ -367,9 +375,9 @@ void Test(
CubDebugExit(cudaMemset(d_aggregate, 0, sizeof(T) * LOGICAL_WARP_THREADS));

// Run kernel
printf("Test-mode %d, gen-mode %d, %s warpscan, %d warp threads, %s (%d bytes) elements:\n",
TEST_MODE,
gen_mode,
printf("Test-mode %d (%s), gen-mode %d (%s), %s warpscan, %d warp threads, %s (%d bytes) elements:\n",
TEST_MODE, typeid(TEST_MODE).name(),
gen_mode, typeid(gen_mode).name(),
(Equals<IdentityT, NullType>::VALUE) ? "Inclusive" : "Exclusive",
LOGICAL_WARP_THREADS,
typeid(T).name(),
Expand Down Expand Up @@ -455,7 +463,6 @@ void Test(GenMode gen_mode)
int ptx_version;
CubDebugExit(PtxVersion(ptx_version));


// primitive
Test<LOGICAL_WARP_THREADS>(gen_mode, Sum(), (char) 0, (char) 99);
Test<LOGICAL_WARP_THREADS>(gen_mode, Sum(), (short) 0, (short) 99);
Expand Down

0 comments on commit 7d6c8fd

Please sign in to comment.