From bf668e99662d0692faa3ad3e57b10ad5d890c722 Mon Sep 17 00:00:00 2001 From: Azim Afroozeh Date: Thu, 9 May 2024 19:11:25 +0200 Subject: [PATCH] init V_0_1 --- .github/workflows/CI.yaml | 165 + .gitignore | 6 + CMakeLists.txt | 112 + LICENSE | 21 + README.md | 40 + crystal-opt/CMakeLists.txt | 3 + crystal-opt/README.md | 29 + crystal-opt/src/CMakeLists.txt | 43 + crystal-opt/src/crystal/crystal.cuh | 32 + crystal-opt/src/crystal/join.cuh | 333 + crystal-opt/src/crystal/load.cuh | 166 + crystal-opt/src/crystal/pred.cuh | 357 + crystal-opt/src/crystal/reduce.cuh | 75 + crystal-opt/src/crystal/store.cuh | 120 + crystal-opt/src/crystal/term.cuh | 33 + crystal-opt/src/ops/join.cu | 242 + crystal-opt/src/ops/project.cu | 198 + crystal-opt/src/ops/utils/generator.h | 399 + crystal-opt/src/ops/utils/gpu_utils.h | 57 + crystal-opt/src/ssb/all.cu | 2734 ++ crystal-opt/src/ssb/gpu_utils.h | 57 + crystal-opt/src/ssb/q11.cu | 204 + crystal-opt/src/ssb/q12.cu | 206 + crystal-opt/src/ssb/q13.cu | 207 + crystal-opt/src/ssb/q21.cu | 336 + crystal-opt/src/ssb/q22.cu | 334 + crystal-opt/src/ssb/q23.cu | 326 + crystal-opt/src/ssb/q31.cu | 349 + crystal-opt/src/ssb/q32.cu | 343 + crystal-opt/src/ssb/q33.cu | 317 + crystal-opt/src/ssb/q34.cu | 365 + crystal-opt/src/ssb/q41.cu | 438 + crystal-opt/src/ssb/q42.cu | 411 + crystal-opt/src/ssb/q43.cu | 405 + crystal-opt/src/ssb/ssb_utils.h | 177 + crystal/CMakeLists.txt | 3 + crystal/LICENSE | 21 + crystal/README.md | 79 + crystal/src/CMakeLists.txt | 43 + crystal/src/crystal/crystal.cuh | 9 + crystal/src/crystal/join.cuh | 311 + crystal/src/crystal/load.cuh | 97 + crystal/src/crystal/pred.cuh | 335 + crystal/src/crystal/reduce.cuh | 53 + crystal/src/crystal/store.cuh | 98 + crystal/src/ops/join.cu | 220 + crystal/src/ops/project.cu | 176 + crystal/src/ops/utils/generator.h | 377 + crystal/src/ops/utils/gpu_utils.h | 35 + crystal/src/ssb/gpu_utils.h | 35 + crystal/src/ssb/q11.cu | 168 + crystal/src/ssb/q12.cu | 167 + crystal/src/ssb/q13.cu | 167 + crystal/src/ssb/q21.cu | 286 + crystal/src/ssb/q22.cu | 286 + crystal/src/ssb/q23.cu | 279 + crystal/src/ssb/q31.cu | 296 + crystal/src/ssb/q32.cu | 290 + crystal/src/ssb/q33.cu | 291 + crystal/src/ssb/q34.cu | 316 + crystal/src/ssb/q41.cu | 371 + crystal/src/ssb/q42.cu | 346 + crystal/src/ssb/q43.cu | 343 + crystal/src/ssb/ssb_utils.h | 107 + data/README.md | 17 + data/result_of_queries/q11 | 1 + data/result_of_queries/q21 | 281 + data/result_of_queries/q31 | 151 + data/result_of_queries/q41 | 36 + data/ssb/.gitignore | 4 + data/ssb/SSB.md | 372 + data/ssb/dbgen/.gitignore | 6 + data/ssb/dbgen/BUGS | 987 + data/ssb/dbgen/CHANGES | 33 + data/ssb/dbgen/HISTORY | 535 + data/ssb/dbgen/PORTING.NOTES | 220 + data/ssb/dbgen/README | 88 + data/ssb/dbgen/TPCH_README | 425 + data/ssb/dbgen/bcd2.c | 237 + data/ssb/dbgen/bcd2.h | 11 + data/ssb/dbgen/bcd2.o | Bin 0 -> 4536 bytes data/ssb/dbgen/bm_utils.c | 589 + data/ssb/dbgen/bm_utils.o | Bin 0 -> 12856 bytes data/ssb/dbgen/build.c | 800 + data/ssb/dbgen/build.o | Bin 0 -> 23320 bytes data/ssb/dbgen/config.h | 179 + data/ssb/dbgen/dists.dss | 817 + data/ssb/dbgen/driver.c | 1144 + data/ssb/dbgen/driver.o | Bin 0 -> 41400 bytes data/ssb/dbgen/dss.ddl | 70 + data/ssb/dbgen/dss.h | 610 + data/ssb/dbgen/dss.ri | 100 + data/ssb/dbgen/dsstypes.h | 312 + data/ssb/dbgen/history.html | 586 + data/ssb/dbgen/load_stub.c | 281 + data/ssb/dbgen/load_stub.o | Bin 0 -> 6680 bytes data/ssb/dbgen/makefile | 127 + data/ssb/dbgen/makefile.suite | 127 + data/ssb/dbgen/makefile_win | 85 + data/ssb/dbgen/permute.c | 175 + data/ssb/dbgen/permute.h | 47 + data/ssb/dbgen/permute.o | Bin 0 -> 3248 bytes data/ssb/dbgen/print.c | 1006 + data/ssb/dbgen/print.o | Bin 0 -> 19760 bytes data/ssb/dbgen/qgen | Bin 0 -> 78240 bytes data/ssb/dbgen/qgen.c | 469 + data/ssb/dbgen/qgen.o | Bin 0 -> 33552 bytes data/ssb/dbgen/rnd.c | 262 + data/ssb/dbgen/rnd.h | 80 + data/ssb/dbgen/rnd.o | Bin 0 -> 10608 bytes data/ssb/dbgen/shared.h | 140 + data/ssb/dbgen/speed_seed.c | 325 + data/ssb/dbgen/speed_seed.o | Bin 0 -> 7776 bytes data/ssb/dbgen/tags | 1078 + data/ssb/dbgen/text.c | 313 + data/ssb/dbgen/text.o | Bin 0 -> 4312 bytes data/ssb/dbgen/tpcd.h | 103 + data/ssb/dbgen/varsub.c | 314 + data/ssb/dbgen/varsub.o | Bin 0 -> 17936 bytes data/ssb/loader/.metadata | 913 + data/ssb/loader/Makefile | 17 + data/ssb/loader/columnSort.c | 302 + data/ssb/loader/convert.py | 106 + data/ssb/loader/convert_old.py | 102 + data/ssb/loader/dict.c | 235 + data/ssb/loader/include/common.h | 245 + data/ssb/loader/include/schema.h | 77 + data/ssb/loader/load.c | 1091 + data/ssb/loader/load_modified.c | 1096 + data/ssb/loader/rle.c | 151 + data/ssb/loader/soa.c | 89 + data/ssb/loader/sort.py | 43 + data/ssb/loader/sort_other_way.py | 43 + data/ssb/queries/original/load.sql | 5 + data/ssb/queries/original/q11.sql | 7 + data/ssb/queries/original/q12.sql | 7 + data/ssb/queries/original/q13.sql | 8 + data/ssb/queries/original/q21.sql | 10 + data/ssb/queries/original/q22.sql | 11 + data/ssb/queries/original/q23.sql | 10 + data/ssb/queries/original/q31.sql | 10 + data/ssb/queries/original/q32.sql | 11 + data/ssb/queries/original/q33.sql | 13 + data/ssb/queries/original/q34.sql | 13 + data/ssb/queries/original/q41.sql | 12 + data/ssb/queries/original/q42.sql | 14 + data/ssb/queries/original/q43.sql | 13 + data/ssb/queries/original/schema.sql | 77 + data/ssb/queries/transformed/load.sql | 5 + data/ssb/queries/transformed/p1.sql | 10 + data/ssb/queries/transformed/q11.sql | 6 + data/ssb/queries/transformed/q12.sql | 7 + data/ssb/queries/transformed/q13.sql | 9 + data/ssb/queries/transformed/q21.sql | 9 + data/ssb/queries/transformed/q22.sql | 10 + data/ssb/queries/transformed/q23.sql | 9 + data/ssb/queries/transformed/q31.sql | 10 + data/ssb/queries/transformed/q32.sql | 10 + data/ssb/queries/transformed/q33.sql | 10 + data/ssb/queries/transformed/q34.sql | 10 + data/ssb/queries/transformed/q41.sql | 11 + data/ssb/queries/transformed/q42.sql | 12 + data/ssb/queries/transformed/q43.sql | 12 + data/ssb/queries/transformed/schema.sql | 72 + data/ssb/queries/transformed/schema_no_pk.sql | 72 + data/util.py | 80 + fastlanes/CMakeLists.txt | 8 + fastlanes/example/CMakeLists.txt | 6 + fastlanes/example/fastlanes_bench_bitpack.cu | 86 + fastlanes/example/fastlanes_bench_delta.cu | 194 + fastlanes/generate.py | 15 + fastlanes/generated/CMakeLists.txt | 1 + fastlanes/generated/cuda/CMakeLists.txt | 2 + .../cuda/fused_t32_uf1/CMakeLists.txt | 38 + .../cuda_fused_t32_1024_uf1_unpack_bench.cu | 1047 + .../cuda_fused_t32_1024_uf1_unpack_helper.hpp | 2894 ++ .../cuda_fused_t32_1024_uf1_unpack_src.cu | 0 .../cuda_fused_t32_1024_uf1_unpack_test.cu | 366 + .../generated/cuda/fused_t32_uf1/unpack.cmake | 19 + .../cuda/normal_t32_uf1/CMakeLists.txt | 38 + .../cuda_normal_t32_1024_uf1_unpack_bench.cu | 1047 + ...cuda_normal_t32_1024_uf1_unpack_helper.hpp | 2899 ++ .../cuda_normal_t32_1024_uf1_unpack_src.cu | 0 .../cuda_normal_t32_1024_uf1_unpack_test.cu | 366 + .../cuda/normal_t32_uf1/unpack.cmake | 19 + fastlanes/generated/generated_files.txt | 8 + fastlanes/generated_files.txt | 4 + fastlanes/src/CMakeLists.txt | 57 + fastlanes/src/bitpack_register.cu | 144 + fastlanes/src/bitpack_shared_memory.cu | 148 + fastlanes/src/delta_global_memory.cu | 160 + fastlanes/src/delta_shared_memory.cu | 180 + fastlanes/src/fastlanes_gpu.cpp | 3 + fastlanes/src/include/common.cuh | 12 + fastlanes/src/include/crystal-opt/crystal.cuh | 32 + fastlanes/src/include/crystal-opt/join.cuh | 334 + fastlanes/src/include/crystal-opt/load.cuh | 147 + fastlanes/src/include/crystal-opt/pred.cuh | 459 + fastlanes/src/include/crystal-opt/reduce.cuh | 75 + fastlanes/src/include/crystal-opt/store.cuh | 120 + fastlanes/src/include/crystal-opt/term.cuh | 33 + fastlanes/src/include/crystal/crystal.cuh | 9 + fastlanes/src/include/crystal/join.cuh | 275 + fastlanes/src/include/crystal/load.cuh | 210 + fastlanes/src/include/crystal/pred.cuh | 246 + fastlanes/src/include/crystal/reduce.cuh | 45 + fastlanes/src/include/crystal/store.cuh | 82 + fastlanes/src/include/crystal/term.cuh | 33 + fastlanes/src/include/crystal_ssb_utils.h | 136 + fastlanes/src/include/debug.cuh | 26 + fastlanes/src/include/debug.hpp | 94 + fastlanes/src/include/error.cuh | 26 + fastlanes/src/include/fastlanes.cuh | 12 + fastlanes/src/include/fastlanes/join.cuh | 82 + fastlanes/src/include/fastlanes/pred.cuh | 0 fastlanes/src/include/fls_gen/macros.hpp | 4 + fastlanes/src/include/fls_gen/pack/pack.hpp | 27 + fastlanes/src/include/fls_gen/rle/rle.hpp | 45 + fastlanes/src/include/fls_gen/rsum/rsum.cuh | 107 + .../include/fls_gen/transpose/transpose.hpp | 24 + .../include/fls_gen/unpack/hardcoded_16.cuh | 1276 + .../src/include/fls_gen/unpack/unpack.cuh | 3451 ++ .../src/include/fls_gen/unpack/unpack.hpp | 13 + .../include/fls_gen/unpack/unpack_fused.cuh | 3461 ++ .../src/include/fls_gen/unrsum/unrsum.hpp | 58 + fastlanes/src/include/gpu_utils.h | 93 + fastlanes/src/include/query/query_2.hpp | 37 + fastlanes/src/include/query/query_21.hpp | 295 + fastlanes/src/include/query/query_3.hpp | 44 + fastlanes/src/include/query/query_31.hpp | 165 + fastlanes/src/include/query/query_4.hpp | 37 + fastlanes/src/include/query/query_41.hpp | 50 + fastlanes/src/include/ssb_utils.h | 248 + fastlanes/src/include/util.cuh | 92 + fastlanes/src/pack.cpp | 29910 ++++++++++++++++ fastlanes/src/ssb/READMe.md | 33 + fastlanes/src/ssb/compress_ssb.cu | 530 + fastlanes/src/ssb/compress_ssb_sorted.cu | 533 + fastlanes/src/ssb/fls_q11.cu | 241 + fastlanes/src/ssb/fls_q11_bitpacked_opt_v2.cu | 233 + fastlanes/src/ssb/fls_q11_bitpacked_opt_v3.cu | 207 + fastlanes/src/ssb/fls_q11_bitpacked_opt_v4.cu | 194 + fastlanes/src/ssb/fls_q11_bp_crystal_opt.cu | 264 + fastlanes/src/ssb/fls_q21.cu | 482 + fastlanes/src/ssb/fls_q21_bitpacked_opt_v4.cu | 394 + fastlanes/src/ssb/fls_q31.cu | 626 + fastlanes/src/ssb/fls_q31_bitpacked_opt_v5.cu | 393 + fastlanes/src/ssb/fls_q41.cu | 603 + fastlanes/src/ssb/fls_q41_bitpacked_opt_v3.cu | 531 + fastlanes/src/ssb/fls_q41_bitpacked_opt_v4.cu | 538 + fastlanes/src/test_g.cu | 209 + fastlanes/src/tmp/fls_q41_bitpacked_opt_v2.cu | 485 + fastlanes/src/transpose.cpp | 8215 +++++ fastlanes/src/unrsum.cpp | 523 + include/cub/test/CMakeLists.txt | 367 + include/cub/test/README.md | 125 + include/cub/test/bfloat16.h | 249 + include/cub/test/c2h/custom_type.cuh | 200 + include/cub/test/c2h/generators.cu | 417 + include/cub/test/c2h/generators.cuh | 103 + include/cub/test/catch2_runner.cu | 3 + .../catch2_test_block_adjacent_difference.cu | 425 + .../cub/test/catch2_test_block_histogram.cu | 216 + include/cub/test/catch2_test_block_load.cu | 326 + .../cub/test/catch2_test_block_merge_sort.cu | 520 + .../cub/test/catch2_test_block_radix_sort.cu | 394 + .../cub/test/catch2_test_block_radix_sort.cuh | 457 + .../catch2_test_block_radix_sort_custom.cu | 1060 + include/cub/test/catch2_test_block_reduce.cu | 362 + .../catch2_test_block_run_length_decode.cu | 638 + include/cub/test/catch2_test_block_scan.cu | 536 + include/cub/test/catch2_test_block_shuffle.cu | 427 + include/cub/test/catch2_test_block_store.cu | 327 + include/cub/test/catch2_test_cdp_helper.h | 170 + include/cub/test/catch2_test_cdp_wrapper.cu | 229 + .../catch2_test_device_decoupled_look_back.cu | 168 + .../catch2_test_device_radix_sort_custom.cu | 1693 + include/cub/test/catch2_test_helper.h | 206 + include/cub/test/catch2_test_printing.cu | 36 + .../cub/test/catch2_test_radix_operations.cu | 686 + include/cub/test/catch2_test_util_type.cu | 70 + include/cub/test/catch2_test_warp_exchange.cu | 354 + include/cub/test/catch2_test_warp_load.cu | 387 + include/cub/test/catch2_test_warp_mask.cu | 108 + .../cub/test/catch2_test_warp_merge_sort.cu | 594 + include/cub/test/catch2_test_warp_reduce.cu | 608 + include/cub/test/catch2_test_warp_scan.cu | 689 + include/cub/test/catch2_test_warp_store.cu | 314 + include/cub/test/cmake/CMakeLists.txt | 24 + .../cub/test/cmake/check_source_files.cmake | 178 + .../test/cmake/test_install/CMakeLists.txt | 93 + include/cub/test/fill_striped.cuh | 163 + include/cub/test/half.h | 345 + include/cub/test/link_a.cu | 11 + include/cub/test/link_b.cu | 11 + include/cub/test/link_main.cpp | 10 + include/cub/test/mersenne.h | 162 + include/cub/test/test_allocator.cu | 452 + include/cub/test/test_block_radix_rank.cu | 343 + include/cub/test/test_cdp_variant_state.cu | 34 + .../test/test_device_adjacent_difference.cu | 701 + include/cub/test/test_device_batch_copy.cu | 523 + include/cub/test/test_device_batch_memcpy.cu | 733 + include/cub/test/test_device_histogram.cu | 1684 + include/cub/test/test_device_merge_sort.cu | 362 + include/cub/test/test_device_radix_sort.cu | 2251 ++ include/cub/test/test_device_reduce.cu | 1916 + include/cub/test/test_device_reduce_by_key.cu | 747 + .../cub/test/test_device_run_length_encode.cu | 839 + include/cub/test/test_device_scan.cu | 1275 + include/cub/test/test_device_scan_by_key.cu | 1099 + .../cub/test/test_device_segmented_sort.cu | 1946 + include/cub/test/test_device_select_if.cu | 1118 + include/cub/test/test_device_select_unique.cu | 616 + .../test/test_device_select_unique_by_key.cu | 631 + include/cub/test/test_device_spmv.cu | 594 + .../test/test_device_three_way_partition.cu | 594 + include/cub/test/test_grid_barrier.cu | 152 + include/cub/test/test_iterator.cu | 544 + include/cub/test/test_iterator_deprecated.cu | 306 + include/cub/test/test_namespace_wrapped.cu | 76 + .../cub/test/test_temporary_storage_layout.cu | 219 + include/cub/test/test_thread_operators.cu | 259 + include/cub/test/test_thread_sort.cu | 150 + include/cub/test/test_util.h | 1655 + include/cub/test/test_util_vec.h | 320 + .../T4/crystal-fls/crystal_fls_q11_sf10.txt | 185 + results/T4/crystal-fls/crystal_fls_q21.txt | 1037 + .../T4/crystal-fls/crystal_fls_q21_sf10.txt | 1034 + .../T4/crystal-fls/crystal_fls_q31_sf10.txt | 939 + .../T4/crystal-fls/crystal_fls_q41_sf10.txt | 976 + results/T4/crystal-opt/crystal_opt_q21.txt | 984 + .../T4/crystal-opt/crystal_opt_q21_sf10.txt | 1008 + .../T4/crystal-opt/crystal_opt_q31_sf10.txt | 913 + .../T4/crystal-opt/crystal_opt_q41_sf10.txt | 952 + results/T4/crystal/crystal_q21.txt | 1008 + results/T4/crystal/crystal_q31_sf10.txt | 906 + results/T4/crystal/crystal_q41_sf10.txt | 951 + .../crystal-fls/crystal_fls_q21_sf10_2.txt | 1037 + .../V100/crystal-fls/crystal_fls_q31_sf10.txt | 955 + .../V100/crystal-fls/crystal_fls_q41_sf10.txt | 983 + .../crystal_opt_fls_q11_sf10.txt | 178 + .../V100/crystal-opt/crystal_opt_q11_sf10.txt | 186 + .../V100/crystal-opt/crystal_opt_q21_sf10.txt | 1045 + .../V100/crystal-opt/crystal_opt_q31_sf10.txt | 926 + .../crystal-opt/crystal_opt_q41_sf10_v100.txt | 987 + results/V100/crystal/crystal_q21_sf10.txt | 1056 + results/V100/crystal/crystal_q31_sf10.txt | 920 + .../V100/crystal/crystal_q41_sf10_v100.txt | 971 + scripts/ssb_on_duckdb.py | 9 + scripts/ssb_on_duckdb/__init__.py | 33 + scripts/ssb_on_duckdb/load.py | 93 + scripts/ssb_on_duckdb/query_11.py | 26 + scripts/ssb_on_duckdb/query_12.py | 13 + scripts/ssb_on_duckdb/query_13.py | 15 + scripts/ssb_on_duckdb/query_21.py | 15 + tile_based/CMakeLists.txt | 1 + tile_based/README.md | 1 + tile_based/src/CMakeLists.txt | 43 + tile_based/src/config.hpp | 213 + tile_based/src/include/binpack_kernel.cuh | 84 + tile_based/src/include/crystal/crystal.cuh | 9 + tile_based/src/include/crystal/join.cuh | 311 + tile_based/src/include/crystal/load.cuh | 97 + tile_based/src/include/crystal/pred.cuh | 335 + tile_based/src/include/crystal/reduce.cuh | 45 + tile_based/src/include/crystal/store.cuh | 98 + .../src/include/deltabinpack_kernel.cuh | 103 + tile_based/src/include/econfig.h | 8 + tile_based/src/include/kernel.cuh | 5 + tile_based/src/include/rlebinpack_kernel.cuh | 146 + tile_based/src/include/ssb_gpu_utils.h | 67 + tile_based/src/include/ssb_utils.h | 239 + tile_based/src/include/utils/gpu_utils.h | 17 + tile_based/src/rlebinpack.cpp | 235 + tile_based/src/rlebinpack_kernel.cuh | 146 + tile_based/src/test_match_rle.cu | 137 + tile_based/src/test_perf_rle.cu | 126 + tile_based/src/tile_based.cu | 3 + tile_based/src/tile_based_bench_bitpack.cu | 182 + tile_based/src/tile_based_bench_bp_sum.cu | 225 + tile_based/src/tile_based_bench_delta.cu | 254 + tile_based/src/tile_based_bench_delta_sum.cu | 270 + tile_based/src/tile_based_bench_rle.cu | 138 + .../src/tile_based_bench_rle_all_memory.cu | 327 + tile_based/src/tile_based_binpack_query_11.cu | 281 + .../src/tile_based_bitpack_shared_memory.cu | 256 + tool/CMakeLists.txt | 1 + tool/device_query.cu | 87 + toolchains/T4.cmake | 7 + toolchains/gtx1080.cmake | 7 + 391 files changed, 162236 insertions(+) create mode 100644 .github/workflows/CI.yaml create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 LICENSE create mode 100644 README.md create mode 100644 crystal-opt/CMakeLists.txt create mode 100644 crystal-opt/README.md create mode 100644 crystal-opt/src/CMakeLists.txt create mode 100644 crystal-opt/src/crystal/crystal.cuh create mode 100644 crystal-opt/src/crystal/join.cuh create mode 100644 crystal-opt/src/crystal/load.cuh create mode 100644 crystal-opt/src/crystal/pred.cuh create mode 100644 crystal-opt/src/crystal/reduce.cuh create mode 100644 crystal-opt/src/crystal/store.cuh create mode 100644 crystal-opt/src/crystal/term.cuh create mode 100644 crystal-opt/src/ops/join.cu create mode 100644 crystal-opt/src/ops/project.cu create mode 100644 crystal-opt/src/ops/utils/generator.h create mode 100644 crystal-opt/src/ops/utils/gpu_utils.h create mode 100644 crystal-opt/src/ssb/all.cu create mode 100644 crystal-opt/src/ssb/gpu_utils.h create mode 100644 crystal-opt/src/ssb/q11.cu create mode 100644 crystal-opt/src/ssb/q12.cu create mode 100644 crystal-opt/src/ssb/q13.cu create mode 100644 crystal-opt/src/ssb/q21.cu create mode 100644 crystal-opt/src/ssb/q22.cu create mode 100644 crystal-opt/src/ssb/q23.cu create mode 100644 crystal-opt/src/ssb/q31.cu create mode 100644 crystal-opt/src/ssb/q32.cu create mode 100644 crystal-opt/src/ssb/q33.cu create mode 100644 crystal-opt/src/ssb/q34.cu create mode 100644 crystal-opt/src/ssb/q41.cu create mode 100644 crystal-opt/src/ssb/q42.cu create mode 100644 crystal-opt/src/ssb/q43.cu create mode 100644 crystal-opt/src/ssb/ssb_utils.h create mode 100644 crystal/CMakeLists.txt create mode 100644 crystal/LICENSE create mode 100644 crystal/README.md create mode 100644 crystal/src/CMakeLists.txt create mode 100644 crystal/src/crystal/crystal.cuh create mode 100644 crystal/src/crystal/join.cuh create mode 100644 crystal/src/crystal/load.cuh create mode 100644 crystal/src/crystal/pred.cuh create mode 100644 crystal/src/crystal/reduce.cuh create mode 100644 crystal/src/crystal/store.cuh create mode 100644 crystal/src/ops/join.cu create mode 100644 crystal/src/ops/project.cu create mode 100644 crystal/src/ops/utils/generator.h create mode 100644 crystal/src/ops/utils/gpu_utils.h create mode 100644 crystal/src/ssb/gpu_utils.h create mode 100644 crystal/src/ssb/q11.cu create mode 100644 crystal/src/ssb/q12.cu create mode 100644 crystal/src/ssb/q13.cu create mode 100644 crystal/src/ssb/q21.cu create mode 100644 crystal/src/ssb/q22.cu create mode 100644 crystal/src/ssb/q23.cu create mode 100644 crystal/src/ssb/q31.cu create mode 100644 crystal/src/ssb/q32.cu create mode 100644 crystal/src/ssb/q33.cu create mode 100644 crystal/src/ssb/q34.cu create mode 100644 crystal/src/ssb/q41.cu create mode 100644 crystal/src/ssb/q42.cu create mode 100644 crystal/src/ssb/q43.cu create mode 100644 crystal/src/ssb/ssb_utils.h create mode 100644 data/README.md create mode 100644 data/result_of_queries/q11 create mode 100644 data/result_of_queries/q21 create mode 100644 data/result_of_queries/q31 create mode 100644 data/result_of_queries/q41 create mode 100644 data/ssb/.gitignore create mode 100644 data/ssb/SSB.md create mode 100644 data/ssb/dbgen/.gitignore create mode 100644 data/ssb/dbgen/BUGS create mode 100644 data/ssb/dbgen/CHANGES create mode 100644 data/ssb/dbgen/HISTORY create mode 100644 data/ssb/dbgen/PORTING.NOTES create mode 100644 data/ssb/dbgen/README create mode 100644 data/ssb/dbgen/TPCH_README create mode 100644 data/ssb/dbgen/bcd2.c create mode 100644 data/ssb/dbgen/bcd2.h create mode 100644 data/ssb/dbgen/bcd2.o create mode 100644 data/ssb/dbgen/bm_utils.c create mode 100644 data/ssb/dbgen/bm_utils.o create mode 100644 data/ssb/dbgen/build.c create mode 100644 data/ssb/dbgen/build.o create mode 100644 data/ssb/dbgen/config.h create mode 100644 data/ssb/dbgen/dists.dss create mode 100644 data/ssb/dbgen/driver.c create mode 100644 data/ssb/dbgen/driver.o create mode 100644 data/ssb/dbgen/dss.ddl create mode 100644 data/ssb/dbgen/dss.h create mode 100644 data/ssb/dbgen/dss.ri create mode 100644 data/ssb/dbgen/dsstypes.h create mode 100644 data/ssb/dbgen/history.html create mode 100644 data/ssb/dbgen/load_stub.c create mode 100644 data/ssb/dbgen/load_stub.o create mode 100644 data/ssb/dbgen/makefile create mode 100644 data/ssb/dbgen/makefile.suite create mode 100644 data/ssb/dbgen/makefile_win create mode 100644 data/ssb/dbgen/permute.c create mode 100644 data/ssb/dbgen/permute.h create mode 100644 data/ssb/dbgen/permute.o create mode 100644 data/ssb/dbgen/print.c create mode 100644 data/ssb/dbgen/print.o create mode 100755 data/ssb/dbgen/qgen create mode 100644 data/ssb/dbgen/qgen.c create mode 100644 data/ssb/dbgen/qgen.o create mode 100644 data/ssb/dbgen/rnd.c create mode 100644 data/ssb/dbgen/rnd.h create mode 100644 data/ssb/dbgen/rnd.o create mode 100644 data/ssb/dbgen/shared.h create mode 100644 data/ssb/dbgen/speed_seed.c create mode 100644 data/ssb/dbgen/speed_seed.o create mode 100644 data/ssb/dbgen/tags create mode 100644 data/ssb/dbgen/text.c create mode 100644 data/ssb/dbgen/text.o create mode 100644 data/ssb/dbgen/tpcd.h create mode 100644 data/ssb/dbgen/varsub.c create mode 100644 data/ssb/dbgen/varsub.o create mode 100644 data/ssb/loader/.metadata create mode 100644 data/ssb/loader/Makefile create mode 100644 data/ssb/loader/columnSort.c create mode 100644 data/ssb/loader/convert.py create mode 100644 data/ssb/loader/convert_old.py create mode 100644 data/ssb/loader/dict.c create mode 100644 data/ssb/loader/include/common.h create mode 100644 data/ssb/loader/include/schema.h create mode 100644 data/ssb/loader/load.c create mode 100644 data/ssb/loader/load_modified.c create mode 100644 data/ssb/loader/rle.c create mode 100644 data/ssb/loader/soa.c create mode 100644 data/ssb/loader/sort.py create mode 100644 data/ssb/loader/sort_other_way.py create mode 100644 data/ssb/queries/original/load.sql create mode 100644 data/ssb/queries/original/q11.sql create mode 100644 data/ssb/queries/original/q12.sql create mode 100644 data/ssb/queries/original/q13.sql create mode 100644 data/ssb/queries/original/q21.sql create mode 100644 data/ssb/queries/original/q22.sql create mode 100644 data/ssb/queries/original/q23.sql create mode 100644 data/ssb/queries/original/q31.sql create mode 100644 data/ssb/queries/original/q32.sql create mode 100644 data/ssb/queries/original/q33.sql create mode 100644 data/ssb/queries/original/q34.sql create mode 100644 data/ssb/queries/original/q41.sql create mode 100644 data/ssb/queries/original/q42.sql create mode 100644 data/ssb/queries/original/q43.sql create mode 100644 data/ssb/queries/original/schema.sql create mode 100644 data/ssb/queries/transformed/load.sql create mode 100644 data/ssb/queries/transformed/p1.sql create mode 100644 data/ssb/queries/transformed/q11.sql create mode 100644 data/ssb/queries/transformed/q12.sql create mode 100644 data/ssb/queries/transformed/q13.sql create mode 100644 data/ssb/queries/transformed/q21.sql create mode 100644 data/ssb/queries/transformed/q22.sql create mode 100644 data/ssb/queries/transformed/q23.sql create mode 100644 data/ssb/queries/transformed/q31.sql create mode 100644 data/ssb/queries/transformed/q32.sql create mode 100644 data/ssb/queries/transformed/q33.sql create mode 100644 data/ssb/queries/transformed/q34.sql create mode 100644 data/ssb/queries/transformed/q41.sql create mode 100644 data/ssb/queries/transformed/q42.sql create mode 100644 data/ssb/queries/transformed/q43.sql create mode 100644 data/ssb/queries/transformed/schema.sql create mode 100644 data/ssb/queries/transformed/schema_no_pk.sql create mode 100755 data/util.py create mode 100644 fastlanes/CMakeLists.txt create mode 100644 fastlanes/example/CMakeLists.txt create mode 100644 fastlanes/example/fastlanes_bench_bitpack.cu create mode 100644 fastlanes/example/fastlanes_bench_delta.cu create mode 100644 fastlanes/generate.py create mode 100644 fastlanes/generated/CMakeLists.txt create mode 100644 fastlanes/generated/cuda/CMakeLists.txt create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/CMakeLists.txt create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/cuda_fused_t32_1024_uf1_unpack_bench.cu create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/cuda_fused_t32_1024_uf1_unpack_helper.hpp create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/cuda_fused_t32_1024_uf1_unpack_src.cu create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/cuda_fused_t32_1024_uf1_unpack_test.cu create mode 100644 fastlanes/generated/cuda/fused_t32_uf1/unpack.cmake create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/CMakeLists.txt create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/cuda_normal_t32_1024_uf1_unpack_bench.cu create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/cuda_normal_t32_1024_uf1_unpack_helper.hpp create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/cuda_normal_t32_1024_uf1_unpack_src.cu create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/cuda_normal_t32_1024_uf1_unpack_test.cu create mode 100644 fastlanes/generated/cuda/normal_t32_uf1/unpack.cmake create mode 100644 fastlanes/generated/generated_files.txt create mode 100644 fastlanes/generated_files.txt create mode 100644 fastlanes/src/CMakeLists.txt create mode 100644 fastlanes/src/bitpack_register.cu create mode 100644 fastlanes/src/bitpack_shared_memory.cu create mode 100644 fastlanes/src/delta_global_memory.cu create mode 100644 fastlanes/src/delta_shared_memory.cu create mode 100644 fastlanes/src/fastlanes_gpu.cpp create mode 100644 fastlanes/src/include/common.cuh create mode 100644 fastlanes/src/include/crystal-opt/crystal.cuh create mode 100644 fastlanes/src/include/crystal-opt/join.cuh create mode 100644 fastlanes/src/include/crystal-opt/load.cuh create mode 100644 fastlanes/src/include/crystal-opt/pred.cuh create mode 100644 fastlanes/src/include/crystal-opt/reduce.cuh create mode 100644 fastlanes/src/include/crystal-opt/store.cuh create mode 100644 fastlanes/src/include/crystal-opt/term.cuh create mode 100644 fastlanes/src/include/crystal/crystal.cuh create mode 100644 fastlanes/src/include/crystal/join.cuh create mode 100644 fastlanes/src/include/crystal/load.cuh create mode 100644 fastlanes/src/include/crystal/pred.cuh create mode 100644 fastlanes/src/include/crystal/reduce.cuh create mode 100644 fastlanes/src/include/crystal/store.cuh create mode 100644 fastlanes/src/include/crystal/term.cuh create mode 100644 fastlanes/src/include/crystal_ssb_utils.h create mode 100644 fastlanes/src/include/debug.cuh create mode 100644 fastlanes/src/include/debug.hpp create mode 100644 fastlanes/src/include/error.cuh create mode 100644 fastlanes/src/include/fastlanes.cuh create mode 100644 fastlanes/src/include/fastlanes/join.cuh create mode 100644 fastlanes/src/include/fastlanes/pred.cuh create mode 100644 fastlanes/src/include/fls_gen/macros.hpp create mode 100644 fastlanes/src/include/fls_gen/pack/pack.hpp create mode 100644 fastlanes/src/include/fls_gen/rle/rle.hpp create mode 100644 fastlanes/src/include/fls_gen/rsum/rsum.cuh create mode 100644 fastlanes/src/include/fls_gen/transpose/transpose.hpp create mode 100644 fastlanes/src/include/fls_gen/unpack/hardcoded_16.cuh create mode 100644 fastlanes/src/include/fls_gen/unpack/unpack.cuh create mode 100644 fastlanes/src/include/fls_gen/unpack/unpack.hpp create mode 100644 fastlanes/src/include/fls_gen/unpack/unpack_fused.cuh create mode 100644 fastlanes/src/include/fls_gen/unrsum/unrsum.hpp create mode 100644 fastlanes/src/include/gpu_utils.h create mode 100644 fastlanes/src/include/query/query_2.hpp create mode 100644 fastlanes/src/include/query/query_21.hpp create mode 100644 fastlanes/src/include/query/query_3.hpp create mode 100644 fastlanes/src/include/query/query_31.hpp create mode 100644 fastlanes/src/include/query/query_4.hpp create mode 100644 fastlanes/src/include/query/query_41.hpp create mode 100644 fastlanes/src/include/ssb_utils.h create mode 100644 fastlanes/src/include/util.cuh create mode 100644 fastlanes/src/pack.cpp create mode 100644 fastlanes/src/ssb/READMe.md create mode 100644 fastlanes/src/ssb/compress_ssb.cu create mode 100644 fastlanes/src/ssb/compress_ssb_sorted.cu create mode 100644 fastlanes/src/ssb/fls_q11.cu create mode 100644 fastlanes/src/ssb/fls_q11_bitpacked_opt_v2.cu create mode 100644 fastlanes/src/ssb/fls_q11_bitpacked_opt_v3.cu create mode 100644 fastlanes/src/ssb/fls_q11_bitpacked_opt_v4.cu create mode 100644 fastlanes/src/ssb/fls_q11_bp_crystal_opt.cu create mode 100644 fastlanes/src/ssb/fls_q21.cu create mode 100644 fastlanes/src/ssb/fls_q21_bitpacked_opt_v4.cu create mode 100644 fastlanes/src/ssb/fls_q31.cu create mode 100644 fastlanes/src/ssb/fls_q31_bitpacked_opt_v5.cu create mode 100644 fastlanes/src/ssb/fls_q41.cu create mode 100644 fastlanes/src/ssb/fls_q41_bitpacked_opt_v3.cu create mode 100644 fastlanes/src/ssb/fls_q41_bitpacked_opt_v4.cu create mode 100644 fastlanes/src/test_g.cu create mode 100644 fastlanes/src/tmp/fls_q41_bitpacked_opt_v2.cu create mode 100644 fastlanes/src/transpose.cpp create mode 100644 fastlanes/src/unrsum.cpp create mode 100644 include/cub/test/CMakeLists.txt create mode 100644 include/cub/test/README.md create mode 100644 include/cub/test/bfloat16.h create mode 100644 include/cub/test/c2h/custom_type.cuh create mode 100644 include/cub/test/c2h/generators.cu create mode 100644 include/cub/test/c2h/generators.cuh create mode 100644 include/cub/test/catch2_runner.cu create mode 100644 include/cub/test/catch2_test_block_adjacent_difference.cu create mode 100644 include/cub/test/catch2_test_block_histogram.cu create mode 100644 include/cub/test/catch2_test_block_load.cu create mode 100644 include/cub/test/catch2_test_block_merge_sort.cu create mode 100644 include/cub/test/catch2_test_block_radix_sort.cu create mode 100644 include/cub/test/catch2_test_block_radix_sort.cuh create mode 100644 include/cub/test/catch2_test_block_radix_sort_custom.cu create mode 100644 include/cub/test/catch2_test_block_reduce.cu create mode 100644 include/cub/test/catch2_test_block_run_length_decode.cu create mode 100644 include/cub/test/catch2_test_block_scan.cu create mode 100644 include/cub/test/catch2_test_block_shuffle.cu create mode 100644 include/cub/test/catch2_test_block_store.cu create mode 100644 include/cub/test/catch2_test_cdp_helper.h create mode 100644 include/cub/test/catch2_test_cdp_wrapper.cu create mode 100644 include/cub/test/catch2_test_device_decoupled_look_back.cu create mode 100644 include/cub/test/catch2_test_device_radix_sort_custom.cu create mode 100644 include/cub/test/catch2_test_helper.h create mode 100644 include/cub/test/catch2_test_printing.cu create mode 100644 include/cub/test/catch2_test_radix_operations.cu create mode 100644 include/cub/test/catch2_test_util_type.cu create mode 100644 include/cub/test/catch2_test_warp_exchange.cu create mode 100644 include/cub/test/catch2_test_warp_load.cu create mode 100644 include/cub/test/catch2_test_warp_mask.cu create mode 100644 include/cub/test/catch2_test_warp_merge_sort.cu create mode 100644 include/cub/test/catch2_test_warp_reduce.cu create mode 100644 include/cub/test/catch2_test_warp_scan.cu create mode 100644 include/cub/test/catch2_test_warp_store.cu create mode 100644 include/cub/test/cmake/CMakeLists.txt create mode 100644 include/cub/test/cmake/check_source_files.cmake create mode 100644 include/cub/test/cmake/test_install/CMakeLists.txt create mode 100644 include/cub/test/fill_striped.cuh create mode 100644 include/cub/test/half.h create mode 100644 include/cub/test/link_a.cu create mode 100644 include/cub/test/link_b.cu create mode 100644 include/cub/test/link_main.cpp create mode 100644 include/cub/test/mersenne.h create mode 100644 include/cub/test/test_allocator.cu create mode 100644 include/cub/test/test_block_radix_rank.cu create mode 100644 include/cub/test/test_cdp_variant_state.cu create mode 100644 include/cub/test/test_device_adjacent_difference.cu create mode 100644 include/cub/test/test_device_batch_copy.cu create mode 100644 include/cub/test/test_device_batch_memcpy.cu create mode 100644 include/cub/test/test_device_histogram.cu create mode 100644 include/cub/test/test_device_merge_sort.cu create mode 100644 include/cub/test/test_device_radix_sort.cu create mode 100644 include/cub/test/test_device_reduce.cu create mode 100644 include/cub/test/test_device_reduce_by_key.cu create mode 100644 include/cub/test/test_device_run_length_encode.cu create mode 100644 include/cub/test/test_device_scan.cu create mode 100644 include/cub/test/test_device_scan_by_key.cu create mode 100644 include/cub/test/test_device_segmented_sort.cu create mode 100644 include/cub/test/test_device_select_if.cu create mode 100644 include/cub/test/test_device_select_unique.cu create mode 100644 include/cub/test/test_device_select_unique_by_key.cu create mode 100644 include/cub/test/test_device_spmv.cu create mode 100644 include/cub/test/test_device_three_way_partition.cu create mode 100644 include/cub/test/test_grid_barrier.cu create mode 100644 include/cub/test/test_iterator.cu create mode 100644 include/cub/test/test_iterator_deprecated.cu create mode 100644 include/cub/test/test_namespace_wrapped.cu create mode 100644 include/cub/test/test_temporary_storage_layout.cu create mode 100644 include/cub/test/test_thread_operators.cu create mode 100644 include/cub/test/test_thread_sort.cu create mode 100644 include/cub/test/test_util.h create mode 100644 include/cub/test/test_util_vec.h create mode 100644 results/T4/crystal-fls/crystal_fls_q11_sf10.txt create mode 100644 results/T4/crystal-fls/crystal_fls_q21.txt create mode 100644 results/T4/crystal-fls/crystal_fls_q21_sf10.txt create mode 100644 results/T4/crystal-fls/crystal_fls_q31_sf10.txt create mode 100644 results/T4/crystal-fls/crystal_fls_q41_sf10.txt create mode 100644 results/T4/crystal-opt/crystal_opt_q21.txt create mode 100644 results/T4/crystal-opt/crystal_opt_q21_sf10.txt create mode 100644 results/T4/crystal-opt/crystal_opt_q31_sf10.txt create mode 100644 results/T4/crystal-opt/crystal_opt_q41_sf10.txt create mode 100644 results/T4/crystal/crystal_q21.txt create mode 100644 results/T4/crystal/crystal_q31_sf10.txt create mode 100644 results/T4/crystal/crystal_q41_sf10.txt create mode 100644 results/V100/crystal-fls/crystal_fls_q21_sf10_2.txt create mode 100644 results/V100/crystal-fls/crystal_fls_q31_sf10.txt create mode 100644 results/V100/crystal-fls/crystal_fls_q41_sf10.txt create mode 100644 results/V100/crystal-opt-fls/crystal_opt_fls_q11_sf10.txt create mode 100644 results/V100/crystal-opt/crystal_opt_q11_sf10.txt create mode 100644 results/V100/crystal-opt/crystal_opt_q21_sf10.txt create mode 100644 results/V100/crystal-opt/crystal_opt_q31_sf10.txt create mode 100644 results/V100/crystal-opt/crystal_opt_q41_sf10_v100.txt create mode 100644 results/V100/crystal/crystal_q21_sf10.txt create mode 100644 results/V100/crystal/crystal_q31_sf10.txt create mode 100644 results/V100/crystal/crystal_q41_sf10_v100.txt create mode 100644 scripts/ssb_on_duckdb.py create mode 100644 scripts/ssb_on_duckdb/__init__.py create mode 100644 scripts/ssb_on_duckdb/load.py create mode 100644 scripts/ssb_on_duckdb/query_11.py create mode 100644 scripts/ssb_on_duckdb/query_12.py create mode 100644 scripts/ssb_on_duckdb/query_13.py create mode 100644 scripts/ssb_on_duckdb/query_21.py create mode 100644 tile_based/CMakeLists.txt create mode 100644 tile_based/README.md create mode 100644 tile_based/src/CMakeLists.txt create mode 100644 tile_based/src/config.hpp create mode 100644 tile_based/src/include/binpack_kernel.cuh create mode 100644 tile_based/src/include/crystal/crystal.cuh create mode 100644 tile_based/src/include/crystal/join.cuh create mode 100644 tile_based/src/include/crystal/load.cuh create mode 100644 tile_based/src/include/crystal/pred.cuh create mode 100644 tile_based/src/include/crystal/reduce.cuh create mode 100644 tile_based/src/include/crystal/store.cuh create mode 100644 tile_based/src/include/deltabinpack_kernel.cuh create mode 100644 tile_based/src/include/econfig.h create mode 100644 tile_based/src/include/kernel.cuh create mode 100644 tile_based/src/include/rlebinpack_kernel.cuh create mode 100644 tile_based/src/include/ssb_gpu_utils.h create mode 100644 tile_based/src/include/ssb_utils.h create mode 100644 tile_based/src/include/utils/gpu_utils.h create mode 100644 tile_based/src/rlebinpack.cpp create mode 100644 tile_based/src/rlebinpack_kernel.cuh create mode 100644 tile_based/src/test_match_rle.cu create mode 100644 tile_based/src/test_perf_rle.cu create mode 100644 tile_based/src/tile_based.cu create mode 100644 tile_based/src/tile_based_bench_bitpack.cu create mode 100644 tile_based/src/tile_based_bench_bp_sum.cu create mode 100644 tile_based/src/tile_based_bench_delta.cu create mode 100644 tile_based/src/tile_based_bench_delta_sum.cu create mode 100644 tile_based/src/tile_based_bench_rle.cu create mode 100644 tile_based/src/tile_based_bench_rle_all_memory.cu create mode 100644 tile_based/src/tile_based_binpack_query_11.cu create mode 100644 tile_based/src/tile_based_bitpack_shared_memory.cu create mode 100644 tool/CMakeLists.txt create mode 100644 tool/device_query.cu create mode 100644 toolchains/T4.cmake create mode 100644 toolchains/gtx1080.cmake diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml new file mode 100644 index 0000000..aaa7359 --- /dev/null +++ b/.github/workflows/CI.yaml @@ -0,0 +1,165 @@ +name: CI +run-name: ${{ github.actor }} is building + +on: push + +jobs: + # https://developer.nvidia.com/nvidia-development-tools-solutions-err_nvgpuctrperm-permission-issue-performance-counters + GPU: + if: github.actor == 'azimafroozeh' + strategy: + fail-fast: true + matrix: + platform: [ T4, V100 ] + BUILD_TYPE: [ Release ] + cxx: [ clang++ ] + runs-on: ${{ matrix.platform }} + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + + - name: Make directory build + run: mkdir ${{github.workspace}}/build + + - name: Configure CMake + run: cmake -DFLS_BUILD_GPU=ON -DCMAKE_TOOLCHAIN_FILE=${{github.workspace}}/toolchains/T4.cmake -S ${{github.workspace}} -B ${{github.workspace}}/build + env: + CXX: ${{ matrix.cxx }} + + - name: Build + run: cmake --build ${{github.workspace}}/build -j 8 + + - name: Report compression average bit width + run: | + echo "NOT Sorted" + ${{github.workspace}}/build/fastlanes/src/compress_ssb + echo "Sorted" + ${{github.workspace}}/build/fastlanes/src/compress_ssb_sorted + + - name: FLS-GPU-opt q21 V3 + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q21_bitpacked_opt_v4 + echo "SORTED + FOR ON ORDERDATE" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q21_bitpacked_opt_v4 | grep Duration + + - name: FLS-GPU-opt q31 V5 + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q31_bitpacked_opt_v5 + echo "SORTED + FOR ON ORDERDATE" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q31_bitpacked_opt_v5 | grep Duration + + - name: FLS-GPU-opt q41 V3 V4 + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q41_bitpacked_opt_v3 + echo "SORTED + FOR ON ORDERDATE" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q41_bitpacked_opt_v3 | grep Duration + + ${{github.workspace}}/build/fastlanes/src/fls_q41_bitpacked_opt_v4 + echo "SORTED + FOR ON ORDERDATE and CUSTKEY" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q41_bitpacked_opt_v4 | grep Duration + + - name: Test FLS + Crystal + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q11 1 + echo "-- fls_q11 version 1 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q11 2 + echo "-- fls_q11 version 2 Passed!" + + ${{github.workspace}}/build/fastlanes/src/fls_q21 1 + echo "-- fls_q21 version 1 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q21 2 + echo "-- fls_q21 version 2 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q21 3 + echo "-- fls_q21 version 3 Passed!" + + ${{github.workspace}}/build/fastlanes/src/fls_q31 1 + echo "-- fls_q31 version 1 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q31 2 + echo "-- fls_q31 version 2 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q31 3 + echo "-- fls_q31 version 3 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q31 4 + echo "-- fls_q31 version 4 Passed!" + + ${{github.workspace}}/build/fastlanes/src/fls_q41 1 + echo "-- fls_q41 version 1 Passed!" + ${{github.workspace}}/build/fastlanes/src/fls_q41 2 + echo "-- fls_q41 version 2 Passed!" + # ${{github.workspace}}/build/fastlanes/src/fls_q31 3 + # echo "-- fls_q31 version 3 Passed!" + # ${{github.workspace}}/build/fastlanes/src/fls_q31 4 + # echo "-- fls_q31 version 4 Passed!" + # ${{github.workspace}}/build/fastlanes/src/fls_q31 4 + # echo "-- fls_q31 version 4 Passed!" + + - name: NCU FLS + Crystal + run: | + echo "FLS Q11 version 1 : FastLanes-GPU" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q11 1 | grep Duration + echo "FLS Q11 version 2 : FLS-GPU-opt" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q11 2 | grep Duration + + echo "FLS Q21 version 1 : FastLanes-GPU" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q21 1 | grep Duration + echo "FLS Q21 version 2 : FLS-GPU-opt" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q21 2 | grep Duration + echo "FLS Q21 version 3 : FLS-GPU-opt + predicate load on uncompressed data" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q21 3 | grep Duration + + echo "FLS Q31 version 1 : FastLanes-GPU" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 1 | grep Duration + echo "FLS Q31 version 2 : Version 1 >> combination of shared + register" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 2 | grep Duration + echo "FLS Q31 version 3 : 8 value at a time" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 3 | grep Duration + echo "FLS Q31 version 4 : v3 + predicate load on uncompressed data" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 4 | grep Duration + + echo "FLS Q41 version 1 : FastLanes-GPU" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q41 1 | grep Duration + echo "FLS Q41 version 2 : 8 value at a time + predicate load on uncompressed data" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q41 2 | grep Duration + # echo "FLS Q31 version 3 : 8 value at a time" + # ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 3 | grep Duration + # echo "FLS Q31 version 4 : v3 + predicate load on uncompressed data" + # ncu ${{github.workspace}}/build/fastlanes/src/fls_q31 4 | grep Duration + + - name: NCU crystal + run: | + echo "-- crystal q11" + ncu ${{github.workspace}}/build/crystal/src/crystal_q11 | grep Duration + echo "-- crystal q21" + ncu ${{github.workspace}}/build/crystal/src/crystal_q21 | grep Duration + echo "-- crystal q31" + ncu ${{github.workspace}}/build/crystal/src/crystal_q31 | grep Duration + echo "-- crystal q41" + ncu ${{github.workspace}}/build/crystal/src/crystal_q41 | grep Duration + + - name: NCU crystal OPT + run: | + echo "-- crystal-opt q11" + ncu ${{github.workspace}}/build/crystal-opt/src/crystal_opt_q11 | grep Duration + echo "-- crystal-opt q21" + ncu ${{github.workspace}}/build/crystal-opt/src/crystal_opt_q21 | grep Duration + echo "-- crystal-opt q31" + ncu ${{github.workspace}}/build/crystal-opt/src/crystal_opt_q31 | grep Duration + echo "-- crystal-opt q41" + ncu ${{github.workspace}}/build/crystal-opt/src/crystal_opt_q41 | grep Duration + + - name: FLS-GPU-opt q11 v2 + run: echo "Simdized TODO" + # ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v2 + # ncu ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v2 | grep Duration + + - name: FLS-GPU-opt q11 v3 + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v3 + echo "v3 : Multiple check" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v3 | grep Duration + + - name: FLS-GPU-opt q11 v4 + run: | + ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v4 + echo "v1 with 8 value at a time **not complete yet**" + ncu ${{github.workspace}}/build/fastlanes/src/fls_q11_bitpacked_opt_v4 | grep Duration diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b982d23 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.i +*.ii +*.gpu +*.ptx +*.cubin +*.fatbin \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1752528 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,112 @@ +cmake_minimum_required(VERSION 3.22) +project(FastLanesGPU) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) + +# Requirements : ------------------------------------------------------------------------------------------------------- +if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + message(FATAL_ERROR "Only Clang is supported!") +endif () +if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13) + message(FATAL_ERROR "Only Clang >= 13 is supported!") +endif () + +# FLAGS : -------------------------------------------------------------------------------------------------------------- +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror") +if ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "x86") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") # FSST +endif () + +#----------------------------------------------------------------------------------------------------------------------- +include(FetchContent) +include(CheckCXXCompilerFlag) +include(CMakePrintHelpers) +# https://stackoverflow.com/questions/56089330/cmake-creates-lots-of-targets-i-didnt-specify +set_property(GLOBAL PROPERTY CTEST_TARGETS_ADDED 1) +include(CTest) + +# GTEST : ------------------------------------------------------------------------------------------------------------ +message("---------------------------------------------------------------------------------------------------------") +message("- Building GTEST:") +include(GoogleTest) +# Gtest: ----------------------------------------------------------------------------------------------------------- +FetchContent_Declare(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG e2239ee6043f73722e7aa812a459f54a28552929 # release-1.11.0 +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +enable_testing() + +# Silence clang-tidy warnings from googletest +set_target_properties(gtest PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gtest_main PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gmock PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gmock_main PROPERTIES CXX_CLANG_TIDY "") + +# Definitions: --------------------------------------------------------------------------------------------------------- +add_compile_definitions(CMAKE_SOURCE_DIR="${CMAKE_SOURCE_DIR}") + + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + message("There is no CUDA on Darwin") + RETURN() +endif () + +set(CMAKE_CUDA_ARCHITECTURES "native") +enable_language(CUDA) +set(CMAKE_CUDA_STANDARD 20) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) + +message("---------------------------------------------------------------------------------------------------------") +message("-- CUDA:") +cmake_print_variables(CUDA_INCLUDE_DIRS) +cmake_print_variables(CUDA_LIBRARIES) +cmake_print_variables(CUDA_FOUND) +cmake_print_variables(CMAKE_CUDA_FLAGS) +cmake_print_variables(CMAKE_CUDA_FLAGS_DEBUG) +cmake_print_variables(CMAKE_CUDA_FLAGS_RELEASE) +cmake_print_variables(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) +cmake_print_variables(CMAKE_CUDA_COMPILER) +cmake_print_variables(CMAKE_CUDA_COMPILER_VERSION) +cmake_print_variables(CMAKE_CUDA_STANDARD) +cmake_print_variables(CMAKE_CUDA_STANDARD_REQUIRED) +cmake_print_variables(CMAKE_CXX_STANDARD) +cmake_print_variables(CMAKE_CXX_COMPILER) +cmake_print_variables(CMAKE_CXX_COMPILER_VERSION) +cmake_print_variables(CMAKE_CXX_COMPILER_ID) +cmake_print_variables(CMAKE_SOURCE_DIR) +cmake_print_variables(CMAKE_BUILD_TYPE) + +# CUDA : --------------------------------------------------------------------------------------------------------------- +# http://knottsgroup.groups.et.byu.net/labbook/index.php?n=Main.CompilingLAMMPSForGPU +#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} +# -gencode=arch=compute_75,code=sm_75 +# -gencode=arch=compute_75,code=compute_75" +#) + +# Include : ------------------------------------------------------------------------------------------------------------ +include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +include_directories(include) + +# Tools : -------------------------------------------------------------------------------------------------------------- +add_subdirectory(tool) + +# Crystal : ------------------------------------------------------------------------------------------------------------ +add_subdirectory(crystal) + +# FastLanes : ---------------------------------------------------------------------------------------------------------- +add_subdirectory(fastlanes) + +#TileBased : ---------------------------------------------------------------------------------------------------------- +add_subdirectory(tile_based) +# try https://github.com/azimafroozeh/gpu-compression + +# Crystal-Opt : -------------------------------------------------------------------------------------------------------- +add_subdirectory(crystal-opt) + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2b375e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Azim Afroozeh, Lotte Felius, CWI Database Architectures Group + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..46d0b9f --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# FastLanesGPU: Accelerating GPU Data Processing using FastLanes Compression + +FastLanesGPU details can be found in the [publication](https://doi.org/10.1145/3662010.3663450). + +## Contents + +- [FastLanesGPU in a Nutshell](#fastlanesgpu-in-a-nutshell) +- [Quickstart](#quickstart) +- [Building and Running](#building-and-running) +- [FastLanesGPU Primitives](#fastlanesgpu-primitives) +- [Replicating Paper Experiments](#replicating-paper-experiments) + - [Build](#build) + - [Setup Data](#setup-data) + - [Speed Tests](#speed-tests) + +## FastLanesGPU in a Nutshell + +## Quickstart + +## Building and Running + +Requirements: + +1) __Clang++__ 13 or higher +2) __CMake__ 3.22 or higher + +## FastLanesGPU Primitives + +## Replicating Paper Experiments + +### Build + +```shell +cmake . +make +``` + +### Setup Data + + diff --git a/crystal-opt/CMakeLists.txt b/crystal-opt/CMakeLists.txt new file mode 100644 index 0000000..2721300 --- /dev/null +++ b/crystal-opt/CMakeLists.txt @@ -0,0 +1,3 @@ +# Source : ------------------------------------------------------------------------------------------------------------- +add_subdirectory(src) + diff --git a/crystal-opt/README.md b/crystal-opt/README.md new file mode 100644 index 0000000..5a6f8ac --- /dev/null +++ b/crystal-opt/README.md @@ -0,0 +1,29 @@ +Crystal-Opt GPU Library +================= + +The Crystal-Opt library makes additional changes to the original Crystal library for better performance. The original Crystal library implements a collection of block-wide device functions that can be used to implement high performance implementations of SQL queries on GPUs. + +You can also refer to the original Crystal library and their papers [here](https://github.com/anilshanbhag/crystal). + +Usage +---- + +``` +# Generate the test data and transform into columnar layout +# Substitute with appropriate scale factor (eg: 1) +python util.py ssb gen +python util.py ssb transform +``` + +* Configure the benchmark settings +``` +cd src/ssb/ +# Edit SF and BASE_PATH in ssb_utils.h +``` + +* To run a query, say run q11 +``` +make bin/ssb/q11 +./bin/ssb/q11 +``` + diff --git a/crystal-opt/src/CMakeLists.txt b/crystal-opt/src/CMakeLists.txt new file mode 100644 index 0000000..7d05391 --- /dev/null +++ b/crystal-opt/src/CMakeLists.txt @@ -0,0 +1,43 @@ +add_library(crystal_opt STATIC ops/join.cu ops/project.cu) +target_include_directories(crystal_opt PUBLIC ops) +target_include_directories(crystal_opt PUBLIC ssb) +target_include_directories(crystal_opt PUBLIC crystal) + +add_executable(crystal_opt_q11 ssb/q11.cu) +target_link_libraries(crystal_opt_q11 crystal_opt) + +add_executable(crystal_opt_q12 ssb/q12.cu) +target_link_libraries(crystal_opt_q12 crystal_opt) + +add_executable(crystal_opt_q13 ssb/q13.cu) +target_link_libraries(crystal_opt_q13 crystal_opt) + +add_executable(crystal_opt_q21 ssb/q21.cu) +target_link_libraries(crystal_opt_q21 crystal_opt) + +add_executable(crystal_opt_q22 ssb/q22.cu) +target_link_libraries(crystal_opt_q22 crystal_opt) + +add_executable(crystal_opt_q23 ssb/q23.cu) +target_link_libraries(crystal_opt_q23 crystal_opt) + +add_executable(crystal_opt_q31 ssb/q31.cu) +target_link_libraries(crystal_opt_q31 crystal_opt) + +add_executable(crystal_opt_q32 ssb/q32.cu) +target_link_libraries(crystal_opt_q32 crystal_opt) + +add_executable(crystal_opt_q33 ssb/q33.cu) +target_link_libraries(crystal_opt_q33 crystal_opt) + +add_executable(crystal_opt_q34 ssb/q34.cu) +target_link_libraries(crystal_opt_q34 crystal_opt) + +add_executable(crystal_opt_q41 ssb/q41.cu) +target_link_libraries(crystal_opt_q41 crystal_opt) + +add_executable(crystal_opt_q42 ssb/q42.cu) +target_link_libraries(crystal_opt_q42 crystal_opt) + +add_executable(crystal_opt_q43 ssb/q43.cu) +target_link_libraries(crystal_opt_q43 crystal_opt) \ No newline at end of file diff --git a/crystal-opt/src/crystal/crystal.cuh b/crystal-opt/src/crystal/crystal.cuh new file mode 100644 index 0000000..8246b3b --- /dev/null +++ b/crystal-opt/src/crystal/crystal.cuh @@ -0,0 +1,32 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +// Block-wide functions +#include "load.cuh" +#include "pred.cuh" +#include "store.cuh" +#include "reduce.cuh" +#include "join.cuh" +#include "term.cuh" + diff --git a/crystal-opt/src/crystal/join.cuh b/crystal-opt/src/crystal/join.cuh new file mode 100644 index 0000000..d3734fa --- /dev/null +++ b/crystal-opt/src/crystal/join.cuh @@ -0,0 +1,333 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define HASH(X,Y,Z) ((X-Z) % Y) + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K slot = ht[hash]; + if (slot != 0) { + selection_flags[ITEM] = 1; + } else { + selection_flags[ITEM] = 0; + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K slot = ht[hash]; + if (slot != 0) { + selection_flags[ITEM] = 1; + } else { + selection_flags[ITEM] = 0; + } + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_1( + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockProbeDirectAndPHT_1(threadIdx.x, items, selection_flags, ht, ht_len, keys_min); + } else { + BlockProbeDirectAndPHT_1(threadIdx.x, items, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_1( + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockProbeAndPHT_1(items, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + uint64_t slot = *reinterpret_cast(&ht[hash << 1]); + if (slot != 0) { + res[ITEM] = (slot >> 32); + } else { + selection_flags[ITEM] = 0; + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_2( + int tid, + K (&items)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + uint64_t slot = *reinterpret_cast(&ht[hash << 1]); + if (slot != 0) { + res[ITEM] = (slot >> 32); + } else { + selection_flags[ITEM] = 0; + } + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockProbeDirectAndPHT_2(threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min); + } else { + BlockProbeDirectAndPHT_2(threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockProbeAndPHT_2(keys, res, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_1( + int tid, + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash], 0, keys[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash], 0, items[ITEM]); + } + } + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_1( + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockBuildDirectSelectivePHT_1(threadIdx.x, keys, selection_flags, ht, ht_len, keys_min); + } else { + BlockBuildDirectSelectivePHT_1(threadIdx.x, keys, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_1( + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockBuildSelectivePHT_1(keys, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash << 1], 0, keys[ITEM]); + ht[(hash << 1) + 1] = res[ITEM]; + } + } +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash << 1], 0, keys[ITEM]); + ht[(hash << 1) + 1] = res[ITEM]; + } + } + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockBuildDirectSelectivePHT_2( + threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min); + } else { + BlockBuildDirectSelectivePHT_2( + threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockBuildSelectivePHT_2(keys, res, selection_flags, ht, ht_len, 0, num_items); +} diff --git a/crystal-opt/src/crystal/load.cuh b/crystal-opt/src/crystal/load.cuh new file mode 100644 index 0000000..54b903c --- /dev/null +++ b/crystal-opt/src/crystal/load.cuh @@ -0,0 +1,166 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +template +__device__ __forceinline__ void +BlockPredLoadDirect(const unsigned int tid, T *block_itr, + T (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD]) { + T *thread_itr = block_itr + tid; + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void +BlockPredLoadDirect(const unsigned int tid, T *block_itr, + T (&items)[ITEMS_PER_THREAD], int num_items, + int (&selection_flags)[ITEMS_PER_THREAD]) { + T *thread_itr = block_itr + tid; + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } + } +} + +template +__device__ __forceinline__ void +BlockPredLoad(T *inp, T (&items)[ITEMS_PER_THREAD], int num_items, + int (&selection_flags)[ITEMS_PER_THREAD]) { + T *block_itr = inp; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredLoadDirect( + threadIdx.x, block_itr, items, selection_flags); + } else { + BlockPredLoadDirect( + threadIdx.x, block_itr, items, num_items, selection_flags); + } +} + +template +__device__ __forceinline__ void BlockLoadDirect( + const unsigned int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockLoadDirect( + const unsigned int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockLoad( + T* inp, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* block_itr = inp; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockLoadDirect(threadIdx.x, block_itr, items); + } else { + BlockLoadDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#if 0 + +template +__device__ __forceinline__ void BlockLoadDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockLoadDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockLoad( + T* inp, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* block_itr = inp + blockIdx.x * blockDim.x; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockLoadDirect(threadIdx.x, block_itr, items); + } else { + BlockLoadDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#endif diff --git a/crystal-opt/src/crystal/pred.cuh b/crystal-opt/src/crystal/pred.cuh new file mode 100644 index 0000000..7d38325 --- /dev/null +++ b/crystal-opt/src/crystal/pred.cuh @@ -0,0 +1,357 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +template +__device__ __forceinline__ void InitFlags( + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = 1; + } +} + +template +__device__ __forceinline__ void BlockPredDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPred( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +__device__ __forceinline__ void BlockPredAndDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = selection_flags[ITEM] && select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredAndDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = selection_flags[ITEM] && select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPredAnd( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredAndDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredAndDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +__device__ __forceinline__ void BlockPredOrDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = selection_flags[ITEM] || select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredOrDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = selection_flags[ITEM] || select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPredOr( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredOrDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredOrDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +struct LessThan +{ + T compare; + + __device__ __forceinline__ + LessThan(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a < compare); + } +}; + +template +struct GreaterThan +{ + T compare; + + __device__ __forceinline__ + GreaterThan(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a > compare); + } +}; + +template +struct LessThanEq +{ + T compare; + + __device__ __forceinline__ + LessThanEq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a <= compare); + } +}; + +template +struct GreaterThanEq +{ + T compare; + + __device__ __forceinline__ + GreaterThanEq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a >= compare); + } +}; + +template +struct Eq +{ + T compare; + + __device__ __forceinline__ + Eq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a == compare); + } +}; + +template +__device__ __forceinline__ void BlockPredLT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThan select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndLT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThan select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredGT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThan select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndGT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThan select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredLTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThanEq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndLTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThanEq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredGTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThanEq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndGTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThanEq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredOrEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPredOr, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + diff --git a/crystal-opt/src/crystal/reduce.cuh b/crystal-opt/src/crystal/reduce.cuh new file mode 100644 index 0000000..1f08282 --- /dev/null +++ b/crystal-opt/src/crystal/reduce.cuh @@ -0,0 +1,75 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +template +__device__ __forceinline__ T BlockSum( + T item, + T* shared + ) { + __syncthreads(); + + T val = item; + const int warp_size = 32; + int lane = threadIdx.x % warp_size; + int wid = threadIdx.x / warp_size; + + // Calculate sum across warp + for (int offset = 16; offset > 0; offset /= 2) { + val += __shfl_down_sync(0xffffffff, val, offset); + } + + // Store sum in buffer + if (lane == 0) { + shared[wid] = val; + } + + __syncthreads(); + + // Load the sums into the first warp + val = (threadIdx.x < blockDim.x / warp_size) ? shared[lane] : 0; + + // Calculate sum of sums + if (wid == 0) { + for (int offset = 16; offset > 0; offset /= 2) { + val += __shfl_down_sync(0xffffffff, val, offset); + } + } + + return val; +} + +template +__device__ __forceinline__ T BlockSum( + T (&items)[ITEMS_PER_THREAD], + T* shared + ) { + T thread_sum = 0; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + thread_sum += items[ITEM]; + } + + return BlockSum(thread_sum, shared); +} diff --git a/crystal-opt/src/crystal/store.cuh b/crystal-opt/src/crystal/store.cuh new file mode 100644 index 0000000..a5de94f --- /dev/null +++ b/crystal-opt/src/crystal/store.cuh @@ -0,0 +1,120 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + thread_itr[ITEM * BLOCK_THREADS] = items[ITEM]; + } +} + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + thread_itr[ITEM * BLOCK_THREADS] = items[ITEM]; + } + } +} + +template +__device__ __forceinline__ void BlockStore( + T* out, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* block_itr = out; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockStoreDirect(threadIdx.x, block_itr, items); + } else { + BlockStoreDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#if 0 + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockStore( + T* inp, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* block_itr = inp + blockIdx.x * blockDim.x; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockStoreDirect(threadIdx.x, block_itr, items); + } else { + BlockStoreDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#endif + diff --git a/crystal-opt/src/crystal/term.cuh b/crystal-opt/src/crystal/term.cuh new file mode 100644 index 0000000..1e3a5fc --- /dev/null +++ b/crystal-opt/src/crystal/term.cuh @@ -0,0 +1,33 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +template +__device__ __forceinline__ bool +IsTerm(int (&selection_flags)[ITEMS_PER_THREAD]) { + int count = 0; + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + count += selection_flags[ITEM]; + } + return count == 0; +} diff --git a/crystal-opt/src/ops/join.cu b/crystal-opt/src/ops/join.cu new file mode 100644 index 0000000..e7da88f --- /dev/null +++ b/crystal-opt/src/ops/join.cu @@ -0,0 +1,242 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "utils/generator.h" +#include "utils/gpu_utils.h" + +using namespace std; + +#define DEBUG 1 + +template +__global__ void build_kernel(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void probe_kernel(int *fact_fkey, int *fact_val, int num_tuples, + int *hash_table, int num_slots, unsigned long long *res) { + // Load a tile striped across threads + int selection_flags[ITEMS_PER_THREAD]; + int keys[ITEMS_PER_THREAD]; + int vals[ITEMS_PER_THREAD]; + int join_vals[ITEMS_PER_THREAD]; + + unsigned long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples+ TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(fact_fkey + tile_offset, keys, num_tile_items); + BlockLoad(fact_val + tile_offset, vals, num_tile_items); + + BlockProbeAndPHT_2(keys, join_vals, selection_flags, + hash_table, num_slots, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += vals[ITEM] * join_vals[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = BlockSum(sum, (long long*)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(res, aggregate); + } +} + +struct TimeKeeper { + float time_build; + float time_probe; + float time_extra; + float time_total; +}; + +TimeKeeper hashJoin(int* d_dim_key, int* d_dim_val, int* d_fact_fkey, int* d_fact_val, int num_dim, int num_fact, cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + int* hash_table = NULL; + unsigned long long* res; + int num_slots = num_dim; + float time_build, time_probe, time_memset, time_memset2; + + ALLOCATE(hash_table, sizeof(int) * 2 * num_dim); + ALLOCATE(res, sizeof(long long)); + + TIME_FUNC(cudaMemset(hash_table, 0, num_slots * sizeof(int) * 2), time_memset); + TIME_FUNC(cudaMemset(res, 0, sizeof(long long)), time_memset2); + + int tile_items = 128*4; + + TIME_FUNC((build_kernel<128, 4><<<(num_dim + tile_items - 1)/tile_items, 128>>>(d_dim_key, d_dim_val, num_dim, hash_table, num_slots)), time_build); + TIME_FUNC((probe_kernel<128, 4><<<(num_fact + tile_items - 1)/tile_items, 128>>>(d_fact_fkey, d_fact_val, num_fact, hash_table, num_slots, res)), time_probe); + +#if DEBUG + cout << "{" << "\"time_memset\":" << time_memset + << ",\"time_build\"" << time_build + << ",\"time_probe\":" << time_probe << "}" << endl; +#endif + + CLEANUP(hash_table); + CLEANUP(res); + + TimeKeeper t = {time_build, time_probe, time_memset, time_build + time_probe + time_memset}; + return t; +} + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- +int main(int argc, char** argv) +{ + int num_fact = 256 * 1<<20; + int num_dim = 16 * 1<<20; + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("n", num_fact); + args.GetCmdLineArgument("d", num_dim); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n=] " + "[--d=] " + "[--t=] " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + int log2 = 0; + int num_dim_dup = num_dim >> 1; + while (num_dim_dup) { + num_dim_dup >>= 1; + log2 += 1; + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate problem device arrays + int *d_dim_key = NULL; + int *d_dim_val = NULL; + int *d_fact_fkey = NULL; + int *d_fact_val = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dim_key, sizeof(int) * num_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dim_val, sizeof(int) * num_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_fact_fkey, sizeof(int) * num_fact)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_fact_val, sizeof(int) * num_fact)); + + int *h_dim_key = NULL; + int *h_dim_val = NULL; + int *h_fact_fkey = NULL; + int *h_fact_val = NULL; + + create_relation_pk(h_dim_key, h_dim_val, num_dim); + create_relation_fk(h_fact_fkey, h_fact_val, num_fact, num_dim); + + CubDebugExit(cudaMemcpy(d_dim_key, h_dim_key, sizeof(int) * num_dim, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_dim_val, h_dim_val, sizeof(int) * num_dim, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_fact_fkey, h_fact_fkey, sizeof(int) * num_fact, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_fact_val, h_fact_val, sizeof(int) * num_fact, cudaMemcpyHostToDevice)); + + for (int j = 0; j < num_trials; j++) { + TimeKeeper t = hashJoin(d_dim_key, d_dim_val, d_fact_fkey, d_fact_val, num_dim, num_fact, g_allocator); + cout<< "{" + << "\"num_dim\":" << num_dim + << ",\"num_fact\":" << num_fact + << ",\"radix\":" << 0 + << ",\"time_partition_build\":" << 0 + << ",\"time_partition_probe\":" << 0 + << ",\"time_partition_total\":" << 0 + << ",\"time_build\":" << t.time_build + << ",\"time_probe\":" << t.time_probe + << ",\"time_extra\":" << t.time_extra + << ",\"time_join_total\":" << t.time_total + << "}" << endl; + } + + CLEANUP(d_dim_key); + CLEANUP(d_dim_val); + CLEANUP(d_fact_fkey); + CLEANUP(d_fact_val); + + return 0; +} + diff --git a/crystal-opt/src/ops/project.cu b/crystal-opt/src/ops/project.cu new file mode 100644 index 0000000..9e44bcd --- /dev/null +++ b/crystal-opt/src/ops/project.cu @@ -0,0 +1,198 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "utils/gpu_utils.h" + +using namespace std; + + +//--------------------------------------------------------------------- +// Implements Projection Operator +// There are two variants: dot-product and sigmoid +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void project(float* in1, float* in2, float* out, int num_items) +{ + float items[ITEMS_PER_THREAD]; + float items2[ITEMS_PER_THREAD]; + float res[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_items + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_items - tile_offset; + } + + BlockLoad(in1 + tile_offset, items, num_tile_items); + BlockLoad(in2 + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (threadIdx.x + (ITEM * BLOCK_THREADS) < num_tile_items) { + res[ITEM] = 2*items[ITEM] + 3*items2[ITEM]; + } + } + + BlockStore(out + tile_offset, res, num_tile_items); +} + +template +__global__ void projectSigmoid(float* in1, float* in2, float* out, int num_items) +{ + float items[ITEMS_PER_THREAD]; + float items2[ITEMS_PER_THREAD]; + float res[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_items + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_items - tile_offset; + } + + BlockLoad(in1 + tile_offset, items, num_tile_items); + BlockLoad(in2 + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (threadIdx.x + (ITEM * BLOCK_THREADS) < num_tile_items) { + res[ITEM] = 1.0f / (1.0f + expf(-2*items[ITEM] -3*items2[ITEM])); + } + } + + BlockStore(out + tile_offset, res, num_tile_items); +} + + +float projectGPU(float* in1, float* in2, float* out, int num_items) { + SETUP_TIMING(); + + float time_proj; + int tile_items = 128*4; + int num_blocks = (num_items + tile_items - 1)/tile_items; + TIME_FUNC((project<128,4><<>>(in1, in2, out, num_items)), time_proj); + + return time_proj; +} + +float projectSigmoidGPU(float* in1, float* in2, float* out, int num_items) { + SETUP_TIMING(); + + float time_proj; + int tile_items = 128*4; + int num_blocks = (num_items + tile_items - 1)/tile_items; + TIME_FUNC((projectSigmoid<128,4><<>>(in1, in2, out, num_items)), time_proj); + + return time_proj; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 1<<28; + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n=] " + "[--t=] " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate problem device arrays + float *d_in1 = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in1, sizeof(float) * num_items)); + + float *d_in2 = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in2, sizeof(float) * num_items)); + + float *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(float) * num_items)); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + curandGenerator_t generator; + int seed = 0; + curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(generator,seed); + curandGenerateUniform(generator, d_in1, num_items); + curandGenerateUniform(generator, d_in2, num_items); + + float time_proj_gpu; + float time_proj_sigmoid_gpu; + + for (int t = 0; t < num_trials; t++) { + time_proj_gpu = projectGPU(d_in1, d_in2, d_out, num_items); + time_proj_sigmoid_gpu = projectSigmoidGPU(d_in1, d_in2, d_out, num_items); + + cout<< "{" + << "\"time_proj_gpu\":" << time_proj_gpu + << ",\"time_proj_sigmoid_gpu\":" << time_proj_sigmoid_gpu + << "}" << endl; + } + + // Cleanup + if (d_in1) CubDebugExit(g_allocator.DeviceFree(d_in1)); + if (d_in2) CubDebugExit(g_allocator.DeviceFree(d_in2)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + + return 0; +} + diff --git a/crystal-opt/src/ops/utils/generator.h b/crystal-opt/src/ops/utils/generator.h new file mode 100644 index 0000000..c69c141 --- /dev/null +++ b/crystal-opt/src/ops/utils/generator.h @@ -0,0 +1,399 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include /* perror */ +#include /* posix_memalign */ +#include +#include +using namespace std; + +#define RAND_RANGE(N) ((double)rand() / ((double)RAND_MAX + 1) * (N)) +#define RANDR_RANGE(N) ((double)rand_r(&seed) / ((double)RAND_MAX + 1) * (N)) +static int seeded = 0; + +/** Check wheter seeded, if not seed the generator with current time */ +static void +check_seed() +{ + if(!seeded) { + srand(0); + seeded = 1; + } +} + +/** + * Shuffle tuples of the relation using Knuth shuffle. + * + * @param relation + */ +void +knuth_shuffle(int* arr, int num_tuples) +{ + int i; + for (i = num_tuples - 1; i > 0; i--) { + int j = RAND_RANGE(i); + int tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; + } +} + + +/** + * Generate unique tuple IDs with Knuth shuffling + * relation must have been allocated + */ +void +random_unique_gen(int*& arr, int num_tuples) +{ + int i; + + for (i = 0; i < num_tuples; i++) { + arr[i] = (i+1); + } + + /* randomly shuffle elements */ + knuth_shuffle(arr, num_tuples); +} + +void +dummy_initialize(int*& arr, int num_tuples) { + for (int i = 0; i < num_tuples; i++) { + arr[i] = i; + } +} + +int +create_relation_pk(int*& keys, int*& vals, int num_tuples) +{ + check_seed(); + + keys = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + vals = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + + if (!keys || !vals) { + perror("out of memory"); + return -1; + } + + random_unique_gen(keys, num_tuples); + dummy_initialize(vals, num_tuples); + + return 0; +} + +int create_relation_fk(int*& keys, int*& vals, int num_tuples, const int maxid) +{ + int i, iters, remainder; + + check_seed(); + keys = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + vals = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + + if (!keys || !vals) { + perror("out of memory"); + return -1; + } + + // alternative generation method + iters = num_tuples / maxid; + for (i = 0; i < iters; i++) { + int* tuples = keys + maxid * i; + random_unique_gen(tuples, maxid); + } + + // if num_tuples is not an exact multiple of maxid + remainder = num_tuples % maxid; + if (remainder > 0) { + int* tuples = keys + maxid * iters; + random_unique_gen(tuples, remainder); + } + + dummy_initialize(vals, num_tuples); + return 0; +} + +/* +typedef struct rand_state_64 { + uint64_t num[313]; + size_t index; +} rand64_t; + +rand64_t *rand64_init(uint64_t seed) +{ + rand64_t *state = malloc(sizeof(rand64_t)); + uint64_t *n = state->num; + size_t i; + n[0] = seed; + for (i = 0 ; i != 311 ; ++i) + n[i + 1] = 6364136223846793005ull * + (n[i] (n[i] >> 62)) + i + 1; + state->index = 312; + return state; +} + +uint64_t rand64_next(rand64_t *state) +{ + uint64_t x, *n = state->num; + if (state->index == 312) { + size_t i = 0; + do { + x = n[i] & 0xffffffff80000000ull; + x |= n[i + 1] & 0x7fffffffull; + n[i] = n[i + 156] (x >> 1); + n[i] = 0xb5026f5aa96619e9ull & -(x & 1); + } while (++i != 156); + n[312] = n[0]; + do { + x = n[i] & 0xffffffff80000000ull; + x |= n[i + 1] & 0x7fffffffull; + n[i] = n[i - 156] (x >> 1); + n[i] = 0xb5026f5aa96619e9ull & -(x & 1); + } while (++i != 312); + state->index = 0; + } + x = n[state->index++]; + x = (x >> 29) & 0x5555555555555555ull; + x = (x << 17) & 0x71d67fffeda60000ull; + x = (x << 37) & 0xfff7eee000000000ull; + x = (x >> 43); + return x; +} + +typedef struct rand_state_32 { + uint32_t num[625]; + size_t index; +} rand32_t; + +rand32_t *rand32_init(uint32_t seed) +{ + rand32_t *state = malloc(sizeof(rand32_t)); + uint32_t *n = state->num; + size_t i; + n[0] = seed; + for (i = 0 ; i != 623 ; ++i) + n[i + 1] = 0x6c078965 * (n[i] (n[i] >> 30)); + state->index = 624; + return state; +} + +uint32_t rand32_next(rand32_t *state) +{ + uint32_t y, *n = state->num; + if (state->index == 624) { + size_t i = 0; + do { + y = n[i] & 0x80000000; + y += n[i + 1] & 0x7fffffff; + n[i] = n[i + 397] (y >> 1); + n[i] = 0x9908b0df & -(y & 1); + } while (++i != 227); + n[624] = n[0]; + do { + y = n[i] & 0x80000000; + y += n[i + 1] & 0x7fffffff; + n[i] = n[i - 227] (y >> 1); + n[i] = 0x9908b0df & -(y & 1); + } while (++i != 624); + state->index = 0; + } + y = n[state->index++]; + y = (y >> 11); + y = (y << 7) & 0x9d2c5680; + y = (y << 15) & 0xefc60000; + y = (y >> 18); + return y; +} + +static int hardware_threads(void) +{ + char name[64]; + struct stat st; + int threads = -1; + do { + sprintf(name, "/sys/devices/system/cpu/cpu%d", ++threads); + } while (stat(name, &st) == 0); + return threads; +} + +static void *mamalloc(size_t size) +{ + void *p = NULL; + return posix_memalign(&p, 64, size) ? NULL : p; +} + +typedef struct { + pthread_t id; + int seed; + int thread; + int threads; + uint32_t hash_factor; + uint32_t invalid_key; + uint32_t *inner; + uint32_t *outer; + volatile uint32_t *table; + size_t inner_size; + size_t outer_size; + size_t table_size; + size_t join_size; + double selectivity; + pthread_barrier_t *barrier; +} info_t; + +static void *run(void *arg) +{ + info_t *d = (info_t*) arg; + assert(pthread_equal(pthread_self(), d->id)); + int thread = d->thread; + int threads = d->threads; + uint32_t hash_factor = d->hash_factor; + uint32_t invalid_key = d->invalid_key; + uint32_t *inner = d->inner; + uint32_t *outer = d->outer; + volatile uint32_t *table = d->table; + size_t i, o, t, h; + size_t inner_size = d->inner_size; + size_t outer_size = d->outer_size; + size_t table_size = d->table_size; + size_t inner_beg = (inner_size / threads) * thread; + size_t inner_end = (inner_size / threads) * (thread + 1); + size_t outer_beg = (outer_size / threads) * thread; + size_t outer_end = (outer_size / threads) * (thread + 1); + size_t table_beg = (table_size / threads) * thread; + size_t table_end = (table_size / threads) * (thread + 1); + if (thread + 1 == threads) { + inner_end = inner_size; + outer_end = outer_size; + table_end = table_size; + } + for (t = table_beg ; t != table_end ; ++t) + table[t] = invalid_key; + pthread_barrier_wait(&d->barrier[0]); + rand32_t *gen = rand32_init(d->seed); + for (i = inner_beg ; i != inner_end ; ++i) { + int new_key_inserted = 0; + uint32_t key; + do { + do { + key = rand32_next(gen); + } while (key == invalid_key); + h = (uint32_t) (key * hash_factor); + h = (h * table_size) >> 32; + for (;;) { + if (table[h] == invalid_key && + __sync_bool_compare_and_swap(&table[h], invalid_key, key)) { + new_key_inserted = 1; + break; + } + if (table[h] == key) break; + if (++h == table_size) h = 0; + } + } while (new_key_inserted == 0); + inner[i] = key; + } + pthread_barrier_wait(&d->barrier[1]); + size_t join_size = 0; + uint32_t limit = ~0; + limit *= d->selectivity; + for (o = outer_beg ; o != outer_end ; ++o) { + uint32_t key; + if (rand32_next(gen) <= limit) { + i = rand32_next(gen); + i = (i * inner_size) >> 32; + key = inner[i]; + join_size++; + } else do { + do { + key = rand32_next(gen); + } while (key == invalid_key); + h = (uint32_t) (key * hash_factor); + h = (h * table_size) >> 32; + while (table[h] != invalid_key) { + if (table[h] == key) break; + if (++h == table_size) h = 0; + } + } while (table[h] == key); + outer[o] = key; + } + free(gen); + d->join_size = join_size; + pthread_exit(NULL); +} + +size_t inner_outer(size_t inner_size, size_t outer_size, double selectivity, + uint32_t **inner_p, uint32_t **outer_p) +{ + srand(time(NULL)); + int t, threads = hardware_threads(); + // input arguments + assert(inner_size <= 1000 * 1000 * 1000); + assert(selectivity >= 0.0 && selectivity <= 1.0); + // tables + uint32_t *inner = mamalloc((inner_size + 1) * sizeof(uint32_t)); + uint32_t *outer = mamalloc(outer_size * sizeof(uint32_t)); + size_t table_size = inner_size / 0.7; + uint32_t *table = malloc(table_size * sizeof(uint32_t)); + // constants + uint32_t hash_factor = (rand() << 1) | 1; + uint32_t invalid_key = rand() * rand(); + // barriers + int b, barriers = 2; + pthread_barrier_t barrier[barriers]; + for (b = 0 ; b != barriers ; ++b) + pthread_barrier_init(&barrier[b], NULL, threads); + // run threads + info_t info[threads]; + for (t = 0 ; t != threads ; ++t) { + info[t].seed = rand(); + info[t].thread = t; + info[t].threads = threads; + info[t].hash_factor = hash_factor; + info[t].invalid_key = invalid_key; + info[t].selectivity = selectivity; + info[t].inner = inner; + info[t].outer = outer; + info[t].table = table; + info[t].inner_size = inner_size; + info[t].outer_size = outer_size; + info[t].table_size = table_size; + info[t].barrier = barrier; + pthread_create(&info[t].id, NULL, run, (void*) &info[t]); + } + size_t join_size = 0; + for (t = 0 ; t != threads ; ++t) { + pthread_join(info[t].id, NULL); + join_size += info[t].join_size; + } + // cleanup + for (b = 0 ; b != barriers ; ++b) + pthread_barrier_destroy(&barrier[b]); + free(table); + // pass output + inner[inner_size] = invalid_key; + *inner_p = inner; + *outer_p = outer; + return join_size; +} +*/ diff --git a/crystal-opt/src/ops/utils/gpu_utils.h b/crystal-opt/src/ops/utils/gpu_utils.h new file mode 100644 index 0000000..1af7526 --- /dev/null +++ b/crystal-opt/src/ops/utils/gpu_utils.h @@ -0,0 +1,57 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); + +#define TIME_FUNC(f,t) { \ + cudaEventRecord(start, 0); \ + f; \ + cudaEventRecord(stop, 0); \ + cudaEventSynchronize(stop); \ + cudaEventElapsedTime(&t, start,stop); \ +} + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +#define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) + +template +T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { + T* dest; + CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); + CubDebugExit(cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice)); + return dest; +} + +#define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD) + +#define CHECK_ERROR() { \ + cudaDeviceSynchronize(); \ + cudaError_t error = cudaGetLastError(); \ + if(error != cudaSuccess) \ + { \ + printf("CUDA error: %s\n", cudaGetErrorString(error)); \ + exit(-1); \ + } \ +} diff --git a/crystal-opt/src/ssb/all.cu b/crystal-opt/src/ssb/all.cu new file mode 100644 index 0000000..ab61e05 --- /dev/null +++ b/crystal-opt/src/ssb/all.cu @@ -0,0 +1,2734 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal/crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void q11_kernel(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, unsigned long long *revenue) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGT( + items, 19930000, selection_flags, num_tile_items); + BlockPredAndLT( + items, 19940000, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndLT( + items, 25, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 1, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 3, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) + { + atomicAdd(revenue, aggregate); + } +} + +void run_q11(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + int num_blocks = (lo_num_entries + tile_items - 1) / tile_items; + q11_kernel<128, 4><<>>(lo_orderdate, lo_discount, + lo_quantity, lo_extendedprice, + lo_num_entries, d_sum); + + // Finalize results + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + CLEANUP(d_sum); +} + +template +__global__ void q12_kernel(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, unsigned long long *revenue) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGTE( + items, 19940101, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 19940131, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 26, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 35, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 4, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 6, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) + { + atomicAdd(revenue, aggregate); + } +} + +void run_q12(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + q12_kernel<128, 4><<<(lo_num_entries + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_discount, lo_quantity, lo_extendedprice, lo_num_entries, + d_sum); + + // Finalize results + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + CLEANUP(d_sum); +} + +template +__global__ void q13_kernel(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, unsigned long long *revenue) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGTE( + items, 19940204, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 19940210, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 26, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 35, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 5, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 7, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) + { + atomicAdd(revenue, aggregate); + } +} + +void run_q13(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + q13_kernel<128, 4><<<(lo_num_entries + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_discount, lo_quantity, lo_extendedprice, lo_num_entries, + d_sum); + + // Finalize results + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + CLEANUP(d_sum); +} + +template +__global__ void q21_kernel4(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, + int *res) +{ + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + if (IsTerm(selection_flags)) + { + return; + } + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void q21_kernel1(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q21_kernel2(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q21_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q21(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int *p_category, int p_len, int *d_datekey, int *d_year, int d_len, + int *s_suppkey, int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * (5 * 5 * 40)); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q21_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + q21_kernel2<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + q21_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q21_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(res); + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); +} + +template +__global__ void q22_kernel4(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, + int *res) +{ + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void q22_kernel1(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q22_kernel2(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 260, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 267, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q22_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q22(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int p_len, int *d_datekey, int *d_year, int d_len, int *s_suppkey, + int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) +{ + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q22_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + q22_kernel2<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_partkey, p_brand1, p_len, ht_p, p_len); + q22_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q22_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(res); + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); +} + +template +__global__ void q23_kernel4(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, + int *res) +{ + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void q23_kernel1(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q23_kernel2(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 260, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q23_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q23(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int p_len, int *d_datekey, int *d_year, int d_len, int *s_suppkey, + int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) +{ + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q23_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + q23_kernel2<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_partkey, p_brand1, p_len, ht_p, p_len); + q23_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q23_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(res); + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); +} + +template +__global__ void q31_kernel4(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, + int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (s_nation[ITEM] * 25 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = c_nation[ITEM]; + res[hash * 6 + 2] = s_nation[ITEM]; + /*atomicAdd(&res[hash * 6 + 4], revenue[ITEM]);*/ + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void q31_kernel1(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q31_kernel2(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q31_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +void run_q31(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_region, int *s_nation, int s_len, + int *c_custkey, int *c_region, int *c_nation, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 25 * 25); + int res_array_size = res_size * 6; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q31_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + q31_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_nation, c_len, ht_c, c_len); + q31_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q31_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(res); +} + +template +__global__ void q32_kernel4(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, + int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void q32_kernel1(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q32_kernel2(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q32_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +void run_q32(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_nation, int *s_city, int s_len, + int *c_custkey, int *c_nation, int *c_city, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q32_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + q32_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_nation, c_custkey, c_city, c_len, ht_c, c_len); + q32_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q32_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(res); +} + +template +__global__ void q33_kernel4(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, + int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void q33_kernel1(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q33_kernel2(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q33_kernel3(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +void run_q33(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_city, int s_len, int *c_custkey, + int *c_city, int c_len, cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q33_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_suppkey, s_city, s_len, ht_s, s_len); + q33_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_custkey, c_city, c_len, ht_c, c_len); + q33_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q33_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(res); +} + +template +__global__ void q34_kernel4(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, + int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void q34_kernel1(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q34_kernel2(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q34_kernel3(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots, + int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ( + items, 199712, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +void run_q34(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int *d_yearmonthnum, int d_len, int *s_suppkey, int *s_city, + int s_len, int *c_custkey, int *c_city, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q34_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_suppkey, s_city, s_len, ht_s, s_len); + q34_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_custkey, c_city, c_len, ht_c, c_len); + q34_kernel3<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_yearmonthnum, d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q34_kernel4<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_c); + CLEANUP(ht_s); + CLEANUP(res); +} + +template +__global__ void q41_kernel5(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, + int *lo_supplycost, int lo_len, int *ht_p, + int p_len, int *ht_s, int s_len, int *ht_c, + int c_len, int *ht_d, int d_len, int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + { + if (selection_flags[ITEM]) + { + int hash = (c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 25); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + /*atomicAdd(&res[hash * 4 + 2], (1));*/ + /*atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + * (long long)(1));*/ + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void q41_kernel1(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q41_kernel3(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, + num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q41_kernel2(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q41_kernel4(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q41(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_mfgr, int p_len, int *s_suppkey, int *s_region, int s_len, + int *c_custkey, int *c_region, int *c_nation, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 25); + int ht_entries = 4; // int,int,long long + int res_array_size = res_size * ht_entries; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q41_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + q41_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_nation, c_len, ht_c, c_len); + q41_kernel3<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_mfgr, p_partkey, p_len, ht_p, p_len); + q41_kernel4<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q41_kernel5<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(ht_p); + CLEANUP(res); +} + +template +__global__ void q42_kernel5(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, + int *lo_supplycost, int lo_len, int *ht_p, + int p_len, int *ht_s, int s_len, int *ht_c, + int c_len, int *ht_d, int d_len, int *res) +{ + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int category[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, category, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + { + if (selection_flags[ITEM]) + { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + + * (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 25 * 25 + s_nation[ITEM] * 25 + + category[ITEM]) % + ((1998 - 1992 + 1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = s_nation[ITEM]; + res[hash * 6 + 2] = category[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), + (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void q42_kernel2(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q42_kernel3(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, + num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q42_kernel1(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q42_kernel4(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ( + items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ( + items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q42(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_mfgr, int *p_category, int p_len, int *s_suppkey, + int *s_region, int *s_nation, int s_len, int *c_custkey, + int *c_region, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 25 * 25); + int ht_entries = 6; + int res_array_size = res_size * ht_entries; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q42_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + q42_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_len, ht_c, c_len); + q42_kernel3<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_mfgr, p_partkey, p_category, p_len, ht_p, p_len); + q42_kernel4<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q42_kernel5<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(ht_p); + CLEANUP(res); +} + +template +__global__ void q43_kernel5(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, + int *lo_supplycost, int lo_len, int *ht_p, + int p_len, int *ht_s, int s_len, int *ht_c, + int c_len, int *ht_d, int d_len, int *res) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int s_city[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_city, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) + { + return; + } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + { + if (selection_flags[ITEM]) + { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + + * (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 250 * 1000 + s_city[ITEM] * 1000 + + brand[ITEM]) % + ((1998 - 1992 + 1) * 250 * 1000); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = s_city[ITEM]; + res[hash * 4 + 2] = brand[ITEM]; + atomicAdd(&res[hash * 4 + 3], (revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void q43_kernel2(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q43_kernel3(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q43_kernel1(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, int num_slots) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void q43_kernel4(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) +{ + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) + { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ( + items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ( + items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +void run_q43(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_category, int *p_brand1, int p_len, int *s_suppkey, + int *s_nation, int *s_city, int s_len, int *c_custkey, + int *c_region, int c_len, + cub::CachingDeviceAllocator &g_allocator) +{ + // Setup + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 1000); + int ht_entries = 4; + int res_array_size = res_size * ht_entries; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + // Run queries + int tile_items = 128 * 4; + int d_val_min = 19920101; + q43_kernel1<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + q43_kernel2<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_len, ht_c, c_len); + q43_kernel3<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + q43_kernel4<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + q43_kernel5<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + // Finalize results + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + delete[] h_res; + CLEANUP(ht_d); + CLEANUP(ht_s); + CLEANUP(ht_c); + CLEANUP(ht_p); + CLEANUP(res); +} + +/** + * Main + */ +int main(int argc, char **argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Load host data + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + int *h_p_mfgr = loadColumn("p_mfgr", P_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + // Load device data + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = + loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + int *d_p_mfgr = loadToGPU(h_p_mfgr, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + int *d_d_yearmonthnum = loadToGPU(h_d_yearmonthnum, D_LEN, g_allocator); + + // Run queries + std::vector seeds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + auto rng = std::default_random_engine{}; + std::shuffle(std::begin(seeds), std::end(seeds), rng); + for (int t = 0; t < num_trials; t++) + { + for (int s : seeds) + { + switch (s) + { + case 0: + run_q11(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + break; + case 1: + run_q12(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + break; + case 2: + run_q13(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + break; + case 3: + run_q21(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_p_partkey, d_p_brand1, d_p_category, P_LEN, + d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_region, S_LEN, + g_allocator); + break; + case 4: + run_q22(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_p_partkey, d_p_brand1, P_LEN, d_d_datekey, d_d_year, + D_LEN, d_s_suppkey, d_s_region, S_LEN, g_allocator); + break; + case 5: + run_q23(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_p_partkey, d_p_brand1, P_LEN, d_d_datekey, d_d_year, + D_LEN, d_s_suppkey, d_s_region, S_LEN, g_allocator); + break; + case 6: + run_q31(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_region, + d_s_nation, S_LEN, d_c_custkey, d_c_region, d_c_nation, C_LEN, + g_allocator); + break; + case 7: + run_q32(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_nation, + d_s_city, S_LEN, d_c_custkey, d_c_nation, d_c_city, C_LEN, + g_allocator); + break; + case 8: + run_q33(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_city, + S_LEN, d_c_custkey, d_c_city, C_LEN, g_allocator); + break; + case 9: + run_q34(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_d_datekey, d_d_year, d_d_yearmonthnum, D_LEN, + d_s_suppkey, d_s_city, S_LEN, d_c_custkey, d_c_city, C_LEN, + g_allocator); + break; + case 10: + run_q41(d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, + d_lo_revenue, d_lo_supplycost, LO_LEN, d_d_datekey, d_d_year, + D_LEN, d_p_partkey, d_p_mfgr, P_LEN, d_s_suppkey, d_s_region, + S_LEN, d_c_custkey, d_c_region, d_c_nation, C_LEN, g_allocator); + break; + case 11: + run_q42(d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, + d_lo_revenue, d_lo_supplycost, LO_LEN, d_d_datekey, d_d_year, + D_LEN, d_p_partkey, d_p_mfgr, d_p_category, P_LEN, d_s_suppkey, + d_s_region, d_s_nation, S_LEN, d_c_custkey, d_c_region, C_LEN, + g_allocator); + break; + case 12: + run_q43(d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, + d_lo_revenue, d_lo_supplycost, LO_LEN, d_d_datekey, d_d_year, + D_LEN, d_p_partkey, d_p_category, d_p_brand1, P_LEN, + d_s_suppkey, d_s_nation, d_s_city, S_LEN, d_c_custkey, + d_c_region, C_LEN, g_allocator); + break; + } + } + } +} diff --git a/crystal-opt/src/ssb/gpu_utils.h b/crystal-opt/src/ssb/gpu_utils.h new file mode 100644 index 0000000..1af7526 --- /dev/null +++ b/crystal-opt/src/ssb/gpu_utils.h @@ -0,0 +1,57 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); + +#define TIME_FUNC(f,t) { \ + cudaEventRecord(start, 0); \ + f; \ + cudaEventRecord(stop, 0); \ + cudaEventSynchronize(stop); \ + cudaEventElapsedTime(&t, start,stop); \ +} + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +#define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) + +template +T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { + T* dest; + CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); + CubDebugExit(cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice)); + return dest; +} + +#define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD) + +#define CHECK_ERROR() { \ + cudaDeviceSynchronize(); \ + cudaError_t error = cudaGetLastError(); \ + if(error != cudaSuccess) \ + { \ + printf("CUDA error: %s\n", cudaGetErrorString(error)); \ + exit(-1); \ + } \ +} diff --git a/crystal-opt/src/ssb/q11.cu b/crystal-opt/src/ssb/q11.cu new file mode 100644 index 0000000..0d53d07 --- /dev/null +++ b/crystal-opt/src/ssb/q11.cu @@ -0,0 +1,204 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void QueryKernel(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, unsigned long long *revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGT( + items, 19930000, selection_flags, num_tile_items); + BlockPredAndLT( + items, 19940000, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndLT( + items, 25, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 1, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 3, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + int num_blocks = (lo_num_entries + tile_items - 1) / tile_items; + QueryKernel<128, 4><<>>(lo_orderdate, lo_discount, + lo_quantity, lo_extendedprice, + lo_num_entries, d_sum); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + cout << "LO_LEN " << LO_LEN << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = + loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + cout << "{" + << "\"query\":11" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q12.cu b/crystal-opt/src/ssb/q12.cu new file mode 100644 index 0000000..572fd47 --- /dev/null +++ b/crystal-opt/src/ssb/q12.cu @@ -0,0 +1,206 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void DeviceSelectIf(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, + unsigned long long *revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGTE( + items, 19940101, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 19940131, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 26, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 35, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 4, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 6, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + DeviceSelectIf<128, 4> + <<<(lo_num_entries + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_discount, lo_quantity, lo_extendedprice, + lo_num_entries, d_sum); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = + loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + cout << "{" + << "\"query\":12" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q13.cu b/crystal-opt/src/ssb/q13.cu new file mode 100644 index 0000000..cb33971 --- /dev/null +++ b/crystal-opt/src/ssb/q13.cu @@ -0,0 +1,207 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void DeviceSelectIf(int *lo_orderdate, int *lo_discount, + int *lo_quantity, int *lo_extendedprice, + int lo_num_entries, + unsigned long long *revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, + items, num_tile_items); + BlockPredGTE( + items, 19940204, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 19940210, selection_flags, num_tile_items); + + BlockPredLoad( + lo_quantity + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 26, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 35, selection_flags, num_tile_items); + + BlockPredLoad( + lo_discount + tile_offset, items, num_tile_items, selection_flags); + BlockPredAndGTE( + items, 5, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 7, selection_flags, num_tile_items); + + BlockPredLoad( + lo_extendedprice + tile_offset, items2, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = + BlockSum(sum, + (long long *)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int *lo_orderdate, int *lo_discount, int *lo_quantity, + int *lo_extendedprice, int lo_num_entries, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128 * 4; + TIME_FUNC((DeviceSelectIf<128, 4> + <<<(lo_num_entries + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_discount, lo_quantity, lo_extendedprice, + lo_num_entries, d_sum)), + time_query); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + unsigned long long revenue; + CubDebugExit( + cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = + loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, + d_lo_extendedprice, LO_LEN, g_allocator); + cout << "{" + << "\"query\":13" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q21.cu b/crystal-opt/src/ssb/q21.cu new file mode 100644 index 0000000..3fe9a52 --- /dev/null +++ b/crystal-opt/src/ssb/q21.cu @@ -0,0 +1,336 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, int *res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + if (IsTerm(selection_flags)) { return; } + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int *p_category, int p_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * (5 * 5 * 40)); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " + << reinterpret_cast(&h_res[4 * i + 2])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + CLEANUP(res); + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, + d_lo_revenue, LO_LEN, d_p_partkey, d_p_brand1, + d_p_category, P_LEN, d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_region, S_LEN, g_allocator); + cout << "{" + << "\"query\":21" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q22.cu b/crystal-opt/src/ssb/q22.cu new file mode 100644 index 0000000..6640329 --- /dev/null +++ b/crystal-opt/src/ssb/q22.cu @@ -0,0 +1,334 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, int *res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 260, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 267, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int p_len, int *d_datekey, int *d_year, int d_len, + int *s_suppkey, int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " + << reinterpret_cast(&h_res[4 * i + 2])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = + runQuery(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_p_partkey, d_p_brand1, P_LEN, d_d_datekey, d_d_year, + D_LEN, d_s_suppkey, d_s_region, S_LEN, g_allocator); + cout << "{" + << "\"query\":22" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q23.cu b/crystal-opt/src/ssb/q23.cu new file mode 100644 index 0000000..e9b4273 --- /dev/null +++ b/crystal-opt/src/ssb/q23.cu @@ -0,0 +1,326 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_p, int p_len, int *ht_d, int d_len, int *res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * (5 * 5 * 40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 260, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_partkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *p_partkey, int *p_brand1, + int p_len, int *d_datekey, int *d_year, int d_len, + int *s_suppkey, int *s_region, int s_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " + << reinterpret_cast(&h_res[4 * i + 2])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = + runQuery(d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_p_partkey, d_p_brand1, P_LEN, d_d_datekey, d_d_year, + D_LEN, d_s_suppkey, d_s_region, S_LEN, g_allocator); + cout << "{" + << "\"query\":23" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q31.cu b/crystal-opt/src/ssb/q31.cu new file mode 100644 index 0000000..70eb4ee --- /dev/null +++ b/crystal-opt/src/ssb/q31.cu @@ -0,0 +1,349 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 25 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = c_nation[ITEM]; + res[hash * 6 + 2] = s_nation[ITEM]; + /*atomicAdd(&res[hash * 6 + 4], revenue[ITEM]);*/ + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), + (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_region, int *s_nation, + int s_len, int *c_custkey, int *c_region, int *c_nation, + int c_len, cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_nation, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 25 * 25); + int res_array_size = res_size * 6; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[6 * i] != 0) { + cout << h_res[6 * i] << " " << h_res[6 * i + 1] << " " << h_res[6 * i + 2] + << " " + << reinterpret_cast(&h_res[6 * i + 4])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_region, d_s_nation, + S_LEN, d_c_custkey, d_c_region, d_c_nation, C_LEN, g_allocator); + cout << "{" + << "\"query\":31" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q32.cu b/crystal-opt/src/ssb/q32.cu new file mode 100644 index 0000000..cf9f16a --- /dev/null +++ b/crystal-opt/src/ssb/q32.cu @@ -0,0 +1,343 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredAndLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_nation, int *s_city, int s_len, + int *c_custkey, int *c_nation, int *c_city, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_nation, c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " << h_res[4 * i + 2] + << " " << h_res[4 * i + 3] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, + d_lo_revenue, LO_LEN, d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_nation, d_s_city, S_LEN, d_c_custkey, + d_c_nation, d_c_city, C_LEN, g_allocator); + cout << "{" + << "\"query\":32" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q33.cu b/crystal-opt/src/ssb/q33.cu new file mode 100644 index 0000000..1048f79 --- /dev/null +++ b/crystal-opt/src/ssb/q33.cu @@ -0,0 +1,317 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void build_hashtable_s(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredGTE( + items, 1992, selection_flags, num_tile_items); + BlockPredLTE( + items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int d_len, int *s_suppkey, int *s_city, int s_len, + int *c_custkey, int *c_city, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " << h_res[4 * i + 2] + << " " << h_res[4 * i + 3] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = + runQuery(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, + LO_LEN, d_d_datekey, d_d_year, D_LEN, d_s_suppkey, d_s_city, + S_LEN, d_c_custkey, d_c_city, C_LEN, g_allocator); + cout << "{" + << "\"query\":33" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q34.cu b/crystal-opt/src/ssb/q34.cu new file mode 100644 index 0000000..7326075 --- /dev/null +++ b/crystal-opt/src/ssb/q34.cu @@ -0,0 +1,365 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void build_hashtable_s(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ(items, 231, selection_flags, + num_tile_items); + BlockPredOrEQ( + items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ( + items, 199712, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, 19920101, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_suppkey, + int *lo_revenue, int lo_len, int *d_datekey, int *d_year, + int *d_yearmonthnum, int d_len, int *s_suppkey, int *s_city, + int s_len, int *c_custkey, int *c_city, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_yearmonthnum, d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + int *d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void **)&d_sum, sizeof(int))); + + cudaMemset(d_sum, 0, sizeof(int)); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, + ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " << h_res[4 * i + 2] + << " " << h_res[4 * i + 3] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +int murmur(int k) { + int h = 0xcd2e2c20; + const int len = 4; + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + k *= 0x1b873593; + h ^= k; + h = (h << 13) | (h >> 19); + h = (h * 5) + 0xe6546b64; + h ^= len; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + int *d_d_yearmonthnum = loadToGPU(h_d_yearmonthnum, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_custkey, d_lo_suppkey, + d_lo_revenue, LO_LEN, d_d_datekey, d_d_year, + d_d_yearmonthnum, D_LEN, d_s_suppkey, d_s_city, S_LEN, + d_c_custkey, d_c_city, C_LEN, g_allocator); + cout << "{" + << "\"query\":34" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q41.cu b/crystal-opt/src/ssb/q41.cu new file mode 100644 index 0000000..7922fcd --- /dev/null +++ b/crystal-opt/src/ssb/q41.cu @@ -0,0 +1,438 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, + int lo_len, int *ht_p, int p_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_1( + items, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, c_nation, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % + ((1998 - 1992 + 1) * 25); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + /*atomicAdd(&res[hash * 4 + 2], (1));*/ + /*atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + * (long long)(1));*/ + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), + (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, + num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_mfgr, int p_len, int *s_suppkey, int *s_region, int s_len, + int *c_custkey, int *c_region, int *c_nation, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + /* int *s_res = new int[s_len * 2]; */ + /* CubDebugExit( */ + /* cudaMemcpy(s_res, ht_s, s_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); */ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_nation, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + /* int *c_res = new int[c_len * 2]; */ + /* CubDebugExit( */ + /* cudaMemcpy(c_res, ht_c, c_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); */ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_mfgr, p_partkey, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + /* int *p_res = new int[p_len * 2]; */ + /* CubDebugExit( */ + /* cudaMemcpy(p_res, ht_p, p_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); */ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + +#if 0 + int *h_ht_s = new int[s_len * 2]; + int *h_ht_c = new int[c_len * 2]; + int *h_ht_p = new int[p_len * 2]; + int *h_ht_d = new int[d_val_len * 2]; + + int num_s = 0 , num_c = 0, num_d = 0, num_p = 0; + + CubDebugExit(cudaMemcpy(h_ht_s, ht_s, 2 * s_len * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i=0; i<<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " + << reinterpret_cast(&h_res[4 * i + 2])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_mfgr = loadColumn("p_mfgr", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_mfgr = loadToGPU(h_p_mfgr, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_custkey, d_lo_partkey, + d_lo_suppkey, d_lo_revenue, d_lo_supplycost, LO_LEN, + d_d_datekey, d_d_year, D_LEN, d_p_partkey, d_p_mfgr, + P_LEN, d_s_suppkey, d_s_region, S_LEN, d_c_custkey, + d_c_region, d_c_nation, C_LEN, g_allocator); + cout << "{" + << "\"query\":41" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q42.cu b/crystal-opt/src/ssb/q42.cu new file mode 100644 index 0000000..08f8852 --- /dev/null +++ b/crystal-opt/src/ssb/q42.cu @@ -0,0 +1,411 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, + int lo_len, int *ht_p, int p_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int category[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_nation, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, category, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + + * (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 25 * 25 + s_nation[ITEM] * 25 + + category[ITEM]) % + ((1998 - 1992 + 1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = s_nation[ITEM]; + res[hash * 6 + 2] = category[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), + (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, + num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ( + items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ( + items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_mfgr, int *p_category, int p_len, int *s_suppkey, + int *s_region, int *s_nation, int s_len, int *c_custkey, + int *c_region, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_mfgr, p_partkey, p_category, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 25 * 25); + int ht_entries = 6; + int res_array_size = res_size * ht_entries; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[6 * i] != 0) { + cout << h_res[6 * i] << " " << h_res[6 * i + 1] << " " << h_res[6 * i + 2] + << " " + << reinterpret_cast(&h_res[6 * i + 4])[0] + << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_mfgr = loadColumn("p_mfgr", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_mfgr = loadToGPU(h_p_mfgr, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + d_lo_supplycost, LO_LEN, d_d_datekey, d_d_year, D_LEN, d_p_partkey, + d_p_mfgr, d_p_category, P_LEN, d_s_suppkey, d_s_region, d_s_nation, + S_LEN, d_c_custkey, d_c_region, C_LEN, g_allocator); + cout << "{" + << "\"query\":42" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/q43.cu b/crystal-opt/src/ssb/q43.cu new file mode 100644 index 0000000..bee27c2 --- /dev/null +++ b/crystal-opt/src/ssb/q43.cu @@ -0,0 +1,405 @@ +// MIT License + +// Copyright (c) 2023 Jiashen Cao + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include "cub/test/test_util.h" +#include +#include + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator + g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int *lo_orderdate, int *lo_partkey, int *lo_custkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, + int lo_len, int *ht_p, int p_len, int *ht_s, int s_len, + int *ht_c, int c_len, int *ht_d, int d_len, int *res) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int s_city[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, + items, num_tile_items); + BlockProbeAndPHT_2( + items, s_city, selection_flags, ht_s, s_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_custkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_1( + items, selection_flags, ht_c, c_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_partkey + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, brand, selection_flags, ht_p, p_len, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_orderdate + tile_offset, items, num_tile_items, selection_flags); + BlockProbeAndPHT_2( + items, year, selection_flags, ht_d, d_len, 19920101, num_tile_items); + if (IsTerm(selection_flags)) { return; } + + BlockPredLoad( + lo_revenue + tile_offset, revenue, num_tile_items, selection_flags); + BlockPredLoad( + lo_supplycost + tile_offset, items, num_tile_items, selection_flags); + +#pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + + * (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 250 * 1000 + s_city[ITEM] * 1000 + + brand[ITEM]) % + ((1998 - 1992 + 1) * 250 * 1000); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = s_city[ITEM]; + res[hash * 4 + 2] = brand[ITEM]; + atomicAdd(&res[hash * 4 + 3], (revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int num_tuples, + int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockBuildSelectivePHT_1( + items, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, + int num_tuples, int *hash_table, + int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, + items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, + num_tile_items); + + BlockLoad(dim_key + tile_offset, items, + num_tile_items); + BlockLoad(dim_val + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items, items2, selection_flags, hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, + int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, + num_tile_items); + BlockPredEQ( + items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ( + items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, + num_tile_items); + BlockBuildSelectivePHT_2( + items2, items, selection_flags, hash_table, num_slots, val_min, + num_tile_items); +} + +float runQuery(int *lo_orderdate, int *lo_custkey, int *lo_partkey, + int *lo_suppkey, int *lo_revenue, int *lo_supplycost, int lo_len, + int *d_datekey, int *d_year, int d_len, int *p_partkey, + int *p_category, int *p_brand1, int p_len, int *s_suppkey, + int *s_nation, int *s_city, int s_len, int *c_custkey, + int *c_region, int c_len, + cub::CachingDeviceAllocator &g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit( + g_allocator.DeviceAllocate((void **)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128 * 4; + build_hashtable_s<128, 4><<<(s_len + tile_items - 1) / tile_items, 128>>>( + s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128, 4><<<(c_len + tile_items - 1) / tile_items, 128>>>( + c_region, c_custkey, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128, 4><<<(p_len + tile_items - 1) / tile_items, 128>>>( + p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128, 4><<<(d_len + tile_items - 1) / tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998 - 1992 + 1) * 250 * 1000); + int ht_entries = 4; + int res_array_size = res_size * ht_entries; + CubDebugExit( + g_allocator.DeviceAllocate((void **)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128, 4><<<(lo_len + tile_items - 1) / tile_items, 128>>>( + lo_orderdate, lo_partkey, lo_custkey, lo_suppkey, lo_revenue, + lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, + d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start, stop); + + cout << "Result:" << endl; + + int *h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), + cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + int res_count = 0; + for (int i = 0; i < res_size; i++) { + if (h_res[4 * i] != 0) { + cout << h_res[4 * i] << " " << h_res[4 * i + 1] << " " << h_res[4 * i + 2] + << " " << h_res[4 * i + 3] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char **argv) { + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) { + printf("%s " + "[--t=] " + "[--v] " + "\n", + argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, d_lo_revenue, + d_lo_supplycost, LO_LEN, d_d_datekey, d_d_year, D_LEN, d_p_partkey, + d_p_category, d_p_brand1, P_LEN, d_s_suppkey, d_s_nation, d_s_city, + S_LEN, d_c_custkey, d_c_region, C_LEN, g_allocator); + cout << "{" + << "\"query\":43" + << ",\"time_query\":" << time_query << "}" << endl; + } + + return 0; +} diff --git a/crystal-opt/src/ssb/ssb_utils.h b/crystal-opt/src/ssb/ssb_utils.h new file mode 100644 index 0000000..a347b14 --- /dev/null +++ b/crystal-opt/src/ssb/ssb_utils.h @@ -0,0 +1,177 @@ +#include +#include +#include + +/*#include */ +/*#include */ + +using namespace std; + +#define SF 10 + +#define LOAD_TYPE 0 + +#define BASE_PATH "" + +#if SF == 1 +#define DATA_DIR BASE_PATH "sf1_column_bin/" +#define LO_LEN 6001171 +#define P_LEN 200000 +#define S_LEN 2000 +#define C_LEN 30000 +#define D_LEN 2556 +#elif SF == 2 +#define DATA_DIR BASE_PATH "sf2_column_bin/" +#define LO_LEN 11998051 +#define P_LEN 400000 +#define S_LEN 4000 +#define C_LEN 60000 +#define D_LEN 2556 +#elif SF == 4 +#define DATA_DIR BASE_PATH "sf4_column_bin/" +#define LO_LEN 23996670 +#define P_LEN 600000 +#define S_LEN 8000 +#define C_LEN 120000 +#define D_LEN 2556 +#elif SF == 8 +#define DATA_DIR BASE_PATH "sf8_column_bin/" +#define LO_LEN 47989129 +#define P_LEN 800000 +#define S_LEN 16000 +#define C_LEN 240000 +#define D_LEN 2556 +#elif SF == 10 +#define DATA_DIR BASE_PATH "/home/ubuntu/fff/gpu/data/ssb/data/s10_columnar/" +#define LO_LEN 59986214 +#define P_LEN 800000 +#define S_LEN 20000 +#define C_LEN 300000 +#define D_LEN 2556 +#elif SF == 16 +#define DATA_DIR BASE_PATH "/home/ubuntu/fff/gpu/data/ssb/data/s1_columnar/" +#define LO_LEN 95988758 +#define P_LEN 1000000 +#define S_LEN 32000 +#define C_LEN 480000 +#define D_LEN 2556 +#elif SF == 32 +#define DATA_DIR BASE_PATH "sf32_column_bin/" +#define LO_LEN 192000754 +#define P_LEN 1200000 +#define S_LEN 64000 +#define C_LEN 960000 +#define D_LEN 2556 +#elif SF == 64 +#define DATA_DIR BASE_PATH "sf64_column_bin/" +#define LO_LEN 384016864 +#define P_LEN 1400000 +#define S_LEN 128000 +#define C_LEN 1920000 +#define D_LEN 2556 +#elif SF == 128 +#define DATA_DIR BASE_PATH "sf128_column_bin/" +#define LO_LEN 768047048 +#define P_LEN 1600000 +#define S_LEN 256000 +#define C_LEN 3840000 +#define D_LEN 2556 +#else // 20 +#define DATA_DIR BASE_PATH "s20_columnar/" +#define LO_LEN 119994746 +#define P_LEN 1000000 +#define S_LEN 40000 +#define C_LEN 600000 +#define D_LEN 2556 +#endif + +int index_of(string *arr, int len, string val) { + for (int i = 0; i < len; i++) + if (arr[i] == val) + return i; + + return -1; +} + +string lookup(string col_name) { + string lineorder[] = {"lo_orderkey", "lo_linenumber", "lo_custkey", + "lo_partkey", "lo_suppkey", "lo_orderdate", + "lo_orderpriority", "lo_shippriority", "lo_quantity", + "lo_extendedprice", "lo_ordtotalprice", "lo_discount", + "lo_revenue", "lo_supplycost", "lo_tax", + "lo_commitdate", "lo_shipmode"}; + string part[] = {"p_partkey", "p_name", "p_mfgr", "p_category", "p_brand1", + "p_color", "p_type", "p_size", "p_container"}; + string supplier[] = {"s_suppkey", "s_name", "s_address", "s_city", + "s_nation", "s_region", "s_phone"}; + string customer[] = {"c_custkey", "c_name", "c_address", "c_city", + "c_nation", "c_region", "c_phone", "c_mktsegment"}; + string date[] = {"d_datekey", + "d_date", + "d_dayofweek", + "d_month", + "d_year", + "d_yearmonthnum", + "d_yearmonth", + "d_daynuminweek", + "d_daynuminmonth", + "d_daynuminyear", + "d_sellingseason", + "d_lastdayinweekfl", + "d_lastdayinmonthfl", + "d_holidayfl", + "d_weekdayfl"}; + + if (col_name[0] == 'l') { + int index = index_of(lineorder, 17, col_name); + return "LINEORDER" + to_string(index); + } else if (col_name[0] == 's') { + int index = index_of(supplier, 7, col_name); + return "SUPPLIER" + to_string(index); + } else if (col_name[0] == 'c') { + int index = index_of(customer, 8, col_name); + return "CUSTOMER" + to_string(index); + } else if (col_name[0] == 'p') { + int index = index_of(part, 9, col_name); + return "PART" + to_string(index); + } else if (col_name[0] == 'd') { + int index = index_of(date, 15, col_name); + return "DDATE" + to_string(index); + } + + return ""; +} + +template T *loadColumn(string col_name, int num_entries) { + T *h_col = new T[num_entries]; + string filename = DATA_DIR + lookup(col_name); + ifstream colData(filename.c_str(), ios::in | ios::binary); + if (!colData) { + return NULL; + } + + colData.read((char *)h_col, num_entries * sizeof(T)); + return h_col; +} + +template +int storeColumn(string col_name, int num_entries, int *h_col) { + string filename = DATA_DIR + lookup(col_name); + ofstream colData(filename.c_str(), ios::out | ios::binary); + if (!colData) { + return -1; + } + + colData.write((char *)h_col, num_entries * sizeof(T)); + return 0; +} + +/*int main() {*/ +// int *h_col = new int[10]; +// for (int i=0; i<10; i++) h_col[i] = i; +// storeColumn("test", 10, h_col); +// int *l_col = loadColumn("test", 10); +// for (int i=0; i<10; i++) cout << l_col[i] << " "; +// cout << endl; +// return 0; +/*}*/ \ No newline at end of file diff --git a/crystal/CMakeLists.txt b/crystal/CMakeLists.txt new file mode 100644 index 0000000..2721300 --- /dev/null +++ b/crystal/CMakeLists.txt @@ -0,0 +1,3 @@ +# Source : ------------------------------------------------------------------------------------------------------------- +add_subdirectory(src) + diff --git a/crystal/LICENSE b/crystal/LICENSE new file mode 100644 index 0000000..beb8041 --- /dev/null +++ b/crystal/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Anil Shanbhag + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crystal/README.md b/crystal/README.md new file mode 100644 index 0000000..47bd715 --- /dev/null +++ b/crystal/README.md @@ -0,0 +1,79 @@ +Crystal GPU Library +================= + +The Crystal library implements a collection of block-wide device functions that can be used to implement high performance implementations of SQL queries on GPUs. + +The package contains: + +* Crystal: `crystal/` contains the block-wide device functions +* Implementations: `src/` contains SQL query operator implementations and implementations of 13 queries of the Star Schema Benchmark + +For full details of the Crystal, see our [paper](http://anilshanbhag.in/static/papers/crystal_sigmod20.pdf) + +``` +@inproceedings{shanbhag2020crystal, + author = {Shanbhag, Anil and Madden, Samuel and Yu, Xiangyao}, + title = {A Study of the Fundamental Performance Characteristics of GPUs and CPUs for Database Analytics}, + year = {2020}, + url = {https://doi.org/10.1145/3318464.3380595}, + doi = {10.1145/3318464.3380595}, + booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data}, + pages = {1617–1632}, + numpages = {16}, + location = {Portland, OR, USA}, + series = {SIGMOD ’20} +} +``` + +Usage +---- + +To use Crystal: + +* Copy out the `crystal` directory into your project. +* Include Crystal +``` +#include "crystal/crystal.cuh" +``` +* Add the crystal directory to your include path + +To run the operator implementations: + +* Compile and run the operator. E.g., +``` +make bin/ops/project +./bin/ops/project +``` + +To run the Star Schema Benchmark implementation: + +* Generate the test dataset + +``` +cd test/ + +# Generate the test generator / transformer binaries +cd ssb/dbgen +make +cd ../loader +make +cd ../../ + +# Generate the test data and transform into columnar layout +# Substitute with appropriate scale factor (eg: 1) +python util.py ssb gen +python util.py ssb transform +``` + +* Configure the benchmark settings +``` +cd src/ssb/ +# Edit SF and BASE_PATH in ssb_utils.h +``` + +* To run a query, say run q11 +``` +make bin/ssb/q11 +./bin/ssb/q11 +``` + diff --git a/crystal/src/CMakeLists.txt b/crystal/src/CMakeLists.txt new file mode 100644 index 0000000..466035d --- /dev/null +++ b/crystal/src/CMakeLists.txt @@ -0,0 +1,43 @@ +add_library(crystal STATIC ops/join.cu ops/project.cu) +target_include_directories(crystal PUBLIC ops) +target_include_directories(crystal PUBLIC ssb) +target_include_directories(crystal PUBLIC crystal) + +add_executable(crystal_q11 ssb/q11.cu) +target_link_libraries(crystal_q11 crystal) + +add_executable(crystal_q12 ssb/q12.cu) +target_link_libraries(crystal_q12 crystal) + +add_executable(crystal_q13 ssb/q13.cu) +target_link_libraries(crystal_q13 crystal) + +add_executable(crystal_q21 ssb/q21.cu) +target_link_libraries(crystal_q21 crystal) + +add_executable(crystal_q22 ssb/q22.cu) +target_link_libraries(crystal_q22 crystal) + +add_executable(crystal_q23 ssb/q23.cu) +target_link_libraries(crystal_q23 crystal) + +add_executable(crystal_q31 ssb/q31.cu) +target_link_libraries(crystal_q31 crystal) + +add_executable(crystal_q32 ssb/q32.cu) +target_link_libraries(crystal_q32 crystal) + +add_executable(crystal_q33 ssb/q33.cu) +target_link_libraries(crystal_q33 crystal) + +add_executable(crystal_q34 ssb/q34.cu) +target_link_libraries(crystal_q34 crystal) + +add_executable(crystal_q41 ssb/q41.cu) +target_link_libraries(crystal_q41 crystal) + +add_executable(crystal_q42 ssb/q42.cu) +target_link_libraries(crystal_q42 crystal) + +add_executable(crystal_q43 ssb/q43.cu) +target_link_libraries(crystal_q43 crystal) \ No newline at end of file diff --git a/crystal/src/crystal/crystal.cuh b/crystal/src/crystal/crystal.cuh new file mode 100644 index 0000000..ddce5b8 --- /dev/null +++ b/crystal/src/crystal/crystal.cuh @@ -0,0 +1,9 @@ +#pragma once + +// Block-wide functions +#include "load.cuh" +#include "pred.cuh" +#include "store.cuh" +#include "reduce.cuh" +#include "join.cuh" + diff --git a/crystal/src/crystal/join.cuh b/crystal/src/crystal/join.cuh new file mode 100644 index 0000000..eabb74c --- /dev/null +++ b/crystal/src/crystal/join.cuh @@ -0,0 +1,311 @@ +#pragma once + +#define HASH(X,Y,Z) ((X-Z) % Y) + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K slot = ht[hash]; + if (slot != 0) { + selection_flags[ITEM] = 1; + } else { + selection_flags[ITEM] = 0; + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K slot = ht[hash]; + if (slot != 0) { + selection_flags[ITEM] = 1; + } else { + selection_flags[ITEM] = 0; + } + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_1( + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockProbeDirectAndPHT_1(threadIdx.x, items, selection_flags, ht, ht_len, keys_min); + } else { + BlockProbeDirectAndPHT_1(threadIdx.x, items, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_1( + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockProbeAndPHT_1(items, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + uint64_t slot = *reinterpret_cast(&ht[hash << 1]); + if (slot != 0) { + res[ITEM] = (slot >> 32); + } else { + selection_flags[ITEM] = 0; + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeDirectAndPHT_2( + int tid, + K (&items)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + uint64_t slot = *reinterpret_cast(&ht[hash << 1]); + if (slot != 0) { + res[ITEM] = (slot >> 32); + } else { + selection_flags[ITEM] = 0; + } + } + } + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockProbeDirectAndPHT_2(threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min); + } else { + BlockProbeDirectAndPHT_2(threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockProbeAndPHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockProbeAndPHT_2(keys, res, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_1( + int tid, + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash], 0, keys[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_1( + int tid, + K (&items)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(items[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash], 0, items[ITEM]); + } + } + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_1( + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockBuildDirectSelectivePHT_1(threadIdx.x, keys, selection_flags, ht, ht_len, keys_min); + } else { + BlockBuildDirectSelectivePHT_1(threadIdx.x, keys, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_1( + K (&keys)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockBuildSelectivePHT_1(keys, selection_flags, ht, ht_len, 0, num_items); +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash << 1], 0, keys[ITEM]); + ht[(hash << 1) + 1] = res[ITEM]; + } + } +} + +template +__device__ __forceinline__ void BlockBuildDirectSelectivePHT_2( + int tid, + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + if (selection_flags[ITEM]) { + int hash = HASH(keys[ITEM], ht_len, keys_min); + + K old = atomicCAS(&ht[hash << 1], 0, keys[ITEM]); + ht[(hash << 1) + 1] = res[ITEM]; + } + } + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + K keys_min, + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockBuildDirectSelectivePHT_2( + threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min); + } else { + BlockBuildDirectSelectivePHT_2( + threadIdx.x, keys, res, selection_flags, ht, ht_len, keys_min, num_items); + } +} + +template +__device__ __forceinline__ void BlockBuildSelectivePHT_2( + K (&keys)[ITEMS_PER_THREAD], + V (&res)[ITEMS_PER_THREAD], + int (&selection_flags)[ITEMS_PER_THREAD], + K* ht, + int ht_len, + int num_items + ) { + BlockBuildSelectivePHT_2(keys, res, selection_flags, ht, ht_len, 0, num_items); +} diff --git a/crystal/src/crystal/load.cuh b/crystal/src/crystal/load.cuh new file mode 100644 index 0000000..bf18fe8 --- /dev/null +++ b/crystal/src/crystal/load.cuh @@ -0,0 +1,97 @@ +#pragma once + +template +__device__ __forceinline__ void BlockLoadDirect( + const unsigned int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockLoadDirect( + const unsigned int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockLoad( + T* inp, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* block_itr = inp; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockLoadDirect(threadIdx.x, block_itr, items); + } else { + BlockLoadDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#if 0 + +template +__device__ __forceinline__ void BlockLoadDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockLoadDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockLoad( + T* inp, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* block_itr = inp + blockIdx.x * blockDim.x; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockLoadDirect(threadIdx.x, block_itr, items); + } else { + BlockLoadDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#endif diff --git a/crystal/src/crystal/pred.cuh b/crystal/src/crystal/pred.cuh new file mode 100644 index 0000000..491f96e --- /dev/null +++ b/crystal/src/crystal/pred.cuh @@ -0,0 +1,335 @@ +#pragma once + +template +__device__ __forceinline__ void InitFlags( + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = 1; + } +} + +template +__device__ __forceinline__ void BlockPredDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPred( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +__device__ __forceinline__ void BlockPredAndDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = selection_flags[ITEM] && select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredAndDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = selection_flags[ITEM] && select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPredAnd( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredAndDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredAndDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +__device__ __forceinline__ void BlockPredOrDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD] + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + selection_flags[ITEM] = selection_flags[ITEM] || select_op(items[ITEM]); + } +} + +template +__device__ __forceinline__ void BlockPredOrDirect( + int tid, + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + selection_flags[ITEM] = selection_flags[ITEM] || select_op(items[ITEM]); + } + } +} + +template +__device__ __forceinline__ void BlockPredOr( + T (&items)[ITEMS_PER_THREAD], + SelectOp select_op, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockPredOrDirect(threadIdx.x, items, select_op, selection_flags); + } else { + BlockPredOrDirect(threadIdx.x, items, select_op, selection_flags, num_items); + } +} + +template +struct LessThan +{ + T compare; + + __device__ __forceinline__ + LessThan(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a < compare); + } +}; + +template +struct GreaterThan +{ + T compare; + + __device__ __forceinline__ + GreaterThan(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a > compare); + } +}; + +template +struct LessThanEq +{ + T compare; + + __device__ __forceinline__ + LessThanEq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a <= compare); + } +}; + +template +struct GreaterThanEq +{ + T compare; + + __device__ __forceinline__ + GreaterThanEq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a >= compare); + } +}; + +template +struct Eq +{ + T compare; + + __device__ __forceinline__ + Eq(T compare) : compare(compare) {} + + __device__ __forceinline__ + bool operator()(const T &a) const { + return (a == compare); + } +}; + +template +__device__ __forceinline__ void BlockPredLT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThan select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndLT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThan select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredGT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThan select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndGT( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThan select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredLTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThanEq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndLTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + LessThanEq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredGTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThanEq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndGTE( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + GreaterThanEq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPred, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredAndEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPredAnd, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + +template +__device__ __forceinline__ void BlockPredOrEQ( + T (&items)[ITEMS_PER_THREAD], + T compare, + int (&selection_flags)[ITEMS_PER_THREAD], + int num_items + ) { + Eq select_op(compare); + BlockPredOr, BLOCK_THREADS, ITEMS_PER_THREAD>(items, select_op, selection_flags, num_items); +} + diff --git a/crystal/src/crystal/reduce.cuh b/crystal/src/crystal/reduce.cuh new file mode 100644 index 0000000..ff0baca --- /dev/null +++ b/crystal/src/crystal/reduce.cuh @@ -0,0 +1,53 @@ +#pragma once + +template +__device__ __forceinline__ T BlockSum( + T item, + T* shared + ) { + __syncthreads(); + + T val = item; + const int warp_size = 32; + int lane = threadIdx.x % warp_size; + int wid = threadIdx.x / warp_size; + + // Calculate sum across warp + for (int offset = 16; offset > 0; offset /= 2) { + val += __shfl_down_sync(0xffffffff, val, offset); + } + + // Store sum in buffer + if (lane == 0) { + shared[wid] = val; + } + + __syncthreads(); + + // Load the sums into the first warp + val = (threadIdx.x < blockDim.x / warp_size) ? shared[lane] : 0; + + // Calculate sum of sums + if (wid == 0) { + for (int offset = 16; offset > 0; offset /= 2) { + val += __shfl_down_sync(0xffffffff, val, offset); + } + } + + return val; +} + +template +__device__ __forceinline__ T BlockSum( + T (&items)[ITEMS_PER_THREAD], + T* shared + ) { + T thread_sum = 0; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + thread_sum += items[ITEM]; + } + + return BlockSum(thread_sum, shared); +} diff --git a/crystal/src/crystal/store.cuh b/crystal/src/crystal/store.cuh new file mode 100644 index 0000000..a99d5b4 --- /dev/null +++ b/crystal/src/crystal/store.cuh @@ -0,0 +1,98 @@ +#pragma once + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + thread_itr[ITEM * BLOCK_THREADS] = items[ITEM]; + } +} + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + thread_itr[ITEM * BLOCK_THREADS] = items[ITEM]; + } + } +} + +template +__device__ __forceinline__ void BlockStore( + T* out, + T (&items)[ITEMS_PER_THREAD], + int num_items + ) { + T* block_itr = out; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockStoreDirect(threadIdx.x, block_itr, items); + } else { + BlockStoreDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#if 0 + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + +template +__device__ __forceinline__ void BlockStoreDirect( + int tid, + T* block_itr, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* thread_itr = block_itr + tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (tid + (ITEM * BLOCK_THREADS) < num_items) { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + +template +__device__ __forceinline__ void BlockStore( + T* inp, + T (&items)[ITEMS_PER_THREAD] + int num_items + ) { + T* block_itr = inp + blockIdx.x * blockDim.x; + + if ((BLOCK_THREADS * ITEMS_PER_THREAD) == num_items) { + BlockStoreDirect(threadIdx.x, block_itr, items); + } else { + BlockStoreDirect(threadIdx.x, block_itr, items, num_items); + } +} + +#endif + diff --git a/crystal/src/ops/join.cu b/crystal/src/ops/join.cu new file mode 100644 index 0000000..4944191 --- /dev/null +++ b/crystal/src/ops/join.cu @@ -0,0 +1,220 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "utils/generator.h" +#include "utils/gpu_utils.h" + +using namespace std; + +#define DEBUG 1 + +template +__global__ void build_kernel(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void probe_kernel(int *fact_fkey, int *fact_val, int num_tuples, + int *hash_table, int num_slots, unsigned long long *res) { + // Load a tile striped across threads + int selection_flags[ITEMS_PER_THREAD]; + int keys[ITEMS_PER_THREAD]; + int vals[ITEMS_PER_THREAD]; + int join_vals[ITEMS_PER_THREAD]; + + unsigned long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples+ TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(fact_fkey + tile_offset, keys, num_tile_items); + BlockLoad(fact_val + tile_offset, vals, num_tile_items); + + BlockProbeAndPHT_2(keys, join_vals, selection_flags, + hash_table, num_slots, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += vals[ITEM] * join_vals[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = BlockSum(sum, (long long*)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(res, aggregate); + } +} + +struct TimeKeeper { + float time_build; + float time_probe; + float time_extra; + float time_total; +}; + +TimeKeeper hashJoin(int* d_dim_key, int* d_dim_val, int* d_fact_fkey, int* d_fact_val, int num_dim, int num_fact, cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + int* hash_table = NULL; + unsigned long long* res; + int num_slots = num_dim; + float time_build, time_probe, time_memset, time_memset2; + + ALLOCATE(hash_table, sizeof(int) * 2 * num_dim); + ALLOCATE(res, sizeof(long long)); + + TIME_FUNC(cudaMemset(hash_table, 0, num_slots * sizeof(int) * 2), time_memset); + TIME_FUNC(cudaMemset(res, 0, sizeof(long long)), time_memset2); + + int tile_items = 128*4; + + TIME_FUNC((build_kernel<128, 4><<<(num_dim + tile_items - 1)/tile_items, 128>>>(d_dim_key, d_dim_val, num_dim, hash_table, num_slots)), time_build); + TIME_FUNC((probe_kernel<128, 4><<<(num_fact + tile_items - 1)/tile_items, 128>>>(d_fact_fkey, d_fact_val, num_fact, hash_table, num_slots, res)), time_probe); + +#if DEBUG + cout << "{" << "\"time_memset\":" << time_memset + << ",\"time_build\"" << time_build + << ",\"time_probe\":" << time_probe << "}" << endl; +#endif + + CLEANUP(hash_table); + CLEANUP(res); + + TimeKeeper t = {time_build, time_probe, time_memset, time_build + time_probe + time_memset}; + return t; +} + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- +int main(int argc, char** argv) +{ + int num_fact = 256 * 1<<20; + int num_dim = 16 * 1<<20; + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("n", num_fact); + args.GetCmdLineArgument("d", num_dim); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n=] " + "[--d=] " + "[--t=] " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + int log2 = 0; + int num_dim_dup = num_dim >> 1; + while (num_dim_dup) { + num_dim_dup >>= 1; + log2 += 1; + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate problem device arrays + int *d_dim_key = NULL; + int *d_dim_val = NULL; + int *d_fact_fkey = NULL; + int *d_fact_val = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dim_key, sizeof(int) * num_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dim_val, sizeof(int) * num_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_fact_fkey, sizeof(int) * num_fact)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_fact_val, sizeof(int) * num_fact)); + + int *h_dim_key = NULL; + int *h_dim_val = NULL; + int *h_fact_fkey = NULL; + int *h_fact_val = NULL; + + create_relation_pk(h_dim_key, h_dim_val, num_dim); + create_relation_fk(h_fact_fkey, h_fact_val, num_fact, num_dim); + + CubDebugExit(cudaMemcpy(d_dim_key, h_dim_key, sizeof(int) * num_dim, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_dim_val, h_dim_val, sizeof(int) * num_dim, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_fact_fkey, h_fact_fkey, sizeof(int) * num_fact, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_fact_val, h_fact_val, sizeof(int) * num_fact, cudaMemcpyHostToDevice)); + + for (int j = 0; j < num_trials; j++) { + TimeKeeper t = hashJoin(d_dim_key, d_dim_val, d_fact_fkey, d_fact_val, num_dim, num_fact, g_allocator); + cout<< "{" + << "\"num_dim\":" << num_dim + << ",\"num_fact\":" << num_fact + << ",\"radix\":" << 0 + << ",\"time_partition_build\":" << 0 + << ",\"time_partition_probe\":" << 0 + << ",\"time_partition_total\":" << 0 + << ",\"time_build\":" << t.time_build + << ",\"time_probe\":" << t.time_probe + << ",\"time_extra\":" << t.time_extra + << ",\"time_join_total\":" << t.time_total + << "}" << endl; + } + + CLEANUP(d_dim_key); + CLEANUP(d_dim_val); + CLEANUP(d_fact_fkey); + CLEANUP(d_fact_val); + + return 0; +} + diff --git a/crystal/src/ops/project.cu b/crystal/src/ops/project.cu new file mode 100644 index 0000000..3340db6 --- /dev/null +++ b/crystal/src/ops/project.cu @@ -0,0 +1,176 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "utils/gpu_utils.h" + +using namespace std; + + +//--------------------------------------------------------------------- +// Implements Projection Operator +// There are two variants: dot-product and sigmoid +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void project(float* in1, float* in2, float* out, int num_items) +{ + float items[ITEMS_PER_THREAD]; + float items2[ITEMS_PER_THREAD]; + float res[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_items + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_items - tile_offset; + } + + BlockLoad(in1 + tile_offset, items, num_tile_items); + BlockLoad(in2 + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (threadIdx.x + (ITEM * BLOCK_THREADS) < num_tile_items) { + res[ITEM] = 2*items[ITEM] + 3*items2[ITEM]; + } + } + + BlockStore(out + tile_offset, res, num_tile_items); +} + +template +__global__ void projectSigmoid(float* in1, float* in2, float* out, int num_items) +{ + float items[ITEMS_PER_THREAD]; + float items2[ITEMS_PER_THREAD]; + float res[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_items + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_items - tile_offset; + } + + BlockLoad(in1 + tile_offset, items, num_tile_items); + BlockLoad(in2 + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { + if (threadIdx.x + (ITEM * BLOCK_THREADS) < num_tile_items) { + res[ITEM] = 1.0f / (1.0f + expf(-2*items[ITEM] -3*items2[ITEM])); + } + } + + BlockStore(out + tile_offset, res, num_tile_items); +} + + +float projectGPU(float* in1, float* in2, float* out, int num_items) { + SETUP_TIMING(); + + float time_proj; + int tile_items = 128*4; + int num_blocks = (num_items + tile_items - 1)/tile_items; + TIME_FUNC((project<128,4><<>>(in1, in2, out, num_items)), time_proj); + + return time_proj; +} + +float projectSigmoidGPU(float* in1, float* in2, float* out, int num_items) { + SETUP_TIMING(); + + float time_proj; + int tile_items = 128*4; + int num_blocks = (num_items + tile_items - 1)/tile_items; + TIME_FUNC((projectSigmoid<128,4><<>>(in1, in2, out, num_items)), time_proj); + + return time_proj; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 1<<28; + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n=] " + "[--t=] " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate problem device arrays + float *d_in1 = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in1, sizeof(float) * num_items)); + + float *d_in2 = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in2, sizeof(float) * num_items)); + + float *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(float) * num_items)); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + curandGenerator_t generator; + int seed = 0; + curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(generator,seed); + curandGenerateUniform(generator, d_in1, num_items); + curandGenerateUniform(generator, d_in2, num_items); + + float time_proj_gpu; + float time_proj_sigmoid_gpu; + + for (int t = 0; t < num_trials; t++) { + time_proj_gpu = projectGPU(d_in1, d_in2, d_out, num_items); + time_proj_sigmoid_gpu = projectSigmoidGPU(d_in1, d_in2, d_out, num_items); + + cout<< "{" + << "\"time_proj_gpu\":" << time_proj_gpu + << ",\"time_proj_sigmoid_gpu\":" << time_proj_sigmoid_gpu + << "}" << endl; + } + + // Cleanup + if (d_in1) CubDebugExit(g_allocator.DeviceFree(d_in1)); + if (d_in2) CubDebugExit(g_allocator.DeviceFree(d_in2)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + + return 0; +} + diff --git a/crystal/src/ops/utils/generator.h b/crystal/src/ops/utils/generator.h new file mode 100644 index 0000000..a24039f --- /dev/null +++ b/crystal/src/ops/utils/generator.h @@ -0,0 +1,377 @@ +#pragma once + +#include +#include /* perror */ +#include /* posix_memalign */ +#include +#include +using namespace std; + +#define RAND_RANGE(N) ((double)rand() / ((double)RAND_MAX + 1) * (N)) +#define RANDR_RANGE(N) ((double)rand_r(&seed) / ((double)RAND_MAX + 1) * (N)) +static int seeded = 0; + +/** Check wheter seeded, if not seed the generator with current time */ +static void +check_seed() +{ + if(!seeded) { + srand(0); + seeded = 1; + } +} + +/** + * Shuffle tuples of the relation using Knuth shuffle. + * + * @param relation + */ +void +knuth_shuffle(int* arr, int num_tuples) +{ + int i; + for (i = num_tuples - 1; i > 0; i--) { + int j = RAND_RANGE(i); + int tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; + } +} + + +/** + * Generate unique tuple IDs with Knuth shuffling + * relation must have been allocated + */ +void +random_unique_gen(int*& arr, int num_tuples) +{ + int i; + + for (i = 0; i < num_tuples; i++) { + arr[i] = (i+1); + } + + /* randomly shuffle elements */ + knuth_shuffle(arr, num_tuples); +} + +void +dummy_initialize(int*& arr, int num_tuples) { + for (int i = 0; i < num_tuples; i++) { + arr[i] = i; + } +} + +int +create_relation_pk(int*& keys, int*& vals, int num_tuples) +{ + check_seed(); + + keys = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + vals = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + + if (!keys || !vals) { + perror("out of memory"); + return -1; + } + + random_unique_gen(keys, num_tuples); + dummy_initialize(vals, num_tuples); + + return 0; +} + +int create_relation_fk(int*& keys, int*& vals, int num_tuples, const int maxid) +{ + int i, iters, remainder; + + check_seed(); + keys = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + vals = (int*)_mm_malloc(num_tuples * sizeof(int), 256); + + if (!keys || !vals) { + perror("out of memory"); + return -1; + } + + // alternative generation method + iters = num_tuples / maxid; + for (i = 0; i < iters; i++) { + int* tuples = keys + maxid * i; + random_unique_gen(tuples, maxid); + } + + // if num_tuples is not an exact multiple of maxid + remainder = num_tuples % maxid; + if (remainder > 0) { + int* tuples = keys + maxid * iters; + random_unique_gen(tuples, remainder); + } + + dummy_initialize(vals, num_tuples); + return 0; +} + +/* +typedef struct rand_state_64 { + uint64_t num[313]; + size_t index; +} rand64_t; + +rand64_t *rand64_init(uint64_t seed) +{ + rand64_t *state = malloc(sizeof(rand64_t)); + uint64_t *n = state->num; + size_t i; + n[0] = seed; + for (i = 0 ; i != 311 ; ++i) + n[i + 1] = 6364136223846793005ull * + (n[i] (n[i] >> 62)) + i + 1; + state->index = 312; + return state; +} + +uint64_t rand64_next(rand64_t *state) +{ + uint64_t x, *n = state->num; + if (state->index == 312) { + size_t i = 0; + do { + x = n[i] & 0xffffffff80000000ull; + x |= n[i + 1] & 0x7fffffffull; + n[i] = n[i + 156] (x >> 1); + n[i] = 0xb5026f5aa96619e9ull & -(x & 1); + } while (++i != 156); + n[312] = n[0]; + do { + x = n[i] & 0xffffffff80000000ull; + x |= n[i + 1] & 0x7fffffffull; + n[i] = n[i - 156] (x >> 1); + n[i] = 0xb5026f5aa96619e9ull & -(x & 1); + } while (++i != 312); + state->index = 0; + } + x = n[state->index++]; + x = (x >> 29) & 0x5555555555555555ull; + x = (x << 17) & 0x71d67fffeda60000ull; + x = (x << 37) & 0xfff7eee000000000ull; + x = (x >> 43); + return x; +} + +typedef struct rand_state_32 { + uint32_t num[625]; + size_t index; +} rand32_t; + +rand32_t *rand32_init(uint32_t seed) +{ + rand32_t *state = malloc(sizeof(rand32_t)); + uint32_t *n = state->num; + size_t i; + n[0] = seed; + for (i = 0 ; i != 623 ; ++i) + n[i + 1] = 0x6c078965 * (n[i] (n[i] >> 30)); + state->index = 624; + return state; +} + +uint32_t rand32_next(rand32_t *state) +{ + uint32_t y, *n = state->num; + if (state->index == 624) { + size_t i = 0; + do { + y = n[i] & 0x80000000; + y += n[i + 1] & 0x7fffffff; + n[i] = n[i + 397] (y >> 1); + n[i] = 0x9908b0df & -(y & 1); + } while (++i != 227); + n[624] = n[0]; + do { + y = n[i] & 0x80000000; + y += n[i + 1] & 0x7fffffff; + n[i] = n[i - 227] (y >> 1); + n[i] = 0x9908b0df & -(y & 1); + } while (++i != 624); + state->index = 0; + } + y = n[state->index++]; + y = (y >> 11); + y = (y << 7) & 0x9d2c5680; + y = (y << 15) & 0xefc60000; + y = (y >> 18); + return y; +} + +static int hardware_threads(void) +{ + char name[64]; + struct stat st; + int threads = -1; + do { + sprintf(name, "/sys/devices/system/cpu/cpu%d", ++threads); + } while (stat(name, &st) == 0); + return threads; +} + +static void *mamalloc(size_t size) +{ + void *p = NULL; + return posix_memalign(&p, 64, size) ? NULL : p; +} + +typedef struct { + pthread_t id; + int seed; + int thread; + int threads; + uint32_t hash_factor; + uint32_t invalid_key; + uint32_t *inner; + uint32_t *outer; + volatile uint32_t *table; + size_t inner_size; + size_t outer_size; + size_t table_size; + size_t join_size; + double selectivity; + pthread_barrier_t *barrier; +} info_t; + +static void *run(void *arg) +{ + info_t *d = (info_t*) arg; + assert(pthread_equal(pthread_self(), d->id)); + int thread = d->thread; + int threads = d->threads; + uint32_t hash_factor = d->hash_factor; + uint32_t invalid_key = d->invalid_key; + uint32_t *inner = d->inner; + uint32_t *outer = d->outer; + volatile uint32_t *table = d->table; + size_t i, o, t, h; + size_t inner_size = d->inner_size; + size_t outer_size = d->outer_size; + size_t table_size = d->table_size; + size_t inner_beg = (inner_size / threads) * thread; + size_t inner_end = (inner_size / threads) * (thread + 1); + size_t outer_beg = (outer_size / threads) * thread; + size_t outer_end = (outer_size / threads) * (thread + 1); + size_t table_beg = (table_size / threads) * thread; + size_t table_end = (table_size / threads) * (thread + 1); + if (thread + 1 == threads) { + inner_end = inner_size; + outer_end = outer_size; + table_end = table_size; + } + for (t = table_beg ; t != table_end ; ++t) + table[t] = invalid_key; + pthread_barrier_wait(&d->barrier[0]); + rand32_t *gen = rand32_init(d->seed); + for (i = inner_beg ; i != inner_end ; ++i) { + int new_key_inserted = 0; + uint32_t key; + do { + do { + key = rand32_next(gen); + } while (key == invalid_key); + h = (uint32_t) (key * hash_factor); + h = (h * table_size) >> 32; + for (;;) { + if (table[h] == invalid_key && + __sync_bool_compare_and_swap(&table[h], invalid_key, key)) { + new_key_inserted = 1; + break; + } + if (table[h] == key) break; + if (++h == table_size) h = 0; + } + } while (new_key_inserted == 0); + inner[i] = key; + } + pthread_barrier_wait(&d->barrier[1]); + size_t join_size = 0; + uint32_t limit = ~0; + limit *= d->selectivity; + for (o = outer_beg ; o != outer_end ; ++o) { + uint32_t key; + if (rand32_next(gen) <= limit) { + i = rand32_next(gen); + i = (i * inner_size) >> 32; + key = inner[i]; + join_size++; + } else do { + do { + key = rand32_next(gen); + } while (key == invalid_key); + h = (uint32_t) (key * hash_factor); + h = (h * table_size) >> 32; + while (table[h] != invalid_key) { + if (table[h] == key) break; + if (++h == table_size) h = 0; + } + } while (table[h] == key); + outer[o] = key; + } + free(gen); + d->join_size = join_size; + pthread_exit(NULL); +} + +size_t inner_outer(size_t inner_size, size_t outer_size, double selectivity, + uint32_t **inner_p, uint32_t **outer_p) +{ + srand(time(NULL)); + int t, threads = hardware_threads(); + // input arguments + assert(inner_size <= 1000 * 1000 * 1000); + assert(selectivity >= 0.0 && selectivity <= 1.0); + // tables + uint32_t *inner = mamalloc((inner_size + 1) * sizeof(uint32_t)); + uint32_t *outer = mamalloc(outer_size * sizeof(uint32_t)); + size_t table_size = inner_size / 0.7; + uint32_t *table = malloc(table_size * sizeof(uint32_t)); + // constants + uint32_t hash_factor = (rand() << 1) | 1; + uint32_t invalid_key = rand() * rand(); + // barriers + int b, barriers = 2; + pthread_barrier_t barrier[barriers]; + for (b = 0 ; b != barriers ; ++b) + pthread_barrier_init(&barrier[b], NULL, threads); + // run threads + info_t info[threads]; + for (t = 0 ; t != threads ; ++t) { + info[t].seed = rand(); + info[t].thread = t; + info[t].threads = threads; + info[t].hash_factor = hash_factor; + info[t].invalid_key = invalid_key; + info[t].selectivity = selectivity; + info[t].inner = inner; + info[t].outer = outer; + info[t].table = table; + info[t].inner_size = inner_size; + info[t].outer_size = outer_size; + info[t].table_size = table_size; + info[t].barrier = barrier; + pthread_create(&info[t].id, NULL, run, (void*) &info[t]); + } + size_t join_size = 0; + for (t = 0 ; t != threads ; ++t) { + pthread_join(info[t].id, NULL); + join_size += info[t].join_size; + } + // cleanup + for (b = 0 ; b != barriers ; ++b) + pthread_barrier_destroy(&barrier[b]); + free(table); + // pass output + inner[inner_size] = invalid_key; + *inner_p = inner; + *outer_p = outer; + return join_size; +} +*/ diff --git a/crystal/src/ops/utils/gpu_utils.h b/crystal/src/ops/utils/gpu_utils.h new file mode 100644 index 0000000..93ad8af --- /dev/null +++ b/crystal/src/ops/utils/gpu_utils.h @@ -0,0 +1,35 @@ +#pragma once + +#define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); + +#define TIME_FUNC(f,t) { \ + cudaEventRecord(start, 0); \ + f; \ + cudaEventRecord(stop, 0); \ + cudaEventSynchronize(stop); \ + cudaEventElapsedTime(&t, start,stop); \ +} + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +#define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) + +template +T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { + T* dest; + CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); + CubDebugExit(cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice)); + return dest; +} + +#define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD) + +#define CHECK_ERROR() { \ + cudaDeviceSynchronize(); \ + cudaError_t error = cudaGetLastError(); \ + if(error != cudaSuccess) \ + { \ + printf("CUDA error: %s\n", cudaGetErrorString(error)); \ + exit(-1); \ + } \ +} diff --git a/crystal/src/ssb/gpu_utils.h b/crystal/src/ssb/gpu_utils.h new file mode 100644 index 0000000..93ad8af --- /dev/null +++ b/crystal/src/ssb/gpu_utils.h @@ -0,0 +1,35 @@ +#pragma once + +#define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); + +#define TIME_FUNC(f,t) { \ + cudaEventRecord(start, 0); \ + f; \ + cudaEventRecord(stop, 0); \ + cudaEventSynchronize(stop); \ + cudaEventElapsedTime(&t, start,stop); \ +} + +#define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) + +#define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) + +template +T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { + T* dest; + CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); + CubDebugExit(cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice)); + return dest; +} + +#define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD) + +#define CHECK_ERROR() { \ + cudaDeviceSynchronize(); \ + cudaError_t error = cudaGetLastError(); \ + if(error != cudaSuccess) \ + { \ + printf("CUDA error: %s\n", cudaGetErrorString(error)); \ + exit(-1); \ + } \ +} diff --git a/crystal/src/ssb/q11.cu b/crystal/src/ssb/q11.cu new file mode 100644 index 0000000..ebec888 --- /dev/null +++ b/crystal/src/ssb/q11.cu @@ -0,0 +1,168 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void QueryKernel(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, unsigned long long* revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockPredGT(items, 19930000, selection_flags, num_tile_items); + BlockPredAndLT(items, 19940000, selection_flags, num_tile_items); + + BlockLoad(lo_quantity + tile_offset, items, num_tile_items); + BlockPredAndLT(items, 25, selection_flags, num_tile_items); + + BlockLoad(lo_discount + tile_offset, items, num_tile_items); + BlockPredAndGTE(items, 1, selection_flags, num_tile_items); + BlockPredAndLTE(items, 3, selection_flags, num_tile_items); + + BlockLoad(lo_extendedprice + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = BlockSum(sum, (long long*)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long* d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128*4; + int num_blocks = (lo_num_entries + tile_items - 1)/tile_items; + QueryKernel<128,4><<>>(lo_orderdate, + lo_discount, lo_quantity, lo_extendedprice, lo_num_entries, d_sum); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + unsigned long long revenue; + CubDebugExit(cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 1; // FLS_CHG + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + cout << "LO_LEN " << LO_LEN << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, d_lo_extendedprice, LO_LEN, g_allocator); + cout<< "{" + << "\"query\":11" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q12.cu b/crystal/src/ssb/q12.cu new file mode 100644 index 0000000..b3a16d1 --- /dev/null +++ b/crystal/src/ssb/q12.cu @@ -0,0 +1,167 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void DeviceSelectIf(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, unsigned long long* revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockPredGTE(items, 19940101, selection_flags, num_tile_items); + BlockPredAndLTE(items, 19940131, selection_flags, num_tile_items); + + BlockLoad(lo_quantity + tile_offset, items, num_tile_items); + BlockPredAndGTE(items, 26, selection_flags, num_tile_items); + BlockPredAndLTE(items, 35, selection_flags, num_tile_items); + + BlockLoad(lo_discount + tile_offset, items, num_tile_items); + BlockPredAndGTE(items, 4, selection_flags, num_tile_items); + BlockPredAndLTE(items, 6, selection_flags, num_tile_items); + + BlockLoad(lo_extendedprice + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = BlockSum(sum, (long long*)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long* d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128*4; + DeviceSelectIf<128,4><<<(lo_num_entries + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_discount, lo_quantity, lo_extendedprice, lo_num_entries, d_sum); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + unsigned long long revenue; + CubDebugExit(cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, d_lo_extendedprice, LO_LEN, g_allocator); + cout<< "{" + << "\"query\":12" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q13.cu b/crystal/src/ssb/q13.cu new file mode 100644 index 0000000..cc73bc9 --- /dev/null +++ b/crystal/src/ssb/q13.cu @@ -0,0 +1,167 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void DeviceSelectIf(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, unsigned long long* revenue) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + + long long sum = 0; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_num_entries + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_num_entries - tile_offset; + } + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockPredGTE(items, 19940204, selection_flags, num_tile_items); + BlockPredAndLTE(items, 19940210, selection_flags, num_tile_items); + + BlockLoad(lo_quantity + tile_offset, items, num_tile_items); + BlockPredAndGTE(items, 26, selection_flags, num_tile_items); + BlockPredAndLTE(items, 35, selection_flags, num_tile_items); + + BlockLoad(lo_discount + tile_offset, items, num_tile_items); + BlockPredAndGTE(items, 5, selection_flags, num_tile_items); + BlockPredAndLTE(items, 7, selection_flags, num_tile_items); + + BlockLoad(lo_extendedprice + tile_offset, items2, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if ((threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items)) + if (selection_flags[ITEM]) + sum += items[ITEM] * items2[ITEM]; + } + + __syncthreads(); + + static __shared__ long long buffer[32]; + unsigned long long aggregate = BlockSum(sum, (long long*)buffer); + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(revenue, aggregate); + } +} + +float runQuery(int* lo_orderdate, int* lo_discount, int* lo_quantity, int* lo_extendedprice, + int lo_num_entries, cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + unsigned long long* d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_sum, sizeof(long long))); + + cudaMemset(d_sum, 0, sizeof(long long)); + + // Run + int tile_items = 128*4; + TIME_FUNC((DeviceSelectIf<128,4><<<(lo_num_entries + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_discount, lo_quantity, lo_extendedprice, lo_num_entries, d_sum)), time_query); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + unsigned long long revenue; + CubDebugExit(cudaMemcpy(&revenue, d_sum, sizeof(long long), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Revenue: " << revenue << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + CLEANUP(d_sum); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_discount = loadColumn("lo_discount", LO_LEN); + int *h_lo_quantity = loadColumn("lo_quantity", LO_LEN); + int *h_lo_extendedprice = loadColumn("lo_extendedprice", LO_LEN); + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_discount = loadToGPU(h_lo_discount, LO_LEN, g_allocator); + int *d_lo_quantity = loadToGPU(h_lo_quantity, LO_LEN, g_allocator); + int *d_lo_extendedprice = loadToGPU(h_lo_extendedprice, LO_LEN, g_allocator); + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery(d_lo_orderdate, d_lo_discount, d_lo_quantity, d_lo_extendedprice, LO_LEN, g_allocator); + cout<< "{" + << "\"query\":13" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q21.cu b/crystal/src/ssb/q21.cu new file mode 100644 index 0000000..ac8e560 --- /dev/null +++ b/crystal/src/ssb/q21.cu @@ -0,0 +1,286 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_p, int p_len, + int* ht_d, int d_len, + int* res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_s, s_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, brand, selection_flags, + ht_p, p_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * (5*5*40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* p_partkey, int* p_brand1, int* p_category, int p_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_region, int s_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>( + d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * (5 * 5 * 40)); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + int res_count = 0; + for (int i=0; i(&h_res[4*i + 2])[0] << "},//" << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + CLEANUP(res); + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_p_partkey, d_p_brand1, d_p_category, P_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_region, S_LEN, + g_allocator); + cout<< "{" + << "\"query\":21" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q22.cu b/crystal/src/ssb/q22.cu new file mode 100644 index 0000000..fb9dbb7 --- /dev/null +++ b/crystal/src/ssb/q22.cu @@ -0,0 +1,286 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_p, int p_len, + int* ht_d, int d_len, + int* res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_s, s_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, brand, selection_flags, + ht_p, p_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * (5*5*40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredGTE(items, 260, selection_flags, num_tile_items); + BlockPredAndLTE(items, 267, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + + +float runQuery(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* p_partkey, int* p_brand1, int p_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_region, int s_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i(&h_res[4*i + 2])[0] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + CLEANUP(ht_d); + CLEANUP(ht_p); + CLEANUP(ht_s); + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_p_partkey, d_p_brand1, P_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_region, S_LEN, + g_allocator); + cout<< "{" + << "\"query\":22" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q23.cu b/crystal/src/ssb/q23.cu new file mode 100644 index 0000000..0d00972 --- /dev/null +++ b/crystal/src/ssb/q23.cu @@ -0,0 +1,279 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_p, int p_len, + int* ht_d, int d_len, + int* res) { + // Load a tile striped across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_s, s_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, brand, selection_flags, + ht_p, p_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (brand[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * (5*5*40)); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = brand[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 260, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* p_partkey, int* p_brand1, int p_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_region, int s_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_p, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 1000); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_partkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_p, p_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i(&h_res[4*i + 2])[0] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + delete[] h_res; + + return time_query; +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_partkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_p_partkey, d_p_brand1, P_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_region, S_LEN, + g_allocator); + cout<< "{" + << "\"query\":23" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q31.cu b/crystal/src/ssb/q31.cu new file mode 100644 index 0000000..22d7e5b --- /dev/null +++ b/crystal/src/ssb/q31.cu @@ -0,0 +1,296 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_nation, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, c_nation, selection_flags, + ht_c, c_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 25 * 7 + c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = c_nation[ITEM]; + res[hash * 6 + 2] = s_nation[ITEM]; + /*atomicAdd(&res[hash * 6 + 4], revenue[ITEM]);*/ + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), (long long)(revenue[ITEM])); + } + } + } +} + +template +__global__ +void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_c(int *filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 2, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredGTE(items, 1992, selection_flags, num_tile_items); + BlockPredLTE(items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, 19920101, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_region, int* s_nation, int s_len, + int *c_custkey, int* c_region, int* c_nation, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_region, c_custkey, c_nation, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 25 * 25); + int res_array_size = res_size * 6; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i(&h_res[6*i + 4])[0] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_region, d_s_nation, S_LEN, + d_c_custkey, d_c_region, d_c_nation, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":31" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q32.cu b/crystal/src/ssb/q32.cu new file mode 100644 index 0000000..7bbe156 --- /dev/null +++ b/crystal/src/ssb/q32.cu @@ -0,0 +1,290 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_nation, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, c_nation, selection_flags, + ht_c, c_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int *filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredGTE(items, 1992, selection_flags, num_tile_items); + BlockPredAndLTE(items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, 19920101, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_nation, int* s_city, int s_len, + int *c_custkey, int* c_nation, int* c_city, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_nation, c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_nation, d_s_city, S_LEN, + d_c_custkey, d_c_nation, d_c_city, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":32" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q33.cu b/crystal/src/ssb/q33.cu new file mode 100644 index 0000000..e639f7f --- /dev/null +++ b/crystal/src/ssb/q33.cu @@ -0,0 +1,291 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_nation, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, c_nation, selection_flags, + ht_c, c_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ +void build_hashtable_s(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 231, selection_flags, num_tile_items); + BlockPredOrEQ(items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_c(int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 231, selection_flags, num_tile_items); + BlockPredOrEQ(items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredGTE(items, 1992, selection_flags, num_tile_items); + BlockPredLTE(items, 1997, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, 19920101, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *s_suppkey, int* s_city, int s_len, + int *c_custkey, int* c_city, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_s_suppkey, d_s_city, S_LEN, + d_c_custkey, d_c_city, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":33" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q34.cu b/crystal/src/ssb/q34.cu new file mode 100644 index 0000000..1003dc0 --- /dev/null +++ b/crystal/src/ssb/q34.cu @@ -0,0 +1,316 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_nation, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, c_nation, selection_flags, + ht_c, c_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101,num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if ((threadIdx.x + (BLOCK_THREADS * ITEM)) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (s_nation[ITEM] * 250 * 7 + c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 250 * 250); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + res[hash * 4 + 2] = s_nation[ITEM]; + atomicAdd(&res[hash * 4 + 3], revenue[ITEM]); + } + } + } +} + +template +__global__ +void build_hashtable_s(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 231, selection_flags, num_tile_items); + BlockPredOrEQ(items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_c(int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 231, selection_flags, num_tile_items); + BlockPredOrEQ(items, 235, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_d(int* filter_col, int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 199712, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, 19920101, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int lo_len, + int *d_datekey, int* d_year, int *d_yearmonthnum, int d_len, + int *s_suppkey, int* s_city, int s_len, + int *c_custkey, int* c_city, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_custkey, c_city, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_yearmonthnum, d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 250 * 250); + int res_array_size = res_size * 4; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + int* d_sum = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_sum, sizeof(int))); + + cudaMemset(d_sum, 0, sizeof(int)); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, + lo_custkey, lo_suppkey, lo_revenue, lo_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i> 17); + k *= 0x1b873593; + h ^= k; + h = (h << 13) | (h >> 19); + h = (h * 5) + 0xe6546b64; + h ^= len; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_city = loadColumn("c_city", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + int *d_d_yearmonthnum = loadToGPU(h_d_yearmonthnum, D_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_city = loadToGPU(h_c_city, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_suppkey, d_lo_revenue, LO_LEN, + d_d_datekey, d_d_year, d_d_yearmonthnum, D_LEN, + d_s_suppkey, d_s_city, S_LEN, + d_c_custkey, d_c_city, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":34" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q41.cu b/crystal/src/ssb/q41.cu new file mode 100644 index 0000000..93fcbe0 --- /dev/null +++ b/crystal/src/ssb/q41.cu @@ -0,0 +1,371 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int* ht_p, int p_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int c_nation[ITEMS_PER_THREAD]; + // int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, c_nation, selection_flags, + ht_c, c_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_p, p_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + BlockLoad(lo_supplycost + tile_offset, items, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + int hash = (c_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 25); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = c_nation[ITEM]; + /*atomicAdd(&res[hash * 4 + 2], (1));*/ + /*atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), (long long)(1));*/ + atomicAdd(reinterpret_cast(&res[hash * 4 + 2]), (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ void build_hashtable_s(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_p(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_c(int* filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *p_partkey, int* p_mfgr, int p_len, + int *s_suppkey, int* s_region, int s_len, + int *c_custkey, int* c_region, int* c_nation, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + // int* s_res = new int[s_len * 2]; + // CubDebugExit(cudaMemcpy(s_res, ht_s, s_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_region, c_custkey, c_nation, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + // int* c_res = new int[c_len * 2]; + // CubDebugExit(cudaMemcpy(c_res, ht_c, c_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_mfgr, p_partkey, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + // int* p_res = new int[p_len * 2]; + // CubDebugExit(cudaMemcpy(p_res, ht_p, p_len * 2 * sizeof(int), cudaMemcpyDeviceToHost)); + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + +#if 0 + int *h_ht_s = new int[s_len * 2]; + int *h_ht_c = new int[c_len * 2]; + int *h_ht_p = new int[p_len * 2]; + int *h_ht_d = new int[d_val_len * 2]; + + int num_s = 0 , num_c = 0, num_d = 0, num_p = 0; + + CubDebugExit(cudaMemcpy(h_ht_s, ht_s, 2 * s_len * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i=0; i<<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, lo_partkey, + lo_custkey, lo_suppkey, lo_revenue, lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i(&h_res[4*i + 2])[0] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_mfgr = loadColumn("p_mfgr", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + int *h_c_nation = loadColumn("c_nation", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_mfgr = loadToGPU(h_p_mfgr, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + int *d_c_nation = loadToGPU(h_c_nation, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, d_lo_revenue, d_lo_supplycost, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_p_partkey, d_p_mfgr, P_LEN, + d_s_suppkey, d_s_region, S_LEN, + d_c_custkey, d_c_region, d_c_nation, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":41" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q42.cu b/crystal/src/ssb/q42.cu new file mode 100644 index 0000000..ce8f956 --- /dev/null +++ b/crystal/src/ssb/q42.cu @@ -0,0 +1,346 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int* ht_p, int p_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + // Load a segment of consecutive items that are blocked across threads + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int category[ITEMS_PER_THREAD]; + int s_nation[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_nation, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_c, c_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, category, selection_flags, + ht_p, p_len, num_tile_items); + + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + BlockLoad(lo_supplycost + tile_offset, items, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 25 * 25 + s_nation[ITEM] * 25 + category[ITEM]) % ((1998-1992+1) * 25 * 25); + res[hash * 6] = year[ITEM]; + res[hash * 6 + 1] = s_nation[ITEM]; + res[hash * 6 + 2] = category[ITEM]; + atomicAdd(reinterpret_cast(&res[hash * 6 + 4]), (long long)(revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ +void build_hashtable_c(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_p(int *filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 0, selection_flags, num_tile_items); + BlockPredOrEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_s(int* filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ(items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *p_partkey, int* p_mfgr, int* p_category, int p_len, + int *s_suppkey, int* s_region, int* s_nation, int s_len, + int *c_custkey, int* c_region, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_region, s_suppkey, s_nation, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_region, c_custkey, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_mfgr, p_partkey, p_category, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 25 * 25); + int ht_entries = 6; + int res_array_size = res_size * ht_entries; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, lo_partkey, + lo_custkey, lo_suppkey, lo_revenue, lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + cout << "Result:" << endl; + int res_count = 0; + for (int i=0; i(&h_res[6*i + 4])[0] << endl; + res_count += 1; + } + } + + cout << "Res Count: " << res_count << endl; + cout << "Time Taken Total: " << diff.count() * 1000 << endl; + + delete[] h_res; + + return time_query; +} + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_trials = 3; + + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("t", num_trials); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--t=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + int *h_d_yearmonthnum = loadColumn("d_yearmonthnum", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_region = loadColumn("s_region", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_mfgr = loadColumn("p_mfgr", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_mfgr = loadToGPU(h_p_mfgr, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_region = loadToGPU(h_s_region, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, d_lo_revenue, d_lo_supplycost, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_p_partkey, d_p_mfgr, d_p_category, P_LEN, + d_s_suppkey, d_s_region, d_s_nation, S_LEN, + d_c_custkey, d_c_region, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":42" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/q43.cu b/crystal/src/ssb/q43.cu new file mode 100644 index 0000000..6b2367d --- /dev/null +++ b/crystal/src/ssb/q43.cu @@ -0,0 +1,343 @@ +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include "cub/test/test_util.h" + +#include "crystal.cuh" + +#include "gpu_utils.h" +#include "ssb_utils.h" + +using namespace std; + +/** + * Globals, constants and typedefs + */ +bool g_verbose = false; // Whether to display input/output to console +cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +template +__global__ void probe(int* lo_orderdate, int* lo_partkey, int* lo_custkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int* ht_p, int p_len, + int* ht_s, int s_len, + int* ht_c, int c_len, + int* ht_d, int d_len, + int* res) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + int brand[ITEMS_PER_THREAD]; + int s_city[ITEMS_PER_THREAD]; + int year[ITEMS_PER_THREAD]; + int revenue[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (lo_len + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = lo_len - tile_offset; + } + + InitFlags(selection_flags); + + BlockLoad(lo_suppkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, s_city, selection_flags, + ht_s, s_len, num_tile_items); + + BlockLoad(lo_custkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_1(items, selection_flags, ht_c, c_len, num_tile_items); + + BlockLoad(lo_partkey + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, brand, selection_flags, + ht_p, p_len, num_tile_items); + + BlockLoad(lo_orderdate + tile_offset, items, num_tile_items); + BlockProbeAndPHT_2(items, year, selection_flags, + ht_d, d_len, 19920101, num_tile_items); + + BlockLoad(lo_revenue + tile_offset, revenue, num_tile_items); + BlockLoad(lo_supplycost + tile_offset, items, num_tile_items); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { + if (threadIdx.x + (BLOCK_THREADS * ITEM) < num_tile_items) { + if (selection_flags[ITEM]) { + /*int hash = (category[ITEM] * 7 * 25 + s_nation[ITEM] * 7 + (year[ITEM] - 1992)) % ((1998-1992+1) * 25 * 55);*/ + int hash = ((year[ITEM] - 1992) * 250 * 1000 + s_city[ITEM] * 1000 + brand[ITEM]) % ((1998-1992+1) * 250 * 1000); + res[hash * 4] = year[ITEM]; + res[hash * 4 + 1] = s_city[ITEM]; + res[hash * 4 + 2] = brand[ITEM]; + atomicAdd(&res[hash * 4 + 3], (revenue[ITEM] - items[ITEM])); + } + } + } +} + +template +__global__ +void build_hashtable_c(int *filter_col, int *dim_key, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockBuildSelectivePHT_1(items, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_p(int *filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 3, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_s(int* filter_col, int *dim_key, int* dim_val, int num_tuples, int *hash_table, int num_slots) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + BlockLoad(filter_col + tile_offset, items, num_tile_items); + BlockPredEQ(items, 24, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items, num_tile_items); + BlockLoad(dim_val + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items, items2, selection_flags, + hash_table, num_slots, num_tile_items); +} + +template +__global__ +void build_hashtable_d(int *dim_key, int *dim_val, int num_tuples, int *hash_table, int num_slots, int val_min) { + int items[ITEMS_PER_THREAD]; + int items2[ITEMS_PER_THREAD]; + int selection_flags[ITEMS_PER_THREAD]; + + int tile_offset = blockIdx.x * TILE_SIZE; + int num_tiles = (num_tuples + TILE_SIZE - 1) / TILE_SIZE; + int num_tile_items = TILE_SIZE; + + if (blockIdx.x == num_tiles - 1) { + num_tile_items = num_tuples - tile_offset; + } + + InitFlags(selection_flags); + BlockLoad(dim_val + tile_offset, items, num_tile_items); + BlockPredEQ(items, 1997, selection_flags, num_tile_items); + BlockPredOrEQ(items, 1998, selection_flags, num_tile_items); + + BlockLoad(dim_key + tile_offset, items2, num_tile_items); + BlockBuildSelectivePHT_2(items2, items, selection_flags, + hash_table, num_slots, val_min, num_tile_items); +} + +float runQuery(int* lo_orderdate, int* lo_custkey, int* lo_partkey, int* lo_suppkey, int* lo_revenue, int* lo_supplycost, int lo_len, + int *d_datekey, int* d_year, int d_len, + int *p_partkey, int* p_category, int* p_brand1, int p_len, + int *s_suppkey, int* s_nation, int* s_city, int s_len, + int *c_custkey, int* c_region, int c_len, + cub::CachingDeviceAllocator& g_allocator) { + SETUP_TIMING(); + + float time_query; + chrono::high_resolution_clock::time_point st, finish; + st = chrono::high_resolution_clock::now(); + + cudaEventRecord(start, 0); + + int *ht_d, *ht_c, *ht_s, *ht_p; + int d_val_len = 19981230 - 19920101 + 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_d, 2 * d_val_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_s, 2 * s_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_c, 2 * c_len * sizeof(int))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&ht_p, 2 * p_len * sizeof(int))); + + CubDebugExit(cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_s, 0, 2 * s_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_c, 0, 2 * c_len * sizeof(int))); + CubDebugExit(cudaMemset(ht_p, 0, 2 * p_len * sizeof(int))); + + int tile_items = 128*4; + build_hashtable_s<128,4><<<(s_len + tile_items - 1)/tile_items, 128>>>(s_nation, s_suppkey, s_city, s_len, ht_s, s_len); + /*CHECK_ERROR();*/ + + build_hashtable_c<128,4><<<(c_len + tile_items - 1)/tile_items, 128>>>(c_region, c_custkey, c_len, ht_c, c_len); + /*CHECK_ERROR();*/ + + build_hashtable_p<128,4><<<(p_len + tile_items - 1)/tile_items, 128>>>(p_category, p_partkey, p_brand1, p_len, ht_p, p_len); + /*CHECK_ERROR();*/ + + int d_val_min = 19920101; + build_hashtable_d<128,4><<<(d_len + tile_items - 1)/tile_items, 128>>>(d_datekey, d_year, d_len, ht_d, d_val_len, d_val_min); + /*CHECK_ERROR();*/ + + int *res; + int res_size = ((1998-1992+1) * 250 * 1000); + int ht_entries = 4; + int res_array_size = res_size * ht_entries; + CubDebugExit(g_allocator.DeviceAllocate((void**)&res, res_array_size * sizeof(int))); + + CubDebugExit(cudaMemset(res, 0, res_array_size * sizeof(int))); + + // Run + probe<128,4><<<(lo_len + tile_items - 1)/tile_items, 128>>>(lo_orderdate, lo_partkey, + lo_custkey, lo_suppkey, lo_revenue, lo_supplycost, lo_len, ht_p, p_len, ht_s, s_len, ht_c, c_len, ht_d, d_val_len, res); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time_query, start,stop); + + cout << "Result:" << endl; + + int* h_res = new int[res_array_size]; + CubDebugExit(cudaMemcpy(h_res, res, res_array_size * sizeof(int), cudaMemcpyDeviceToHost)); + finish = chrono::high_resolution_clock::now(); + std::chrono::duration diff = finish - st; + + int res_count = 0; + for (int i=0; i] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + int *h_lo_orderdate = loadColumn("lo_orderdate", LO_LEN); + int *h_lo_suppkey = loadColumn("lo_suppkey", LO_LEN); + int *h_lo_custkey = loadColumn("lo_custkey", LO_LEN); + int *h_lo_partkey = loadColumn("lo_partkey", LO_LEN); + int *h_lo_revenue = loadColumn("lo_revenue", LO_LEN); + int *h_lo_supplycost = loadColumn("lo_supplycost", LO_LEN); + + int *h_d_datekey = loadColumn("d_datekey", D_LEN); + int *h_d_year = loadColumn("d_year", D_LEN); + + int *h_s_suppkey = loadColumn("s_suppkey", S_LEN); + int *h_s_nation = loadColumn("s_nation", S_LEN); + int *h_s_city = loadColumn("s_city", S_LEN); + + int *h_p_partkey = loadColumn("p_partkey", P_LEN); + int *h_p_category = loadColumn("p_category", P_LEN); + int *h_p_brand1 = loadColumn("p_brand1", P_LEN); + + int *h_c_custkey = loadColumn("c_custkey", C_LEN); + int *h_c_region = loadColumn("c_region", C_LEN); + + cout << "** LOADED DATA **" << endl; + + int *d_lo_orderdate = loadToGPU(h_lo_orderdate, LO_LEN, g_allocator); + int *d_lo_custkey = loadToGPU(h_lo_custkey, LO_LEN, g_allocator); + int *d_lo_suppkey = loadToGPU(h_lo_suppkey, LO_LEN, g_allocator); + int *d_lo_partkey = loadToGPU(h_lo_partkey, LO_LEN, g_allocator); + int *d_lo_revenue = loadToGPU(h_lo_revenue, LO_LEN, g_allocator); + int *d_lo_supplycost = loadToGPU(h_lo_supplycost, LO_LEN, g_allocator); + + int *d_d_datekey = loadToGPU(h_d_datekey, D_LEN, g_allocator); + int *d_d_year = loadToGPU(h_d_year, D_LEN, g_allocator); + + int *d_p_partkey = loadToGPU(h_p_partkey, P_LEN, g_allocator); + int *d_p_category = loadToGPU(h_p_category, P_LEN, g_allocator); + int *d_p_brand1 = loadToGPU(h_p_brand1, P_LEN, g_allocator); + + int *d_s_suppkey = loadToGPU(h_s_suppkey, S_LEN, g_allocator); + int *d_s_nation = loadToGPU(h_s_nation, S_LEN, g_allocator); + int *d_s_city = loadToGPU(h_s_city, S_LEN, g_allocator); + + int *d_c_custkey = loadToGPU(h_c_custkey, C_LEN, g_allocator); + int *d_c_region = loadToGPU(h_c_region, C_LEN, g_allocator); + + cout << "** LOADED DATA TO GPU **" << endl; + + for (int t = 0; t < num_trials; t++) { + float time_query; + time_query = runQuery( + d_lo_orderdate, d_lo_custkey, d_lo_partkey, d_lo_suppkey, d_lo_revenue, d_lo_supplycost, LO_LEN, + d_d_datekey, d_d_year, D_LEN, + d_p_partkey, d_p_category, d_p_brand1, P_LEN, + d_s_suppkey, d_s_nation, d_s_city, S_LEN, + d_c_custkey, d_c_region, C_LEN, + g_allocator); + cout<< "{" + << "\"query\":43" + << ",\"time_query\":" << time_query + << "}" << endl; + } + + return 0; +} + diff --git a/crystal/src/ssb/ssb_utils.h b/crystal/src/ssb/ssb_utils.h new file mode 100644 index 0000000..77f24ef --- /dev/null +++ b/crystal/src/ssb/ssb_utils.h @@ -0,0 +1,107 @@ +#include +#include +#include + +/*#include */ +/*#include */ + +using namespace std; + +#define SF 10 + +#define BASE_PATH "" + +#if SF == 1 +#define DATA_DIR BASE_PATH "/home/ubuntu/fff/gpu/data/ssb/data/s1_columnar/" +#define LO_LEN 6001171 +#define P_LEN 200000 +#define S_LEN 2000 +#define C_LEN 30000 +#define D_LEN 2556 +#elif SF == 10 +#define DATA_DIR BASE_PATH "/home/ubuntu/fff/gpu/data/ssb/data/s10_columnar/" +#define LO_LEN 59986214 +#define P_LEN 800000 +#define S_LEN 20000 +#define C_LEN 300000 +#define D_LEN 2556 +#else // 20 +#define DATA_DIR BASE_PATH "s20_columnar/" +#define LO_LEN 119994746 +#define P_LEN 1000000 +#define S_LEN 40000 +#define C_LEN 600000 +#define D_LEN 2556 +#endif + + + +int index_of(string* arr, int len, string val) { + for (int i=0; i +T* loadColumn(string col_name, int num_entries) { + T* h_col = new T[num_entries]; + string filename = DATA_DIR + lookup(col_name); + ifstream colData (filename.c_str(), ios::in | ios::binary); + if (!colData) { + return NULL; + } + + colData.read((char*)h_col, num_entries * sizeof(T)); + return h_col; +} + +template +int storeColumn(string col_name, int num_entries, int* h_col) { + string filename = DATA_DIR + lookup(col_name); + ofstream colData (filename.c_str(), ios::out | ios::binary); + if (!colData) { + return -1; + } + + colData.write((char*)h_col, num_entries * sizeof(T)); + return 0; +} + +/*int main() {*/ + //int *h_col = new int[10]; + //for (int i=0; i<10; i++) h_col[i] = i; + //storeColumn("test", 10, h_col); + //int *l_col = loadColumn("test", 10); + //for (int i=0; i<10; i++) cout << l_col[i] << " "; + //cout << endl; + //return 0; +/*}*/ diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..bad12d5 --- /dev/null +++ b/data/README.md @@ -0,0 +1,17 @@ +- how to generate data for `sf=10` + ```shell + pip3 install duckdb + make -C ssb/dbgen/ + make -C ssb/loader/ + python3 util.py ssb 10 gen + chmod 777 ssb/data/s10/date.tbl + python3 util.py ssb 10 transform + python3 util.py ssb 10 sort + echo "change BASE_PATH in crystal_ssb_utils.h and ssb_utils to the right path" + ``` + +- temp + ```shell + python3 util.py ssb 10 sort_other_way // experimental + + ``` \ No newline at end of file diff --git a/data/result_of_queries/q11 b/data/result_of_queries/q11 new file mode 100644 index 0000000..ae202bf --- /dev/null +++ b/data/result_of_queries/q11 @@ -0,0 +1 @@ +4468236714181 \ No newline at end of file diff --git a/data/result_of_queries/q21 b/data/result_of_queries/q21 new file mode 100644 index 0000000..1cc6850 --- /dev/null +++ b/data/result_of_queries/q21 @@ -0,0 +1,281 @@ +1992 40 6418103797 +1993 40 6382034658 +1994 40 6525665927 +1995 40 6563646251 +1996 40 6578456138 +1997 40 6379340748 +1998 40 3698210388 +1992 41 7215774624 +1993 41 7235770546 +1994 41 7093880636 +1995 41 7156912379 +1996 41 7158257544 +1997 41 6890416180 +1998 41 4204327203 +1992 42 6601010229 +1993 42 6661380481 +1994 42 6805618335 +1995 42 6539366376 +1996 42 6832348876 +1997 42 6571540214 +1998 42 3978744872 +1992 43 7197665271 +1993 43 6945853876 +1994 43 6916324003 +1995 43 6871377174 +1996 43 6832025455 +1997 43 7045010342 +1998 43 4258248122 +1992 44 6727625998 +1993 44 6312893492 +1994 44 6562962487 +1995 44 6229499393 +1996 44 6564940527 +1997 44 6510029432 +1998 44 3893760987 +1992 45 6461380401 +1993 45 6558772163 +1994 45 6456432352 +1995 45 6239560871 +1996 45 6519111073 +1997 45 6390521469 +1998 45 3755042585 +1992 46 7012618906 +1993 46 6783420789 +1994 46 6677778187 +1995 46 7063055884 +1996 46 6717830860 +1997 46 6825894334 +1998 46 4019728654 +1992 47 6730883299 +1993 47 6673551973 +1994 47 6889765819 +1995 47 6873007289 +1996 47 6691029370 +1997 47 6656963454 +1998 47 3860509210 +1992 48 6125452526 +1993 48 6387782986 +1994 48 6220999101 +1995 48 6266773366 +1996 48 6086559287 +1997 48 6096975918 +1998 48 3424123719 +1992 49 6656732408 +1993 49 6439163794 +1994 49 7086198960 +1995 49 6801494826 +1996 49 6380057064 +1997 49 6624824324 +1998 49 4162459164 +1992 50 6960570696 +1993 50 7237748553 +1994 50 6871669861 +1995 50 7141121473 +1996 50 7137131058 +1997 50 6771705645 +1998 50 4259523518 +1992 51 6574132099 +1993 51 6358522943 +1994 51 6572086846 +1995 51 6401898562 +1996 51 6410252673 +1997 51 6426732319 +1998 51 3553327368 +1992 52 7047940337 +1993 52 6944255619 +1994 52 6773948949 +1995 52 6714537523 +1996 52 6831573122 +1997 52 6734349931 +1998 52 3804080515 +1992 53 6783046496 +1993 53 6764674340 +1994 53 6740138960 +1995 53 7013631699 +1996 53 6488850668 +1997 53 6762927970 +1998 53 3958631518 +1992 54 7105405127 +1993 54 7036373470 +1994 54 7019061940 +1995 54 6646909733 +1996 54 6546458610 +1997 54 6747693662 +1998 54 3959735030 +1992 55 7292018911 +1993 55 6919737436 +1994 55 6782433544 +1995 55 6603309817 +1996 55 6796414799 +1997 55 6930467621 +1998 55 3936336506 +1992 56 7416913901 +1993 56 7023034684 +1994 56 6886859642 +1995 56 7204223670 +1996 56 7307030629 +1997 56 7278012359 +1998 56 4195358018 +1992 57 7072285707 +1993 57 6769724436 +1994 57 7157349757 +1995 57 6786320672 +1996 57 7083167031 +1997 57 6867387556 +1998 57 3959867848 +1992 58 6668044014 +1993 58 6985920856 +1994 58 6596737151 +1995 58 6659827925 +1996 58 6454616521 +1997 58 6778311943 +1998 58 3972101307 +1992 59 6326657255 +1993 59 6484432568 +1994 59 6408868609 +1995 59 6616633932 +1996 59 6679260631 +1997 59 6372927264 +1998 59 3846617983 +1992 60 7136768663 +1993 60 6878576876 +1994 60 6741527996 +1995 60 6769490915 +1996 60 6903254888 +1997 60 6960241189 +1998 60 4130144930 +1992 61 6738756530 +1993 61 6636715879 +1994 61 6763648338 +1995 61 6617746150 +1996 61 6892805375 +1997 61 6536932124 +1998 61 3835627586 +1992 62 6164211920 +1993 62 6342711015 +1994 62 6300859287 +1995 62 6732584121 +1996 62 6226736904 +1997 62 6431206336 +1998 62 3948296506 +1992 63 7049023811 +1993 63 7257074782 +1994 63 7201434704 +1995 63 7143627518 +1996 63 7218551955 +1997 63 7122881926 +1998 63 4229058658 +1992 64 6918049898 +1993 64 6501113968 +1994 64 7009023813 +1995 64 6718571799 +1996 64 6763605438 +1997 64 7104613185 +1998 64 3979838975 +1992 65 7152976211 +1993 65 7458190031 +1994 65 6937644159 +1995 65 6997669629 +1996 65 6758440512 +1997 65 6673821228 +1998 65 4322914592 +1992 66 6395458610 +1993 66 6409727300 +1994 66 6143698484 +1995 66 6290853513 +1996 66 6562894079 +1997 66 6580940135 +1998 66 3835167462 +1992 67 6844550759 +1993 67 7336259130 +1994 67 7471700197 +1995 67 7040096938 +1996 67 7077296627 +1997 67 7573027340 +1998 67 4445654176 +1992 68 6586538461 +1993 68 6329817914 +1994 68 6495180880 +1995 68 6424478604 +1996 68 6541879729 +1997 68 6614661298 +1998 68 3725370328 +1992 69 6437149944 +1993 69 6642939280 +1994 69 6493295161 +1995 69 6642164323 +1996 69 6502125649 +1997 69 6745438347 +1998 69 3683114400 +1992 70 6956560451 +1993 70 7000267344 +1994 70 6510307841 +1995 70 6337688211 +1996 70 7034861207 +1997 70 6184992923 +1998 70 3792560046 +1992 71 7503528393 +1993 71 7311857458 +1994 71 7877750677 +1995 71 7548275489 +1996 71 7299486342 +1997 71 7130260446 +1998 71 4364145775 +1992 72 6773151840 +1993 72 6705723103 +1994 72 6576032819 +1995 72 6874053112 +1996 72 6405666522 +1997 72 6755654898 +1998 72 4000181003 +1992 73 6642879253 +1993 73 6637877324 +1994 73 6954631030 +1995 73 6454765835 +1996 73 6598056575 +1997 73 6785666627 +1998 73 3680466597 +1992 74 7338251519 +1993 74 7059280620 +1994 74 7061358044 +1995 74 7044215380 +1996 74 7332374720 +1997 74 7010120097 +1998 74 4382878336 +1992 75 6993738734 +1993 75 7079353328 +1994 75 7136558061 +1995 75 7044955465 +1996 75 6832731514 +1997 75 6839863219 +1998 75 4144643700 +1992 76 7216305524 +1993 76 7243703041 +1994 76 7274388343 +1995 76 7233806943 +1996 76 6971700893 +1997 76 7041104465 +1998 76 3945668122 +1992 77 5945520673 +1993 77 6149061528 +1994 77 5791875920 +1995 77 5953806237 +1996 77 6094857618 +1997 77 6161765944 +1998 77 3589143954 +1992 78 6082662735 +1993 78 6384605378 +1994 78 6458393784 +1995 78 6165718089 +1996 78 6742644418 +1997 78 6420560847 +1998 78 3695789321 +1992 79 7126061027 +1993 79 7055353878 +1994 79 7101978837 +1995 79 6781640340 +1996 79 7257992096 +1997 79 6791270791 +1998 79 4070644777 +Res Count: 280 \ No newline at end of file diff --git a/data/result_of_queries/q31 b/data/result_of_queries/q31 new file mode 100644 index 0000000..0f82ec5 --- /dev/null +++ b/data/result_of_queries/q31 @@ -0,0 +1,151 @@ +1992 8 8 53840255574 +1993 8 8 53166216941 +1994 8 8 53437240310 +1995 8 8 53396799768 +1996 8 8 54110132821 +1997 8 8 53398173290 +1992 9 8 56083363742 +1993 9 8 55223660082 +1994 9 8 55339397030 +1995 9 8 54949301113 +1996 9 8 55903082845 +1997 9 8 54769022116 +1992 12 8 52837317579 +1993 12 8 53383468103 +1994 12 8 52207914158 +1995 12 8 52862670951 +1996 12 8 52829409093 +1997 12 8 53020471016 +1992 18 8 53363391476 +1993 18 8 52946193531 +1994 18 8 52997321941 +1995 18 8 53672123936 +1996 18 8 53436907487 +1997 18 8 54352229494 +1992 21 8 53796356168 +1993 21 8 54304168176 +1994 21 8 53974392943 +1995 21 8 53857720297 +1996 21 8 54093512752 +1997 21 8 53598437998 +1992 8 9 55334149561 +1993 8 9 55669527348 +1994 8 9 54838930433 +1995 8 9 55981258937 +1996 8 9 56549465183 +1997 8 9 55451474341 +1992 9 9 57493556858 +1993 9 9 58025342779 +1994 9 9 57308767649 +1995 9 9 57866394299 +1996 9 9 58676834632 +1997 9 9 57151657961 +1992 12 9 55470271862 +1993 12 9 55450422145 +1994 12 9 55166732599 +1995 12 9 55756628069 +1996 12 9 55295862862 +1997 12 9 53377511976 +1992 18 9 56294215648 +1993 18 9 56167494867 +1994 18 9 55456868802 +1995 18 9 55888788272 +1996 18 9 56240855720 +1997 18 9 55624174081 +1992 21 9 56528084092 +1993 21 9 57031719413 +1994 21 9 56459028335 +1995 21 9 57672132145 +1996 21 9 56293030145 +1997 21 9 56215096026 +1992 8 12 51104583944 +1993 8 12 52291194128 +1994 8 12 52149700327 +1995 8 12 51756734585 +1996 8 12 52743929158 +1997 8 12 53618521846 +1992 9 12 54393567369 +1993 9 12 53410059754 +1994 9 12 53932060476 +1995 9 12 54260687958 +1996 9 12 55113622290 +1997 9 12 53884139975 +1992 12 12 52781570092 +1993 12 12 52683527061 +1994 12 12 50283319443 +1995 12 12 51809888688 +1996 12 12 52500376734 +1997 12 12 50815598125 +1992 18 12 52836119396 +1993 18 12 52943818670 +1994 18 12 52153400982 +1995 18 12 51839229204 +1996 18 12 53030051819 +1997 18 12 53281809182 +1992 21 12 53377618064 +1993 21 12 53970340911 +1994 21 12 54078621677 +1995 21 12 53174393671 +1996 21 12 52256511400 +1997 21 12 53064919288 +1992 8 18 51758985311 +1993 8 18 52173652875 +1994 8 18 52604990324 +1995 8 18 52587898615 +1996 8 18 51780518836 +1997 8 18 51906203038 +1992 9 18 53887104795 +1993 9 18 53920040836 +1994 9 18 53898996978 +1995 9 18 54122679431 +1996 9 18 54303106396 +1997 9 18 54430180840 +1992 12 18 51465172557 +1993 12 18 51406709327 +1994 12 18 52099528581 +1995 12 18 51320895827 +1996 12 18 51326040782 +1997 12 18 52052860907 +1992 18 18 53202304966 +1993 18 18 52197097507 +1994 18 18 52525946124 +1995 18 18 52421548431 +1996 18 18 53671108592 +1997 18 18 52788981021 +1992 21 18 53635069027 +1993 21 18 53806768582 +1994 21 18 52744648993 +1995 21 18 52784240366 +1996 21 18 53641429016 +1997 21 18 52632344235 +1992 8 21 49640993819 +1993 8 21 50661144654 +1994 8 21 50362372598 +1995 8 21 50516483322 +1996 8 21 51123449982 +1997 8 21 51125299004 +1992 9 21 51006397394 +1993 9 21 51824859693 +1994 9 21 51996233504 +1995 9 21 51968286051 +1996 9 21 53096102262 +1997 9 21 51759284236 +1992 12 21 49650941206 +1993 12 21 50057832135 +1994 12 21 50097922236 +1995 12 21 48627408805 +1996 12 21 50197634124 +1997 12 21 49149703784 +1992 18 21 50875757574 +1993 18 21 50618892442 +1994 18 21 50420152423 +1995 18 21 50255208143 +1996 18 21 50798876344 +1997 18 21 50981837552 +1992 21 21 49991192802 +1993 21 21 51428673225 +1994 21 21 49946254861 +1995 21 21 51328670072 +1996 21 21 50456326144 +1997 21 21 50401588878 +Res Count: 150 diff --git a/data/result_of_queries/q41 b/data/result_of_queries/q41 new file mode 100644 index 0000000..4d0188b --- /dev/null +++ b/data/result_of_queries/q41 @@ -0,0 +1,36 @@ +1992 1 103719745491 +1993 1 104804149905 +1994 1 102680809322 +1995 1 104521470391 +1996 1 105409529511 +1997 1 103520208117 +1998 1 60245313373 +1992 2 106246161239 +1993 2 106198050501 +1994 2 106093079488 +1995 2 107568611750 +1996 2 106880639017 +1997 2 106690124662 +1998 2 61912349455 +1992 3 106647931375 +1993 3 107048690889 +1994 3 104514167652 +1995 3 105315997395 +1996 3 105586646448 +1997 3 106924659923 +1998 3 62738136949 +1992 17 104134609838 +1993 17 104651610426 +1994 17 104257308810 +1995 17 104390879969 +1996 17 105890415529 +1997 17 104161057567 +1998 17 62706700969 +1992 24 105245006839 +1993 24 104166556157 +1994 24 107595107297 +1995 24 104996502880 +1996 24 104859848521 +1997 24 105030361725 +1998 24 62169336083 +Res Count: 35 \ No newline at end of file diff --git a/data/ssb/.gitignore b/data/ssb/.gitignore new file mode 100644 index 0000000..5d04e56 --- /dev/null +++ b/data/ssb/.gitignore @@ -0,0 +1,4 @@ +data/ +dbgen/*.o +dbgen/dbgen +loader/loader diff --git a/data/ssb/SSB.md b/data/ssb/SSB.md new file mode 100644 index 0000000..2b58538 --- /dev/null +++ b/data/ssb/SSB.md @@ -0,0 +1,372 @@ +Star Schema Benchmark Queries +============================= + + +Queries +------- + +q11 + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder,date +where lo_orderdate = d_datekey +and d_year = 1993 and lo_discount>=1 +and lo_discount<=3 +and lo_quantity<25; + +q11.m + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder +where lo_orderdate >= 19930101 and lo_orderdate <= 19940101 and lo_discount>=1 +and lo_discount<=3 +and lo_quantity<25; + +q12 + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder,date +where lo_orderdate = d_datekey +and d_yearmonthnum = 199401 +and lo_discount>=4 +and lo_discount<=6 +and lo_quantity>=26 +and lo_quantity<=35; + +q12.m + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder +where lo_orderdate >= 19940101 and lo_orderdate <= 19940131 +and lo_discount>=4 and lo_discount<=6 +and lo_quantity>=26 +and lo_quantity<=35; + +q13 + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder,date +where lo_orderdate = d_datekey +and d_weeknuminyear = 6 +and d_year = 1994 +and lo_discount>=5 +and lo_discount<=7 +and lo_quantity>=26 +and lo_quantity<=35; + +q13.m + +select sum(lo_extendedprice * lo_discount) as revenue +from lineorder +where lo_orderdate >= 19940204 +and lo_orderdate <= 19940210 +and lo_discount>=5 +and lo_discount<=7 +and lo_quantity>=26 +and lo_quantity<=35; + +q21 + +select sum(lo_revenue),d_year,p_brand1 +from lineorder,part,supplier,date +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_category = 'MFGR#12' +and s_region = 'AMERICA' +group by d_year,p_brand1 +order by d_year,p_brand1; + +q21.m + +select sum(lo_revenue),d_year,p_brand1 +from lineorder,part,supplier,ddate +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_category = 1 +and s_region = 1 +group by d_year,p_brand1 +order by d_year,p_brand1; + +q22 + +select sum(lo_revenue),d_year,p_brand1 +from lineorder, part, supplier,date +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_brand1 >= 'MFGR#2221' +and p_brand1 <= 'MFGR#2228' +and s_region = 'ASIA' +group by d_year,p_brand1 +order by d_year,p_brand1; + +q22.m + +select sum(lo_revenue),d_year,p_brand1 +from lineorder, part, supplier,ddate +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_brand1 >= 260 +and p_brand1 <= 267 +and s_region = 2 +group by d_year,p_brand1 +order by d_year,p_brand1; + +q23 + +select sum(lo_revenue),d_year,p_brand1 +from lineorder,part,supplier,date +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_brand1 = 'MFGR#2239' +and s_region = 'EUROPE' +group by d_year,p_brand1 +order by d_year,p_brand1; + +q23.m + +select sum(lo_revenue),d_year,p_brand1 +from lineorder,part,supplier,ddate +where lo_orderdate = d_datekey +and lo_partkey = p_partkey +and lo_suppkey = s_suppkey +and p_brand1 = 278 +and s_region = 3 +group by d_year,p_brand1 +order by d_year,p_brand1; + +Dictionary Encoding +America => 1 +Asia => 2 +Europe => 3 + +q31 [Aggregates greater than int] + +select c_nation,s_nation,d_year,sum(lo_revenue) as revenue +from lineorder,customer, supplier,date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and c_region = 'ASIA' +and s_region = 'ASIA' +and d_year >= 1992 and d_year <= 1997 +group by c_nation,s_nation,d_year +order by d_year asc,revenue desc; + +q31.m + +select c_nation,s_nation,d_year,sum(lo_revenue) as revenue +from lineorder,customer, supplier,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and c_region = 2 +and s_region = 2 +and d_year >= 1992 and d_year <= 1997 +group by c_nation,s_nation,d_year +order by d_year asc,revenue desc; + +q32 + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and c_nation = 'UNITED STATES' +and s_nation = 'UNITED STATES' +and d_year >=1992 and d_year <= 1997 +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +q32.m + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and c_nation = 24 +and s_nation = 24 +and d_year >=1992 and d_year <= 1997 +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +q33 + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and (c_city = 'UNITED KI1' or c_city = 'UNITED KI5') +and (s_city = 'UNITED KI1' or s_city = 'UNITED KI5') +and d_year >=1992 and d_year <= 1997 +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +q33.m + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and (c_city = 231 or c_city = 235) +and (s_city = 231 or s_city = 235) +and d_year >=1992 and d_year <= 1997 +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +q34 + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and (c_city = 'UNITED KI1' or c_city = 'UNITED KI5') +and (s_city = 'UNITED KI1' or s_city = 'UNITED KI5') +and d_yearmonth = 'Dec1997' +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +q34.m + +select c_city,s_city,d_year,sum(lo_revenue) as revenue +from lineorder,customer,supplier,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_orderdate = d_datekey +and (c_city = 231 or c_city = 235) +and (s_city = 231 or s_city = 235) +and d_yearmonthnum = 199712 +group by c_city,s_city,d_year +order by d_year asc,revenue desc; + +ASIA => 2 +UNITED STATES => 24 +UNITED KI1 => 231 +UNITED KI5 => 235 + +q41 [Aggregates greater than int] + +select d_year,c_nation,sum(lo_revenue-lo_supplycost) as profit +from lineorder,supplier,customer,part, date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 'AMERICA' +and s_region = 'AMERICA' +and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by d_year,c_nation +order by d_year,c_nation; + +q41.m + +select d_year,c_nation,sum(lo_revenue-lo_supplycost) as profit +from lineorder,supplier,customer,part,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 1 +and s_region = 1 +and (p_mfgr = 0 or p_mfgr = 1) +group by d_year,c_nation +order by d_year,c_nation; + +q42 + +select d_year,s_nation,p_category,sum(lo_revenue-lo_supplycost) as profit +from lineorder,customer,supplier,part,date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 'AMERICA' +and s_region = 'AMERICA' +and (d_year = 1997 or d_year = 1998) +and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by d_year,s_nation, p_category +order by d_year,s_nation, p_category; + +q42.m + +select d_year,s_nation,p_category,sum(lo_revenue-lo_supplycost) as profit +from lineorder,customer,supplier,part,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 1 +and s_region = 1 +and (d_year = 1997 or d_year = 1998) +and (p_mfgr = 0 or p_mfgr = 1) +group by d_year,s_nation, p_category +order by d_year,s_nation, p_category; + +q43 + +select d_year,s_city,p_brand1,sum(lo_revenue-lo_supplycost) as profit +from lineorder,supplier,customer,part,date +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 'AMERICA' +and s_nation = 'UNITED STATES' +and (d_year = 1997 or d_year = 1998) +and p_category = 'MFGR#14' +group by d_year,s_city,p_brand1 +order by d_year,s_city,p_brand1; + +q43.m + +select d_year,s_city,p_brand1,sum(lo_revenue-lo_supplycost) as profit +from lineorder,supplier,customer,part,ddate +where lo_custkey = c_custkey +and lo_suppkey = s_suppkey +and lo_partkey = p_partkey +and lo_orderdate = d_datekey +and c_region = 1 +and s_nation = 24 +and (d_year = 1997 or d_year = 1998) +and p_category = 3 +group by d_year,s_city,p_brand1 +order by d_year,s_city,p_brand1; + +AMERICA => 1 +MFGR#1 => 1 +MFGR#2 => 2 +UNITED STATES => 24 + +Data Generation +---------------- + +SF 1: +./gpuDBLoaderM --lineorder ../../test/dbgen/lineorder.tbl --ddate ../../test/dbgen/date.tbl --customer ../../test/dbgen/customer.tbl.p --supplier ../../test/dbgen/supplier.tbl.p --part ../../test/dbgen/part.tbl.p --datadir ../../dataM1/ + +SF 10: +./gpuDBLoaderM --customer ../../data-raw10/customer.tbl.p --supplier ../../data-raw10/supplier.tbl.p --part ../../data-raw10/part.tbl.p --datadir ../../dataM10/ + +SF 20: +./gpuDBLoaderM --lineorder ../../data-raw20/lineorder.tbl --ddate ../../data-raw20/date.tbl --customer ../../data-raw20/customer.tbl.p --supplier ../../data-raw20/supplier.tbl.p --part ../../data-raw20/part.tbl.p --datadir ../../dataM20/ + + +python convert.py + +Inefficiencies +------------- + +* Hash function (eg: for q23) + +Hyper +--- + +./bin/driver /big_fast_drive/anil/dbops/test/ssb/schema.sql /big_fast_drive/anil/dbops/test/ssb/load.sql --store ssb_transformed.dump diff --git a/data/ssb/dbgen/.gitignore b/data/ssb/dbgen/.gitignore new file mode 100644 index 0000000..665a9df --- /dev/null +++ b/data/ssb/dbgen/.gitignore @@ -0,0 +1,6 @@ +*.o +*.tbl +qgen +dbgen + +.vscode diff --git a/data/ssb/dbgen/BUGS b/data/ssb/dbgen/BUGS new file mode 100644 index 0000000..1f1b2ab --- /dev/null +++ b/data/ssb/dbgen/BUGS @@ -0,0 +1,987 @@ +# @(#) BUGS 2.1.8.20@(#) +# The following is a list of the various DBGEN/QGEN bugs that have been +# and are being fixed. Each entry is of the form: +# +# Problem #xx: STATUS -- MR ID and OPEN/closed +# followed by a detailed explanation +# TYPE: -- classification of the bug or issue +# SPEC FIX: -- details of any change to the spec +# DBGEN FIX: -- details of any change needed to QGEN/DBGEN +# ANSWER SETS: -- any effect on answer sets +# WORKAROUND: -- temporary fix, if available +# HELP NEEDED: -- any work/assistance required +# AUDITORS NOTIFIED: -- date auditors were notified, if appropriate +# OPENED AGAINST: -- date and effected versions +# CLOSED IN: -- date and fixed version +# +# OPEN BUGS +# ========== +# Problem #33: Parallel load doesn't work under NT +# +# OPEN Feature Requests +# ================= +# Problem #9: would like to include answer set formatting in query templates +# Problem #37: need way to validate DBGEN without large storage requriement +# Problem #58: Need way to track changes from one release to the next +# +# OPEN Documentation Errors +# ================= +# None +#--------------------------------------------------------------------- +#Complete Bug List +#================== +Problem #1: closed +Summary: Q10 returns no rows + Since orders can only be returned (l_returnflag = 'R') after they + have been received, and can't be received in the future, the + number of permissible orders for query 10 tails off early in + 1995. If you are lucky enough to get a parameter substitution + after February '95 (allowed in 2.12.3), things can go "quickly". +SEVERITY: +SPEC FIX: replace 2.12.3 (1) with "DATE is the first day in a + rundomly selected month between the first month of 1993 and the + last month of 1994" +DBGEN FIX: change permisible substitution range for query 10, + parameter 1 +ANSWER SETS: not effected. +WORKAROUND: use a different seed for qgen parameter substitution +HELP NEEDED: +AUDITORS NOTIFIED: +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 (dbgen and qgen) + +Problem #2: closed +Summary: parallelism in load to gen differing data sets +the parallel load code was based on extensible data sets; since + each "extension" made an assumption of scale factor, the data + could end up clustered. Further, since the RNG is + self-modifying, different numbers of extension led to different + final data sets. +SEVERITY: +SPEC FIX: none. +DBGEN FIX: remove -E(xtensible) option and implement pure parallel +load with a known scale factor; rebuild seed files +ANSWER SETS: not effected. (parallelism not implemented for SF <= 1) +WORKAROUND: don't use the parallel load (-C) option to DBGEN +HELP NEEDED: testers needed. +AUDITORS NOTIFIED: yes. +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #3: closed +Summary: some arithmetic tends to overflow at large SF +retailprice tends to SF/10 as SF increases. this can lead to + data corruption in extendedprice and aggregate calculations +SEVERITY: +SPEC FIX: will need rework of 1.3 wrt retailprice calculation +DBGEN FIX: modification to second term of rpb_routine() calcuation +to limit contibution of second term to the maximum seen at + SF=.1 +ANSWER SETS: not effected +WORKAROUND: code retail/extended price calculations as long long; +build smaller data sets +HELP NEEDED: +AUDITORS NOTIFIED: +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #4: closed +Summary: dbgen not ported to NT +SEVERITY: +SPEC FIX: none +DBGEN FIX: need to roll in changes supplied by IBM +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.1.0 + +Problem #5: closed +Summary: QGEN seed init inconsistent +A prior fix assured that parameter values were query order + independent when a seed was provided on the command line. need + to make this true when no seed is provided +SEVERITY: +SPEC FIX: none +DBGEN FIX: rework seed init loop in qgen.c +ANSWER SETS: not effected +WORKAROUND: supply seeds on command line +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #6: closed +Summary: command line options with abutting arguments mishandled +SEVERITY: +SPEC FIX: none +DBGEN FIX: minor fix to getopt routine in bm_utils.c +ANSWER SETS: not effected +WORKAROUND: separate options and arguments with a space +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #7: closed +Summary: '-O f' asking for new file names twice +SEVERITY: +SPEC FIX: none +DBGEN FIX: rework of set_files() in driver.c +ANSWER SETS: not effected +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #8: closed +Summary: Seed generation taking too long +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: implement "skip and trudge" as discussed +ANSWER SETS: not effected +WORKAROUND: none +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #9: OPEN +Summary: would like to include answer set formatting in query templates +SEVERITY: feature request +SPEC FIX: none +DBGEN FIX: additional flag in qgen() +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: asked for reproduction info 25 Oct 95 +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: + +Problem #10: closed +Summary: need to re-introduce ability to do incremental, flat file builds +SEVERITY: feature request +SPEC FIX: none +DBGEN FIX: add -S(tep) option to build one of many partial data sets +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #11: closed +Summary: Row count for first delete at 10/100 is incorrect +SEVERITY: Error +SPEC FIX: None +DBGEN FIX: +ANSWER SETS: No Effect +WORKAROUND: hand edit of first delete file +HELP NEEDED: +AUDITORS NOTIFIED: No +OPENED AGAINST: 1.0.1 +CLOSED IN: 2.0.0 (not sure of precise release) +CLOSED BY: jms@gradientsystems.com + +Problem #12: closed +Summary: Bad default rowcount generated for query 17 +SEVERITY: Error +SPEC FIX: None +DBGEN FIX: corrected rowcnt[] entries to be 1-based +ANSWER SETS: N/A +WORKAROUND: hand edit query or add explicit row count to template +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.0 +CLOSED IN: 1.1.0 + +Problem #13: closed +Summary: Bad expansion of SET_OUTPUT for Teradata +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: new macro in tpcd.h +ANSWER SETS: N/A +WORKAROUND: Hand edit query or hardcode output directive in templates +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #14: closed +Summary: Badly formed range deletes +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: TBD +ANSWER SETS: N/A +WORKAROUND: hand edit delete files +HELP NEEDED: asked for reproduction info 25 Oct 95 +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 2.0.0 (not sure of precise release) +CLOSED BY: jms@gradientsystems.com + +Problem #15: closed +Summary: in a multi-stage load, parent tables are not properly named +when parent and child are build simultaneously +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: reworked tdef[].name in pr_X_Y routines for master/detail +tables +ANSWER SETS: N/A +WORKAROUND: Build master/detail tables separately +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #16: closed +Summary: update generation at large scale factors produced the wrong number +of rows due to overflow of 32-bit integer +SEVERITY: BUG +SPEC FIX: N/A +DBGEN FIX: corrected order of operations in row count calcuation in +driver.c +ANSWER SETS: N/A +WORKAROUND: use 64 bit integers +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #17: closed +Summary: comment fields may be truncated when using columnar output, due to +rounding/truncation in the length calculation +SEVERITY: BUG +SPEC FIX: N/A +DBGEN FIX: add ceil() calls around all PR_VSTR() calls in print.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #18: closed +Summary: the output format for identifier fields in columnar output is +unneccessarily large, and is inconsistant +SEVERITY: minor +SPEC FIX: N/A +DBGEN FIX: revised PR_BCD2 macro +ANSWER SETS: N/A +WORKAROUND: avoid columnar output, or rework macro +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0 +OPENED BY: jenn@torolab2.vnet.ibm.com +CLOSED IN: 1.1.0A +CLOSED BY: jms@informix.com + +Problem #19: closed +Summary: the case statement used to decipher substitution points in the +query template allowed extraneous :'s to re-initialize the +parameter substitution +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: rework flag switch in qgen.c to explicitly call out numerics +ANSWER SETS: N/A +WORKAROUND: be sure that there are no "unknown" flags in the template +HELP NEEDED: none +AUDITORS NOTIFIED: yes +OPENED AGAINST: 1.0.1 +OPENED BY: jenn@torolab2.vnet.ibm.com +CLOSED IN: 1.1.0A +CLOSED BY: jms@informix.com + +Problem #20: closed +Summary: parameter substitution values were not effected by small changes +in seed values +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: add UnifInt() calls to RNG init in qgen.c +ANSWER SETS: N/A +WORKAROUND: be sure seed values provide sufficient randomness in EQT +HELP NEEDED: none +AUDITORS NOTIFIED: yes +OPENED AGAINST: 1.1.0 +OPENED BY: alain_crolotte@elsegundoca.attgis.com +CLOSED IN: 1.1.0B +CLOSED BY: jms@informix.com + +Problem #21: closed +Summary: parameter logging doesn't properly handle the variable length of +the substitution list +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: assure null termination of param list and bound the output +loop that logs parameter usage +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0B +OPENED BY: +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #22: closed +Summary: parameter output for Q11 can overflow default formatting at very +large volumes +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: expand format string to %11.10f +ANSWER SETS: N/A +WORKAROUND: hand code queries for large volumes +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #23: closed +Summary: typos in variant 14c +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: corrected query template +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #24: closed +Summary: macro PR_DATE was hard-coded to print t->alpha even though a +target was passed in as a parameter +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: re-worked macro to properly use its arguments +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0A +OPENED BY: Robert.Lane@eng.sun.com +CLOSED IN: dbgen 1.1.0B +CLOSED BY: jms@informix.com + +Problem #25: closed +Summary: typos in variant 10a +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: corrected query template +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #26: closed +Summary: the version numbers for QGEN and DBGEN do not match +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: unified version numbers starting with 1.1.0C +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B (or 1.1.0C, depending) +OPENED BY: Robert.Lane@eng.sun.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #27: closed +Summary: correcting typos in 7, 9, 13 +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: fixed them +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0C (pre-release) +OPENED BY: tblank@vnet.ibm.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #28: closed +Summary: Seed generation fails with SF > 1000 due to 32 bit integer +arithmetic used to verify "divisible-ness" of data set +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: TBD +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0C +OPENED BY: alain_colotte@elsegundoca.ncr.com +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #29: closed +Summary: Compile time errors on Solaris 2.5.1 and SunOS +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: Solaris fixed by renaming lineitem field from extended to + eprice; SunOS problem documented in Porting.Notes +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0D +OPENED BY: jms@informix.com +CLOSED IN: 1.2.0 +CLOSED BY: jms@informix.com + +Problem #30: closed +Summary: Cryptic comments in dists.dss +SEVERITY: flaw +SPEC FIX: N/A +SOURCE FIX: Cleaned up the comments in the file +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: francois@sizing.com +CLOSED IN: 1.2.3 ALPHA 1 +CLOSED BY: jms@informix.com + +Problem #31: closed +Summary: Inconsistant handling of fopen() failures +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: introduced OPEN_CHECK macro (defined in dss.h) +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: schiefer@ca.ibm.com +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #32: closed +Summary: Path separators were hard-coded +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: introduced PATH_SEP in config.h +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #33: OPEN +Summary: Parallel load doesn't work under NT +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: +ANSWER SETS: N/A +WORKAROUND: use -S option to build each step independently +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0 +OPENED BY: +CLOSED IN: +CLOSED BY: + +Problem #34: closed +Summary: P_NAME not properly populated +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: Corrected color selection logic in agg_str() +ANSWER SETS: NFI for 1.x since it effect answer sets +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: schiefer@ca.ibm.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #35: closed +Summary: mk_sparse() returning bad orderkeys +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: corrected logic in mk_sparse() and bcd2_bin() +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.3.0 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.3.1 +CLOSED BY: jms@gradientsystems.com + +Problem #36: closed +Summary: a_rnd() doesn't mask properly, uses small 'alphabet' +SEVERITY: bug +SPEC FIX: Corrected 4.2.2.6 to reflect 64 character set +SOURCE FIX: changed mask in a_rnd() from 067 to 077 +ANSWER SETS: NFI for 1.x since answers would be effected +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: pek@elsegundoca.ncr.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #37: OPEN +Summary: need way to validate DBGEN without large storage requriement +SEVERITY: Feature Request +SPEC FIX: N/A +SOURCE FIX: Provide vrf_xxx routine to generate checksums +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: jms@gradientsystems.com +CLOSED IN: +CLOSED BY: + +Problem #38: closed +Summary: need to be able to generate specific update set +SEVERITY: Feature Request +SPEC FIX: N/A +SOURCE FIX: Update update generation to use -S option +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.3.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.0.0 (not certain of fix version) +CLOSED BY: jms@gradientsystems.com + +Problem #39: closed +Summary: README for dbgen is out of date +SEVERITY: Documentation error +SPEC FIX: N/A +SOURCE FIX: Rewrite of README +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.6b +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #40: closed +Summary: O_CUSTKEY is generated out of range at 10GB +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Correction of CUST_MORTALITY calculation +ANSWER SETS: Unknown +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.7 +OPENED BY: wayne.smith@intel.com +CLOSED IN: 2.0.0.8 +CLOSED BY: jms@gradientsystems.com + +Problem #41: closed +Summary: V2 appears slower than V1 +SEVERITY: Bug +SPEC FIX: +SOURCE FIX: Used NthElement() in row_stop() +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.8 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.01a +CLOSED BY: jms@gradientsystems.com + +Problem #42: closed +Summary: Dual declaration of articles causes C++ compilation error +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Duplicate declaration removed +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: jpm@informix.com +CLOSED IN: 2.0.0a +CLOSED BY: jms@gradientsystems.com + +Problem #43: closed +Summary: Subselect wild card not consistant with spec +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Query templates corrected +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: jpm@informix.com +CLOSED IN: 2.0.0a +CLOSED BY: jms@gradientsystems.com + +Problem #44: closed +Summary: small money values incorrect +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: reworked PR_xxx macros +ANSWER SETS: new answer included for Q22 +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: ac4@elsegundoca.ncr.com +CLOSED IN: 1.0.1 +CLOSED BY: jms@gradientsystems.com + +Problem #45: closed +Summary: L_ORDERKEY/O_ORDERKEY incorrect +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: corrected pointer arithmetic in print.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jpm@informix.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #46: closed +Summary: L_ORDERKEY/O_ORDERKEY incorrect +SEVERITY: Dup (see #45) +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #47: closed +Summary: QGEN parameter substitution not random +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected varsub RANDOM usage to reflect seed file removal +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #48: closed +Summary: QGEN parameter substitution not random for Q21 +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected varsub to only reference nations2 distribution +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1a +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1b +CLOSED BY: jms@gradientsystems.com + +Problem #49: closed +Summary: Extraneous trailing separator in delete files +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: special-cased the handling of deletes using PR_KEY +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1b +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1c +CLOSED BY: jms@gradientsystems.com + +Problem #50: closed +Summary: qgen not generating valid parameter log files for defaults +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected params/default reference +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: cta@elsegundoca.ncr.com +CLOSED IN: 1.0.1d +CLOSED BY: jms@gradientsystems.com + +Problem #51: closed +Summary: inconistent/invariant substitutions in Q16, Q17, Q19 +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected "brand" selection to make order irrelevent +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1d +CLOSED BY: jms@gradientsystems.com + +Problem #52: closed +Summary: qgen seeds make parameter substitutions position dependant + The current scheme uses an individual RNG stream for each query, and seeds + all streams identically. Accordingly, two queries that use the same domain + for the same parameter will always have the same value (e.g., q9 and q20). +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: seed the individual streams with the sequence of random + numbers produced by the global seed value +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1a/1.1.0a (990727) +CLOSED BY: jms@gradientsystems.com +CHECKED BY: qa52 + +Problem #53: closed +Summary: number of lineitems in update files no longer varies + The RNG is not being set at the start of update generation; accordingly + the original data (including rowcounts) is being "regenerated" +SEVERITY: +SPEC FIX: N/A +SOURCE FIX: +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 990810 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: qa53 + +Problem #54: closed +Summary: segmented update files fail when rows per file is small + A round off error could cause the wrong number of rows to be output to a + given update file +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: correction to driver.c and print.c to use division and modulo + to produce comparably sized files regardless of divisor +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: v-larryk@microsoft.com +CLOSED IN: 1.0.1a/1.1.0a (990727) +CLOSED BY: jms@gradientsystems.com (using code from larry) +CHECKED BY: + +Problem #55: closed +Summary: -S generates bad data when used with updates + The RNG is not being properly set +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: added the appropriate offset to the RNG, and simplified the + update generation code +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jms@gradientsystems.com +CLOSED IN: 990816 +CLOSED BY: jms@gradientsystems.com (using code from larry) +CHECKED BY: qa55 + +Problem #56: closed +Summary: Need way to specify dists.dss location on the command line +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: added -b switch to driver.c and qgen.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: clevine@microsoft.com +CLOSED IN: 990830 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #57: closed +Summary: Need way to remove all DBGEN output unless there is an error +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: added -q switch to driver.c and changed verbose if's +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: clevine@microsoft.com +CLOSED IN: 990830 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00058: OPEN +Summary: Need way to track changes from one release to the next +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: reintroduce and automate the CHANGES file. Require MRs for + all source code changes +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: mpoess@us.oracle.com +CLOSED IN: +CLOSED BY: +CHECKED BY: + +Problem #00059: closed +Summary: extra comma in Q2 template +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Template corrected +ANSWER SETS: N/A +WORKAROUND: None. +HELP NEEDED: None. +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: jpm@informix.com +CLOSED ON: 990908 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + + +Problem #00060: closed +Summary: segmented inserts/deletes creating an extra file +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Adding in missed change from original roll-in +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: larryk@microsoft.com +CLOSED ON: 990111 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00061: closed +Summary: 64-bit support under DigUnix leads to math errors +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Calculation of dRange in rnd.c now uses double cast +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: nramesh@us.oracle.com +CLOSED ON: 000131 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00062: closed +Summary: bad update rollover after 1000 refreshes + This test uses tpcH scale 0.01. We've encountered + an situation in which dbgen doesn't generate + the correct data for delete files delete.1000 and + above. In particular, file delete.1000 contains + keys to be deleted that have never been loaded. + Because of this problem, keys that should have been + deleted never are causing duplicate unique values + to appear in the incremental loads after we cycle + from the 4000th incremental update back around starting + again with the 1st one. +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 19991101 +OPENED BY: Roger.McNicol@sybase.com +CLOSED ON: 20000509 +CLOSED BY: jms +CHECKED BY: N/A + +Problem #00063: closed +Summary: update copyright notice + N/A +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: N/A +OPENED BY: jms@gradientsystems.com +CLOSED ON: 20000131 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00064: closed +Summary: permute() introduce 0 selection in [1..50] for q16 + N/A +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: rework permute() to be 1-based +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 20000413 +OPENED BY: lorna@permetrics.com +CLOSED ON: 20000414 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00065: OPEN +Summary: permute correction caused dataset changes + initial fix for #64 caused qa failures due to data set changes. New fix + is limited to query parameter substitution changes and has passed qa +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 20000511 +OPENED BY: jms +CLOSED ON: N/A +CLOSED BY: N/A +CHECKED BY: N/A diff --git a/data/ssb/dbgen/CHANGES b/data/ssb/dbgen/CHANGES new file mode 100644 index 0000000..e7d1247 --- /dev/null +++ b/data/ssb/dbgen/CHANGES @@ -0,0 +1,33 @@ +# @(#)CHANGES 2.1.8.18 +08 Dec 1998 Release 2.0.0 +15 Mar 1998 Release 2.0.0 pre-release +06 Feb 1998 Release 1.3.1 +15 Dec 1996 Release 1.2.0 +08 Aug 1996 Release 1.1.0D +01 May 1996 Release 1.1.0C +29 Jan 1996 Release 1.1.0B +23 Jan 1996 Release 1.1.0A +19 Dec 1995 Release 1.1.0 +11 Sep 1995 Release 1.0.1 +13 Mar 1995 Release 1.0 + + +Changes between 990830 and 991011 +File Bug ID +---- ------ +s.2.sql 00059 Removing extra comma +s.dss.h 00061 +s.config.h 00061 +s.driver.c 00060 adding missed change from Larry +s.makefile 00058 +s.rnd.c 00061 +s.HISTORY 00061 +s.history.html 00061 +s.mr.sh 00058 miscelaneous corrections +s.bug.template 00058 removing extraneous spaces +s.bug.template changed titles + + +Changes between 199910 and 000511 +File Bug ID +---- ------ diff --git a/data/ssb/dbgen/HISTORY b/data/ssb/dbgen/HISTORY new file mode 100644 index 0000000..8258af8 --- /dev/null +++ b/data/ssb/dbgen/HISTORY @@ -0,0 +1,535 @@ +# @(#)HISTORY 2.1.8.3 +Changes as of 10/11/99 + -- versions: TPCH 1.2.0a, TPCR 1.1.0a + -- Correction to segmented updates that was causing extra file to be + generated + -- Porting changes for DigUnix +Changes as of 08/28/99 + -- versions: TPCH 1.2.0, TPCR 1.1.0 + -- reduced parameter substitution range for Q18 + -- added new option to specify location of dists file (-b) + -- added DBGEN option to suppress all output (-q) +Changes as of 08/16/99 + -- versions: TPCH 1.1.0a, TPCR 1.0.1e + -- prevent "reuse" of original data in update files + -- correction to lint target in makefile.suite + -- removal of vestigal l_partkey predicate from 21.sql + -- reorder lineitem/order join in q5 + -- removal of table aliases from 2.sql + -- randomize seeding of qgen RNG to close bug 52 + -- correct possible round off error in segmented update files + -- corrected soft copy answer set for Q22 + -- corrected percision of answer set for Q19 +Changes as of 07/08/99 + -- versions: TPCH 1.1.0, TPCR 1.0.1 + -- WORKLOAD must be set to either TPCH or TPCR in the makefile + -- unneeded reference to part table removed from q21 template +Changes as of 06/04/99 + -- version 1.0.1d + -- Restarted version numbering to match specification revisions for + TPC-H and TPC-R + -- Corrected answer set for for Q13 + -- Corrected parameter substitutions for Q16, Q17, Q19, Q20, Q21, Q22 + -- Corrected RNG initialization in qgen.c + -- added adhoc.c adhoc.h to code base to support randomized data sets; + currently disabled + -- replaced calls to UnifInt() row_stop with call to NthElement() + -- Corrected a problem that caused small negative money values to print as + a positive value + -- Simplication of PR_xxx macros + -- QGEN building correct parameter logs again + +****************** +* NOTE NOTE NOTE * +****************** +Below this line the file refers to TPC-D which was retired in favor of +TPC-H and TPC-R. Since the new speicifications are numbered from 1.0.0 +the program version was reset. +****************** +* NOTE NOTE NOTE * +****************** + +Changes as of 01/05/99 + -- version 2.0.1 + -- added 1999 to the copyright notice + -- corrected C++ compilation problem + -- sub-select phrasing corrected in Q4, Q21, Q22 + -- added support for segmenting update files (contributed by Larry Kemp, HP) +Changes as of 12/08/98 + -- version 2.0.0 + -- removed permute.h from clean target in makefile +Changes as of 11/17/98 + -- version 2.0.0 Alpha 8 + -- corrected o_custkey overrun bug + -- removed upper bound on -C command option + -- added static permute.h to distribution to match the specification +Changes as of 10/23/98 + -- version 2.0.0 Alpha 7 + -- removed references to DSS_SEED and SEED_TAG + -- minor query template cleanup + -- V2 answer sets added + -- correction to hd_sparse for SF > 300 + -- added static declaration to row types in gen_tbl to fix update problem + -- permuted params to Q22 +Changes as of 5/19/98 + -- version 2.0.0 Alpha6b + -- removed trailing apostrophe from dists.dss nouns for Tandem loader + -- corrected mk_sparse() problem with alpha6 + -- added 64b support for NCR/Metaware + -- corrected revision problem with 2.0.0.6 +Changes as of 5/7/98 + -- version 2.0.0 Alpha6 + -- corrected generation of parent/child tables in parallel + -- renamed ORDER table to ORDERS table + -- revision of DBGEN synced with revision of 2.0 specification + -- portability changes to process termination provided by John Matzka + -- portability changes for Watcom C provided by Andrew Eisenberg + -- indentation of specifications/templates now matches + -- queries now include a consistant header format +Changes as of 4/28/98 + -- version 2.0.0 Alpha5 + -- NO RELEASE OF ALPHA 5 ; skipped to sync spec/DBGEN revision levels +Changes as of 4/6/98 + -- version 2.0.0 Alpha4 + -- corrected parallel table generation + -- minor corrections to query templates + -- portability changes for HP +Changes as of 3/24/98 + -- version 2.0.0 Alpha3 + -- include substitution parameters for Q22 + -- correct substitution parameters for Q16 under AIX + -- include permute.h until unix/NT makefile fix + -- correct orderkey generation +Changes as of 3/20/98 + -- version 2.0.0 Alpha2 + -- correct runtime malloc error from bad INIT_HUGE macro + -- improve pseudo text distribution in comments + -- fix problem with parallelism of data gen + -- re-enable generation of parent/child tables + -- remove recombinaton code for parallel flat files +Changes as of 3/11/98 + -- version 2.0.0 Alpha1 + -- removed the TIME table + -- removed the need for seed files + -- made 1GB the validation database size + -- add pseudo text support in comments + -- correct character selection in a_rnd() + -- correct population of P_NAME + -- removed unclaimed variants + -- added new queries 18-22, replaced Q13 +Changes as of 2/6/98 + -- version 1.3.1 + -- Revised 64 bit support to clean up bcd2_bin()and mk_sparse() + -- Add 64b support for NT +Changes as of 12/31/97 + -- version 1.3.0 + -- support for seed generation > 1TB (data gen still to be tested) + -- rework of 64b support + -- added bcd support for subtraction, comparison, modulo + -- added 1998 to the copyright notice + -- clarified comments in dists.dss + -- corrected substitution problem in Q11 + -- standardized fopen() error messages with OPEN_CHECK() + -- introduced PATH_SEP in config.h to allow changes in path separators +Changes as of 12/15/96 + -- version 1.2.0 + -- corrected typos in queries 8a, 8c, 8d, 11a, 12F and 14F, 17a + -- added variant 15c + -- defined MAX_SCALE and MIN_SCALE; issued error messages for SF > 1000 + since implementation is incomplete + -- seed file generation can now be resumed with dbgen -R ... + -- corrected slight compile bug under Solaris 2.5.1 + -- documented compile problems under SunOS +Changes as of 8/1/96 + -- version 1.1.0D + -- included new variants for queries 8 and 15 + -- re-introduced answer sets in the source tree +Changes as of 5/1/96 + -- version 1.1.0C + -- unified version numbering of DBGEN and QGEN + -- updated BUGS list + -- removed FAQ from soft appendix; web site will keep the current + version of the FAQ + -- added 1996 to the copyright notice + -- corrected bug in PR_DATE macro; NO CHANGE TO DATA SET + -- properly initialize param values for cleaner logging + -- adjusted output format of Q11 partam to allow scaling to 1TB + -- corrected typos in variant 14c + -- corrected data type for YEAR in variant 8c + -- corrected typos in variant 10a + -- added variant 8d +Changes as of 1/23/96 + -- qgen version 1.1.0B + -- include support for ANSI semantics + -- improved patch for seed sensetivity +Changes as of 1/23/96 + -- updated BUGS list + -- dbgen version 1.1.0A + -- patch to limit BCD2 fields to 12 characters for columnar output + -- qgen version 1.1.0A + -- patch to fix the "unknown flag" problem + -- patch to fix the seed sensetivity problem +Changes as of 12/19/95 + -- updated BUGS list + -- dbgen version 1.1.0 + -- upped default value of MAX_CHILDREN to 1000 + -- corrected naming of detail tables in incremental load + -- corrected range delete output + -- forced delete files to truncate existing files + -- removed fixed size tables from seed generation + -- corrected overflow problem with large scale seed generation + -- allow date generation as MM-DD-YY based on config.h #define + -- correct truncation problem with columnar output in PR_VSTR() + -- added support for Windows NT + -- added PLATFORM macro to makefile, removed platform defines from + config.h + -- removed MAX_CHILDREN define from config.h (set to 1000 in dss.h) + -- qgen version 1.1.0 + -- correct SET_OUTPUT macro to TDAT + -- use %ld in output for q17; portability + -- add support for SQLSERVER database dialect + -- add support for SYBASE database dialect + -- adjust parameter ranges for Q1, Q3, Q6 + -- add -T/-t option to usage summary + -- added support for Windows NT +Changes as of 09/01/95 + -- qgen version 1.0.1 + -- formalized version numbering + -- -p now generates correct query permutations + -- added separate verion number for qgen + -- corrected Q3 substitution problem + -- updated permissible range for Q10 + -- corrected rowcount_dflt and the MAX row indicator (-1) + -- expanded param logging to include all possible parameters + -- allowed qgen's -d option to be used at all scale factors + -- made parameter substitution permutation-independent + -- added qgen suppport for END_TRAN (-E) and DFLT_NUM (-N) + -- correct handling of :n directive + -- added more complete explanation of QGEN to README + -- rename of random to rndm, for portability + -- dbgen version 1.0.1 + -- formalized version numbering + -- inclusion of SF=1 seed file + -- correct typo in usage() update example + -- patch to driver.c to allow correct updates + -- documentation change to README to clarify seed/stage/update + intereaction + -- corrected minor glitch in "open failed" error msg in print.c + -- added missing line continuation to makefile.suite + -- seed files are now based on scale factor and number of generators + -- seed files now hold seeds for one "step" of a given build + -- clean up of parallel load routines + -- inclusion of faster seed generation routines from Susanne Englert + -- removed the -E(xisting) option + -- assure proper scaling of O_CUSTKEY + -- corrected default update percentage + -- proper handling of child tables with '-O f' + -- removed seed files from the distribution + -- modified rpb_routine() to limit contribution of partkey in + retailprice + -- added '-S(tep)' option to allow multi-stage loads + -- roll in of 32 bit speed_seed routines from Dick Shelton + -- miscelaneous typo corrections in the documentation + -- cleanup of usage output +Changes as of 05/08/95 + -- version 1.0 + -- add Teradata defines to tpcd.h for QGEN + -- add :c to query templates for database CONNECT syntax + -- add examples of DBGEN and QGEN usage to README + -- add -T option to qgen to allow time able usage + -- query template names only requre .sql suffix, rest is arbitrary +Changes as of 03/13/95 + -- version 9.1 + -- surround DBNAME with ifndef in config.h + -- remove -DDBNAME from makefile.suite + -- sync varchar handling with 9.1 draft +Changes as of 02/21/95 + -- version 9.0a + -- fixed bug in qgen that incorrectly included rnd.h + -- included revised DDL with changes for char/varchar and l_quantity + -- updated DBGEN help message to include new single table options for + order/lineitem and part/partsupp + -- included handling for multi-set seed files TPCDSEED.xxx + -- generated seeds up through 400GB; headed to 1TB! + -- ANSI lint cleanup; more needed + -- UF2 now defaults to key lists; use "-O r" to generate key ranges + also note, this routine this routine does NOT use the BCD2_* + routines. As a result, it WILL fail if the keys being deleted + exceed 32 bits. Since this would require ~660 update iterations, + this seems an acceptable oversight +Changes as of 01/19/95 + -- version 9.0 + -- allowed command line seeding of RNG for QGEN + -- order and number of params in QGEN now matches + presentation in spec + -- fixed bug in time table format of O_ORDERDATE + -- changed l_QUANTITY to FLOAT in dss.ddl + -- reworked QGEN options to be more useful + -- allowed creation of sparse keys beyond 32 bits (for 1TB) + -- removed unused '#ifdef' and associated code + -- allowed independent generation of master/detail tables + (eg, order/lineitem) +Changes as of 12/06/94 + -- version 8.6 + -- fixed renaming of flat files for child tables + -- various documentation fixes + -- added naming convention section to Porting.Notes + -- added -DIBM flag to config.h + -- synced up QGEN with draft 8.1 +Changes as of 10/25/94 + -- version 8.5a + -- corrected bug in columnar output of pr_supp + -- added pr_drange to generate a list of order keys to be + deleted instead of generating SQL + -- added '-O d' to generate range delete as SQL + -- updated default values for QGEN to sync with spec 8.1 + -- corrected MK_SPARSE to reflect groups of 8 + -- corrected a bug in o_orderstatus + -- regenerated seed files for SF in [1,10] + -- ANSI cleanup (primarily function declarations) +Changes as of 10/11/94 + -- version 8.5 + -- remove deletes/inserts to other than order/lineitem + -- increased cardinality for part.type part.container + -- '-r' argument is now integer; percentage in basis points + -- initial roll-in of new update scheme + -- added BBB comments to supplier table +Changes as of 9/27/94 + -- version 8.4 + -- all money calculations now use integer math. This should + bring everyone's data sets into exact aggreement. +Changes as of 9/21/94 + -- version 8.3b + -- fixed handling of MAX_STREAM + -- added floor function to RPRICE bridge + -- misc lint cleanup (type fixes, new prototypes, etc.) + -- MONEY format becomes lf for DOS + -- further cleanup of PR_VSTR and its length argument + -- change to parameter generation for Q6 to allow for float + discount +Changes as of 9/15/94 + -- version 8.3a + -- isolated MONEY format for Unisys (Lf) using DOS + -- make sure all arguments to MAKE_MONEY were double's + -- rolled in NEW_PTEXT to allow Berni to experiment +Changes as of 9/12/94 + -- version 8.3 + -- added -T n and -T r to usage to match getopt() and README + -- changed PR_MONEY to remove leading blanks + -- included revised DDL from Berni + -- included some MVS portability fixes in re malloc.h + -- cleaned up error messages in qgen and made #define ofp usage + universal + -- additional DOS portability changes + -- added {c,a}len to provide specific length for columnar + output of varchar + -- added PR_VSTR to handle varchar printing under MVS + -- fixed bit masking in a_rnd and cleaned up prototype match + with V_STR + -- PR_MONEY now used %Lf + -- added revised pseudo text under NEW_PTEXT ifdef for + experiments +Changes as of 9/09/94 + -- version 8.2 + -- l_discount and l_tax are now fractional (per teleconference) + -- money calculations moved to scaled integer math to clean up + answer sets + -- changed PR_FLT() to PR_MONEY to clarify usage + -- portability changes for SYBASE: dbname --> db_name + STATUS --> DBGEN_STATUS + -- added nations2 to dists.dss to handle qgen needs for now + -- reintroduced #ifndef DOS + -- reintroduced U2200 define to control kill_load() + -- broke out nation and region separately in -T option + -- updated dss.ddl based on mail from Berni +Changes as of 8/31/94 + -- version 8.1 + -- scaling for clerks needed to be 1000 (was 100) + -- added qgen parameter for scale + -- changed qgen parameter from s)tream to p)ermutation + -- synced qgen paramter values with 8.0 spec + -- corrected duplications in dists.dss +Changes as of 8/24/94 + -- version 8.0 + -- added sparse keys to lineitem/order + -- added varchar generation for comments/addresses + -- added variable lineitems/orders + -- removed ifdef for normalized code_tables + -- included code for parameter generation and template->EQT + routines + -- updated README and Porting.Notes to reflect QGEN + -- included DDL and RI examples from Berni +Changes as of 6/15/94 + -- version 7.0b (numbers now match spec revsion) + -- rework of code tables to properly map nation/region; when + compiled with -DCODE_TABLES distributions are taken from + code.dss and two additional fields are generated for + customers and suppliers, [cs]_ncode and [cs]_rcode, + immediately following [cs]_region + -- replaced ifdef's around DEAD_DATA with opposites. DEAD_DATA + is now the default + -- worked through code to see that it conformed to 7.0 + specification + -- adjusted scale factors/rowcounts for 1 GB == sf1 + -- brought help message in line with current code + -- fixed order per customer at 10 + -- make suppkey scalable in lineitem/partsupp +Changes as of 4/25/94 + -- version 1.5 + -- added the customers with no orders; Compile with -DDEAD_DATA + to activate the change. + -- added the code table for nation and region; + Compile with -DCODE_TABLES to activate the change. +Changes as of 3/17/94 + -- version 1.41 + -- completed implementation of JULIAN_DAY after talks with Berni + -- misc cleanup in usage/README files + -- removed all tabs and capped line length at 75 + -- added -n option to allowing naming of inline-loaded database +Changes as of 3/16/94 + -- version 1.4 + -- prottyped julian day/month for query re-write work. Compile + with -DJULIAN_DAY to enable + -- removed gen_times() from driver.c + -- added VMS ifdef to config.h to clean up fork/signal issues + -- added ICL ifdef to config.h to clean up getopt() issues + -- changed header file references to config.h from machine.h +Changes as of 3/2/94 + -- version 1.31 + -- corrected format of C_NAME to match S_NAME and O_CLERK + -- re-allowed fractional scale factors < 1 (updates not + contiguous) + -- added DSS_CONFIG environemnt variable + -- reworked read_dist() to look for DSS_DIST in DSS_CONFIG + -- updated the README file +Changes as of 2/16/94 + -- version 1.3 + -- added command line options for parallel load and data set + expansion + -- changed dists.dss delimiter to | for portability + -- limited scale factors to integer values + -- added command line option for seed file generation + -- added all seed files to distribution for SFs 1 - 10 + -- moved machine.h to config.h and added MAX_CHILDREN define + -- added 'f' flag to options to allow renaming of output files + -- added generation of SQL delete statements to match updates + (Note: updates are still single-threaded; -C is cleared + by -U) + -- corrected field sizing in dsstypes.h typedefs to match v 6.4 + -- update percentage default set to 1% +Changes as of 12/3/93 + -- version 1.2 + -- added command line option to adjust update percentage + -- fixed update gneration for proper primary key ordering + -- renamed UUSR/PRC to RUSSIA/CHINA in dists.dss + -- cleaned up phone number generation to be consistant regard- + less of order of evaluation + -- adjusted size of lineitem comment to bring data in line with + 100 MB == SF=1 +Changes as of 10/15/93 + -- added command line option for update data creation + -- miscelaneous porting and cleanup changes + -- reworked table generation to allow reuse for updates + -- added comment field to tdefs structure + -- added load_state and store_state to sync data gen and + update gen +Changes as of 7/26/93 + -- combined loader and header stubs in load_stubs.c + -- separated Revision History (this file) from README + -- simplified makefile + -- removed redundancies from colors distribution + -- added getopt() for portability + -- created Porting.Notes + -- adjusted scaling rules + -- added help option to the command line +Changes as of 2/26/93 + -- combined all typedefs in one header: dsstypes.h + -- combined flat file generation in print.ec + -- combined typedef population in build.ec + -- added -P to control rowcnt scaling (P for percentage) + -- added -D option for Direct data generation and added + appropriate hooks in tdefs[] structure + -- added -F option for flat file generation + -- reused -T option (use -P 0.1 to build test size database) + now accepts suboptions c,o,p,s for single table builds. + -- dropped -M option (scaling is now by rowcount) + -- added -O option for optional controls. Currently defined: + -O t -- generate optional time table a join fields in + order/lineitem + -O h -- generate headers for flat file output + -O m -- generate fixed column-length output + -- removed dynamic memory allocation, redundant calls to + UnifInt, etc to improve performance +Changes as of 1/12/92 + -- julian() changed to handle orders->orderdate correctly + -- rflag distributions corrected in dists.dss + -- sea, gold removed from color distribution to clean up substring + problems + -- part->number and supplier-> adjusted for 1-based indexing + -- time->day changed to be day of month, not day of year + -- t.week changed to be week in year, not day of week +Changes as of 11/18/92 + -- checked line length and tab for transmission + -- another chapter in the portability wars. added #include + "machine.h" to dss.h (which is included by everyone else). Any + machine particular porting changes should go here. + -- fixed fixed-field formats to prevent double printing + -- expanded PR_FLT formats to %010.2 +Changes as of 10/21/92 + -- added fixed format and column header handling; users of headers + will have to define the header functions to be called in + int (*tdefs.header)() +Changes as of 10/09/92: + -- added ansi prototypes and recompiled with gcc -ansi. users may + need to change the CC definition in the makefile and the contents + of CFLAGS to reflect their particular ansi compiler. + -- replaced all int references with long + -- replaced all float references with double + -- found and fixed odate/julian problem TS mentioned in 10/09 phone + call + +Changes as of 9/09/92: + -- Park/Miller random number generator included + -- clerk scaling changed to 100 * scale + -- parts.name always built from 5 selections from colors set + -- test scaling changed to ~60MB (TEST_SCALING == 10) + -- logarithmic scaling removed + -- mfgcost removed and retail/supplier cost bounds adjusted + -- agg_str memory leak fixed + -- independent RNG streams on a per column basis + +This is the revised data generator for DSS. + +The rewrite tried to accomplish three things: (1) identify and isolate +all the implicit assumptions about limits, bounds, ranges, distribu- +tions, etc.; (2) standardize the way any given table was generated/ +printed to ease understanding and maintenance; (3) bring the generator +in line with the current work of the committee and the excellent spec +the Indira put together; (4) provide an easy way to adjust distribu- +tions, string contents and to facilitate experimentation to get a +better idea of the impact of data population changes. + +The files included are: + +driver.c ------- main and the calling routines for the generators +dist.c ------- should really be named dss_util.c; misc routines +customer.c ------- generation and print routines for customer table +orders.c ------- "" "" order table +parts.c ------- "" "" parts/partsupp +suppliers.c ------- "" "" suppliers table +time.c ------- "" "" time table +customer.h ------- associate header files; contain structure + definitions +dss.h dss.h holds the large number of assumptions and +orders.h values that have been used as IFDEFs. +parts.h +suppliers.h +time.h +dists.dss ------- string selections and weights; used to build + distributions + +Running make will create an executable (using the compiler flags in +CFLAGS, the ld flags in LDFLAGS and the libraries in LIBS [-O, -s, +and -lm by default]) which will create flat files suitable for dbload. +t + diff --git a/data/ssb/dbgen/PORTING.NOTES b/data/ssb/dbgen/PORTING.NOTES new file mode 100644 index 0000000..2916d20 --- /dev/null +++ b/data/ssb/dbgen/PORTING.NOTES @@ -0,0 +1,220 @@ +# @(#)PORTING.NOTES 2.1.8.1 + +Table of Contents +================== +1. General Program Structure +2. Naming Conventions and Variable Usage +3. Porting Procedures +4. Compilation Options +5. Customizing QGEN +6. Further Enhancements +7. Known Porting Problems +8. Reporting Problems + +1. General Program Structure + +The code provided with TPC-H and TPC-R benchmarks includes a database +population generator (DBGEN) and a query template translator(QGEN). It +is written in ANSI-C, and is meant to be easily portable to a broad variety +of platforms. The program is composed of five source files and some +support and header files. The main modules are: + + build.c: each table in the database schema is represented by a + routine mk_XXXX, which populates a structure + representing one row in table XXXX. + See Also: dss_types.h, bm_utils.c, rnd.* + print.c: each table in the database schema is represented by a + routine pr_XXXX, which prints the contents of a + structure representing one row in table XXX. + See Also: dss_types.h, dss.h + driver.c: this module contains the main control functions for + DBGEN, including command line parsing, distribution + management, database scaling and the calls to mk_XXXX + and pr_XXXX for each table generated. + qgen.c: this module contains the main control functions for + QGEN, including query template parsing. + varsub.c: each query template includes one or more parameter + substitution points; this routine handles the + parameter generation for the TPC-H/TPC-R benchmark. + +The support utilities provide a generalized set of functions for data +generation and include: + + bm_utils.c: data type generators, string management and + portability routines. + + rnd.*: a general purpose random number generator used + throughout the code. + + dss.h: + shared.h: a set of '#defines' for limits, formats and fixed + values + dsstypes.h: structure definitions for each table definition + +2. Naming Conventions and Variable Usage + +Since DBGEN will be maintained by a large number of people, it is +particularly important to observe the coding, variable naming and usage +conventions detailed here. + + #define + -------- + All #define directives are found in header files (*.h). In general, + the header files segregate variables and macros as follows: + rnd.h -- anything exclusively referenced by rnd.c + dss.h -- general defines for the benchmark, including *all* + extern declarations (see below). + shared.h -- defines related to the tuple definitions in + dsstypes.h. Isolated to ease automatic processing needed by many + direct load routines (see below). + dsstypes.h -- structure definitons and typedef directives to + detail the contents of each table's tuples. + config.h -- any porting and configuration related defines should + go here, to localize the changes necessary to move the suite + from one machine to another. + tpcd.h -- defines related to QGEN, rather than DBGEN + + extern + ------ + DBGEN and QGEN make extensive use of extern declarations. This could + probably stand to be changed at some point, but has made the rapid + turnaround of prototypes easier. In order to be sure that each + declaration was matched by exactly one definition per executatble, + they are all declared as EXTERN, a macro dependent on DECLARER. In + any module that defines DECLARER, all variables declared EXTERN will + be defined as globals. DECLARER should be declared only in modules + containing a main() routine. + + Naming Conventions + ------------------ + defines + o All defines use upper case + o All defines use a table prefix, if appropriate: + O_* relates to orders table + L_* realtes to lineitem table + P_* realtes to part table + PS_* relates to partsupplier table + C_* realtes to customer table + S_* relates to supplier table + N_* relates to nation table + R_* realtes to region table + T_* relates to time table + o All defines have a usage prefix, if appropriate: + *_TAG environment variable name + *_DFLT environment variable default + *_MAX upper bound + *_MIN lower bound + *_LEN average length + *_SD random number seed (see rnd.*) + *_FMT printf format string + *_SCL divisor (for scaled arithmetic) + *_SIZE tuple length + +3. Porting Procedures + +The code provided should be easily portable to any machine providing an +ANSI C compiler. + -- Copy makefile.suite to makefile + -- Edit the makefile to match the name of your C compiler + and to include appropriate compilation options in the CFLAGS + definition + -- make. + +Special care should be taken in modifying any of the monetary calcu- +lations in DBGEN. These have proven to be particularly sensitive to +portability problems. If you decide to create the routines for inline +data load (see below), be sure to compare the resulting data to that +generated by a flat file data generation to be sure that all numeric +conversions have been correct. + +If the compile generates errors, refer to "Compilation Options", below. +The problem you are encountering may already have been addressed in the +code. + +If the compile is successful, but QGEN is not generating the appropriate +query syntax for your environment, refer to "Customizing QGEN", below. + +For other problems, refer to "Reporting Problems" at the end of this +document. + +4. Compilation Options + +config.h and makefile.suite contain a number of compile time options intended +to make the process of porting the code provided with TPC-H/TPC-R as easy as +possible on a broad range of platforms. Most ports should consist of reviewing +the possible settings described in config.h and modifying the makefile +to employ them appropriately. + +5. Customizing QGEN + +QGEN relies on a number of vendor-specific conventions to generate +appropriate query syntax. These are controlled by #defines in tpcd.h, +and enabled by a #define in config.h. If you find that the syntax +generated by QGEN is not sufficient for your environment you will need +to modify these to files. It is strongly recomended that you not change +the general organization of the files. + +Currently defined options are: + +VTAG -- marks a variable substitution point [:] +QDIR_TAG -- environent variable which points to query templates + [DSS_QUERY] +GEN_QUERY_PLAN -- syntax to generate a query plan ["Set Explain On;"] +START_TRAN -- syntax to begin a transaction ["Begin Work;"] +END_TRAN -- syntax to end a transaction ["Commit Work;"] +SET_OUTPUT -- syntax to redirect query output ["Output to"] +SET_ROWCOUNT -- syntax to set the number of rows returned + ["{return %d rows}"] +SET_DBASE -- syntax to connect to a database + +6. Further Enhancements + +load_stub.c provides entry points for two likely enhancements. + +The ld_XXXX routines make it possible to load the +database directly from DBGEN without first writing the database +population out to the filesystem. This may prove particularly useful +when loading larger database populations. Be particularly careful about +monetary amounts. To assure portability, all monetary calcualtion are +done using long integers (which hold money amounts as a number of +pennies). These will need to be scaled to dollars and cents (by dividing +by 100), before the values are presented to the DBMS. + +The hd_XXXX routines allow header information to be written before the +creation of the flat files. This should allow system which require +formatting information in database load files to use DBGEN with only +a small amount of custom code. + +qgen.c defines the translation table for query templates in the +routine qsub(). + +varsub.c defines the parameter substitutions in the routine varsub(). + +If you are porting DBGEN to a machine that is not supports a native word +size larger that 32 bits, you may wish to modify the default values for +BITS_PER_LONG and MAX_LONG. These values are used in the generation of +the sparse primary keys in the order and lineitem tables. The code has +been structured to run on any machine supporting a 32 bit long, but +may be slightly more efficient on machines that are able to make use of +a larger native type. + +7. Known Porting Problems + +The current codeline will not compile under SunOS 4.1. Solaris 2.4 and later +are supported, and anyone wishing to use DBGEN on a Sun platform is +encouraged to use one of these OS releases. + + +8. Reporting Problems + +The code provided with TPC-H/TPC-R has been written to be easily portable, +and has been tested on a wide variety of platforms, If you have any +trouble porting the code to your platform, please help us to correct +the problem in a later release by sending the following information +to the TPC D subcommittee: + + Computer Make and Model + Compiler Type and Revision Number + Brief Description of the problem + Suggested modification to correct the problem + diff --git a/data/ssb/dbgen/README b/data/ssb/dbgen/README new file mode 100644 index 0000000..477bfb7 --- /dev/null +++ b/data/ssb/dbgen/README @@ -0,0 +1,88 @@ +Note: In our research paper we use the SSB instead of SSBM +Version of 2/28/10: +Cardinality of supplier fixed to follow benchmark spec: now 2000*SF + (previously was 10000*SF, in error): line 226, driver.c +Type of time value changed from long to time_t (now 64 bits on Windows): + line 688, build.c +Building in Visual Studio 2008: + Use Win32 console project, not using precompiled headers, + in Properties>C/C++>CommandLine, additional options: + /D "SSBM" /D "DBNAME" /D "DB2" (for DB2) +Building using makefile_win: set for DB2 build: + nmake -f makefile_win + (Change DATABASE symbol for other database) + +SSBM dbgen readme: + +SSBM is based on TPC-H dbgen source. The coding style and architecture +follows the TPCH dbgen. The original TPCH dbgen code stays untouched and +all new code related to SSBM dbgen follow the "#ifdef SSBM" statements. + +For original detailed TPC-H documentation, please refer TPCH_README +document under the same directory. Here we just list few things that +are specific to SSBM. + + +1. How is SSBM DBGEN built? + +Same idea as TPCH dbgen setup, which requires user to create an +appropriate makefile, using makefile.suite as a basis. Make sure to +use "SSBM" for the workload variable. + +Type "make" to compile and to generate the SSBM dbgen executable. +Please refer to Porting.Notes for more details and for +suggested compile time options. + +Note: If you want to generate the data files to a diffent directory, you should +copy the dbgen executable as well as the dists.dss file to that directory. + +2. How to generate SSBM data files? +To generate the dimension tables: + +(customer.tbl) +dbgen -s 1 -T c + +(part.tbl) +dbgen -s 1 -T p + +(supplier.tbl) +dbgen -s 1 -T s + +(date.tbl) +dbgen -s 1 -T d + +(fact table lineorder.tbl) +dbgen -s 1 -T l + +(for all SSBM tables) +dbgen -s 1 -T a + +To generate the refresh (insert/delete) data set: +(create delete.[1-4] and lineorder.tbl.u[1-4] with refreshing fact 0.05%) +dbgen -s 1 -r 5 -U 4 + + where "-r 5" specifies refreshin fact n/10000 + "-U 4" specifies 4 segments for deletes and inserts + +At this moment there is no QGEN for SSBM. So +the command line options related to those features won't apply. + +3. What are the changes upon TPC-H dbgen + +changes made upon original TPC-H dbgen + +1. removed snowflake tables such as nation and region (done) +2. removed the partsupply table (done) +3. removed the order table (done) +4. renamed the fact table as Lineorder and added/removed many fields +( done) +5. added the date dimension table (done) +6. adding and removing fields in dimension tables (done) +7. have data cross reference for supplycost, revenue in lineorder (done) +8. apply the refreshing only to lineorder table (done) + +The command line option keeps the same as TPC-H dbgen (The -T options +are changed to reflect different set of tables) + +===================== End of README ======================================== + diff --git a/data/ssb/dbgen/TPCH_README b/data/ssb/dbgen/TPCH_README new file mode 100644 index 0000000..9c8225f --- /dev/null +++ b/data/ssb/dbgen/TPCH_README @@ -0,0 +1,425 @@ +# @(#)README 2.1.8.1 + +Table of Contents +=================== + 0. What is this document? + 1. What is DBGEN? + 2. What will DBGEN create? + 3. How is DBGEN built? + 4. Command Line Options for DBGEN + 5. Building Large Data Sets with DBGEN + 6. DBGEN limitations and compliant usage + 7. Sample DBGEN executions + 8. What is QGEN? + 9. What will QGEN create? +10. How is QGEN built? +11. Command Line Options for QGEN +12. Query Template Syntax +13. Sample QGEN executions and Query Templates +14. Environment variable +15. Version Numbering in DBGEN and QGEN + +0. What is this document? + +This is the general README file for DBGEN and QGEN, the data- +base population and executable query text generation programs +used in the TPC-H and TPC-R benchmarks. It covers the proper use +of DBGEN and QGEN. For information on porting the utility to your +particular platform see Porting.Notes. + +1. What is DBGEN? + +DBGEN is a database population program for use with the TPC-H and +TPC-R benchmarks. It is written in ANSI 'C' for portability, and has +been successfully ported to over a dozen different systems. While the +TPC-H and TPC-R specifications allow an implementor to use any utility +to populate the benchmark database, the resultant population must exactly +match the output of DBGEN. The source code has been provided to make the +process of building a compliant database population as simple as possible. + +2. What will DBGEN create? + +Without any command line options, DBGEN will generate 8 separate ascii +files. Each file will contain pipe-delimited load data for one of the +tables defined in the TPC-H and TPC-R database schemas. The default tables +will contain the load data required for a scale factor 1 database. By +default the file will be created in the current directory and be +named .tbl. As an example, customer.tbl will contain the +load data for the customer table. + +When invoked with the '-U' flag, DBGEN will create the data sets to be +used in the update functions and the SQL syntax required to delete the +data sets. The update files will be created in the same directory as +the load data files and will be named "u_
.set". The delete +syntax will be written to "delete.set". For instance, the data set to +be used in the third query set to update the lineitem table will be +named "u_lineitem.tbl.3", and the SQL to remove those rows will be +found in "delete.3". The size of the update files can be controlled +with the '-r' flag. + +3. How is DBGEN built? + +Create an appropriate makefile, using makefile.suite as a basis, +and type make. Refer to Porting.Notes for more details and for +suggested compile time options. + +4. Command Line Options for DBGEN + +DBGEN's output is controlled by a combination of command line options +and environment variables. Command line options are assumed to be single +letter flags preceded by a minus sign. They may be followed by an +optional argument. + +option argument default action +------ -------- ------- ------ +-h Display a usage summary + +-f none Force. Existing data files will be + overwritten. + +-F none yes Flat file output. + +-D none Direct database load. ld_XXXX() routines + must be defined in load_stub.c + +-s 1 Scale of the database population. Scale + 1.0 represents ~1 GB of data + +-T
Generate the data for a particular table + ONLY. Arguments: p -- part/partuspp, + c -- customer, s -- supplier, + o -- orders/lineitem, n -- nation, r -- region, + l -- code (same as n and r), + O -- orders, L -- lineitem, P -- part, + S -- partsupp + +-O d Generate SQL for delete function + instead of key ranges + +-O f Allow over-ride of default output file + names + +-O h Generate headers in flat ascii files. + hd_XXX routines must be defined in + load_stub.c + +-O m Flat files generate fixed length records + +-O r Generate key ranges for the UF2 update + function + +-O v Verify data set without generating it. + +-r 10 Scale each udpate file to the given + percentage (expressed in basis points) + of the data set + +-v none Verbose. Progress messages are + displayed as data is generated. + +-n Use database for in-line load + +-C Use separate processes to + generate data + +-S Generate the th part of a multi-part load + or update set + +-U Create a specified number of data sets + in flat files for the update/delete + functions + +-i Split the inserted rows in an refresh pair + between files + +-d Split the deleted rows in an refresh pair + between files + +5. DBGEN limitations and compliant usage + +DBGEN is meant to be a robust population generator for use with the +TPC-H and TPC-R benchmarks. It is hoped that DBGEN will make it easier +to experiment with and become proficient in the execution of TPC decision +support benchmarks. As a result, it includes a number of command line +options which are not, strictly speaking, necessary to generate a compliant +data set for a TPC-D run. In addition, some command line options will accept +arguments which result in the generation of NON-COMPLIANT data sets. Options +which should be used with care include: + +-s -- scale factor. TPC-H/TPC-R runs are only compliant when run against SF's + of 1, 10, 30, 100, 300, 1000 .... +-r -- refresh percentage. TPC-H/TPC-R runs are only compliant when run with + -r 10, the default. + +6. Sample DBGEN executions + +DBGEN has been built to allow as much flexibility as possible, but is +fundementally intended to generate two things: a database population +against which the queries in TPC-H and TPC-R can be run, and the updates +that are used during the update functions in TPC-H and TPC-R. Here are +some sample uses of DBGEN. + + 1. To generate the database population for the qualification database + dbgen -s 1 + 2. To generate the lineitem table only, for a scale factor 10 database, + and over-write any existing flat files: + dbgen -s 10 -f -T L + 4. To geterate a 100GB data set in 1GB pieces, generate only the part and + partsupplier tables, and include some progress reports along the way: + dbgen -s 100 -S 1 -C 100 -T p -v (to generate the first 1GB file) + dbgen -s 100 -S 2 -C 100 -T p -v (to generate the second 1GB file) + (and so on, incrementing the argument to -S each time) + 5. To generate the update files needed for a 4 stream run of the throughput + test at 100 GB, using an existing set of seed files from an 8 process + load: + dbgen -s 100 -U 4 -C 8 + + +7. What is QGEN? + +QGEN is a query generation program for use with the TPC-H and TPC-R benchmarks. +It is written in ANSI 'C' for portability, and has been successfully +ported to over a dozen different systems. While the benchmark specifications +allow an implementor to use any utility to create the benchmark query +sets, QGEN has been provided to make the process of building +a benchmark implementation as simple as possible. + +8. What will QGEN create? + +QGEN is a filter, triggered by :'s. It does line-at-a-time reads of its +input (more on that later), scanning for :foo, where foo determines the +substitution that occurs. Including: + +: replace with the appropriate value for parameter +:b replace with START_TRAN (from tpcd.h) +:c replace with SET_DBASE (from tpcd.h) +:n replace with SET_ROWCOUNT() (from tpcd.h) +:o replace with SET_OUTPUT (from tpcd.h) +:q replace with query number +:s replace with stream number +:x replace with GEN_QUERY_PLAN (from tpcd.h) + +Qgen takes an assortment of command line options, controlling which of these +options should be active during the translation from template to EQT, and a +list of query "names". It then translates the template found in +$DSS_QUERY/.sql and puts the result of stdout. + +Here is a sample query template: + +{ Sccsid: @(#)1.sql 9.1.1.1 1/25/95 10:51:56 } +:n 0 +:o +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from lineitem +where l_shipdate <= date '1998-12-01' - interval :1 day +group by l_returnflag, l_linestatus +order by l_returnflag, l_linestatus; + +And here is what is generated: +$ qgen -d 1 + +{return 0 rows} + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from lineitem +where l_shipdate <= date('1998-12-01') - interval (90) day to day +group by l_returnflag, l_linestatus +order by l_returnflag, l_linestatus; + +See "Query Template Syntax" below for more detail on converting your prefered query +phrasing for use with QGEN. + +9. How is QGEN built? + +QGEN is built by the same makefile that creates DBGEN. If the makefile +is successfully creating DBGEN, no further compilation modifications +should be necessary. You may need to modify some of the options which +allow QGEN to integrate with your preferred query tool. Refer to +Porting.Notes for more detail. + +10. Command Line Options for QGEN + +Like DBGEN, QGEN is controlled by a combination of command line options +and environment variables (See "Environment Variables", below for more +detail). Command line options are assumed to be single +letter flags preceded by a minus sign. They may be followed by an +optional argument. + +option argument default action +------ -------- ------- ------ +-c none Retain comments in translation of template to + EQT + +-d none Default. Use the parameter substitutions + required for query validation + +-h Display a usage summary + +-i Use contents of to init a query stream + +-l Save query parameters to + +-n Use database for queries + +-N Always use default rowcount, and ignore :n directives + +-o Save query n's output in /n. + Uses -p option, and uses :o tag + +-p Use the query permutation defined for + stream . If this option is + omited, EQT will be generated for the + queries named on the command line. + +-r Seed the rnadom number generator with + +-s Set scale to for parameter + substitutions. + +-t Use contents of to complete a query + stream + +-T none Use time table format for date substitution + +-v none Verbose. Progress messages are + displayed as data is generated. + +-x none Generate a query plan as part of query + execution. + +11. Query Template Syntax + +QGEN is a simple ASCII text filter, meant to translate query generalized +query syntax("query template") into the executable query text(EQT) re- +quired by the benchmarks. It provides a number of shorthands and syntactic +extensions that allow the automatic generation of query parameters and some +control over the operation of the benchmark implementation. + +QGEN first strips all comments from the query template, recognizing both +{comment} and --comment styles. Next it traverses the query template +one line at a time, locating required substitution points, called +parameter tags. The values substituted for a given tag are summarized +below. QGEN does not support nested substitutions. That is, if +the text substituted for tag itself contains a valid tag the second tag +will not be expanded. + +Tag Converted To Based on +=== ============ ======== +:c database ;(1) -n from the command line +:x set explain on;(1) -x from the command line +: paremeter +:s stream number +:o output to outpath/qnum.stream;(1) + -o from command line, -s from + command line +:b BEGIN WORK;(1) -a from comand line +:e COMMIT WORK(1) -a from command line +:q query number +:n sets rowcount to be returned + to , unless -N appears on the command line + +Notes: + (1) This is Informix-specific syntax. Refer to Porting.Notes for + tailoring the generated text to your database environment. + +12. Sample QGEN executions and Query Templates + +QGEN translates generic query templates into valid SQL. In addition, it +allows conditional inclusion of the commands necessary to connect to a +database, produce diagnostic output, etc. Here are some sample of QGEN +usage, and the way that command line parameters and the query templates +interact to produce valid SQL. + + Template, in $DSS_QUERY/1.sql: + :c + :o + select count(*) from foo; + :x + select count(*) from lineitem + where l_orderdate < ':1'; + + 1. "qgen 1", would produce: + select count(*) from foo; + select count(*) from lineitem + where l_orderdate < '1997-01-01'; + Assuming that 1 January 1997 was a valid substitution for parameter 1. + + 2. "qgen -d -c dss1 1, would produce: + database dss1; + select count(*) from foo; + select count(*) from lineitem + where l_orderdate < '1995-07-18'; + Assuming that 18 July 1995 was the default substitution for parameter 1, + and using Informix syntax. + + 3. "qgen -d -c dss1 -x -o somepath 1, would produce: + database dss1; + output to "somepath/1.0" + select count(*) from foo; + set explain on; + select count(*) from lineitem + where l_orderdate < '1995-07-18'; + Assuming that 18 July 1995 was the default substitution for parameter 1, + and using Informix syntax. + + +13. Environment Variables + +Enviroment variables are used to control features of DBGEN and QGEN +which are unlikely to change from one execution to another. + +Variable Default Action +------- ------- ------ +DSS_PATH . Directory in which to build flat files +DSS_CONFIG . Directory in which to find configuration files +DSS_DIST dists.dss Name of distribution definition file +DSS_QUERY . Directory in which to find query templates + +14. Version Numbering in DBGEN and QGEN + +DBGEN and QGEN use a common version numbering algorithm. Each executable +is stamped with a version number which is displayed in the usage messages +available with the '-h' option. A version number is of the form: + + V.R.P.M + | | | | + | | | | + | | | | + | | | -- modification: alphabetic, incremented for any trivial changes + | | | to the source (e.g, porting ifdef's) + | | ---- patch level: numeric, incremented for any minor bug fix + | | (e.g, qgen parameter range) + | ------- release: numeric, incremented for each minor revision of the + | specification + |-------- version: numeric, incremented for each major revision of the + specification + +An implementation of TPC-H or TPC-R is valid only if it conforms to the +following version usage rules: + + -- The Version of DBGEN and QGEN must match the integer portion of the + current specification revision + +The current revisions are: + DBGEN: 1.0.1 + QGEN: 1.0.1 diff --git a/data/ssb/dbgen/bcd2.c b/data/ssb/dbgen/bcd2.c new file mode 100644 index 0000000..30038ba --- /dev/null +++ b/data/ssb/dbgen/bcd2.c @@ -0,0 +1,237 @@ +/* @(#)bcd2.c 2.1.8.1 */ +/* + * bcd.c: conversion routines for multi-byte arithmetic + * + * defined routines: + * bin_bcd2(long binary, long *low_res, long *high_res) + * bcd2_bin(long *dest, long bcd) + * bcd2_add(long *bcd_low, long *bcd_high, long addend) + * bcd2_sub(long *bcd_low, long *bcd_high, long subend) + * bcd2_mul(long *bcd_low, long *bcd_high, long multiplier) + * bcd2_div(long *bcd_low, long *bcd_high, long divisor) + * long bcd2_mod(long *bcd_low, long *bcd_high, long modulo) + * long bcd2_cmp(long *bcd_low, long *bcd_high, long compare) + */ +#include +#include "bcd2.h" /* for function prototypes */ + +#define DIGITS_PER_LONG 7 +#define WORD_DIVISOR 10000000 +#define GET_DIGIT(num, low, high) \ + ((num) >= DIGITS_PER_LONG)? \ + (high & (0xF << (4 * ((num) - DIGITS_PER_LONG)))) \ + >> (((num) - DIGITS_PER_LONG) * 4): \ + (low & (0xF << (4 * (num)))) >> ((num) * 4) +#define SET_DIGIT(value, num, low, high) \ + if ((num) >= DIGITS_PER_LONG) \ + { \ + *high &= \ + (0xFFFFFFF ^ (0xF << (4 * ((num) - DIGITS_PER_LONG)))); \ + *high |= (value << (4 * ((num) - DIGITS_PER_LONG))); \ + } \ + else \ + { \ + *low = (*low & (0xFFFFFFF ^ (0xF << (4 * (num))))); \ + *low |= (value << (4 * (num))); \ + } +int +bin_bcd2(long binary, long *low_res, long *high_res) +{ + char number[15], + *current; + int count; + long *dest; + + *low_res = *high_res = 0; + sprintf(number, "%014ld", binary); + for (current = number, count=13; *current; current++, count--) + { + dest = (count < DIGITS_PER_LONG)?low_res:high_res; + *dest = *dest << 4; + *dest |= *current - '0'; + } + return(0); +} + +int +bcd2_bin(long *dest, long bcd) +{ + int count; + long mask; + + count = DIGITS_PER_LONG - 1; + mask = 0xF000000; + *dest = 0; + while (mask) + { + *dest *= 10; + *dest += (bcd & mask) >> (4 * count); + mask = mask >> 4; + count -= 1; + } + return(0); +} + +int +bcd2_add(long *bcd_low, long *bcd_high, long addend) +{ + long tmp_lo, tmp_hi, carry, res; + int digit; + + bin_bcd2(addend, &tmp_lo, &tmp_hi); + carry = 0; + for (digit=0; digit < 14; digit++) + { + res = GET_DIGIT(digit, *bcd_low, *bcd_high); + res += GET_DIGIT(digit, tmp_lo, tmp_hi); + res += carry; + carry = res / 10; + res %= 10; + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +int +bcd2_sub(long *bcd_low, long *bcd_high, long subend) +{ + long tmp_lo, tmp_hi, carry, res; + int digit; + + bin_bcd2(subend, &tmp_lo, &tmp_hi); + carry = 0; + for (digit=0; digit < 14; digit++) + { + res = GET_DIGIT(digit, *bcd_low, *bcd_high); + res -= GET_DIGIT(digit, tmp_lo, tmp_hi); + res -= carry; + if (res < 0) + { + res += 10; + carry = 1; + } + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +int +bcd2_mul(long *bcd_low, long *bcd_high, long multiplier) +{ + long tmp_lo, tmp_hi, carry, m_lo, m_hi, m1, m2; + int udigit, ldigit, res; + + tmp_lo = *bcd_low; + tmp_hi = *bcd_high; + bin_bcd2(multiplier, &m_lo, &m_hi); + *bcd_low = 0; + *bcd_high = 0; + carry = 0; + for (ldigit=0; ldigit < 14; ldigit++) + { + m1 = GET_DIGIT(ldigit, m_lo, m_hi); + carry = 0; + for (udigit=0; udigit < 14; udigit++) + { + m2 = GET_DIGIT(udigit, tmp_lo, tmp_hi); + res = m1 * m2; + res += carry; + if (udigit + ldigit < 14) + { + carry = GET_DIGIT(udigit + ldigit, *bcd_low, *bcd_high); + res += carry; + } + carry = res / 10; + res %= 10; + if (udigit + ldigit < 14) + SET_DIGIT(res, udigit + ldigit, bcd_low, bcd_high); + } + } + return(carry); +} + +int +bcd2_div(long *bcd_low, long *bcd_high, long divisor) +{ + long tmp_lo, tmp_hi, carry, d1, res, digit; + + + carry = 0; + tmp_lo = *bcd_low; + tmp_hi = *bcd_high; + *bcd_low = *bcd_high = 0; + for (digit=13; digit >= 0; digit--) + { + d1 = GET_DIGIT(digit, tmp_lo, tmp_hi); + d1 += 10 * carry; + res = d1 / divisor; + carry = d1 % divisor; + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +long +bcd2_mod(long *bcd_low, long *bcd_high, long modulo) +{ + long tmp_low, tmp_high; + + tmp_low = *bcd_low; + tmp_high = *bcd_high; + while (tmp_high || tmp_low > modulo) + bcd2_sub(&tmp_low, &tmp_high, modulo); + return(tmp_low); +} + +long +bcd2_cmp(long *low1, long *high1, long comp) +{ + long temp = 0; + + bcd2_bin(&temp, *high1); + if (temp > 214) + return(1); + bcd2_bin(&temp, *low1); + return(temp - comp); +} + +#ifdef TEST_BCD +#include + +main() +{ +long bin, low_bcd, high_bcd; +int i; + +bin = MAXINT; +printf("%ld\n", bin); +bin_bcd2(bin, &low_bcd, &high_bcd); +printf("%ld %ld\n", high_bcd, low_bcd); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +for (i=9; i >= 0; i--) + printf("%dth digit in %d is %d\n", + i, bin, GET_DIGIT(i, low_bcd, high_bcd)); +bcd2_add(&low_bcd, &high_bcd, MAXINT); +bin = 0; +bcd2_bin(&bin, high_bcd); +high_bcd = bin; +bin = 0; +bcd2_bin(&bin, low_bcd); +low_bcd = bin; +printf( "%ld%07ld\n", high_bcd, low_bcd); +bin_bcd2(14, &low_bcd, &high_bcd); +bcd2_mul(&low_bcd, &high_bcd, 23L); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +bcd2_div(&low_bcd, &high_bcd, 10L); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +} +#endif /* TEST */ diff --git a/data/ssb/dbgen/bcd2.h b/data/ssb/dbgen/bcd2.h new file mode 100644 index 0000000..6ea92a1 --- /dev/null +++ b/data/ssb/dbgen/bcd2.h @@ -0,0 +1,11 @@ +/* + * Sccsid: @(#)bcd2.h 2.1.8.1 + */ +int bin_bcd2(long binary, long *low_res, long *high_res); +int bcd2_bin(long *dest, long bcd); +int bcd2_add(long *bcd_low, long *bcd_high, long addend); +int bcd2_sub(long *bcd_low, long *bcd_high, long subend); +int bcd2_mul(long *bcd_low, long *bcd_high, long multiplier); +int bcd2_div(long *bcd_low, long *bcd_high, long divisor); +long bcd2_mod(long *bcd_low, long *bcd_high, long modulo); +long bcd2_cmp(long *bcd_low, long *bcd_high, long compare); diff --git a/data/ssb/dbgen/bcd2.o b/data/ssb/dbgen/bcd2.o new file mode 100644 index 0000000000000000000000000000000000000000..469699c95d4bc145902e4505694f744398e5787d GIT binary patch literal 4536 zcmbtWeQX>@6`%E8>~m`G9S|j!646V1Ew)0Bb8NsVsJ*rC>UU)WhT5g54#vK7Vk`a# z_8oFc9|a91N60rjqEhUi_RRbF(z{o{j{{ zk7ScyhKwF4N4|}GvG$U&SO)wCc=cP5-Uzu@+}?9pV>Se*pnMv5DHu7$ds;b?P0c_W z?ImElX+2djPD;V}W6h(xF9-wV?3pSA&(exU>t5h*>07gqFFj(aU*z7+(wAoBszz3ORz#}Z`=|sSzr#k|^^RE?)#dD@z z+Og?^h(PSj-aTfU-y7wZ`AseVdMy4zj8E#-DHh8w>07U&gRx9SDJXvf{-@ZxbC6ps zK?-t(`=NG1JNyo6_o5I~N12{!cw(1wTd%zt$?frWskqm~io z8imralVoqfX0KkDyYlhPxmH_Su&lW>qfj}P;nR?A6It0HY9dvM3GqL=TkFtvYu{Z- z%O?wNKLs}t@M+@|-SZ;4XDPbc8KOn*wBJ*eX`;;LABymo`9l60#BT$C8>-tGiRIs$dkz;e6~86c2%o`t#%G|qE#SdW&27T8 zVKGMdMY9^Ikg*Dt35Z{UzKe81Ah)oMPv;7E*5)9dlQq99{vW-10M&BkV(ldq7iweW zmvuhC<}$*ScQoS#%-9=l1D(cvQQkgKlV8DasrSS;aTn+=?r#gvt$6H5k0boeC|`)s zk~q4eprZW7{5L35fOm~q)!w5N6U^2lR%L6Q7a%?XeOc4-!nWMvowd_&%9un)sAh6_ z3>9NZ^&aU!`^FMF^6#Z$u{l%Wx5vhZvC+K0&MOl}vGUNfMiKcgBcBKrdP^4tRJ%TQ zq_!^yDz_4-?Q=b=N=aOEG5#K2aWOulr@x@5@3yY*sWM8RcBX;8<5l=QUx6QSojp${ zZN8b`pfunao7A_?qG#sK>TZRf#@=0AuKrjxR{vEfpN#Q(I9eQi5R5l5HnSL;IX7fl z6rc<0H>6A)D^y0e;M&T-QU<(!{xqL9(;%*G^K}BA|Ik&P#)l<>)rZdtr(65zOllqM zyfXGV$n?{}IEZWfpZNHgCj+a%1IEFCkKx0x^U9%@9jCR-`P2x@RpKbFHtwNE(H+2G z;aR*6=`RTb<8*Ps;jac>!f|X9Y42f#ToE-~^5lB8N>hEB>e22H z`a$yEE5|U79g^Nn8-GCcohWHit1_-8)qmWrsWr#eX==zQji{}z`<3--2q8`NBTJL& zYa+#@<7w>FwEw9?T~mjSvabmZjD3y%Q_zpc$&VA{N5Bi9aQN zP8k#NM{J+OtHgE3n20rEtUI3A5sb5--W+1xeS@7eXPuoR`-l4mlY2Vjz58gE?27Lr zx^qugU%!y)L}rudN+e8TB-L#a11ZUq==*_0h9pmXU_V)9JH?mL`4ngKeY~)#S?7w* zEN#uge$A|W?4IgQ@8N2jH8o()P~#n56k#E`Cqi*cK#P-+@1W_WZVfaxB0U{ z#@)%^V#A5F{6Fv!_>XM3Jq~`9rAFJ6TNHD;52#tVRgd2j=n8&a&YyAMRz1!-aH}30 zW&ADuRz3DRaH}3qIdH2US#adQK(ebF$Kc-Pj4Y z-Z{dk{n=sHL``0crjhPp()mf{>rj^L@4#+vmVKuiayvqX_`}!<3t_jn87WBq*_%^7 zhKm+v=UZ|bmWxhax=)Ff>UdR?f%dNgSK@EM-$#OUI_3RRKI<@O<{Gb5mi#z9sMvmJ K4$EJs{C@#d@HHI( literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/bm_utils.c b/data/ssb/dbgen/bm_utils.c new file mode 100644 index 0000000..5da29a0 --- /dev/null +++ b/data/ssb/dbgen/bm_utils.c @@ -0,0 +1,589 @@ +/* @(#)bm_utils.c 2.1.8.2 */ +/* + * + * Various routines that handle distributions, value selections and + * seed value management for the DSS benchmark. Current functions: + * env_config -- set config vars with optional environment override + * yes_no -- ask simple yes/no question and return boolean result + * a_rnd(min, max) -- random alphanumeric within length range + * pick_str(size, set) -- select a string from the set of size + * read_dist(file, name, distribution *) -- read named dist from file + * tbl_open(path, mode) -- std fopen with lifenoise + * julian(date) -- julian date correction + * rowcnt(tbl) -- proper scaling of given table + * e_str(set, min, max) -- build an embedded str + * agg_str() -- build a string from the named set + * dsscasecmp() -- version of strcasecmp() + * dssncasecmp() -- version of strncasecmp() + * getopt() + * set_state() -- initialize the RNG + */ + +/*this has to be put on top...*/ +#ifdef LINUX +/* turn on GNU extensions, incl O_DIRECT */ +/* O_LARGEFILE is defined in fcntl.h*/ +#define _GNU_SOURCE +#endif + +#include "dss.h" +#include +#include +#include +#include + +#ifdef HP +#include +#endif /* HP */ +#include +#include +#ifndef _POSIX_SOURCE +#include +#endif /* POSIX_SOURCE */ + +#include + +#ifdef IBM +#include +#endif /* IBM */ +#include +#include +/* Lines added by Chuck McDevitt for WIN32 support */ +#if (defined(WIN32)||defined(DOS)) +#ifndef _POSIX_ +#include +#ifndef S_ISREG + +#define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG ) +#define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO ) + +#endif +#endif +#ifndef stat +#define stat _stat +#endif +#ifndef fdopen +#define fdopen _fdopen +#endif +#ifndef open +#define open _open +#endif +#ifndef O_RDONLY +#define O_RDONLY _O_RDONLY +#endif +#ifndef O_WRONLY +#define O_WRONLY _O_WRONLY +#endif +#ifndef O_CREAT +#define O_CREAT _O_CREAT +#endif +#endif +/* End of lines added by Chuck McDevitt for WIN32 support */ +#include "dsstypes.h" + + +static char alpha_num[65] = +"0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,"; + +#if defined(__STDC__) || defined(__cplusplus) +#define PROTO(s) s +#else +#define PROTO(s) () +#endif + +char *getenv PROTO((const char *name)); +void usage(); +long *permute_dist(distribution *d, long stream); +extern long Seed[]; + +/* + * env_config: look for a environmental variable setting and return its + * value; otherwise return the default supplied + */ +char * +env_config(char *var, char *dflt) +{ + static char *evar; + + if ((evar = getenv(var)) != NULL) + return (evar); + else + return (dflt); +} + +/* + * return the answer to a yes/no question as a boolean + */ +long +yes_no(char *prompt) +{ + char reply[128]; + +#ifdef WIN32 +/* Disable warning about conditional expression is constant */ +#pragma warning(disable:4127) +#endif + + while (1) + { +#ifdef WIN32 +#pragma warning(default:4127) +#endif + printf("%s [Y/N]: ", prompt); + gets(reply); + switch (*reply) + { + case 'y': + case 'Y': + return (1); + case 'n': + case 'N': + return (0); + default: + printf("Please answer 'yes' or 'no'.\n"); + } + } +} + +/* + * generate a random string with length randomly selected in [min, max] + * and using the characters in alphanum (currently includes a space + * and comma) + */ +int +a_rnd(int min, int max, int column, char *dest) +{ + long i, + len, + char_int; + + RANDOM(len, min, max, column); + for (i = 0; i < len; i++) + { + if (i % 5 == 0) + RANDOM(char_int, 0, MAX_LONG, column); + *(dest + i) = alpha_num[char_int & 077]; + char_int >>= 6; + } + *(dest + len) = '\0'; + return (len); +} + +/* + * embed a randomly selected member of distribution d in alpha-numeric + * noise of a length rendomly selected between min and max at a random + * position + */ +void +e_str(distribution *d, int min, int max, int stream, char *dest) +{ + char strtmp[MAXAGG_LEN + 1]; + long loc; + int len; + + a_rnd(min, max, stream, dest); + pick_str(d, stream, strtmp); + len = strlen(strtmp); + RANDOM(loc, 0, (strlen(dest) - 1 - len), stream); + strncpy(dest + loc, strtmp, len); + + return; +} + + +/* + * return the string associate with the LSB of a uniformly selected + * long in [1, max] where max is determined by the distribution + * being queried + */ +int +pick_str(distribution *s, int c, char *target) +{ + long i = 0; + long j; + + RANDOM(j, 1, s->list[s->count - 1].weight, c); + while (s->list[i].weight < j) + i++; + strcpy(target, s->list[i].text); + return(i); +} + +/* + * unjulian (long date) -- return(date - STARTDATE) + */ +long +unjulian(long date) +{ + int i; + long res = 0; + + for (i = STARTDATE / 1000; i < date / 1000; i++) + res += 365 + LEAP(i); + res += date % 1000 - 1; + + return(res); +} + +long +julian(long date) +{ + long offset; + long result; + long yr; + long yend; + + offset = date - STARTDATE; + result = STARTDATE; + +#ifdef WIN32 +/* Disable warning about conditional expression is constant */ +#pragma warning(disable:4127) +#endif + + while (1) + { +#ifdef WIN32 +#pragma warning(default:4127) +#endif + yr = result / 1000; + yend = yr * 1000 + 365 + LEAP(yr); + if (result + offset > yend) /* overflow into next year */ + { + offset -= yend - result + 1; + result += 1000; + continue; + } + else + break; + } + return (result + offset); +} + +/* +* load a distribution from a flat file into the target structure; +* should be rewritten to allow multiple dists in a file +*/ +void +read_dist(char *path, char *name, distribution *target) +{ +FILE *fp; +char line[256], + token[256], + *c; +long weight, + count = 0, + name_set = 0; + + if (d_path == NULL) + { + sprintf(line, "%s%c%s", + env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path); + fp = fopen(line, "r"); + OPEN_CHECK(fp, line); + } + else + { + fp = fopen(d_path, "r"); + OPEN_CHECK(fp, d_path); + } + while (fgets(line, sizeof(line), fp) != NULL) + { + if ((c = strchr(line, '\n')) != NULL) + *c = '\0'; + if ((c = strchr(line, '#')) != NULL) + *c = '\0'; + if (*line == '\0') + continue; + + if (!name_set) + { + if (dsscasecmp(strtok(line, "\n\t "), "BEGIN")) + continue; + if (dsscasecmp(strtok(NULL, "\n\t "), name)) + continue; + name_set = 1; + continue; + } + else + { + if (!dssncasecmp(line, "END", 3)) + { + fclose(fp); + return; + } + } + + if (sscanf(line, "%[^|]|%ld", token, &weight) != 2) + continue; + + if (!dsscasecmp(token, "count")) + { + target->count = weight; + target->list = + (set_member *) + malloc((size_t)(weight * sizeof(set_member))); + MALLOC_CHECK(target->list); + target->max = 0; + continue; + } + target->list[count].text = + (char *) malloc((size_t)(strlen(token) + 1)); + MALLOC_CHECK(target->list[count].text); + strcpy(target->list[count].text, token); + target->max += weight; + target->list[count].weight = target->max; + + count += 1; + } /* while fgets() */ + + if (count != target->count) + { + fprintf(stderr, "Read error on dist '%s'\n", name); + fclose(fp); + exit(1); + } + target->permute = (long *)NULL; + fclose(fp); + return; +} + +/* + * standard file open with life noise + */ + +FILE * +tbl_open(int tbl, char *mode) +{ + char prompt[256]; + char fullpath[256]; + FILE *f; + struct stat fstats; + int retcode; + + + if (*tdefs[tbl].name == PATH_SEP) + strcpy(fullpath, tdefs[tbl].name); + else + sprintf(fullpath, "%s%c%s", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name); + + retcode = stat(fullpath, &fstats); + if (retcode && (errno != ENOENT)) + { + fprintf(stderr, "stat(%s) failed.\n", fullpath); + exit(-1); + } + if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' ) + { + sprintf(prompt, "Do you want to overwrite %s ?", fullpath); + if (!yes_no(prompt)) + exit(0); + } + + if (S_ISFIFO(fstats.st_mode)) + { + retcode = + open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT); + f = fdopen(retcode, mode); + } + else{ + +#ifdef LINUX + /* allow large files on Linux */ + /*use open to first to get the in fd and apply regular fdopen*/ + + /*cheng: Betty mentioned about write mode problem here, added 066*/ + retcode = + open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT|O_LARGEFILE,0644); + f = fdopen(retcode, mode); +#else + f = fopen(fullpath, mode); +#endif + + } + OPEN_CHECK(f, fullpath); + if (header && columnar && tdefs[tbl].header != NULL) + tdefs[tbl].header(f); + + return (f); +} + + +/* + * agg_str(set, count) build an aggregated string from count unique + * selections taken from set + */ +void +agg_str(distribution *set, long count, long col, char *dest) +{ + distribution *d; + int i; + + + d = set; + *dest = '\0'; + for (i=0; i < count; i++) + { + strcat(dest, DIST_MEMBER(set,*permute_dist(d, col))); + + strcat(dest, " "); + d = (distribution *)NULL; + } + *(dest + strlen(dest) - 1) = '\0'; + return; +} + + +long +dssncasecmp(char *s1, char *s2, int n) +{ + for (; n > 0; ++s1, ++s2, --n) + if (tolower(*s1) != tolower(*s2)) + return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); + else if (*s1 == '\0') + return (0); + return (0); +} + +long +dsscasecmp(char *s1, char *s2) +{ + for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) + if (*s1 == '\0') + return (0); + return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); +} + +#ifndef STDLIB_HAS_GETOPT +int optind = 0; +int opterr = 0; +char *optarg = NULL; + +int +getopt(int ac, char **av, char *opt) +{ + static char *nextchar = NULL; + char *cp; + char hold; + + if (optarg == NULL) + { + optarg = (char *)malloc(BUFSIZ); + MALLOC_CHECK(optarg); + } + + if (!nextchar || *nextchar == '\0') + { + optind++; + if (optind == ac) + return(-1); + nextchar = av[optind]; + if (*nextchar != '-') + return(-1); + nextchar +=1; + } + + if (nextchar && *nextchar == '-') /* -- termination */ + { + optind++; + return(-1); + } + else /* found an option */ + { + cp = strchr(opt, *nextchar); + nextchar += 1; + if (cp == NULL) /* not defined for this run */ + return('?'); + if (*(cp + 1) == ':') /* option takes an argument */ + { + if (*nextchar) + { + hold = *cp; + cp = optarg; + while (*nextchar) + *cp++ = *nextchar++; + *cp = '\0'; + *cp = hold; + } + else /* white space separated, use next arg */ + { + if (++optind == ac) + return('?'); + strcpy(optarg, av[optind]); + } + nextchar = NULL; + } + return(*cp); + } +} +#endif /* STDLIB_HAS_GETOPT */ + +char ** +mk_ascdate(void) +{ + char **m; + dss_time_t t; + int i; + + m = (char**) malloc((size_t)(TOTDATE * sizeof (char *))); + MALLOC_CHECK(m); + for (i = 0; i < TOTDATE; i++) + { + m[i] = (char *)malloc(DATE_LEN * sizeof(char)); + MALLOC_CHECK(m[i]); + mk_time((long)(i + 1), &t); + strcpy(m[i], t.alpha); + } + + return(m); +} + +/* + * set_state() -- initialize the RNG so that + * appropriate data sets can be generated. + * For each table that is to be generated, calculate the number of rows/child, and send that to the + * seed generation routine in speed_seed.c. Note: assumes that tables are completely independent. + * Returns the number of rows to be generated by the named step. + */ +long +set_state(int table, long sf, long procs, long step, long *extra_rows) +{ + int i; + long rowcount, remainder, result; + + if (sf == 0 || step == 0) + return(0); + + rowcount = tdefs[table].base / procs; + if ((sf / procs) > (int)MAX_32B_SCALE) + INTERNAL_ERROR("SCALE OVERFLOW. RE-RUN WITH MORE CHILDREN."); + rowcount *= sf; + remainder = (tdefs[table].base % procs) * sf; + rowcount += remainder / procs; + result = rowcount; + for (i=0; i < step - 1; i++) + { + if (table == LINE) /* special case for shared seeds */ + tdefs[table].gen_seed(1, rowcount); + else + tdefs[table].gen_seed(0, rowcount); + /* need to set seeds of child in case there's a dependency */ + /* NOTE: this assumes that the parent and child have the same base row count */ + if (tdefs[table].child != NONE) + tdefs[tdefs[table].child].gen_seed(0,rowcount); + } + *extra_rows = remainder % procs; + if (step > procs) /* moving to the end to generate updates */ + tdefs[table].gen_seed(*extra_rows); + + return(result); +} + + + + + + + + diff --git a/data/ssb/dbgen/bm_utils.o b/data/ssb/dbgen/bm_utils.o new file mode 100644 index 0000000000000000000000000000000000000000..fccd1821b3a60a2bf2debaf0a5b010ce40c3db48 GIT binary patch literal 12856 zcmbuE4|rSEb-=G|nHUo!(;DiL6%>$(O%q{-1Z;2`KU=o1Kz7pDNq{6ZiY&(_w&Y1q zBxcUPPj;3!MZg%aY=e)jbX{B80UcjR0!?h3zx)ju2{2X)loo3KP`@M6|J@?#u&%O6)U7fE!$6{eBS=e`3`bkm7MrWk+)goKXE@6f2bNdx% znz`}$$H@4=Ah&ylmkcA-7HTaEoB4)gTkwtXWgAaeOG-t)-?<6y?m=I&1n%(ix4xv% z&yD}|8Nb_w25iGOrdH##^n-n?)matNtE)omTvbi_+^XAS*KR|3o_J7bZR6i564k`q ze8X<6d&x3wskUsms#h=e8J`K2BM%FWBQKh_VWDB$otHD2KH!9rfiPxO7p z6A>tDb|csf&dN_YD|S6Ns(8t=VIIge>!}5mt50Bm7^g{9hSfQ8pU^+@WU4)>4zKVU zUm2&2BgQK}V|ZN4N@2_zL&hhfj4#;*-DiAa9QPR?nMB&n>vL+{}zwCs=Xv+!h-g~#_UBj5c)UN>ADAD-X0)zGi=D$iDT!$TOz)v^_p!zqAK)(w@#k zALqgxDtrCbq$ht&L>XW5uO8cSc5Ih_g@t)jtTx*i5r4zC=tKQw6HI>;?2 zyvd>pUird0D>n`f^RiJGsupkAAm6YLb&vFmcw$Dvf68IZ_!C4+GN}5UTY$O;d0?N^ zZso~K(R^RRnWL?*a$aZezYhJJoQ;$x=itp8D8?X9KJUCA-^P#SYc1OL>x76HZaXlL zi$O2?%N%y`-4Gdmr%DMnFsR!^CSH+aA9xoOz)&kLVJ8@y!`3rq?>{}B6(YL@l&`-z zQf+@|Z?%2v$@t!CrF6#+;Z?dyV;?u(H7hrU`M^8&{(51rXa>{WJT7z7c;Hi>^!yxR z6@&=W2Qa^Liuu4Hy#NNe&^dO9CrqEECWC#T95q5T*svR2jMHK`Wes`(78PN^Q3e5x zCpVieGpi6D;t63y+Ui{(D1)lB&pvQ5pqF_(YC`p1uMiNBwd%KT_9&xDI9XnVh8aYA}Tjt z+G~O3{N&n3uW{CEeDO=PR1@|#CfXsW>ZzqobeA`wThpORhOG8|)?5dS%sm!ZJ-n5l z#&)Ynwbzq7hKm}+YFwMeqK2jn99p|X>>6-U`9d?^Fvm@z++%zo{Ak}gIhWgap7tcH zJK^4!#}ZXLwaU?0QU3w`8k~B(@?h+3AvIL2W>iZv1fx_8_{ z+3&0{XTCZ1_`sNclbqzHEyf-kDg79Iwdb(|_&$i>nDLphz-PvSqa`@}vxJ8(XF0}( z<}9HBmQv9cvqq^yE@K=Fz<7bf^)F$x^Q6yNu5Hi#+Cxp-egP?y5Od^zIZAP9EFbR` zh?61`nkEkn#>yl}PbUsR2Zm^&64C4r?yj?M{g5Y@f;Qumofs6s-5no= zc-tw8KQBebHw>1EK4+11ws4FhXigphJFK!+1uXSMwl8tXEE$0Eb=Wr8Ct$K0ahgXioujI-Nlp7 zq%~-XI%_U$RWzf{>P;-n=Ucd8y`0BkZgNoQ} z@4h-%f3Tm-Js_?IIFC}RL98B@0i1Otw(x`xwo~Ve_ex#5PSIj9opJGwAFC#dw#QlF z9^CcuJmod$kmWF2o`Z%lukqn7Y@6vZ!}F_A?5|a@1f_-+Z0^u~zZx}Va7Vu3%d;0U z7Tbec^NMs^!rb^6=3uo5U)r|AU)yuqwztk(Mp!UWqsN_qxKZT9p^)zrM~H!w_Wl-7 zt6|{$4?b|f-d~5coU?@izvsYyV?Y0UehMNw>;z7j**Um3TGztuPoBlsxa|j#)9?&C z^@HMcWrF1N3iGrQAGO%;{b%s24|cX<>n?+!H?YT-bguUfeQ0g8{BcC%W_;OkEy$W{ zAlWg=PB|`GEJm6(_V6IsJ~9`lCUIKNMXRj5wzC2BXs)VaSBS(_GBOJk8Cm z^R*VvN}?r!k|>Mne!a9LTIOi?cXkEaTm{S%cJzj0j#d7U?$E=I@ZG`4sz|3EbU-C@ z+4AnznBLhHbp_bGo?u88bF@Ph@ZPV(^`??G&;s?;&TFdoHn0dQxX{6B>KeRFtggw! zN|t=TZ&_bSR~rk2Vg`bf_mS)#V*H!x)}==HaQl1Tezcf-hw(uE%*973+kF&Abg%w?w)!5tl2l*SW)R; z9%u`;ujuH!Yh_n=DBSZGk*FTKd)4aRdmLI#t*5TOfqQSc)z{cGZ~pfdG`B2VwD`8$ z@0fm$>W13dn;fMJmqWW^4!7Gi%T+$Z9TSi4wKHeB%4d~HEQ(@`#(0H0vxcUH4C2xt zGF&S#6`40?xn}{htS+=nnLH^U_w&WT|6IPi4DWTe!g$VnTRvnfJRfph{d6WNTl{rDV{h5t&?R!Doyn_yLv{YbKExX+jDQE)WYu^H{z zh<=G%!6kV)OKkh|YHSDdg81-$fp893DVzgEJn(x^>PSHTfAr7%oX|w>nFU4vOX}%C zJ-3XvC8t?92df?4`*J;+w=s8f4s(KN5!w!E5niZI9XVd*(%axwm6Ur82h;icvB9=J zFDGu>1YLW7-aYV;yDcYMx3Y3jhlf&3U*ILfkJwExvT#_XJm&+kmt`5@zw_GzDsop@ zCUP9scb?Z% zk7aO-2dMwi4BjO9PcnFmF!gHhrgTT?w zLvY6=Vl~|N!fzt|FJ<9JvT*#&gZfX&DRU5>F?|HTiS*;a0_FF~Ier3#A^j146Ul#> zh2!B7^*k#VKo!sPB=5t;5t54OlO?|j7f?tduEYIO;1kt5B?~Xh!d+Q-VHQ3&3&(F- z6WP<4g)hp&16lZ8S@`NKd`%V}2af*VB}1QB776 zaAUnYr2HaNoV_S{A)J0NiM0&wZ)DN)P8R-t7XEP-emo2RA`8!haX(T2PR_!=2^`z| zYg~XJiFFk2S7edDM#{e-j>YLcNO>0d8?$h_efM|ubog6Cv2Nz_)t2MMjhC5txgIaG z@G=`OH{j()yj0+&5-)Bc?G{3AA>}re2nBAT!7U29MM<|P>aJwL(A}+paHzd=1zQo+ z;fcW+r8N|0t*t$g&XC^T8t7QbdSZGM2pmrWD-pEf`GomfBcV1HT^U@(+M?0ch(FX8 z?q<#K?1h6wPbbJm^#~klB7vS>7E}*i!4R?#!dU38SXZY%#PsD|tzkIAFda7jqApTB z9*dxQIDLe|t#CT=>z&~cYi|<;I^bXujKJxoE7l$IM}(PJp)foO>LSS6iMrtGPMyIS zDG+4-6)S{UJ;6wKOb@o=xkeZa6+)Xr0XV${x_dxDK=17dw(8-oFr4Jj2z;+zN5BTN zY+GxOU+)0z*cND|9$v}X!LxX_iv&8*l+I{4P+8fEr9E4YKZ)t^R1B z&995B-q{^w(V&iw2huK1e$2l-9MN%{%4rCDEXFQ}-&gQ;2~#!XF~P8d-9nn*f)EmN zTy2$nD?&)92hTBzo9nyDzfJfrAdd;SujJ^rKO_9_hdK8^61S@@lV&mr_hU#8l%p72Wu-%L1~ zrsV&Y@NW>lMRKf{&g)+gc{;BT5P3S@4ib6VuD6IhZPythPsfi1D}#i7qvPjd!s+<= zmgHzZ9Y0eEr{f1c3yYLFZf6r-4HC+4RfJP}IN{Wu1(Ijlvy^aZPY2QSO;T?+k*^_q z6_GC@ypPD^w-IIkCL&MuKS1QE{>KQXdUg?BOY|Qm+(Y;$S$L8Bu!R1pBJvf4*AYHn za_p~5313L$shwSf*Aw}_CcJ_0M~Hqp4j(7{G9v#Z;k4hMA^Pb!+(|g?_m>E#@#iSv z)c%Vx=tDxkmB3H+@6D2Dj^`S}sr^laQ~Pft`lgnpy`oRWo?6YhmF%FlBNr~Z#gp6UNS!m0l^5l;Q^ z0MSqV@Cf0wy+0v(Fstl*if~%*^MuoSha^Y;(0X4YoYs4gaO&sFF*rg(KTL<8vS)1; z{t)5Ro*xljO!PblEKw?or4)%eZOI@Xm7XfUk-OTu#0<#%0+@*ewQ9xtuq&#%Kgl>JQ`)L zNEjbb-0gxdFZ2;UteB7L?g~V8fPrv#cMv{{xI$q)=z_l&XNa#U@_9ul=IV*SM}vso z3k9*#U`K0v#19_;{@=*~8ql?JTb|NaEpr`moT(SG8`LAiy*G~8>J%|Eg?RXfG%Dv4 z4r44Ux0B+3QG9=m?GrR3g|<+T(sGmebs#qf;fanDOrDQ=n}srt;P^t0k^ z@B+)1D*<^gm*vsFG@&deb?+v5@j_mf%JP(9UX@dP4P++4FSGr3%JL2+DDTQ{g&%?p z>dY*^UY0M;7#Nx5?}xGzjo*CPaP>blj^D}fQ{^#sW16UL +#include +#ifdef SSBM +#include +#endif +#ifndef VMS +#include +#endif +#if defined(SUN) +#include +#endif + +#if defined(LINUX) +#include +#endif + +#include + +#include "dss.h" +#include "dsstypes.h" +#include "bcd2.h" +#ifdef ADHOC +#include "adhoc.h" +extern adhoc_t adhocs[]; +#endif /* ADHOC */ + +#define LEAP_ADJ(yr, mnth) \ +((LEAP(yr) && (mnth) >= 2) ? 1 : 0) +#define JDAY_BASE 8035 /* start from 1/1/70 a la unix */ +#define JMNTH_BASE (-70 * 12) /* start from 1/1/70 a la unix */ +#define JDAY(date) ((date) - STARTDATE + JDAY_BASE + 1) +#define PART_SUPP_BRIDGE(tgt, p, s) \ + { \ + long tot_scnt = tdefs[SUPP].base * scale; \ + tgt = (p + s * (tot_scnt / SUPP_PER_PART + \ + (long) ((p - 1) / tot_scnt))) % tot_scnt + 1; \ + } +#define RPRICE_BRIDGE(tgt, p) tgt = rpb_routine(p) +#define V_STR(avg, sd, tgt) a_rnd((int)(avg * V_STR_LOW), \ +(int)(avg * V_STR_HGH), sd, tgt) +#define TEXT(avg, sd, tgt) \ +dbg_text(tgt, (int)(avg * V_STR_LOW),(int)(avg * V_STR_HGH), sd) +static void gen_phone PROTO((long ind, char *target, long seed)); + +#ifdef SSBM +static void gen_category PROTO((char *target, long seed)); +int gen_city PROTO((char *cityName, char *nationName)); +int gen_season PROTO((char * dest,int month,int day)); +int is_last_day_in_month PROTO((int year,int month,int day)); +int gen_holiday_fl PROTO((char * dest, int month, int day)); +int gen_city PROTO((char *cityName, char *nationName)); +int gen_color PROTO((char * source, char * dest)); +#endif + + +long +rpb_routine(long p) + { + long price; + price = 90000; + price += (p/10) % 20001; /* limit contribution to $200 */ + price += (p % 1000) * 100; + + return(price); + } + +static void +gen_phone(long ind, char *target, long seed) + { + long acode, + exchg, + number; + + RANDOM(acode, 100, 999, seed); + RANDOM(exchg, 100, 999, seed); + RANDOM(number, 1000, 9999, seed); + sprintf(target, "%02d", 10 + (ind % NATIONS_MAX)); + sprintf(target + 3, "%03d", acode); + sprintf(target + 7, "%03d", exchg); + sprintf(target + 11, "%04d", number); + target[2] = target[6] = target[10] = '-'; + return; +} + +static void +gen_category(char *target, long seed){ + long num1,num2; + RANDOM(num1,1,5,seed); + RANDOM(num2,1,5,seed); + strcpy(target,"MFGR"); + sprintf(target + 4, "%01d", num1); + sprintf(target + 5, "%01d", num2); + return; +} + +#ifdef SSBM +long mk_cust(long n_cust, customer_t *c) +{ + long i; + c->custkey = n_cust; + sprintf(c->name, C_NAME_FMT, C_NAME_TAG, n_cust); + c->alen = V_STR(C_ADDR_LEN, C_ADDR_SD, c->address); + RANDOM(i, 0, nations.count-1, C_NTRG_SD); + strcpy(c->nation_name,nations.list[i].text); + strcpy(c->region_name,regions.list[nations.list[i].weight].text); + gen_city(c->city,c->nation_name); + gen_phone(i, c->phone, (long)C_PHNE_SD); + pick_str(&c_mseg_set, C_MSEG_SD, c->mktsegment); + return (0); + } + +#else +long +mk_cust(long n_cust, customer_t *c) + { + long i; + + c->custkey = n_cust; + sprintf(c->name, C_NAME_FMT, C_NAME_TAG, n_cust); + c->alen = V_STR(C_ADDR_LEN, C_ADDR_SD, c->address); + RANDOM(i, 0, (nations.count - 1), C_NTRG_SD); + c->nation_code = i; + gen_phone(i, c->phone, (long)C_PHNE_SD); + RANDOM(c->acctbal, C_ABAL_MIN, C_ABAL_MAX, C_ABAL_SD); + pick_str(&c_mseg_set, C_MSEG_SD, c->mktsegment); + c->clen = TEXT(C_CMNT_LEN, C_CMNT_SD, c->comment); + + return (0); + } +#endif + + /* + * generate the numbered order and its associated lineitems +*/ +void +mk_sparse (long i, DSS_HUGE *ok, long seq) + { +#ifndef SUPPORT_64BITS + if (scale < MAX_32B_SCALE) +#endif + ez_sparse(i, ok, seq); +#ifndef SUPPORT_64BITS + else + hd_sparse(i, ok, seq); +#endif + return; + } + + /* + * the "simple" version of mk_sparse, used on systems with 64b support + * and on all systems at SF <= 300G where 32b support is sufficient +*/ +void +ez_sparse(long i, DSS_HUGE *ok, long seq) + { + long low_bits; + + LONG2HUGE(i, ok); + low_bits = (long)(i & ((1 << SPARSE_KEEP) - 1)); + *ok = *ok >> SPARSE_KEEP; + *ok = *ok << SPARSE_BITS; + *ok += seq; + *ok = *ok << SPARSE_KEEP; + *ok += low_bits; + + + return; + } + +#ifndef SUPPORT_64BITS +void +hd_sparse(long i, DSS_HUGE *ok, long seq) + { + long low_mask, seq_mask; + static int init = 0; + static DSS_HUGE *base, *res; + + if (init == 0) + { + INIT_HUGE(base); + INIT_HUGE(res); + init = 1; + } + + low_mask = (1 << SPARSE_KEEP) - 1; + seq_mask = (1 << SPARSE_BITS) - 1; + bin_bcd2(i, base, base + 1); + HUGE_SET (base, res); + HUGE_DIV (res, 1 << SPARSE_KEEP); + HUGE_MUL (res, 1 << SPARSE_BITS); + HUGE_ADD (res, seq, res); + HUGE_MUL (res, 1 << SPARSE_KEEP); + HUGE_ADD (res, *base & low_mask, res); + bcd2_bin (&low_mask, *res); + bcd2_bin (&seq_mask, *(res + 1)); + *ok = low_mask; + *(ok + 1) = seq_mask; + return; + } +#endif + +#ifdef SSBM +long +mk_order(long index, order_t *o, long upd_num) + { + long lcnt; + long rprice; + long ocnt; + long tmp_date; + long c_date; + long clk_num; + long supp_num; + static char **asc_date = NULL; + char tmp_str[2]; + char **mk_ascdate PROTO((void)); + int delta = 1; + + if (asc_date == NULL) + asc_date = mk_ascdate(); + + RANDOM(tmp_date, O_ODATE_MIN, O_ODATE_MAX, O_ODATE_SD); + strcpy(o->odate, asc_date[tmp_date - STARTDATE]); + + mk_sparse (index, o->okey, + (upd_num == 0) ? 0 : 1 + upd_num / (10000 / refresh)); + RANDOM(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD); + while (o->custkey % CUST_MORTALITY == 0) + { + o->custkey += delta; + o->custkey = MIN(o->custkey, O_CKEY_MAX); + delta *= -1; + } + pick_str(&o_priority_set, O_PRIO_SD, o->opriority); + RANDOM(clk_num, 1, MAX((scale * O_CLRK_SCL), O_CLRK_SCL), O_CLRK_SD); + o->spriority = 0; + + o->totalprice = 0; + ocnt = 0; + + RANDOM(o->lines, O_LCNT_MIN, O_LCNT_MAX, O_LCNT_SD); + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + + HUGE_SET(o->okey, o->lineorders[lcnt].okey); + o->lineorders[lcnt].linenumber = lcnt + 1; + o->lineorders[lcnt].custkey = o->custkey; + RANDOM(o->lineorders[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD); + RANDOM(o->lineorders[lcnt].suppkey, L_SKEY_MIN, L_SKEY_MAX, L_SKEY_SD); + + RANDOM(o->lineorders[lcnt].quantity, L_QTY_MIN, L_QTY_MAX, L_QTY_SD); + RANDOM(o->lineorders[lcnt].discount, L_DCNT_MIN, L_DCNT_MAX, L_DCNT_SD); + RANDOM(o->lineorders[lcnt].tax, L_TAX_MIN, L_TAX_MAX, L_TAX_SD); + + strcpy(o->lineorders[lcnt].orderdate,o->odate); + + strcpy(o->lineorders[lcnt].opriority,o->opriority); + o->lineorders[lcnt].ship_priority = o->spriority; + + RANDOM(c_date, L_CDTE_MIN, L_CDTE_MAX, L_CDTE_SD); + c_date += tmp_date; + strcpy(o->lineorders[lcnt].commit_date, asc_date[c_date - STARTDATE]); + + pick_str(&l_smode_set, L_SMODE_SD, o->lineorders[lcnt].shipmode); + + RPRICE_BRIDGE( rprice, o->lineorders[lcnt].partkey); + o->lineorders[lcnt].extended_price = rprice * o->lineorders[lcnt].quantity; + o->lineorders[lcnt].revenue = o->lineorders[lcnt].extended_price * ((long)100-o->lineorders[lcnt].discount)/(long)PENNIES; + + //round off problem with linux if use 0.6 + o->lineorders[lcnt].supp_cost = 6 * rprice /10; + + o->totalprice += + ((o->lineorders[lcnt].extended_price * + ((long)100 - o->lineorders[lcnt].discount)) / (long)PENNIES ) * + ((long)100 + o->lineorders[lcnt].tax) + / (long)PENNIES; + } + + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + o->lineorders[lcnt].order_totalprice = o->totalprice; + } + return (0); + } +#else +long +mk_order(long index, order_t *o, long upd_num) + { + long lcnt; + long rprice; + long ocnt; + long tmp_date; + long s_date; + long r_date; + long c_date; + long clk_num; + long supp_num; + static char **asc_date = NULL; + char tmp_str[2]; + char **mk_ascdate PROTO((void)); + int delta = 1; + + if (asc_date == NULL) + asc_date = mk_ascdate(); + mk_sparse (index, o->okey, + (upd_num == 0) ? 0 : 1 + upd_num / (10000 / refresh)); + RANDOM(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD); + while (o->custkey % CUST_MORTALITY == 0) + { + o->custkey += delta; + o->custkey = MIN(o->custkey, O_CKEY_MAX); + delta *= -1; + } + + + RANDOM(tmp_date, O_ODATE_MIN, O_ODATE_MAX, O_ODATE_SD); + strcpy(o->odate, asc_date[tmp_date - STARTDATE]); + + pick_str(&o_priority_set, O_PRIO_SD, o->opriority); + RANDOM(clk_num, 1, MAX((scale * O_CLRK_SCL), O_CLRK_SCL), O_CLRK_SD); + sprintf(o->clerk, O_CLRK_FMT, + O_CLRK_TAG, + clk_num); + o->clen = TEXT(O_CMNT_LEN, O_CMNT_SD, o->comment); +#ifdef DEBUG + if (o->clen > O_CMNT_MAX) fprintf(stderr, "comment error: O%d\n", index); +#endif /* DEBUG */ + o->spriority = 0; + + o->totalprice = 0; + o->orderstatus = 'O'; + ocnt = 0; + + RANDOM(o->lines, O_LCNT_MIN, O_LCNT_MAX, O_LCNT_SD); + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + HUGE_SET(o->okey, o->l[lcnt].okey); + o->l[lcnt].lcnt = lcnt + 1; + RANDOM(o->l[lcnt].quantity, L_QTY_MIN, L_QTY_MAX, L_QTY_SD); + RANDOM(o->l[lcnt].discount, L_DCNT_MIN, L_DCNT_MAX, L_DCNT_SD); + RANDOM(o->l[lcnt].tax, L_TAX_MIN, L_TAX_MAX, L_TAX_SD); + pick_str(&l_instruct_set, L_SHIP_SD, o->l[lcnt].shipinstruct); + pick_str(&l_smode_set, L_SMODE_SD, o->l[lcnt].shipmode); + o->l[lcnt].clen = TEXT(L_CMNT_LEN, L_CMNT_SD, o->l[lcnt].comment); + RANDOM(o->l[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD); + RPRICE_BRIDGE( rprice, o->l[lcnt].partkey); + RANDOM(supp_num, 0, 3, L_SKEY_SD); + PART_SUPP_BRIDGE( o->l[lcnt].suppkey, o->l[lcnt].partkey, supp_num); + o->l[lcnt].eprice = rprice * o->l[lcnt].quantity; + + o->totalprice += + ((o->l[lcnt].eprice * + ((long)100 - o->l[lcnt].discount)) / (long)PENNIES ) * + ((long)100 + o->l[lcnt].tax) + / (long)PENNIES; + + RANDOM(s_date, L_SDTE_MIN, L_SDTE_MAX, L_SDTE_SD); + s_date += tmp_date; + RANDOM(c_date, L_CDTE_MIN, L_CDTE_MAX, L_CDTE_SD); + c_date += tmp_date; + RANDOM(r_date, L_RDTE_MIN, L_RDTE_MAX, L_RDTE_SD); + r_date += s_date; + + + strcpy(o->l[lcnt].sdate, asc_date[s_date - STARTDATE]); + strcpy(o->l[lcnt].cdate, asc_date[c_date - STARTDATE]); + strcpy(o->l[lcnt].rdate, asc_date[r_date - STARTDATE]); + + + if (julian(r_date) <= CURRENTDATE) + { + pick_str(&l_rflag_set, L_RFLG_SD, tmp_str); + o->l[lcnt].rflag[0] = *tmp_str; + } + else + o->l[lcnt].rflag[0] = 'N'; + + if (julian(s_date) <= CURRENTDATE) + { + ocnt++; + o->l[lcnt].lstatus[0] = 'F'; + } + else + o->l[lcnt].lstatus[0] = 'O'; + } + + if (ocnt > 0) + o->orderstatus = 'P'; + if (ocnt == o->lines) + o->orderstatus = 'F'; + + return (0); +} +#endif + +#ifdef SSBM +long mk_part(long index, part_t *p) +{ + long mfgr,cat,brnd; + + p->partkey = index; + + agg_str(&colors, (long)P_NAME_SCL, (long)P_NAME_SD, p->name); + + /*extract color from substring of p->name*/ + p->clen =gen_color(p->name,p->color); + + + RANDOM(mfgr, P_MFG_MIN, P_MFG_MAX, P_MFG_SD); + sprintf(p->mfgr, "%s%d", "MFGR#", mfgr); + + RANDOM(cat, P_CAT_MIN, P_CAT_MAX, P_CAT_SD); + sprintf(p->category, "%s%d", p->mfgr,cat); + + + RANDOM(brnd, P_BRND_MIN, P_BRND_MAX, P_BRND_SD); + sprintf(p->brand,"%s%d",p->category,brnd); + + p->tlen = pick_str(&p_types_set, P_TYPE_SD, p->type); + p->tlen = strlen(p_types_set.list[p->tlen].text); + RANDOM(p->size, P_SIZE_MIN, P_SIZE_MAX, P_SIZE_SD); + + pick_str(&p_cntr_set, P_CNTR_SD, p->container); + + + return (0); +} +#else +long +mk_part(long index, part_t *p) + { + long temp; + long snum; + long brnd; + + p->partkey = index; + agg_str(&colors, (long)P_NAME_SCL, (long)P_NAME_SD, p->name); + RANDOM(temp, P_MFG_MIN, P_MFG_MAX, P_MFG_SD); + sprintf(p->mfgr, P_MFG_FMT, P_MFG_TAG, temp); + RANDOM(brnd, P_BRND_MIN, P_BRND_MAX, P_BRND_SD); + sprintf(p->brand, P_BRND_FMT, + P_BRND_TAG, + (temp * 10 + brnd)); + p->tlen = pick_str(&p_types_set, P_TYPE_SD, p->type); + p->tlen = strlen(p_types_set.list[p->tlen].text); + RANDOM(p->size, P_SIZE_MIN, P_SIZE_MAX, P_SIZE_SD); + pick_str(&p_cntr_set, P_CNTR_SD, p->container); + RPRICE_BRIDGE( p->retailprice, index); + p->clen = TEXT(P_CMNT_LEN, P_CMNT_SD, p->comment); + + for (snum = 0; snum < SUPP_PER_PART; snum++) + { + p->s[snum].partkey = p->partkey; + PART_SUPP_BRIDGE( p->s[snum].suppkey, index, snum); + RANDOM(p->s[snum].qty, PS_QTY_MIN, PS_QTY_MAX, PS_QTY_SD); + RANDOM(p->s[snum].scost, PS_SCST_MIN, PS_SCST_MAX, PS_SCST_SD); + p->s[snum].clen = TEXT(PS_CMNT_LEN, PS_CMNT_SD, p->s[snum].comment); + } + return (0); + } +#endif + + +#ifdef SSBM +long +mk_supp(long index, supplier_t *s) +{ + long i, + bad_press, + noise, + offset, + type; + s->suppkey = index; + sprintf(s->name, S_NAME_FMT, S_NAME_TAG, index); + s->alen = V_STR(S_ADDR_LEN, S_ADDR_SD, s->address); + RANDOM(i, 0, nations.count-1, S_NTRG_SD); + strcpy(s->nation_name,nations.list[i].text); + strcpy(s->region_name,regions.list[nations.list[i].weight].text); + gen_city(s->city,s->nation_name); + gen_phone(i, s->phone, (long)C_PHNE_SD); + return (0); +} +#else +long +mk_supp(long index, supplier_t *s) + { + long i, + bad_press, + noise, + offset, + type; + + s->suppkey = index; + sprintf(s->name, S_NAME_FMT, S_NAME_TAG, index); + s->alen = V_STR(S_ADDR_LEN, S_ADDR_SD, s->address); + RANDOM(i, 0, nations.count - 1, S_NTRG_SD); + s->nation_code= i; + gen_phone(i, s->phone, S_PHNE_SD); + RANDOM(s->acctbal, S_ABAL_MIN, S_ABAL_MAX, S_ABAL_SD); + + s->clen = TEXT(S_CMNT_LEN, S_CMNT_SD, s->comment); + /* these calls should really move inside the if stmt below, + * but this will simplify seedless parallel load + */ + RANDOM(bad_press, 1, 10000, BBB_CMNT_SD); + RANDOM(type, 0, 100, BBB_TYPE_SD); + RANDOM(noise, 0, (s->clen - BBB_CMNT_LEN), BBB_JNK_SD); + RANDOM(offset, 0, (s->clen - (BBB_CMNT_LEN + noise)), + BBB_OFFSET_SD); + if (bad_press <= S_CMNT_BBB) + { + type = (type < BBB_DEADBEATS) ?0:1; + memcpy(s->comment + offset, BBB_BASE, BBB_BASE_LEN); + if (type == 0) + memcpy(s->comment + BBB_BASE_LEN + offset + noise, + BBB_COMPLAIN, BBB_TYPE_LEN); + else + memcpy(s->comment + BBB_BASE_LEN + offset + noise, + BBB_COMMEND, BBB_TYPE_LEN); + } + + return (0); + } +#endif + +struct + { + char *mdes; + long days; + long dcnt; + } +months[] = + + { + {NULL, 0, 0}, + {"JAN", 31, 31}, + {"FEB", 28, 59}, + {"MAR", 31, 90}, + {"APR", 30, 120}, + {"MAY", 31, 151}, + {"JUN", 30, 181}, + {"JUL", 31, 212}, + {"AUG", 31, 243}, + {"SEP", 30, 273}, + {"OCT", 31, 304}, + {"NOV", 30, 334}, + {"DEC", 31, 365} + }; + +long +mk_time(long index, dss_time_t *t) + { + long m = 0; + long y; + long d; + + t->timekey = index + JDAY_BASE; + y = julian(index + STARTDATE - 1) / 1000; + d = julian(index + STARTDATE - 1) % 1000; + while (d > months[m].dcnt + LEAP_ADJ(y, m)) + m++; + PR_DATE(t->alpha, y, m, + d - months[m - 1].dcnt - ((LEAP(y) && m > 2) ? 1 : 0)); + t->year = 1900 + y; + t->month = m + 12 * y + JMNTH_BASE; + t->week = (d + T_START_DAY - 1) / 7 + 1; + t->day = d - months[m - 1].dcnt - LEAP_ADJ(y, m-1); + + return (0); + } + + int + mk_nation(long index, code_t *c) + { + c->code = index - 1; + c->text = nations.list[index - 1].text; + c->join = nations.list[index - 1].weight; + c->clen = TEXT(N_CMNT_LEN, N_CMNT_SD, c->comment); + return(0); + } + + int + mk_region(long index, code_t *c) + { + + c->code = index - 1; + c->text = regions.list[index - 1].text; + c->join = 0; /* for completeness */ + c->clen = TEXT(R_CMNT_LEN, R_CMNT_SD, c->comment); + return(0); + } + + +#ifdef SSBM + /*bug!*/ +int gen_city(char *cityName, char *nationName){ + int i=0; + long randomPick; + int clen = strlen(cityName); + int nlen = strlen(nationName); + + strncpy(cityName,nationName,CITY_FIX-1); + + if(nlen < CITY_FIX-1){ + for(i = nlen ; i< CITY_FIX-1;i++) + cityName[i] = ' '; + } + RANDOM(randomPick, 0, 9, 98); + + sprintf(cityName+CITY_FIX-1,"%d",randomPick); + cityName[CITY_FIX] = '\0'; + return 0; +} + + +/* +P_NAME is as long as 55 bytes in TPC-H, which is un¬reasonably large. +We reduce it to 22 by limiting to a concatena¬tion of two colors (see [TPC-H], pg 94). +We also add a new column named P_COLOR that could be used in queries where currently a +color must be chosen by substring from P_NAME. +*/ +int gen_color(char * source, char * dest){ + int i = 0,j=0; + int clen=0; + + while(source[i]!= ' ' ){ + dest[i]=source[i]; + i++; + } + dest[i]='\0'; + + i++; + while(source[i] != '\0'){ + source[j] = source[i]; + j++; + i++; + } + + source[j] = '\0'; + + clen = strlen(dest); + return clen; +} + + + +/*Following functions are related to date table generation*/ +int days_in_a_month[12]={31,28,31,30,31,30,31,31,30,31,30,31}; +int days_in_a_month_l[12]={31,29,31,30,31,30,31,31,30,31,30,31}; +season seasons[]={ + {"Christmas",1,11,31,12}, + {"Summer",1,5,31,8}, + {"Winter",1,1,31,3}, + {"Spring",1,4,30,4}, + {"Fall",1,9,31,10} +}; +holiday holidays[]={ + {"Christmas",12,24}, + {"New Years Day", 1,1}, + {"holiday1", 2,20}, + {"Easter Day",4,20}, + {"holiday2", 5,20}, + {"holiday3",7,20}, + {"holiday4",8,20}, + {"holiday5",9,20}, + {"holiday6",10,20}, + {"holiday7",11,20} +}; + +char * month_names[]={"January","February","March","April", + "May","June","July","Augest", + "September","Octorber","November","December"}; + +char * weekday_names[]={"Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"}; + +/*make the date table, it takes the continuous index , and add index*60*60*24 to + *numeric representation 1/1/1992 01:01:01, + *then convert the final numeric date time to tm structure, and thus extract other field + *for date_t structure */ +long +mk_date(long index,date_t *d) +{ + long espan = (index-1)*60*60*24; + + time_t numDateTime = D_STARTDATE + espan; + + struct tm *localTime = localtime(&numDateTime); + + /*make Sunday be the first day of a week */ + d->daynuminweek=((long)localTime->tm_wday+1)%7+1; + d->monthnuminyear=(long)localTime->tm_mon+1; + strncpy(d->dayofweek, weekday_names[d->daynuminweek-1],D_DAYWEEK_LEN+1); + strncpy(d->month,month_names[d->monthnuminyear-1],D_MONTH_LEN+1); + d->year=(long)localTime->tm_year + 1900; + d->daynuminmonth=(long)localTime->tm_mday; + d->yearmonthnum=d->year * 100 + d->monthnuminyear; + + sprintf(d->yearmonth,"%.3s%d",d->month,d->year); + sprintf(d->date,"%s %d, %d",d->month,d->daynuminmonth,d->year); + + d->datekey = d->year*10000+d->monthnuminyear*100+ d->daynuminmonth; + + d->daynuminyear=(int)localTime->tm_yday+1; + d->weeknuminyear = d->daynuminyear/7 + 1; + + if(d->daynuminweek ==7){ + d->lastdayinweekfl[0]='1'; + } + else{ + d->lastdayinweekfl[0]='0'; + } + d->lastdayinweekfl[1]='\0'; + + if(is_last_day_in_month(d->year,d->monthnuminyear,d->daynuminmonth)==1){ + d->lastdayinmonthfl[0]= '0'; + }else{ + d->lastdayinmonthfl[0]= '1'; + } + d->lastdayinmonthfl[1]='\0'; + + if(d->daynuminweek!=1 && d->daynuminweek!=7){ + d->weekdayfl[0]='1'; + } + else{ + d->weekdayfl[0]='0'; + } + + d->weekdayfl[1]='\0'; + + gen_season(d->sellingseason,d->monthnuminyear,d->daynuminmonth); + d->slen = strlen(d->sellingseason); + gen_holiday_fl(d->holidayfl,d->monthnuminyear,d->daynuminmonth); + return (0); +} + +int gen_holiday_fl(char * dest, int month, int day){ + int i; + for(i = 0; i< NUM_HOLIDAYS; i++){ + if(holidays[i].month == month && holidays[i].day == day){ + strcpy(dest,"1"); + return 0; + } + } + strcpy(dest,"0"); + return 0; +} + + +int +is_last_day_in_month(int year,int month,int day){ + int * days; + if(LEAP(year)) + days = days_in_a_month_l; + else + days = days_in_a_month; + if(day == days[month-1]) return 1; + return 0; +} + +int gen_season(char * dest,int month,int day) +{ + int i; + for(i =0;i=seas->start_month && month<=seas->end_month && + day >= seas->start_day && day <= seas->end_day){ + strcpy(dest, seas->name); + return 0; + } + } + strcpy(dest,""); + + return 0; +} + +#endif + + + + + + + + + + + + + + + + + + + diff --git a/data/ssb/dbgen/build.o b/data/ssb/dbgen/build.o new file mode 100644 index 0000000000000000000000000000000000000000..f33ad4a89afbbae63d8f617f72fe59d7c8a73fd5 GIT binary patch literal 23320 zcmdU13w%_?xj&l(vI5Pnh?@FbU{e!;xCw}fMa{Aa%*ui(NqG25mfa1Bd3GNNf*QLC za$FW|eU#p6>#h2$R$Hx?TC%o~2Tf308?nCcRcckT;)9~{s>uDmIWxOoPI9XE@%wr2 z{5WT3{_~%2zWL@od&12Xo&^~ulOml-xl~EVB&sM|m!-pHJXoeoR!&x&)iqTt`hkt9 z$C>(xTm6fl#j^79fx*;rdu~zBL5p=9^6M>y+_&831NoM&Yv9e)eN25Ju`g3mcKCra zjKt9yR`Lv0m?6HwZuY2u{n9K;cortISTT#`JSb%tisGfPM0D1x-@|`Q9cG8$XT8U= zFw}eGGFH&Xn)Y1D3SRQWD<-h`H&81!*mnZl4v+|HzqK7PO;CJ&rYDwVXYpBxF*Ql4 zBY4@KSh?N&&lnStVI8B$w44FggjoZw@$h8KV!+Pzvl*JfEnOX`Ky;Vws%xBA_jh+f zT|MgC+_F2-#@kVOP3f&n&9W!I|HX+DUq%CsllLMof4U|PO}G<{IZ_nV6TSiJL=J8DfzKQDf8ff=d`4RQn| zWH!p+EyiXGx(*(zOnJi{y>gg<22EApWnZ>U+C$KVt zsZkKj8UtkN`=fZHGRxAeYY2{%5B*o@JU4Id=m+L{%f~$jndN-0FH9r6 z$a>2KyVp$6l#cE)_w&&HPce^kxpSGbMs$Oc>3C=-KiVuFf}5ofx+^vpmjyRKnby zFYV8>yJH?`@Wiqj&541S9)$6%rL43!a-}E6jmaC%6W2P#&Qvbn05kQvRS@M-ce^c* zci za1z+gXM)l`X7LPRBdqA2IHHB&v)V_T!g z##^=&VVR(49Rs7)u|%Hx2X_1?jP?S|c&W)teGz7wm$>ADAy&}Kz&Ir=dep?CZ=7K2 z)piH#dc(5i_3@yRHX1-N8}dO5Uj2iXlQ-D&lP$pDmSEbh72bb% zXWNP0F0DF)X&x$`ypiY8I+W#c%~y{m@}W3hIL+aP9OmRZB!_xb7Q`A)5Ni*DwT~oH zhNeQW*cHi*upGclnE<;f%u?Q`84sGLu6qVR37ivo`MiS3hmanH)a>vi^c^Nk=P#hE zxxqA*$W5XNaH1(eB`a|fzI_cq$zpgNw5)(P`YW^SFbm9tjzt^2LDjte%KeBAuj?<_5A1oGK3%fpdY>J)5CNtGK0$2*GbEVl8&ni@19d57=#9f=f zwzEJCbi|Wq+F?mL1fA_8%WWG0(spQoy@ShCo?zLLm0q~Gh3YN&*hnw~K92lS2$$KE{9uzhNyuO2$zTWX^Vs-yP%hixja}*K>>n+Rrk}rRbvj-n2 zZgtQjCIffxz6{h91@Y++bPwBerCL2yC80z<$gO@6%Hp*ErQTjM+gKq%s@^3OdJU9$ zQPVgzYGMC?0*-jpqaZfRo=6Sdkuyuh9mxxME#y3wnw@(8ngL-BCsHFKJ4SHbRhB}i^7s)!uizZj1FY8 z(sH{aya;Drn_}q<9>bmj5Q`3**tdaX>FfY3H5Iu_!*}W+paL>RW8ah9TJhAluS)G43;>$1`p^ zsiG62_knbG{6;gJvdxulHMtnH)6S&Yx{s@C`grJ$ciNj_v2?2yxr^dieirZS+73+v zaT|*-${1_gqS=nc=bM0)Pc@ryHK-Nc1<&ye%a*B76spvj3jyBulBYmODu*5A>rznD zrtANJskBaB(C=2SamWsDCLJF8dKI2G;|pOU2+PsFtkiL!yyOT|7o-nhq?Klfwd`4EI+aSmbxr(1?^>Q=$T*zVzHL-&Ww5>0SJO3+)Bc~(iod0mE zhv1OxR{uESfMkXPl6siy<5vHWI0@u>koXpdoYCXvo2R>dFTp4}g~h8+F?E}PQC^T_ zre_Bf8=sR(ha#WCls^s!BnU$)`##5%w7;Y4k`=M zSZtnsc;W}JF2wItQIB&F9I?TJJy6=@3rCo|Uw3L@KCO2~KQ<*&wbD01@(kF1{qMHI zw1f?H*SEsF(XG%8JWJQV!CT)7Q&9puS`)HStx)+DTfzU&TA?g~dofsJWGj3(-3ndr zYpe5r%i1aJM_Id=S{oM}hW-7p<*Ac9j@iQ3a{i|3>=RDdz^el|cr8y=n@&JtWOyL! zBE=amGqJ6fr>2-X_k`zh`eI*?+q8QVu(S+LNL4!b!1lv;XdtUdSTG7VGfv)JxLNl< zsd;bku_v!+O7uf7ALkwzdaa;mGLeUk0tc9pT{0fJRBj1WmxsmpK&fYz)(>IR$|E<} zBIod%Eo^TkfukfnY?Dty6gI*i;@+6kVQ<`QA9>fr8wj?uxMe>Zg+TbL?2n&?4f}4~ zu%qgbsrrwwKYjr=?5~RbF%VXK1JmGC3%4t{KYj*v|3d;nZMTa0j%-M{;_$?q>>jwL zfB>e0Nw}Ewk=vtw%+0O!t+Z7>1FP^OfV4_(cM;?_u!`=)@U>e7RW-IsF3gWJ&?>o$ z;ig|$#4NMGd^x5?j)O(`OeHO1>xNB_lD<9RCY1eT#L3S8_4gB0RkKJwg!KIcwuI|v zqeX0FSDC)ifgECd#;YoKz>#>w&{AE^aKS`ZmbKc=116+cKUF|GhwRQ*I!Rr^n{PZGd<(&#P(IwixBYonL+yD5c`$5G1 zlVCcv!lh~0TIz^=Qd*f6`KW#{yGYqT4m^jp&C`C_(1vu`p~1|^Oy(P8jXmdf?f(uO zS@P7Qy+_Y2*xl6QFC9#Y=Odq4DtckYEvk$AwEv z+b7-pacTRM4eys$ns0u$w9>j^FJBOR&#?Fuw-g?mGsL^UtfaYrAe*Q=3}`G)%vHGts`z)<4%+6DyxVEtj>6|-J~{o znC}ImF7&`N`F$JL+Y=AvE%v=5(l3wA*~`;BVn^$YRZJ<5Z^8prxw^;!(MhVS+}YIY zHw~+l8z&cXR z^TB$d8BF-93yzlSo6|D!uupa)lVy)cZ?}1VtSfbJ!dGy~L?5@4)Td#Y29yyFIk-0+ zi#ryU=P33od5+omn&VeiMH-v^g+67mx4F5^XRG%%HV6DRZ`hU>x;W22K`AQ54DwO( z{E90Q3b(Zcg430}P@bdIuT(|a+nXCxK(k+2ykOx{o(zQ)&JIDQypS!=KNB8cxB9*7 zl*Mh_tBwRh7+oIlw`!s4hDeY@3xbW9QsoUtf*82m+Zyo(*C`7EtAaeR*c74^gh*=uUd`(iXJmCC6jrJN?cqSnsz6Y=!WV7}Vqi(z8VxBA__TnlA=nrSw|GO! zlEC%0l>u)sWGe@04Qevwk)4TS?iiWSSm*)lLk1}>C=xiWAO)VKxAr7Ul31vyF; z=-awlSpfD{E_W_b7F5hv7CVwUswyg#D_qsek}H-eb>Y)}>LkB~;$^p&X&i?b z90x9g101@WQ+XGE&YgR>C!x39iQQ@3I$E>0Vj~3ljTwLgwQy^fT1=|d753tP_ zE~!zVU-lp-+gSD)(+x`%(^{+P%t_hiJ3uD~aBv<*cN%u)`8n3j8P1&CjhW6ITQsXY zrz|sWnvi1y$eClEpJR4T#BwSz130NLVt$S&$~57lY}syq!ul^5EqkR-HjcH&ME*JG zvmC2`0+tF#YKZO@X>sh7An*2!`8l~;GUw;m;#u=^@;8rj<~TNvcjlBtv&(ayFl9EK zG$E%1FlSCaaDWT-%!meHg& zTnR)wMB1y^DZ$QhtjA{TV;iv^(Q)NDb(uS|CgkJ;1bt056ZWg$iM%KW9b!WQTf`A< zPJBmTb%?FWvDSeRvCn)iaNfV6Y-1Z9&tc`zrfp!(O=Fr9C6DRQOj*j`PmGnAIby{~ zGcrfav=rxO|1Ei(Xmi*&X%MiBXHS2Wy%JDFO`7szbEjk zb@(QMYkH*L;oUhO|Nv~{ScIWB#kP{Lk9R02KWvGe82#I1@Otx z<77XU>&V{(ezzF2(;$xSLwJz?TY>ZS1hB*KjHUk<26#3M)Uo(h1N@r?IDSMpmYf*| zc(DOqW`I`!j`i&n1DdaeP^T*l_%(vRUGVw(2>fde_)P|Q$N*nwfNwIuzi)v5!~lQ9 z0RN2vjvptXeJb$he{g*L2EWgM{|w+KE8~?O@X?hBe+Yb(Q*^S1uh2Z@b->4J-?t6$ z{X&i^*Fl$yn|6ijZ@L*mTSJ?=8T`;wOcCAvRUt!?IkFP$voUW@L4swYI&X4ek~i zTN}fLvz1leP@u3_2?j!iMT$4%tMz-sfkKB;=yCB^35JUBRgACM_?m;S3-L7xZi&Oah`33i|`WAe2>4GzhG13@a^7Qf|oSZ4O|(EeK?;SJ2nK zjw>4s)Ppe_6cAG%Qrc=kN?WioyskDB2rKQ4pnfO}7bjYkTy1cWLqTA9G`!MW8)|9u z2ROO8ZFMoGg&SJ}%5{ih68K4uwY;{CTU(s z!8Z4S22izNa{y}?Yz1vmh!3v66mM;?)vx$!TS9@=ykJNO!8Mqcv8}l+h*Yp`Sn**j zqia(W_foMQW`_G&2aw&U2I(~GzqVZ0+A5L zyko~}o4FdHfHwqH(!3B>LDWFAS{pP7H`DcjKog2@^|l1SOlU0;<|83bj0YGd{7wTt zrQvQC!Ew(i@g)MshCy99p^~WC+BtK8!sHctK3k~q)L=G;wQjWaW(93y}$eBv`uNmMbR1h4>q5R1N zp9X2N+&2Y|9^P@_ISAc;!v7Y;CI6s~kG4ZMPF#>+xs*Ri;8H%OqdQZF%Q?G5hfDe8 zL_VIeq&|xb_)B%TEVq@&$tQ9m2K*@D7ZCn!2K;*n|9rx~&w&3Z;Zylf8u0m#hagwl zQ`+ZQB8QgyvVoj834aFB^IZe}CkFiUg<(-2JY&jsDIz$wt;8=O_)LP&7dU!2w@AK+ z;DrQVN#xLS>j+NEZ6-J^_q#+sEq6V^X}KK)pGNe#o5*(%{62!?7FE{kVIs#)@Fxk5 zbFbvTN915r{6oycpAnqShsOy{=fi2^ATu~jrt@=&z-7OZS+zN^m^0eGU)0pA($g`Duac>-D|?{}92sd|}UJGBYaw~CW6y?-9&KI|A;8}XGA`2m!}CI^^|v<{RF3W_%p$&K5r8Fc;_VRRf9jI zfW!Jugh%25fn$B~Y#?#`DHxr;pR6S~)w6@(RL>s~`E;FnjPPl@ZzDLB^PGX4Hx2kn zf>S-M;sOinOZCzIh;sCJi3>j30gsH*KEEJxXgfYh__SU4pRx#h(th$M!bd$Hg9qJ9 z1gCZwHYoQ1;nQ-D5S*5K7G98o(~l#&z_Gqm&P;++Ih6)-RuDdwQ%7(rCu$()9)i<( z=@A2*e?<$r>-Fz5D0d%`L)-B<;p03k+x-+&8XVe<>N8#7SZ*milAllbbUrB}_;kWQ z4F@MUz5eqEj{QXL2bK_=_LC}sqn;PTBlT$^eB1*{{2qeSe*P4}sr+7o)A6-Ohf6)* z6S!WVgM^PksZWN;h<2cQo=0%1r^^7pPT*KB9q$_mpVsSEg4254sly)wdFXyFaJ~F) z9beApy#%M_zG)!m0O3=8ju4#6ITa5r;Pm>OCvdbUm2&~XsT_}ioNEc6%Bd$fl@m3P zhw`bMSpwI~xqt-S0({kq$oR)hD!KvM91dj4)e+Uqq_J_NPoLr*ky#)Uz!5<}ZYy|Ho zIPDM56FIa$yi9P~ABG7|>!pa_9k5=sKUf5gc1FJJ4|c+*{b4rY)BaFl!1oaT9FYGQ zJm{(kPTM6w4)033nOXS!HzMJr=JzpX?wdX-1 zhuSj>2QxUd8@1=@1gG{qOW=BY<`X`(X9>ZnJ+C1+wdXnm{AU8!+w*sXPwn<1;oG2Y z(w?sqKDFCEf>XO?;iLu*%cXWZP2hUF6%d@-t&qr}{qSPKr*^xX;M8ty1gCb38Q|Ll zuD9Fsgir1EHsRZ#&eCoN34aN}4-@`ef@fpr28Z@RK9-NpDsYUK!z1ONLHM*?&LcQ& z7Z;I3+ogu^X@71YIBk~}BIjbF&pN`V?V=K#w#%;wPTS>K1N;+#>)Yid@w+qH?Fyn# zj=+&m?L3>{(+K}kBIi4RV?K0i2wnwoY5)5S@CONAP58Y8UqzmV`TDEV#!{;vtX zl<+?wcn!fnC3rc($KhZAhx$~)Bg>shaN6$3MQ4Hs^K`(2PF#bf@LM%F^$mgKZxOhd z4paPl1a1?)ryX?%in!% zI)0sqJ9K!Xh?nVbDR+eq4~lrZ4#)4!(Bb!G=+NW+U&Qa%;kSzTt2%tMh#%15cZj$d zCk${HMfrCL+@`~~3cN&z%X_d&9WM35|6UHAl>eZR)1kv37Wh^j{+Pg@(BV%A9KS!J z^0x_muMXcXaGOTw^Q53~1lIy|h~Xo@OOoSD;_|!94M+wjars@Q?9URH-!J}Aqo?@t zJH@kvUXm}r=j+tr^7}k#SIL*(+xfO@5$sJYG5)x^7}BQFtn~E>|F(ZIH>szG87CndkgueK!xye zj8`dK6$&YZ!8VK_y$C+^D&!=NK&TM@SkBv=;`l;g_%2IifCzrEMb8YyfKwzUU>@RgA!JJ3*DAB2x^(3<~qX9E$~l;UT+nCmVoeOe%a4uvaG-KaSw%Iefd2$Eyoej|Fqa>{mUR>Ec*@P z<`rBeCW1rzVH-*P<^BTqKv+KRnW@`8U5iPcOq2PgzX}q@%0DFXTV+P!%eeHheK5a6 z$J;CNWBX8tTy)Z3N5VWoye8RcEFG5lB%(qf8y=mR6|2Z^lSJXmdQ13UA%HUV`5huZ z(>1I<|GkhGo2U#P>Po}}FV_uhKl~{}=9ldL}_Qo!)&yPPd>Qn>- g{?17>NTx1PZ$G@2*HL)>VeDAo9@V_mTc7_w0f-_I?EnA( literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/config.h b/data/ssb/dbgen/config.h new file mode 100644 index 0000000..fa505ec --- /dev/null +++ b/data/ssb/dbgen/config.h @@ -0,0 +1,179 @@ +/* + * Sccsid: @(#)config.h 2.1.8.2 + * + * this file allows the compilation of DBGEN to be tailored to specific + * architectures and operating systems. Some options are grouped + * together to allow easier compilation on a given vendor's hardware. + * + * The following #defines will effect the code: + * TPCH -- make will create TPCH (set in makefile) + * TPCR -- make will create TPCR (set in makefile) + * KILL(pid) -- how to terminate a process in a parallel load + * SPAWN -- name of system call to clone an existing process + * SET_HANDLER(proc) -- name of routine to handle signals in parallel load + * WAIT(res, pid) -- how to await the termination of a child + * SEPARATOR -- character used to separate fields in flat files + * DBNAME -- default name of database to be loaded + * STDLIB_HAS_GETOPT -- to prevent confilcts with gloabal getopt() + * MDY_DATE -- generate dates as MM-DD-YY + * WIN32 -- support for WindowsNT + * SUPPORT_64BITS -- compiler defines a 64 bit datatype + * DSS_HUGE -- 64 bit data type + * HUGE_FORMAT -- printf string for 64 bit data type + * HUGE_COUNT -- number of objects in DSS_HUGE + * EOL_HANDLING -- flat files don't need final column separator + * + * OS defines + * ========== + * ATT -- getopt() handling + * DIGITAL -- changes for DigUnix 64-bit support + * DOS -- disable all multi-user functionality/dependency + * HP -- posix source inclusion differences + * IBM -- posix source inclusion differences + * ICL -- getopt() handling + * MVS -- special handling of varchar format + * SGI -- getopt() handling + * SUN -- getopt() handling + * LINUX -- getopt() handling + * TANDEM -- EOL handling + * U2200 -- death of parent kills children automatically + * VMS -- signal/fork handing differences + * + * Database defines + * ================ + * DB2 -- use DB2 dialect in QGEN + * INFORMIX -- use Informix dialect in QGEN + * SQLSERVER -- use SQLSERVER dialect in QGEN + * SYBASE -- use Sybase dialect in QGEN + * TDAT -- use Teradata dialect in QGEN + */ + +#ifdef DOS +#define DSS_PROC 1 +#define PATH_SEP '\\' +#else + + +#ifdef ATT +#define STDLIB_HAS_GETOPT +#ifdef SQLSERVER +#define WIN32 +#else +/* the 64 bit defines are for the Metaware compiler */ +#define SUPPORT_64BITS +#define DSS_HUGE long long +#define HUGE_COUNT 1 +#define HUGE_FORMAT "%LLd" +#endif /* SQLSERVER or MP/RAS */ +#endif /* ATT */ + +#ifdef DIGITAL +#define DOUBLE_CAST (double)(int) +#endif + +#ifdef HP +#define _INCLUDE_POSIX_SOURCE +#define STDLIB_HAS_GETOPT +#endif /* HP */ + +#ifdef IBM +#define _POSIX_SOURCE +/* + * if the C compiler is 3.1 or later, then uncomment the + * lines for 64 bit seed generation + */ +/* #define SUPPORT_64BITS*/ +/* #define DSS_HUGE long long*/ +/* #define HUGE_COUNT 1 */ +#define STDLIB_HAS_GETOPT +#endif /* IBM */ + +#ifdef ICL +#define STDLIB_HAS_GETOPT +#endif /* ICL */ + +#ifdef SUN +#define STDLIB_HAS_GETOPT +#endif /* SUN */ + +#ifdef LINUX +#define STDLIB_HAS_GETOPT +#endif /* LINUX */ + +#ifdef SGI +#define STDLIB_HAS_GETOPT +#define SUPPORT_64BITS +#define DSS_HUGE __uint64_t +#define HUGE_COUNT 1 +#endif /* SGI */ + +#ifdef TANDEM +#define EOL_HANDLING +#endif /* TANDEM */ + +#ifdef VMS +#define SPAWN vfork +#define KILL(pid) kill(SIGQUIT, pid) +#define SET_HANDLER(proc) signal(SIGQUIT, proc) +#define WAIT(res, pid) wait(res) +#define SIGS_DEFINED +#endif /* VMS */ + +#if (defined(WIN32)&&!defined(_POSIX_)) +#define pid_t int +#define SET_HANDLER(proc) signal(SIGINT, proc) +#define KILL(pid) \ + TerminateProcess(OpenProcess(PROCESS_TERMINATE,FALSE,pid),3) +#if (defined (__WATCOMC__)) +#define SPAWN() spawnv(P_NOWAIT, spawn_args[0], spawn_args) +#define WAIT(res, pid) cwait(res, pid, WAIT_CHILD) +#else +#define SPAWN() _spawnv(_P_NOWAIT, spawn_args[0], spawn_args) +#define WAIT(res, pid) _cwait(res, pid, _WAIT_CHILD) +#define getpid _getpid +#endif /* WATCOMC */ +#define SIGS_DEFINED +#define PATH_SEP '\\' +#ifndef TEST_32B +#define SUPPORT_64BITS +#define DSS_HUGE __int64 +#define HUGE_COUNT 1 +#define HUGE_FORMAT "%I64d" +#endif /* TEST_32B */ +/* need to define process termination codes to match UNIX */ +/* these are copied from Linux/GNU and need to be verified as part of a rework of */ +/* process handling under NT (29 Apr 98) */ +#define WIFEXITED(s) ((s & 0xFF) == 0) +#define WIFSIGNALED(s) (((unsigned int)((status)-1) & 0xFFFF) < 0xFF) +#define WIFSTOPPED(s) (((s) & 0xff) == 0x7f) +#define WTERMSIG(s) ((s) & 0x7f) +#define WSTOPSIG(s) (((s) & 0xff00) >> 8) +#endif /* WIN32 */ + +#ifndef SIGS_DEFINED +#define KILL(pid) kill(SIGUSR1, pid) +#define SET_HANDLER(proc) signal(SIGUSR1, proc) +#define SPAWN fork +#define WAIT(res, pid) wait(res) +#endif /* DEFAULT */ + +#define DSS_PROC getpid() +#endif /* DOS */ + +#ifndef DBNAME +#define DBNAME "dss" +#endif /* DBNAME */ + +#ifndef PATH_SEP +#define PATH_SEP '/' +#endif /* PATH_SEP */ + +#ifndef DSS_HUGE +#define DSS_HUGE long +#define HUGE_COUNT 2 +#endif + +#ifndef DOUBLE_CAST +#define DOUBLE_CAST (double) +#endif /* DOUBLE_CAST */ + diff --git a/data/ssb/dbgen/dists.dss b/data/ssb/dbgen/dists.dss new file mode 100644 index 0000000..72157ef --- /dev/null +++ b/data/ssb/dbgen/dists.dss @@ -0,0 +1,817 @@ +# Sccsid: @(#)dists.dss 2.1.8.1 +# +# distributions have the following format: +# +# | # comment +# +# Distributions are used to bias the selection of a token +# based on its associated weight. The list of tokens and values +# between the keywords BEGIN and END define the distribution named after +# the BEGIN. A uniformly random value from [0, sum(weights)] +# will be chosen and the first token whose cumulative weight is greater than +# or equal to the result will be returned. In essence, the weights for each +# token represent its relative weight within a distribution. +# +# one special token is defined: count (number of data points in the +# distribution). It MUST be defined for each named distribution. +#----------------------------------------------------------------------- +# currently defined distributions and their use: +# NAME FIELD/NOTES +# ======== ============== +# category parts.category +# container parts.container +# instruct shipping instructions +# msegmnt market segment +# names parts.name +# nations must be ordered along with regions +# nations2 stand alone nations set for use with qgen +# o_prio order priority +# regions must be ordered along with nations +# rflag lineitems.returnflag +# types parts.type +# colors embedded string creation; CANNOT BE USED FOR pick_str(), agg_str() perturbs order +# articles comment generation +# nouns +# verbs +# adverbs +# auxillaries +# prepositions +# terminators +# grammar sentence formation +# np +# vp +### +# category +### +BEGIN category +COUNT|5 +FURNITURE|1 +STORAGE EQUIP|1 +TOOLS|1 +MACHINE TOOLS|1 +OTHER|1 +END category +### +# container +### +begin p_cntr +count|40 +SM CASE|1 +SM BOX|1 +SM BAG|1 +SM JAR|1 +SM PACK|1 +SM PKG|1 +SM CAN|1 +SM DRUM|1 +LG CASE|1 +LG BOX|1 +LG BAG|1 +LG JAR|1 +LG PACK|1 +LG PKG|1 +LG CAN|1 +LG DRUM|1 +MED CASE|1 +MED BOX|1 +MED BAG|1 +MED JAR|1 +MED PACK|1 +MED PKG|1 +MED CAN|1 +MED DRUM|1 +JUMBO CASE|1 +JUMBO BOX|1 +JUMBO BAG|1 +JUMBO JAR|1 +JUMBO PACK|1 +JUMBO PKG|1 +JUMBO CAN|1 +JUMBO DRUM|1 +WRAP CASE|1 +WRAP BOX|1 +WRAP BAG|1 +WRAP JAR|1 +WRAP PACK|1 +WRAP PKG|1 +WRAP CAN|1 +WRAP DRUM|1 +end p_cntr +### +# instruct +### +begin instruct +count|4 +DELIVER IN PERSON|1 +COLLECT COD|1 +TAKE BACK RETURN|1 +NONE|1 +end instruct +### +# msegmnt +### +begin msegmnt +count|5 +AUTOMOBILE|1 +BUILDING|1 +FURNITURE|1 +HOUSEHOLD|1 +MACHINERY|1 +end msegmnt +### +# names +### +begin p_names +COUNT|4 +CLEANER|1 +SOAP|1 +DETERGENT|1 +EXTRA|1 +end p_names +### +# nations +# NOTE: this is a special case; the weights here are adjustments to +# map correctly into the regions table, and are *NOT* cummulative +# values to mimic a distribution +### +begin nations +count|25 +ALGERIA|0 +ARGENTINA|1 +BRAZIL|0 +CANADA|0 +EGYPT|3 +ETHIOPIA|-4 +FRANCE|3 +GERMANY|0 +INDIA|-1 +INDONESIA|0 +IRAN|2 +IRAQ|0 +JAPAN|-2 +JORDAN|2 +KENYA|-4 +MOROCCO|0 +MOZAMBIQUE|0 +PERU|1 +CHINA|1 +ROMANIA|1 +SAUDI ARABIA|1 +VIETNAM|-2 +RUSSIA|1 +UNITED KINGDOM|0 +UNITED STATES|-2 +end nations +### +# nations2 +### +begin nations2 +count|25 +ALGERIA|1 +ARGENTINA|1 +BRAZIL|1 +CANADA|1 +EGYPT|1 +ETHIOPIA|1 +FRANCE|1 +GERMANY|1 +INDIA|1 +INDONESIA|1 +IRAN|1 +IRAQ|1 +JAPAN|1 +JORDAN|1 +KENYA|1 +MOROCCO|1 +MOZAMBIQUE|1 +PERU|1 +CHINA|1 +ROMANIA|1 +SAUDI ARABIA|1 +VIETNAM|1 +RUSSIA|1 +UNITED KINGDOM|1 +UNITED STATES|1 +end nations2 +### +# regions +### +begin regions +count|5 +AFRICA|1 +AMERICA|1 +ASIA|1 +EUROPE|1 +MIDDLE EAST|1 +end regions +### +# o_prio +### +begin o_oprio +count|5 +1-URGENT|1 +2-HIGH|1 +3-MEDIUM|1 +4-NOT SPECIFIED|1 +5-LOW|1 +end o_oprio +### +# rflag +### +begin rflag +count|2 +R|1 +A|1 +end rflag +### +# smode +### +begin smode +count|7 +REG AIR|1 +AIR|1 +RAIL|1 +TRUCK|1 +MAIL|1 +FOB|1 +SHIP|1 +end smode +### +# types +### +begin p_types +COUNT|150 +STANDARD ANODIZED TIN|1 +STANDARD ANODIZED NICKEL|1 +STANDARD ANODIZED BRASS|1 +STANDARD ANODIZED STEEL|1 +STANDARD ANODIZED COPPER|1 +STANDARD BURNISHED TIN|1 +STANDARD BURNISHED NICKEL|1 +STANDARD BURNISHED BRASS|1 +STANDARD BURNISHED STEEL|1 +STANDARD BURNISHED COPPER|1 +STANDARD PLATED TIN|1 +STANDARD PLATED NICKEL|1 +STANDARD PLATED BRASS|1 +STANDARD PLATED STEEL|1 +STANDARD PLATED COPPER|1 +STANDARD POLISHED TIN|1 +STANDARD POLISHED NICKEL|1 +STANDARD POLISHED BRASS|1 +STANDARD POLISHED STEEL|1 +STANDARD POLISHED COPPER|1 +STANDARD BRUSHED TIN|1 +STANDARD BRUSHED NICKEL|1 +STANDARD BRUSHED BRASS|1 +STANDARD BRUSHED STEEL|1 +STANDARD BRUSHED COPPER|1 +SMALL ANODIZED TIN|1 +SMALL ANODIZED NICKEL|1 +SMALL ANODIZED BRASS|1 +SMALL ANODIZED STEEL|1 +SMALL ANODIZED COPPER|1 +SMALL BURNISHED TIN|1 +SMALL BURNISHED NICKEL|1 +SMALL BURNISHED BRASS|1 +SMALL BURNISHED STEEL|1 +SMALL BURNISHED COPPER|1 +SMALL PLATED TIN|1 +SMALL PLATED NICKEL|1 +SMALL PLATED BRASS|1 +SMALL PLATED STEEL|1 +SMALL PLATED COPPER|1 +SMALL POLISHED TIN|1 +SMALL POLISHED NICKEL|1 +SMALL POLISHED BRASS|1 +SMALL POLISHED STEEL|1 +SMALL POLISHED COPPER|1 +SMALL BRUSHED TIN|1 +SMALL BRUSHED NICKEL|1 +SMALL BRUSHED BRASS|1 +SMALL BRUSHED STEEL|1 +SMALL BRUSHED COPPER|1 +MEDIUM ANODIZED TIN|1 +MEDIUM ANODIZED NICKEL|1 +MEDIUM ANODIZED BRASS|1 +MEDIUM ANODIZED STEEL|1 +MEDIUM ANODIZED COPPER|1 +MEDIUM BURNISHED TIN|1 +MEDIUM BURNISHED NICKEL|1 +MEDIUM BURNISHED BRASS|1 +MEDIUM BURNISHED STEEL|1 +MEDIUM BURNISHED COPPER|1 +MEDIUM PLATED TIN|1 +MEDIUM PLATED NICKEL|1 +MEDIUM PLATED BRASS|1 +MEDIUM PLATED STEEL|1 +MEDIUM PLATED COPPER|1 +MEDIUM POLISHED TIN|1 +MEDIUM POLISHED NICKEL|1 +MEDIUM POLISHED BRASS|1 +MEDIUM POLISHED STEEL|1 +MEDIUM POLISHED COPPER|1 +MEDIUM BRUSHED TIN|1 +MEDIUM BRUSHED NICKEL|1 +MEDIUM BRUSHED BRASS|1 +MEDIUM BRUSHED STEEL|1 +MEDIUM BRUSHED COPPER|1 +LARGE ANODIZED TIN|1 +LARGE ANODIZED NICKEL|1 +LARGE ANODIZED BRASS|1 +LARGE ANODIZED STEEL|1 +LARGE ANODIZED COPPER|1 +LARGE BURNISHED TIN|1 +LARGE BURNISHED NICKEL|1 +LARGE BURNISHED BRASS|1 +LARGE BURNISHED STEEL|1 +LARGE BURNISHED COPPER|1 +LARGE PLATED TIN|1 +LARGE PLATED NICKEL|1 +LARGE PLATED BRASS|1 +LARGE PLATED STEEL|1 +LARGE PLATED COPPER|1 +LARGE POLISHED TIN|1 +LARGE POLISHED NICKEL|1 +LARGE POLISHED BRASS|1 +LARGE POLISHED STEEL|1 +LARGE POLISHED COPPER|1 +LARGE BRUSHED TIN|1 +LARGE BRUSHED NICKEL|1 +LARGE BRUSHED BRASS|1 +LARGE BRUSHED STEEL|1 +LARGE BRUSHED COPPER|1 +ECONOMY ANODIZED TIN|1 +ECONOMY ANODIZED NICKEL|1 +ECONOMY ANODIZED BRASS|1 +ECONOMY ANODIZED STEEL|1 +ECONOMY ANODIZED COPPER|1 +ECONOMY BURNISHED TIN|1 +ECONOMY BURNISHED NICKEL|1 +ECONOMY BURNISHED BRASS|1 +ECONOMY BURNISHED STEEL|1 +ECONOMY BURNISHED COPPER|1 +ECONOMY PLATED TIN|1 +ECONOMY PLATED NICKEL|1 +ECONOMY PLATED BRASS|1 +ECONOMY PLATED STEEL|1 +ECONOMY PLATED COPPER|1 +ECONOMY POLISHED TIN|1 +ECONOMY POLISHED NICKEL|1 +ECONOMY POLISHED BRASS|1 +ECONOMY POLISHED STEEL|1 +ECONOMY POLISHED COPPER|1 +ECONOMY BRUSHED TIN|1 +ECONOMY BRUSHED NICKEL|1 +ECONOMY BRUSHED BRASS|1 +ECONOMY BRUSHED STEEL|1 +ECONOMY BRUSHED COPPER|1 +PROMO ANODIZED TIN|1 +PROMO ANODIZED NICKEL|1 +PROMO ANODIZED BRASS|1 +PROMO ANODIZED STEEL|1 +PROMO ANODIZED COPPER|1 +PROMO BURNISHED TIN|1 +PROMO BURNISHED NICKEL|1 +PROMO BURNISHED BRASS|1 +PROMO BURNISHED STEEL|1 +PROMO BURNISHED COPPER|1 +PROMO PLATED TIN|1 +PROMO PLATED NICKEL|1 +PROMO PLATED BRASS|1 +PROMO PLATED STEEL|1 +PROMO PLATED COPPER|1 +PROMO POLISHED TIN|1 +PROMO POLISHED NICKEL|1 +PROMO POLISHED BRASS|1 +PROMO POLISHED STEEL|1 +PROMO POLISHED COPPER|1 +PROMO BRUSHED TIN|1 +PROMO BRUSHED NICKEL|1 +PROMO BRUSHED BRASS|1 +PROMO BRUSHED STEEL|1 +PROMO BRUSHED COPPER|1 +end p_types +### +# colors +# NOTE: This distribution CANNOT be used by pick_str(), since agg_str() perturbs its order +### +begin colors +COUNT|92 +almond|1 +antique|1 +aquamarine|1 +azure|1 +beige|1 +bisque|1 +black|1 +blanched|1 +blue|1 +blush|1 +brown|1 +burlywood|1 +burnished|1 +chartreuse|1 +chiffon|1 +chocolate|1 +coral|1 +cornflower|1 +cornsilk|1 +cream|1 +cyan|1 +dark|1 +deep|1 +dim|1 +dodger|1 +drab|1 +firebrick|1 +floral|1 +forest|1 +frosted|1 +gainsboro|1 +ghost|1 +goldenrod|1 +green|1 +grey|1 +honeydew|1 +hot|1 +indian|1 +ivory|1 +khaki|1 +lace|1 +lavender|1 +lawn|1 +lemon|1 +light|1 +lime|1 +linen|1 +magenta|1 +maroon|1 +medium|1 +metallic|1 +midnight|1 +mint|1 +misty|1 +moccasin|1 +navajo|1 +navy|1 +olive|1 +orange|1 +orchid|1 +pale|1 +papaya|1 +peach|1 +peru|1 +pink|1 +plum|1 +powder|1 +puff|1 +purple|1 +red|1 +rose|1 +rosy|1 +royal|1 +saddle|1 +salmon|1 +sandy|1 +seashell|1 +sienna|1 +sky|1 +slate|1 +smoke|1 +snow|1 +spring|1 +steel|1 +tan|1 +thistle|1 +tomato|1 +turquoise|1 +violet|1 +wheat|1 +white|1 +yellow|1 +end colors +################ +################ +## psuedo text distributions +################ +################ +### +# nouns +### +BEGIN nouns +COUNT|45 +packages|40 +requests|40 +accounts|40 +deposits|40 +foxes|20 +ideas|20 +theodolites|20 +pinto beans|20 +instructions|20 +dependencies|10 +excuses|10 +platelets|10 +asymptotes|10 +courts|5 +dolphins|5 +multipliers|1 +sauternes|1 +warthogs|1 +frets|1 +dinos|1 +attainments|1 +somas|1 +Tiresias|1 +patterns|1 +forges|1 +braids|1 +frays|1 +warhorses|1 +dugouts|1 +notornis|1 +epitaphs|1 +pearls|1 +tithes|1 +waters|1 +orbits|1 +gifts|1 +sheaves|1 +depths|1 +sentiments|1 +decoys|1 +realms|1 +pains|1 +grouches|1 +escapades|1 +hockey players|1 +END nouns +### +# verbs +### +BEGIN verbs +COUNT|40 +sleep|20 +wake|20 +are|20 +cajole|20 +haggle|20 +nag|10 +use|10 +boost|10 +affix|5 +detect|5 +integrate|5 +maintain|1 +nod|1 +was|1 +lose|1 +sublate|1 +solve|1 +thrash|1 +promise|1 +engage|1 +hinder|1 +print|1 +x-ray|1 +breach|1 +eat|1 +grow|1 +impress|1 +mold|1 +poach|1 +serve|1 +run|1 +dazzle|1 +snooze|1 +doze|1 +unwind|1 +kindle|1 +play|1 +hang|1 +believe|1 +doubt|1 +END verbs +### +# adverbs +## +BEGIN adverbs +COUNT|28 +sometimes|1 +always|1 +never|1 +furiously|50 +slyly|50 +carefully|50 +blithely|40 +quickly|30 +fluffily|20 +slowly|1 +quietly|1 +ruthlessly|1 +thinly|1 +closely|1 +doggedly|1 +daringly|1 +bravely|1 +stealthily|1 +permanently|1 +enticingly|1 +idly|1 +busily|1 +regularly|1 +finally|1 +ironically|1 +evenly|1 +boldly|1 +silently|1 +END adverbs +### +# articles +## +BEGIN articles +COUNT|3 +the|50 +a|20 +an|5 +END articles +### +# prepositions +## +BEGIN prepositions +COUNT|47 +about|50 +above|50 +according to|50 +across|50 +after|50 +against|40 +along|40 +alongside of|30 +among|30 +around|20 +at|10 +atop|1 +before|1 +behind|1 +beneath|1 +beside|1 +besides|1 +between|1 +beyond|1 +by|1 +despite|1 +during|1 +except|1 +for|1 +from|1 +in place of|1 +inside|1 +instead of|1 +into|1 +near|1 +of|1 +on|1 +outside|1 +over|1 +past|1 +since|1 +through|1 +throughout|1 +to|1 +toward|1 +under|1 +until|1 +up|1 +upon|1 +whithout|1 +with|1 +within|1 +END prepositions +### +# auxillaries +## +BEGIN auxillaries +COUNT|18 +do|1 +may|1 +might|1 +shall|1 +will|1 +would|1 +can|1 +could|1 +should|1 +ought to|1 +must|1 +will have to|1 +shall have to|1 +could have to|1 +should have to|1 +must have to|1 +need to|1 +try to|1 +END auxiallaries +### +# terminators +## +BEGIN terminators +COUNT|6 +.|50 +;|1 +:|1 +?|1 +!|1 +--|1 +END terminators +### +# adjectives +## +BEGIN adjectives +COUNT|29 +special|20 +pending|20 +unusual|20 +express|20 +furious|1 +sly|1 +careful|1 +blithe|1 +quick|1 +fluffy|1 +slow|1 +quiet|1 +ruthless|1 +thin|1 +close|1 +dogged|1 +daring|1 +brave|1 +stealthy|1 +permanent|1 +enticing|1 +idle|1 +busy|1 +regular|50 +final|40 +ironic|40 +even|30 +bold|20 +silent|10 +END adjectives +### +# grammar +# first level grammar. N=noun phrase, V=verb phrase, +# P=prepositional phrase, T=setence termination +## +BEGIN grammar +COUNT|5 +N V T|3 +N V P T|3 +N V N T|3 +N P V N T|1 +N P V P T|1 +END grammar +### +# NP +# second level grammar. Noun phrases. N=noun, A=article, +# J=adjective, D=adverb +## +BEGIN np +COUNT|4 +N|10 +J N|20 +J, J N|10 +D J N|50 +END np +### +# VP +# second level grammar. Verb phrases. V=verb, X=auxiallary, +# D=adverb +## +BEGIN vp +COUNT|4 +V|30 +X V|1 +V D|40 +X V D|1 +END vp +### +# Q13 +# Substitution parameters for Q13 +## +BEGIN Q13a +COUNT|4 +special|20 +pending|20 +unusual|20 +express|20 +END Q13a +BEGIN Q13b +COUNT|4 +packages|40 +requests|40 +accounts|40 +deposits|40 +END Q13b diff --git a/data/ssb/dbgen/driver.c b/data/ssb/dbgen/driver.c new file mode 100644 index 0000000..4f4c903 --- /dev/null +++ b/data/ssb/dbgen/driver.c @@ -0,0 +1,1144 @@ +/* @(#)driver.c 2.1.8.4 */ +/* main driver for dss banchmark */ + +#define DECLARER /* EXTERN references get defined here */ +#define NO_FUNC (int (*) ()) NULL /* to clean up tdefs */ +#define NO_LFUNC (long (*) ()) NULL /* to clean up tdefs */ + +#include "config.h" +#include +#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ +#ifndef DOS +#include +#include +#endif + +#endif /* WIN32 */ +#include /* */ +#include +#include +#include +#include +#include +#include +#ifdef HP +#include +#endif +#if (defined(WIN32)&&!defined(_POSIX_)) +#include +#pragma warning(disable:4201) +#pragma warning(disable:4214) +#pragma warning(disable:4514) +#define WIN32_LEAN_AND_MEAN +#define NOATOM +#define NOGDICAPMASKS +#define NOMETAFILE +#define NOMINMAX +#define NOMSG +#define NOOPENFILE +#define NORASTEROPS +#define NOSCROLL +#define NOSOUND +#define NOSYSMETRICS +#define NOTEXTMETRIC +#define NOWH +#define NOCOMM +#define NOKANJI +#define NOMCX + +#include "windows.h" + +#pragma warning(default:4201) +#pragma warning(default:4214) +#endif + +#include "dss.h" +#include "dsstypes.h" +#include "bcd2.h" + +/* +* Function prototypes +*/ +void usage (void); +int prep_direct (char *); +int close_direct (void); +void kill_load (void); +int pload (int tbl); +void gen_tbl (int tnum, long start, long count, long upd_num); +int pr_drange (int tbl, long min, long cnt, long num); +int set_files (int t, int pload); +int partial (int, int); + + +extern int optind, opterr; +extern char *optarg; +long rowcnt = 0, minrow = 0, upd_num = 0; +double flt_scale; +#if (defined(WIN32)&&!defined(_POSIX_)) +char *spawn_args[25]; +#endif + + +/* +* general table descriptions. See dss.h for details on structure +* NOTE: tables with no scaling info are scaled according to +* another table +* +* +* the following is based on the tdef structure defined in dss.h as: +* typedef struct +* { +* char *name; -- name of the table; +* flat file output in .tbl +* long base; -- base scale rowcount of table; +* 0 if derived +* int (*header) (); -- function to prep output +* int (*loader[2]) (); -- functions to present output +* long (*gen_seed) (); -- functions to seed the RNG +* int (*verify) (); -- function to verfiy the data set without building it +* int child; -- non-zero if there is an associated detail table +* unsigned long vtotal; -- "checksum" total +* } tdef; +* +*/ + +/* +* flat file print functions; used with -F(lat) option +*/ +#ifdef SSBM +int pr_cust (customer_t * c, int mode); +int pr_part (part_t * p, int mode); +int pr_supp (supplier_t * s, int mode); +int pr_line (order_t * o, int mode); +#else +int pr_cust (customer_t * c, int mode); +int pr_line (order_t * o, int mode); +int pr_order (order_t * o, int mode); +int pr_part (part_t * p, int mode); +int pr_psupp (part_t * p, int mode); +int pr_supp (supplier_t * s, int mode); +int pr_order_line (order_t * o, int mode); +int pr_part_psupp (part_t * p, int mode); +int pr_nation (code_t * c, int mode); +int pr_region (code_t * c, int mode); +#endif + +/* +* inline load functions; used with -D(irect) option +*/ +#ifdef SSBM +int ld_cust (customer_t * c, int mode); +int ld_part (part_t * p, int mode); +int ld_supp (supplier_t * s, int mode); + +/*todo: get rid of ld_order*/ +int ld_line (order_t * o, int mode); +int ld_order (order_t * o, int mode); + +#else +int ld_cust (customer_t * c, int mode); +int ld_line (order_t * o, int mode); +int ld_order (order_t * o, int mode); +int ld_part (part_t * p, int mode); +int ld_psupp (part_t * p, int mode); +int ld_supp (supplier_t * s, int mode); +int ld_order_line (order_t * o, int mode); +int ld_part_psupp (part_t * p, int mode); +int ld_nation (code_t * c, int mode); +int ld_region (code_t * c, int mode); +#endif + +/* +* seed generation functions; used with '-O s' option +*/ +#ifdef SSBM +long sd_cust (int child, long skip_count); +long sd_part (int child, long skip_count); +long sd_supp (int child, long skip_count); + +long sd_line (int child, long skip_count); +long sd_order (int child, long skip_count); + +#else +long sd_cust (int child, long skip_count); +long sd_line (int child, long skip_count); +long sd_order (int child, long skip_count); +long sd_part (int child, long skip_count); +long sd_psupp (int child, long skip_count); +long sd_supp (int child, long skip_count); +long sd_order_line (int child, long skip_count); +long sd_part_psupp (int child, long skip_count); +#endif + +/* +* header output functions); used with -h(eader) option +*/ +#ifdef SSBM +int hd_cust (FILE * f); +int hd_part (FILE * f); +int hd_supp (FILE * f); +int hd_line (FILE * f); + +#else +int hd_cust (FILE * f); +int hd_line (FILE * f); +int hd_order (FILE * f); +int hd_part (FILE * f); +int hd_psupp (FILE * f); +int hd_supp (FILE * f); +int hd_order_line (FILE * f); +int hd_part_psupp (FILE * f); +int hd_nation (FILE * f); +int hd_region (FILE * f); +#endif + +/* +* data verfication functions; used with -O v option +*/ +#ifdef SSBM +int vrf_cust (customer_t * c, int mode); +int vrf_part (part_t * p, int mode); +int vrf_supp (supplier_t * s, int mode); +int vrf_line (order_t * o, int mode); +int vrf_order (order_t * o, int mode); +int vrf_date (date_t,int mode); +#else +int vrf_cust (customer_t * c, int mode); +int vrf_line (order_t * o, int mode); +int vrf_order (order_t * o, int mode); +int vrf_part (part_t * p, int mode); +int vrf_psupp (part_t * p, int mode); +int vrf_supp (supplier_t * s, int mode); +int vrf_order_line (order_t * o, int mode); +int vrf_part_psupp (part_t * p, int mode); +int vrf_nation (code_t * c, int mode); +int vrf_region (code_t * c, int mode); +#endif + + +#ifdef SSBM +tdef tdefs[] = +{ + + {"part.tbl", "part table", 200000, hd_part, + {pr_part, ld_part}, sd_part, vrf_part, PSUPP, 0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {"supplier.tbl", "suppliers table", 2000, hd_supp, + {pr_supp, ld_supp}, sd_supp, vrf_supp, NONE, 0}, + + {"customer.tbl", "customers table", 30000, hd_cust, + {pr_cust, ld_cust}, sd_cust, vrf_cust, NONE, 0}, + {"date.tbl","date table",2556,0,{pr_date,ld_date}, 0,vrf_date, NONE,0}, + /*line order is SF*1,500,000, however due to the implementation + the base here is 150,000 instead if 1500,000*/ + {"lineorder.tbl", "lineorder table", 150000, hd_line, + {pr_line, ld_line}, sd_line, vrf_line, NONE, 0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, +}; + +#else + +tdef tdefs[] = +{ + {"part.tbl", "part table", 200000, hd_part, + {pr_part, ld_part}, sd_part, vrf_part, PSUPP, 0}, + {"partsupp.tbl", "partsupplier table", 200000, hd_psupp, + {pr_psupp, ld_psupp}, sd_psupp, vrf_psupp, NONE, 0}, + {"supplier.tbl", "suppliers table", 10000, hd_supp, + {pr_supp, ld_supp}, sd_supp, vrf_supp, NONE, 0}, + {"customer.tbl", "customers table", 150000, hd_cust, + {pr_cust, ld_cust}, sd_cust, vrf_cust, NONE, 0}, + {"orders.tbl", "order table", 150000, hd_order, + {pr_order, ld_order}, sd_order, vrf_order, LINE, 0}, + {"lineitem.tbl", "lineitem table", 150000, hd_line, + {pr_line, ld_line}, sd_line, vrf_line, NONE, 0}, + {"orders.tbl", "orders/lineitem tables", 150000, hd_order_line, + {pr_order_line, ld_order_line}, sd_order, vrf_order_line, LINE, 0}, + {"part.tbl", "part/partsupplier tables", 200000, hd_part_psupp, + {pr_part_psupp, ld_part_psupp}, sd_part, vrf_part_psupp, PSUPP, 0}, + {"nation.tbl", "nation table", NATIONS_MAX, hd_nation, + {pr_nation, ld_nation}, NO_LFUNC, vrf_nation, NONE, 0}, + {"region.tbl", "region table", NATIONS_MAX, hd_region, + {pr_region, ld_region}, NO_LFUNC, vrf_region, NONE, 0}, +}; +#endif +int *pids; + + +/* +* routines to handle the graceful cleanup of multi-process loads +*/ + +void +stop_proc (int signum) +{ + exit (0); +} + +void +kill_load (void) +{ + int i; + +#if !defined(U2200) && !defined(DOS) + for (i = 0; i < children; i++) + if (pids[i]) + KILL (pids[i]); +#endif /* !U2200 && !DOS */ + return; +} + +/* +* re-set default output file names +*/ +int +set_files (int i, int pload) +{ + char line[80], *new_name; + + if (table & (1 << i)) +child_table: + { + if (pload != -1) + sprintf (line, "%s.%d", tdefs[i].name, pload); + else + { + printf ("Enter new destination for %s data: ", + tdefs[i].name); + if (fgets (line, sizeof (line), stdin) == NULL) + return (-1);; + if ((new_name = strchr (line, '\n')) != NULL) + *new_name = '\0'; + if (strlen (line) == 0) + return (0); + } + new_name = (char *) malloc (strlen (line) + 1); + MALLOC_CHECK (new_name); + strcpy (new_name, line); + tdefs[i].name = new_name; + if (tdefs[i].child != NONE) + { + i = tdefs[i].child; + tdefs[i].child = NONE; + goto child_table; + } + } + + return (0); +} + + + +/* +* read the distributions needed in the benchamrk +*/ +void +load_dists (void) +{ + read_dist (env_config (DIST_TAG, DIST_DFLT), "p_cntr", &p_cntr_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "colors", &colors); + read_dist (env_config (DIST_TAG, DIST_DFLT), "p_types", &p_types_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "nations", &nations); + read_dist (env_config (DIST_TAG, DIST_DFLT), "regions", ®ions); + read_dist (env_config (DIST_TAG, DIST_DFLT), "o_oprio", + &o_priority_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "instruct", + &l_instruct_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "smode", &l_smode_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "category", + &l_category_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "rflag", &l_rflag_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "msegmnt", &c_mseg_set); + + /* load the distributions that contain text generation */ + read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns); + read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries); + read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators); + read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles); + read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions); + read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar); + read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np); + read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp); + +} + +/* +* generate a particular table +*/ +void +gen_tbl (int tnum, long start, long count, long upd_num) +{ + static order_t o; + supplier_t supp; + customer_t cust; + part_t part; +#ifdef SSBM + date_t dt; +#else + code_t code; +#endif + static int completed = 0; + static int init = 0; + long i; + + int rows_per_segment=0; + int rows_this_segment=-1; + int residual_rows=0; + + if (insert_segments) + { + rows_per_segment = count / insert_segments; + residual_rows = count - (rows_per_segment * insert_segments); + } + + if (init == 0) + { + INIT_HUGE(o.okey); + for (i=0; i < O_LCNT_MAX; i++) +#ifdef SSBM + INIT_HUGE(o.lineorders[i].okey); +#else + INIT_HUGE(o.l[i].okey); +#endif + init = 1; + } + + for (i = start; count; count--, i++) + { + LIFENOISE (1000, i); + row_start(tnum); + + switch (tnum) + { + case LINE: +#ifdef SSBM +#else + case ORDER: + case ORDER_LINE: +#endif + mk_order (i, &o, upd_num % 10000); + + if (insert_segments && (upd_num > 0)) + if((upd_num / 10000) < residual_rows) + { + if((++rows_this_segment) > rows_per_segment) + { + rows_this_segment=0; + upd_num += 10000; + } + } + else + { + if((++rows_this_segment) >= rows_per_segment) + { + rows_this_segment=0; + upd_num += 10000; + } + } + + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&o, 0); + else + tdefs[tnum].loader[direct] (&o, upd_num); + break; + case SUPP: + mk_supp (i, &supp); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&supp, 0); + else + tdefs[tnum].loader[direct] (&supp, upd_num); + break; + case CUST: + mk_cust (i, &cust); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&cust, 0); + else + tdefs[tnum].loader[direct] (&cust, upd_num); + break; +#ifdef SSBM + case PART: +#else + case PSUPP: + case PART: + case PART_PSUPP: +#endif + mk_part (i, &part); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&part, 0); + else + tdefs[tnum].loader[direct] (&part, upd_num); + break; +#ifdef SSBM + case DATE: + mk_date (i, &dt); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&dt, 0); + else + tdefs[tnum].loader[direct] (&dt, 0); + break; +#else + case NATION: + mk_nation (i, &code); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&code, 0); + else + tdefs[tnum].loader[direct] (&code, 0); + break; + case REGION: + mk_region (i, &code); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&code, 0); + else + tdefs[tnum].loader[direct] (&code, 0); + break; +#endif + } + row_stop(tnum); + if (set_seeds && (i % tdefs[tnum].base) < 2) + { + printf("\nSeeds for %s at rowcount %ld\n", tdefs[tnum].comment, i); + dump_seeds(tnum); + } + } + completed |= 1 << tnum; +} + + + +void +usage (void) +{ +#ifdef SSBM + fprintf (stderr, "%s\n%s\n\t%s\n%s %s\n\n", + "USAGE:", + "dbgen [-{vfFD}] [-O {fhmsv}][-T {pcsdla}]", + "[-s ][-C ][-S ]", + "dbgen [-v] [-O {dfhmr}] [-s ]", + "[-U ] [-r ]"); + +#else + fprintf (stderr, "%s\n%s\n\t%s\n%s %s\n\n", + "USAGE:", + "dbgen [-{vfFD}] [-O {fhmsv}][-T {pcsoPSOL}]", + "[-s ][-C ][-S ]", + "dbgen [-v] [-O {dfhmr}] [-s ]", + "[-U ] [-r ]"); +#endif + fprintf (stderr, "-b -- load distributions for \n"); + fprintf (stderr, "-C -- use processes to generate data\n"); + fprintf (stderr, " [Under DOS, must be used with -S]\n"); + fprintf (stderr, "-D -- do database load in line\n"); + fprintf (stderr, "-d -- split deletes between files\n"); + fprintf (stderr, "-f -- force. Overwrite existing files\n"); + fprintf (stderr, "-F -- generate flat files output\n"); + fprintf (stderr, "-h -- display this message\n"); + fprintf (stderr, "-i -- split inserts between files\n"); + fprintf (stderr, "-n -- inline load into database \n"); + fprintf (stderr, "-O d -- generate SQL syntax for deletes\n"); + fprintf (stderr, "-O f -- over-ride default output file names\n"); + fprintf (stderr, "-O h -- output files with headers\n"); + fprintf (stderr, "-O m -- produce columnar output\n"); + fprintf (stderr, "-O r -- generate key ranges for deletes.\n"); + fprintf (stderr, "-O v -- Verify data set without generating it.\n"); + fprintf (stderr, "-q -- enable QUIET mode\n"); + fprintf (stderr, "-r -- updates refresh (n/100)%% of the\n"); + fprintf (stderr, " data set\n"); + fprintf (stderr, "-s -- set Scale Factor (SF) to \n"); + fprintf (stderr, "-S -- build the th step of the data/update set\n"); + +#ifdef SSBM + fprintf (stderr, "-T c -- generate cutomers dimension table ONLY\n"); + fprintf (stderr, "-T p -- generate parts dimension table ONLY\n"); + fprintf (stderr, "-T s -- generate suppliers dimension table ONLY\n"); + fprintf (stderr, "-T d -- generate date dimension table ONLY\n"); + fprintf (stderr, "-T l -- generate lineorder fact table ONLY\n"); +#else + fprintf (stderr, "-T c -- generate cutomers ONLY\n"); + fprintf (stderr, "-T l -- generate nation/region ONLY\n"); + fprintf (stderr, "-T L -- generate lineitem ONLY\n"); + fprintf (stderr, "-T n -- generate nation ONLY\n"); + fprintf (stderr, "-T o -- generate orders/lineitem ONLY\n"); + fprintf (stderr, "-T O -- generate orders ONLY\n"); + fprintf (stderr, "-T p -- generate parts/partsupp ONLY\n"); + fprintf (stderr, "-T P -- generate parts ONLY\n"); + fprintf (stderr, "-T r -- generate region ONLY\n"); + fprintf (stderr, "-T s -- generate suppliers ONLY\n"); + fprintf (stderr, "-T S -- generate partsupp ONLY\n"); +#endif + + fprintf (stderr, "-U -- generate update sets\n"); + fprintf (stderr, "-v -- enable VERBOSE mode\n"); + fprintf (stderr, + "\nTo generate the SF=1 (1GB), validation database population, use:\n"); + fprintf (stderr, "\tdbgen -vfF -s 1\n"); + fprintf (stderr, "\nTo generate updates for a SF=1 (1GB), use:\n"); + fprintf (stderr, "\tdbgen -v -U 1 -s 1\n"); +} + +/* +* pload() -- handle the parallel loading of tables +*/ +/* +* int partial(int tbl, int s) -- generate the s-th part of the named tables data +*/ +int +partial (int tbl, int s) +{ + long rowcnt; + long extra; + + if (verbose > 0) + { + fprintf (stderr, "\tStarting to load stage %d of %d for %s...", + s, children, tdefs[tbl].comment); + } + + if (direct == 0) + set_files (tbl, s); + + rowcnt = set_state(tbl, scale, children, s, &extra); + + if (s == children) + gen_tbl (tbl, rowcnt * (s - 1) + 1, rowcnt + extra, upd_num); + else + gen_tbl (tbl, rowcnt * (s - 1) + 1, rowcnt, upd_num); + + if (verbose > 0) + fprintf (stderr, "done.\n"); + + return (0); +} + +#ifndef DOS + +int +pload (int tbl) +{ + int c = 0, i, status; + + if (verbose > 0) + { + fprintf (stderr, "Starting %d children to load %s", + children, tdefs[tbl].comment); + } + for (c = 0; c < children; c++) + { + pids[c] = SPAWN (); + if (pids[c] == -1) + { + perror ("Child loader not created"); + kill_load (); + exit (-1); + } + else if (pids[c] == 0) /* CHILD */ + { + SET_HANDLER (stop_proc); + verbose = 0; + partial (tbl, c+1); + exit (0); + } + else if (verbose > 0) /* PARENT */ + fprintf (stderr, "."); + } + + if (verbose > 0) + fprintf (stderr, "waiting..."); + + c = children; + while (c) + { + i = WAIT (&status, pids[c - 1]); + if (i == -1 && children) + { + if (errno == ECHILD) + fprintf (stderr, "\nCould not wait on pid %d\n", pids[c - 1]); + else if (errno == EINTR) + fprintf (stderr, "\nProcess %d stopped abnormally\n", pids[c - 1]); + else if (errno == EINVAL) + fprintf (stderr, "\nProgram bug\n"); + } + if (! WIFEXITED(status)) { + (void) fprintf(stderr, "\nProcess %d: ", i); + if (WIFSIGNALED(status)) { + (void) fprintf(stderr, "rcvd signal %d\n", + WTERMSIG(status)); + } else if (WIFSTOPPED(status)) { + (void) fprintf(stderr, "stopped, signal %d\n", + WSTOPSIG(status)); + } + + } + c--; + } + + if (verbose > 0) + fprintf (stderr, "done\n"); + return (0); +} +#endif + + +void +process_options (int count, char **vector) +{ + int option; + + while ((option = getopt (count, vector, + "b:C:Dd:Ffi:hn:O:P:qr:s:S:T:U:v")) != -1) + switch (option) + { + case 'b': /* load distributions from named file */ + d_path = (char *)malloc(strlen(optarg) + 1); + MALLOC_CHECK(d_path); + strcpy(d_path, optarg); + break; + case 'q': /* all prompts disabled */ + verbose = -1; + break; + case 'i': + insert_segments = atoi (optarg); + break; + case 'd': + delete_segments = atoi (optarg); + break; + case 'S': /* generate a particular STEP */ + step = atoi (optarg); + break; + case 'v': /* life noises enabled */ + verbose = 1; + break; + case 'f': /* blind overwrites; Force */ + force = 1; + break; + case 'T': /* generate a specifc table */ + switch (*optarg) + { +#ifdef SSBM + case 'c': /* generate customer ONLY */ + table = 1 << CUST; + break; + case 'p': /* generate part ONLY */ + table = 1 << PART; + break; + case 's': /* generate partsupp ONLY */ + table = 1 << SUPP; + break; + case 'd': /* generate date ONLY */ + table = 1 << DATE; + break; + case 'l': /* generate lineorder table ONLY */ + table = 1 << LINE; + break; + case 'a': + table = 1 << CUST; + table |= 1 << PART; + table |= 1 << SUPP; + table |= 1 << DATE; + table |= 1 << LINE; + break; +#else + case 'c': /* generate customer ONLY */ + table = 1 << CUST; + break; + case 'L': /* generate lineitems ONLY */ + table = 1 << LINE; + break; + case 'l': /* generate code table ONLY */ + table = 1 << NATION; + table |= 1 << REGION; + break; + case 'n': /* generate nation table ONLY */ + table = 1 << NATION; + break; + case 'O': /* generate orders ONLY */ + table = 1 << ORDER; + break; + case 'o': /* generate orders/lineitems ONLY */ + table = 1 << ORDER_LINE; + break; + case 'P': /* generate part ONLY */ + table = 1 << PART; + break; + case 'p': /* generate part/partsupp ONLY */ + table = 1 << PART_PSUPP; + break; + case 'r': /* generate region table ONLY */ + table = 1 << REGION; + break; + case 'S': /* generate partsupp ONLY */ + table = 1 << PSUPP; + break; + case 's': /* generate suppliers ONLY */ + table = 1 << SUPP; + break; +#endif + default: + fprintf (stderr, "Unknown table name %s\n", + optarg); + usage (); + exit (1); + } + break; + case 's': /* scale by Percentage of base rowcount */ + case 'P': /* for backward compatibility */ + flt_scale = atof (optarg); + if (flt_scale < MIN_SCALE) + { + int i; + + scale = 1; + for (i = PART; i < REGION; i++) + { + tdefs[i].base *= flt_scale; + if (tdefs[i].base < 1) + tdefs[i].base = 1; + } + } + else + scale = (long) flt_scale; + if (scale > MAX_SCALE) + { + fprintf (stderr, "%s %5.0f %s\n\t%s\n\n", + "NOTE: Data generation for scale factors >", + MAX_SCALE, + "GB is still in development,", + "and is not yet supported.\n"); + fprintf (stderr, + "Your resulting data set MAY NOT BE COMPLIANT!\n"); + } + break; + case 'O': /* optional actions */ + switch (tolower (*optarg)) + { + case 'd': /* generate SQL for deletes */ + gen_sql = 1; + break; + case 'f': /* over-ride default file names */ + fnames = 1; + break; + case 'h': /* generate headers */ + header = 1; + break; + case 'm': /* generate columnar output */ + columnar = 1; + break; + case 'r': /* generate key ranges for delete */ + gen_rng = 1; + break; + case 's': /* calibrate the RNG usage */ + set_seeds = 1; + break; + case 'v': /* validate the data set */ + validate = 1; + break; + default: + fprintf (stderr, "Unknown option name %s\n", + optarg); + usage (); + exit (1); + } + break; + case 'D': /* direct load of generated data */ + direct = 1; + break; + case 'F': /* generate flat files for later loading */ + direct = 0; + break; + case 'U': /* generate flat files for update stream */ + updates = atoi (optarg); + break; + case 'r': /* set the refresh (update) percentage */ + refresh = atoi (optarg); + break; +#ifndef DOS + case 'C': + children = atoi (optarg); + break; +#endif /* !DOS */ + case 'n': /* set name of database for direct load */ + db_name = (char *) malloc (strlen (optarg) + 1); + MALLOC_CHECK (db_name); + strcpy (db_name, optarg); + break; + default: + printf ("ERROR: option '%c' unknown.\n", + *(vector[optind] + 1)); + case 'h': /* something unexpected */ + fprintf (stderr, + "%s Population Generator (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, + MODIFICATION, PATCH); + fprintf (stderr, "Copyright %s %s\n", TPC, C_DATES); + usage (); + exit (1); + } + +#ifndef DOS + if (children != 1 && step == -1) + { + pids = malloc(children * sizeof(pid_t)); + MALLOC_CHECK(pids) + } +#else + if (children != 1 && step < 0) + { + fprintf(stderr, "ERROR: -C must be accompanied by -S on this platform\n"); + exit(1); + } +#endif /* DOS */ + + return; +} + +/* +* MAIN +* +* assumes the existance of getopt() to clean up the command +* line handling +*/ +int +main (int ac, char **av) +{ + int i; + + table = (1 << CUST) | + (1 << SUPP) | + (1 << NATION) | + (1 << REGION) | + (1 << PART_PSUPP) | + (1 << ORDER_LINE); + force = 0; + insert_segments=0; + delete_segments=0; + insert_orders_segment=0; + insert_lineitem_segment=0; + delete_segment=0; + verbose = 0; + columnar = 0; + set_seeds = 0; + header = 0; + direct = 0; + scale = 1; + flt_scale = 1.0; + updates = 0; + refresh = UPD_PCT; + step = -1; +#ifdef SSBM + tdefs[LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ +#else + tdefs[ORDER].base *= + ORDERS_PER_CUST; /* have to do this after init */ + tdefs[LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ + tdefs[ORDER_LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ +#endif + fnames = 0; + db_name = NULL; + gen_sql = 0; + gen_rng = 0; + children = 1; + d_path = NULL; + +#ifdef NO_SUPPORT + signal (SIGINT, exit); +#endif /* NO_SUPPORT */ + process_options (ac, av); +#if (defined(WIN32)&&!defined(_POSIX_)) + for (i = 0; i < ac; i++) + { + spawn_args[i] = malloc ((strlen (av[i]) + 1) * sizeof (char)); + MALLOC_CHECK (spawn_args[i]); + strcpy (spawn_args[i], av[i]); + } + spawn_args[ac] = NULL; +#endif + + if (verbose >= 0) + { + fprintf (stderr, + "%s Population Generator (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, MODIFICATION, PATCH); + fprintf (stderr, "Copyright %s %s\n", TPC, C_DATES); + } + + load_dists (); + /* have to do this after init */ + tdefs[NATION].base = nations.count; + tdefs[REGION].base = regions.count; + + /* + * updates are never parallelized + */ + if (updates) + { + /* + * set RNG to start generating rows beyond SF=scale + */ + double fix1; + +#ifdef SSBM + set_state (LINE, scale, 1, 2, (long *)&i); + fix1 = (double)tdefs[LINE].base / (double)10000; /*represent the %% percentage (n/100)%*/ +#else + set_state (ORDER, scale, 1, 2, (long *)&i); + fix1 = (double)tdefs[ORDER_LINE].base / (double)10000; +#endif + rowcnt = (int)(fix1 * scale * refresh); + if (step > 0) + { + /* + * adjust RNG for any prior update generation + */ + sd_order(0, rowcnt * (step - 1)); + sd_line(0, rowcnt * (step - 1)); + upd_num = step - 1; + } + else + upd_num = 0; + + while (upd_num < updates) + { + if (verbose > 0) +#ifdef SSBM + fprintf (stderr, + "Generating update pair #%d for %s [pid: %d]", + upd_num + 1, tdefs[LINE].comment, DSS_PROC); +#else + fprintf (stderr, + "Generating update pair #%d for %s [pid: %d]", + upd_num + 1, tdefs[ORDER_LINE].comment, DSS_PROC); + +#endif + insert_orders_segment=0; + insert_lineitem_segment=0; + delete_segment=0; + minrow = upd_num * rowcnt + 1; +#ifdef SSBM + gen_tbl (LINE, minrow, rowcnt, upd_num + 1); +#else + gen_tbl (ORDER_LINE, minrow, rowcnt, upd_num + 1); +#endif + if (verbose > 0) + fprintf (stderr, "done.\n"); +#ifdef SSBM + pr_drange (LINE, minrow, rowcnt, upd_num + 1); +#else + pr_drange (ORDER_LINE, minrow, rowcnt, upd_num + 1); +#endif + upd_num++; + } + + exit (0); + } + + /** + ** actual data generation section starts here + **/ +/* + * open database connection or set all the file names, as appropriate + */ + if (direct) + prep_direct ((db_name) ? db_name : DBNAME); + else if (fnames) + for (i = PART; i <= REGION; i++) + { + if (table & (1 << i)) + if (set_files (i, -1)) + { + fprintf (stderr, "Load aborted!\n"); + exit (1); + } + } + +/* + * traverse the tables, invoking the appropriate data generation routine for any to be built + */ + for (i = PART; i <= REGION; i++) + if (table & (1 << i)) + { + if (children > 1 && i < NATION) + if (step >= 0) + { + if (validate) + { + INTERNAL_ERROR("Cannot validate parallel data generation"); + } + else + partial (i, step); + } +#ifdef DOS + else + { + fprintf (stderr, + "Parallel load is not supported on your platform.\n"); + exit (1); + } +#else + else + { + if (validate) + { + INTERNAL_ERROR("Cannot validate parallel data generation"); + } + else + pload (i); + } +#endif /* DOS */ + else + { + minrow = 1; + if (i < NATION) + rowcnt = tdefs[i].base * scale; + else + rowcnt = tdefs[i].base; +#ifdef SSBM + if(i==PART){ + rowcnt = tdefs[i].base * (floor(1+log((double)(scale))/(log(2)))); + } + if(i==DATE){ + rowcnt = tdefs[i].base; + } +#endif + if (verbose > 0) + fprintf (stderr, "%s data for %s [pid: %ld]", + (validate)?"Validating":"Generating", tdefs[i].comment, DSS_PROC); + gen_tbl (i, minrow, rowcnt, upd_num); + if (verbose > 0) + fprintf (stderr, "done.\n"); + } + if (validate) + printf("Validation checksum for %s at %d GB: %0x\n", + tdefs[i].name, scale, tdefs[i].vtotal); + } + + if (direct) + close_direct (); + + return (0); +} + + + + + + + + + + + diff --git a/data/ssb/dbgen/driver.o b/data/ssb/dbgen/driver.o new file mode 100644 index 0000000000000000000000000000000000000000..11ddc431f6fe96551e2cb1815bf6424d62eaf0c3 GIT binary patch literal 41400 zcmeI5dwdkt`Tr*nFhtA-MH>a}RD%XfV!}0v6cY%rXn>SkQJ^90Zb&5Ajk_Bz-T*_; zZ3s}WShZ4%g?i=d712Tv6uf*DuSKPbmr^3Ah*m*SzRz>!IlCVYnOOU~yuN?*yx5(Y z_j%6eJm)fJ&di)F^M{v?=#Z3T>5ya%u@Xm`TGrg#6Z=JKx5zr#>TZ4EIpdqM$=-&2 zDI1yY;V)+1hF|cmb#>nOCwaT~1aHIUByYj#3ZCX|>cl%u{Z}-875)3hRN7Zb_r5h{ z^JY$8{Hx+g#S@DsOdLuBS6 zK?lkkZ}Z$FXD9ZIx3R^u=pkkKsW-OO!M=A$^A1kuZSJ1o-Sk27s92xw#Z6PY7d3o) z#Ixu@4hVPGf8mnm!Vn;P~PxdwsV6~0xQoev77W>rOteV^# zdrxX|t9T$q6-ssb9|P*%|EUMvZ~pi9hlVXFQ>K}t)xv#M9cUo>&Krwnri_ZcF)H>l zj9!Z}CsS-*OOray$)qQJGH49!;q|@bjcxbd>iznV=fV9YvDV(7$4WCZXuRtwLuoW~ zbmJ$UMZsjt8XeoWwIs7U4<%mTGo{V_7mV7pJGu2vI;l6NQw7RNrA9^?xT5)T7M3>u zaRntun~oem(z|f;8cH{+U}yaYTU9b1ANDRSUf0_0(4jI)j2gx3_Wc-hZ|rUFrf)9r zZu)o#jiVo2-^asHdn)b=lp9rS+vG07dzdz%hk z;O+gixBAueb9jI}$hxtoWysM(jjq=&qZ^NSmhlBnNPVX4corQdx`944ZYIXcBWehh zi-cC`p$FL54)3iOetl(Ly0_t*B+t@jYVc7F`;t719-%#NQwep8=Bb^m(%v8Hi==1S zf$xtT(R$x?nzJglM~{-e__C1zdd)1__B}Z<2el4|kD|_#4ek5}_*OQ#L zx&|BHph#Uur+b^9QdX);)uqF!%hm^#gcC`5I+q{kQss?j*22y=N@kq%F_~4FnX%O| znGKc9IMOwTDW_^6m;T539%$IFJXx4g~j9KWz>LM~mZ)mUUqIixeT4y6RnY^(LPU0s|3 zrO6q^Xky?@%IZ(_F5JSS9?T8Yz~D>-_fZxqVVT~RI#ZES2kY~uy@`qRf%Uzp2dMd) zbV_HSb2`X+o^rTG_GvHjto9<${kJ3A_4cR#wrtkFoieArl$q^Cp4ndH&)bX4YcH~A zo00!YPdKT~l+qJ=w--6Ey~uXGZMXXRwkO+e*K9Yk-L84Yzn$mM_EL6hFS6YeiLWom zo`MZqGMs6Wb32zF`!4ncOp54=nAeiH7G6gh@uNjg(NU@I!%?warLp(rM#wAEC%H?D zJzaV}pK6w85nsfcBQz=c%1g5-Z^N@mGIHoUI(=DM_B*mv7vuOa=4o6*%(?P3Y)R+A zY_r!#w=2o&u2BZm(x%c5DW%TsYwUA@PLa*Ksn&sXQH{N;hf+TBc1+HU`*#5RquGhJ zo6L67kvo$|$JA$Q30bI-@0dH~jN;gLZZ$iT4L)-SmZ+9+X3hZJ$3-STO< z`TwQ&Jj*^()vAX3*wcK=?BOe?kEULI8V{4(&Et;HT&%P?v+*Du$qjm`a|tSSZW>$9 z=OoT_OO;Z6Z6tH2(E5TEe`w%!W~`JmwS7{F)MMg#1Uz~0Ed3?P$kPY%X@$~`4X3W4 zd~Ql!(QxocbRo@&SLi}ry;1txMn3;lg1z_Y24W|UX&@0(70`Hurx5XOH<8Q6b3o{J zK4QAtWxzF5`_wh7GmJ`}4%ii13jc?j^qcuvk8hfiW_Ho+xmFo z*yiKaiDSNpYrTb!+nZykT1?#49#ilGo^Gn(2R!NOPO#E*-u5~34|qL&TBP8Wp3hq7 z_}mtXa_VUD&nVZUC%%;2=|b+kg)3iqqtDScry#A*@U%WrLo&}UZC7}*{2Hokf_-7mkg_XW(`>)FuF+hjE* zFK^i6Z#a13oVl@+lWKQWGvqhm;ZkggMO*n0Hg+jRJ2 z8V{#)=OP )-GK|-V#+5Brm$q9NWY5n|LdQQCV z_KrV#H1Ro|s;T4|%d?>@Nnb9xE2*oD)24DucBxhT1lL8ey2<-j#;qT5*JhkY`m||1 zg^H#JiiU$p(Js#A@krD|PnhYMTKoyKw|Q_Qd*`?uA8_k;6tBCl-HI`CR-?W%`LvIk z9%azP)9DZ|vRdR+lT! z6k;ZpD%(W7Q_V70nK#cll=w`kII#*6FJz86IIUOr8{RB#N zIsIn9nf1wAo7TUmc2!-t?Eh)!q^_3UdMZ_AX)`?+dxBiF);rDMJP%ik`i9 zZ#dM&vuqUE#dg4(#^~QRko=L@%c^>5_V?fV+MGrL$4osF(+KCLwehRDgT#sYt|HR9 znyXE2`LfzlQF-ZPT_oAw$PcICF4&wLa+tHD(?y9BY^|0(x?s`tvx)mHpo3704 zuCh(o-axjx0vlJ+fmvBE5>povtgp94AnEEZ4Tp~NEd4Su9V2s#*|LeMJ=J8&>)RB+ z<&OQ8`oop%wR-s8_?)Nl_mqUrwNH57GAo@fDK(iX`V4(q`KwRS1D?sMtd*Xu9f_G* z*0xH|gy-qF7YUJ4&ik|b@zjTAYR+Vc;Yj&RD86e(`l$ z$WyYdw5jAIdU&d;rR^K5)?$^9aUzt;{X^?mj`dMjrW$SL@=}&5oFyxJ}`Kfb(V)c6TjUAZq94O_lYO(-0pqv*W4 zK*S1GM5{x=h!qaZRNGK_s4iR`vZ{lTXt>@NwIa14f57rpL<2KJ;dxfLvZi9DRT~M+ ztPMu3V5pwbk}ESJR)zo4fGY2|wT4wy*ZA$4P=!Abwu7Ol?F$FU5x+I3f=Uvc znUj-ar49?#lM%-uG(*N)1;P#LtIpQM*U3Z$QOu2D5}u5!oJylJ5oI}SW%;F zBhgS@UBKT58GDgxO$^QohUNtAs48HvqBg)aV^swGc6CJMJ1-EmBlUH4p)mPIZjT(w zF_9>>h+Q4D{ejtmnowOWRaqaZVY}CWoZL#*;CfEzhw8X-?05#l$Bi31uFw{IPA}g% zcD>^YWifvI(9w3*_-IAg9`CCP)K=I-13_OEHR>#Sf}B@S(BICs`{w56TEjwh^TO3L ztGL4|$GNLcs;H@^MxiRSMh1d`u(MlA6>V3{P{sIJD!Zy0t}+znv{R^B3+-Mt{%KYn zb*r4{j2gA$RE)(HP*Y8(3UhrBkw{-X)o-mt>H{6geM_a{9Z67A9SnrR)a)uEek8H# zBpn{4j&277bEtx;uAS4%7H8RUYGn?o;{ySIL`Tqx8xGC!(b+^5QbV1DdU(Vh9Pw4u z1cpqTl0A&NcR1vuzCI;;Jn?9tj`}$Vr{_+|zJmI0$yIzRjJ2<*tg4O7zKU!o*jLo~ zBL13+s~qcu{cJJylWka~OXSD8*)u4cA$$&TN4I%A30KdkS7)m7nqqkd&8`KNU40~= zcG*$t&;hE*kWHr*w_`xH8QUjLzbO-gd;*n>9pA^UrIxT~1Z>jq+jFX;Rd)9HX>4v~ zm#7rvhd-o5W>io<$|vf?+yu(Z%JyqNBGlkfs{NV(4Gtt4ofDvQo1LtrL6Xi96|yU} zF13v>kYkUfA!|;!np_RcrNUAtJlcMQwvQKyM#89LV~6Uab@eomkiS*hj;gG#redBQ zt)kPemTIJ8Ci$9OeYBr6&IZEKcKjsQopz`Wa%pwZscyM9soIdHaLP5-_LB!xUvWRj z|FYDM%nL><=BkF1n$h(z*5;ZZ`w&$?cDUM4Z561jsIQ?xOO;9$Q4IiGchtSOglwOP zjW}&q72qM!sbR9Kb?m4=`0ITEn=U-{wLuyh5|zdEHP#Lr{>}=_v%?j^nbdlG-iW_6 zyl^1<*^d3BK)AYco--KGD6U)}XX5K&sK=)6%h@QO}nL5@J9-nNL=c zR?eR1T(_Jnk42}b&nUmIUX4mL&Q;Us9O2>D8Rf^0DZMzJ-vqnPNXO%!Ya+@gV%R5! z!*(*FbY_221EO0Jvad1nNsN<~RDniW_(JaV?|6UZ<3t_F37gHWj@R@_!^aIBJASxa z)vfHyb$nbWGAl5Lh7&$bjvl|b4pS$<;KYqJe#9^G?5w4c}&=)=QQp)|x%Ix-_s-L~Q=#dwM?e05Z#3-us1j76d}FwoVW`v`qImkORks1P@& zC?8Ke!Zeuaq`e|)HX-u|>S=1WKo`qkC|pZ3!+ELJn6VRv7uqFU2U0QIpEwhBaiN(b z_vwf|#L3ROI8+~|QzJs>Kc7LW^7srGU3{@kTK3T4_OP*|%SuNTkC{MMW4;`h#cReM z=?wRLlF;d?x^6E&O<3t)uSjpMvI%r%M<`GBf+j-jI_Fas2$N5>6+s#(XzCR7Ro6HU z%_tmJSmG}nQCVGB6)YTESXOvhxG+*UzHma}#KPH^7mC|3VbFc2uA(|@pB1lBJyW2G z0*7H0L7tpRO9yN^d8onyHBN~WRi*Xy+=SewnT>B&q`p>~O3k|c_Q;_$4auD=-9jh( z&-Q;o4P1F%_Ea}FicX;CV?QGB9rpjvu>H6nmm5DyX)i@?eS5LT#{H1q;V#FfX^;Gn z-u_Yi;P77$Kcx4sEB(hZQ3Lzc(>}}UUgWU-7pQH~GUxcZ%|)LTeDnR#oZSw$a)&u~ zmc{KsVZJ`LMh+WRXlG5FLAN&bc3xgi|D4?Hyn1z*cXi*sIl29N3sy<^1th<8=V@!}^A%4T?4dU0AxNg9c9T)v92 ztRmiyO-pa+5K2oa-fWf9;RI$Bp5T*vL|Xc?4nxy2mUbMPW;Z7fP0L!+sW>fnap&T+ z{D$L7(pDyQY)VQ^%O_TxmPIyXJ2WkQD5)J!$+PIQias3v$z7TJSlppFEu*1hNt)ep zPlvROV&xe{ao*eM!=VQkxc$6GrmgJM@vCI_yt(|mDw0E2t~762x;HJuOZk4$F)hW) zB~&Ewyl#T?Tf)9A?pT~=Hzb#&<#xC`HO(eMJ~}1xl37&J^yNlq-^Y@)vW}bFs)y^j zMYO*9;qsQG6?Occit8PkR@ULt<7c`Syohu&WZ0)3gGC#tMS<0j2R@{qqd2!!NSi;AhlO4 zoYw_n7@-b2S@8*-L|!D^NtZ}bCY<{xhKcH+ljX!GVlELrIf2yPbm7ic9jLEL;Z+Hw z_MB;HC+jkUhsA!5!RHEJVDPJjFE;oh;Y$tv8{sz^e7SI5euv>^b5I&f5+hKg@0)97U8=M{;cp;gTJVbJF9UPqh1yJ&c-RZ zL+n?gG3~!A-0VLe39mEkKUT+`tWONSU-*{>|4JQqwze7kuy7|`BC><>yK{W})-fr< z`Nao@G~sg%-d*?_gP$gRhrw;(e4)i~ws70veT0`8ysz*D2G1A1*5E^g?=pCa@Dv%Z zv`_rkh!%}LxdwOYrL#5N;FFYnCo5BJwTfj;6`d7^{S2`$Hteg!ew$$*6n@a)&b5;K z)B~+#eYx0I8GM0o{xc#Bi-m7D_!8mfrQ~|yxqMxPutK==>MjxSd*O2pdwvVa^}NR5 z_bA@UQuBs{jI~zm_Z#+)iTzBqO(b|q?Cs78#(PhT{gsCOHnA@=>|YXIXYkiWXSJd8 zw(#u+-zB`k;Jbz2Zt#7=9e)#LI3WB}!~US~cMbl%@D4`*PgcjBtuo_Mo~k&{Fpm!D z!c&gM)+vg2vQpGG5!qAh(-TPTohdxS;8~*cQ$r_5c%i}jtK-hra-&@wyH3_5!@fxD zTMYXVig&X3&tNc=3OC1@al+Rd_Wb98Ja4(f;J;SKovr;wzJ9TP(y+f&>}{i6>coDl zVLw~!cNq3pDSo_VW1(n&k=V~Q?3)ztWbIJfM5613A29f>>X^=7LuaMp#~*XuT&;N5 zgs|*AAojUN%%h5TvUaL%BJwHW|1kJw;Yr3g`8@FxX=bWJciDNJxY;*3>MgO?c12?U zA@OcBzjfwWmZi^weZs{o&Odk2|4Q^HNCI^)$ou3s>n{+l^*bw`c=%yU`MT1!E&oq* z;XiZXSuXrM7e3g94|U;Q7e3a7PjcaY7hX@C+xG;jR}Si4n6?XC?3;*lJ?DzO9*1sr zv0o|n&iUuu<8wZ%g_jvR4+~#TTMkH~ z!~PQC+nwr<*Z&NyW7r3T?>Fpu{>1&NgUmP7a}%0dp$nf?_%FT*BjNohW+C%_8VRJRu}%V3;(MN|A08x=jrm|LOnMk z-}boJe1~ZW~>1U5K+kSIYwh^~{MhJ;b%GWT7^v zxbV|m_?a#|%Y|pV@FB$6&$TBy-l*qUv^&Pdej;(MAFnvC%S(T_-e$RYU2t3Fy;O9Z z=PQZlKbH%iEc#UKD$IA$SuFNhVyFG#1+VPq8iW5V+g$V?6np1+Ny5*^h1VJS z8-%Yk_%p&+82lySRl;?7-xPkGVgIi1rwseu!lxVdpAv7Y{{Kmw+x<^6psMGMl=d6Z z$v5=V<$UjN@GRlE1|LMct$c@xy)E{-{x5W~AMe7axbPXGFU5+_m#SUtBQAWN3%|;R z|Hg%1@4|oQ!tZk7_qgzfUHFqOe6tIG!G*u(!gspxk6idZ7yhLSKkULg(fvSM{pti4 zezFTc!-b#i!gE~sKo@=iaUOrx8Rx|a;dREmE3p)|I!McX`dXo!JgX)cs*cd2)lh96 zuLASuixUU-|r1z+ZVz5|t)TCCXE&@>H@sl`c;u z%u^}zRMI?^wy#Rt*U3+%?W@xERcZUGw0%|DzA9~BmA0=++fUi_Q#SpSPCup7&v9Jo z^iyg3skHr6+I}i+e`Via+4ooW{hg93&HhTWze?L*rR}fM_E%{KsI&uA+5sx<0F`!t zN;^QM9iY+Z4N?k&oK#97U!^TjBws1yEBkz9pRaWCm1e$5o3AwUowO=dfuo?( z7O1oZ%DzA;6exuPm9{{oEl_Fm@>GJnTxFG)tITL2nbU8SKwhp&m6xlM<>ji}@^YQz zxsqIEm`6R0SMQYb;z^4ZAV#fOv`)927YJE;*^fm_T_cKHv?8Zmt!A{;$}lTROZFmG zd3l7^ZU>{4<-V#}q!+FU1T9_%M4EKutD8qAM_Kr*gI49tKs2J_tHQLQ6?|bbs{}jJ zjaK+(v1&Q5G`HBTa<#Y1{@yqtWa)#PGeEH_t?$1XYN73Z|{a-NE= zDc5VyPKpgCarQV=LGCubYvcTA@6{h3P!A5% zUo3E|&|gMa5x;Ipe>o@T;?o&t(O)f)1*ItWt5vC%T8mESz)Z_mL#?W1s<_su3CU1x zYvh=8)$#0brL#kZs_n^8ZOKq=$xvKtUKwq&TbWT>`ms0~!FAp^DL2(?2B zswkwOwq&5TY@l{{r61L+oRajCSI-f6K79p!=nrjG=pol*zTLlyKJ*u-D(LsPnrnOg z9+x@4>(tzNpX=NY@FEe-7tuZkwqHme&7Jp{4!;KYO4{eZ_I&HA?O$O?f&FiwkLGVN zq`;i>()kI{*-vS>E9QA(%I@g1}^Zegwmu|p)po98B;HWMwKAcb?}ve%=T=&hvbS^G`Qv{kuU2_4RW`Zb$TAzc1lr zH-XM3@V`6oXI=DP0gn3bfDWI_+RqPw^O_dT_4@<%6aClkIZXc#gU&6W(^2jdIQkUe z`n|HLf0Edn`lo}=3ed>}{uAK6K?nWs4;<@f2UkUJ-M!*3qc3s_^H5Wfez}=29EmtJf8#C^KJCe`qu(K4fqn!LH%2Rqy8U2hu5xX{WZY(+NAmY zpo9ABfTRA?pmPW4JO{id@RvXb_1^-H`X7SM??LBd;HLxs40KTc8{nv)%pH~j*Z-aL z(e-dV@H2p?3+D~$_W+Liy+CIr=7(<_10B>K3>@`GfX*L4huEW-Nlb5U3A_s?AJ)X zdw}CS?K9vwPx}@)&eLcKjtcAs&eKwX<2>yr!nqzW-!p(?zUKnRe9r@Y%(nnI<~tE| zxC)&0NtQJo_}RpD{nr4;d}j+c%exlrxv6w}tpoc%gZ>7v=YFE^Q?>)Y2l!jS?*-1! zT{-A}rStt7_%#&2Mds7r3upiDqmM3cGCjBCz&e<38gSOp_C>(&C$9Y;Bb@bl?WMM# z3ie$8n(qdV^>zR_=6eYEgP?P=^lQ#{E$|+~&2nV{$NC%wob!F1J{-youKlkBj{aOD zoc;MP`e^&>z#i-QCgA9gUT@9*vp;&B;EQ0-T*}r z=QF_10Y6_C&i-Eu{2kzr0RIehxSy|*@%+?OI-tNhc;A{Ooa;Y}KHC3W;Jtz813wq| z1;Bp}d<5`5z()hm2Co00m;Gr1?h||VAIITKK?m)x0GF;MamZ>Z|~M9@yXE zqO%I@QRh+MxuBzd9*&N=T`~_q;Q63)my6Dy!5($i11|ubjV?ORf<5Z|6*xcJ)aBag zqVplxqt2JW&j+1DE;?#5M|S4;(5)*$;k*_^`*VtLvmc%g_UKOz@LzyVKNp=sut%MX zfDZwkG8dg+f&B$w?*oqdwV<;E?4vIBjlhe*{x_h5``YU z@S&iy#YN`@ut%Mpz=wg(`z|_rz#erD0WSfa?_G2{@y!DTvmc%Ud^pAHe%M2}*$>YI zd-SIt@DZSs@1k=7*rQGv@R6W1(M4w}*rQG@a4+bDU3BJwJ?i`h_$bg>=Av^m*rU#B z;1`02S-{IczYcV8Jii=xGuZzLbg+x1^iqMI_!@=?;ZsEO<=zj>@nXRz<&kye*+z?huvV0baMV8t`pWl>L1N$++{lG5)j`i#pZr1ZG7yB!L2S9%z=-{~R z{1~=VZ(KjRAFcqMYl!pM$6*)f{5Kuzdj1F4V>=#yib|&16;?@%X0IM)yQ^E0r=`DZq8)ENLe zi%3hi%MjprefNUS8(=>X>=y%{3HIE_wSEvdUf(YVj{RW~=;L^}6gal~O`!88}P?_Vc<2uJ7&-U1@0%c^wIU~ z0gm~eD%{NX4B&WvWdX-}&INtEjt>To{m=`X=OOxjWG3iie+z;=-bX9|j{WLd;MlKj z0FL#rLbzEEcLB%pJ_9;9uhk1XMsPr0>|-uC2$E=vz8-iHaGZCI23`mD6M$oXs1?rj z_BXJPfIa%N5IFkZ1Um16&TqjUb?yNB_rQKN*kgGg0?zGnqdd3S1RVRr3!uLQ^j`;i z)PD~+mTNcgU7)ic^xp^mHQ1wmhn{3af%^&aF2J!JPXUhO+gZSOkd_`lvw^<_d=T)r zfe!_a{)_^?lk9aroC16o@M_`Qe^vvp1AFxUYT(#j%R%Qc(76-%6~O-lI+p=|2smHA z^mzLzaGtm5avcW!1;CFxoen5)d!e5hz{3=$bxs$~^??4L104O&1|2*v1_8g9w6y=l zpz}E7I~wfK&tC(NfV~fN(9bI1AAo%jbkLu`ZnC}kM6g(fPEDBoxpLvdN1&LuzwQxY~Z_qqfY8i$%q2i z1NwQk3(pnK{=Y{b?f(d{$9f(Ed=A)81U?sd0QfxMmjTEAe1&lK2d^*Jy4c?e_Snxi zf<3n58(@$3O{xw_4?^_gHP+?fbIsrLG&{W-dXPRdm8-iQygF${C8rXY4E>@eU`!R z68k;||C8`sgWn^(zri06o^SAngbz0OW5SCJzFv5V!8Zu^8r&o0Dm8e<$qwK-9tYiC z&x_6k2gmtK!Y3R2o0A+c)!=ia0jC?hi|F_b{;u#UgMT2r#^Ae!*BSg{;ZcL@=SOo5 zen9N6H27D-7a05-;foCZz3}S{-a$I-5`%XZzTDuc!f!PA6shMG20u~kZ#TFt^}o{K zr-}V4gFh(tYYcv-*xzUHQsHY2ev0TkV(=WXUuW>X!k;quK;bP0FBHDT;BQL(Z!`E% zv47FvBZO}^_+hE%*A4y$(cfY4RdOEfH25UZ-(~Pignwl4)lzSJ3_e5b_ZhrW_>-T2|4ZcS5J#6r&gws!qsi4P)>B5r@exvXdgKrbw)!;7*PdE5>;oS}Xy6_Bx z?-1V8;5&ue2G{TZ^xxU&a%~a&EW>_}@ID6D?+f+sRBE05V&C7eUnSi69rDC@drvz3xEkw+JsW?7PS~?=|>5J^mZq7OsC!Q|m7jKEbfh7e3kGG2v4U-Yk5&!G9~< zZ}6Lh-oT3ga29VuQd1q;R_6|->WS$_@iQfoxz_F zuKylU*Yg9y<+QUnSu%ZkbaY6HL};%6&RN<#yvdb) z=6cLwln=aB_Ia~X_Sv5OTC11{j3&$^!dxt%)`Q`gZ-DFqt6$P{wHwV&zXM(d^zZ_{#ByC!r=Tn z$Q;%H|Jp(7do6HXw;Zt!_#v=w0sb!VZNR?)Zp*$tUv$)9gUfJeO*Z%hIoADC>*(Ks ze%-LwzvF9O7xeGZn&Ykh-C14FT3`QO>=r4P=KA+pdorM)x&Hl5DQf)XJe>Q>w6XX3&39oJ0SeDIKtR9{tqdQHS))ZYqsqA_vNvaNN`v ziL#+1LkIi|g7i~`{Ci?n4*jQufgJjeud>xouF3JtV0{k#R2TiYS9G4J2CB*{=|7PU zIHmeO|F%hjt4?nB;(|r8ccL9fYDcO$4*9Vv_v@lKaduk9Ios>bceW?ehSR1vhwt>nmLvMlscDZj`^Jg0qtoAJ2(v8* zy)BY`Grg6`R@Co|{Pm$u&*u&AOE={tDkO8kDBP$GokLaEva2>@>AS7^_wq|N3e)BO zp6mZO`sn;KIFdpIeK-rFnAXZ{j-2g%msnfrRl`zTx{+QV>+NDmFWJSd)45|%kY;cW=$rH6pts9lUq#5* zU`daR_jNkW@1z}W7qk6~B)zQ-Wvk1r@#D0^nr8axlHU1$Ftl}|{xo@rcG_ycmftxR z`^AvkhwDt|ulrA1d2^C={rH=qc>H RrC)oSlV}s9 +#include +#ifdef SSBM +#include +#endif + +#define NONE -1 +#define PART 0 +#define PSUPP 1 +#define SUPP 2 +#define CUST 3 +#define ORDER 4 +#define LINE 5 +#define ORDER_LINE 6 +#define PART_PSUPP 7 +#define NATION 8 +#define REGION 9 +#define UPDATE 10 +#define MAX_TABLE 11 +#define ONE_STREAM 1 +#define ADD_AT_END 2 + +#ifdef MAX +#undef MAX +#endif +#ifdef MIN +#undef MIN +#endif +#define MAX(a,b) ((a > b )?a:b) +#define MIN(A,B) ( (A) < (B) ? (A) : (B)) + +#define INTERNAL_ERROR(p) {fprintf(stderr,"%s", p);abort();} +#define LN_CNT 4 +static char lnoise[4] = {'|', '/', '-', '\\' }; +#define LIFENOISE(n, var) \ + if (verbose > 0) fprintf(stderr, "%c\b", lnoise[(var%LN_CNT)]) + +#define MALLOC_CHECK(var) \ + if ((var) == NULL) \ + { \ + fprintf(stderr, "Malloc failed at %s:%d\n", \ + __FILE__, __LINE__); \ + exit(1);\ + } +#define OPEN_CHECK(var, path) \ + if ((var) == NULL) \ + { \ + fprintf(stderr, "Open failed for %s at %s:%d\n", \ + path, __FILE__, __LINE__); \ + exit(1);\ + } +#ifndef MAX_CHILDREN +#define MAX_CHILDREN 1000 +#endif + +/* + * macros that control sparse keys + * + * refer to Porting.Notes for a complete explanation + */ +#ifndef BITS_PER_LONG +#define BITS_PER_LONG 32 +#define MAX_LONG 0x7FFFFFFF +#endif /* BITS_PER_LONG */ +#define SPARSE_BITS 2 +#define SPARSE_KEEP 3 +#define MK_SPARSE(key, seq) \ + (((((key>>3)<<2)|(seq & 0x0003))<<3)|(key & 0x0007)) + +#define RANDOM(tgt, lower, upper, stream) dss_random(&tgt, lower, upper, stream) +#ifdef SSBM +typedef struct{ + char * name; + int start_day; + int start_month; + int end_day; + int end_month; +} season; +typedef struct { + char * name; + int month; + int day; +} holiday; + + +#endif + + +typedef struct +{ + long weight; + char *text; +} set_member; + +typedef struct +{ + int count; + int max; + set_member *list; + long *permute; +} distribution; + +/* + * some handy access functions + */ +#define DIST_SIZE(d) d->count +#define DIST_MEMBER(d, i) ((set_member *)((d)->list + i))->text + +typedef struct +{ + char *name; + char *comment; + long base; + int (*header) (); + int (*loader[2]) (); + long (*gen_seed)(); + int (*verify) (); + int child; + unsigned long vtotal; +} tdef; + +typedef struct SEED_T { + long table; + long value; + long usage; + long boundary; + } seed_t; + + +#if defined(__STDC__) +#define PROTO(s) s +#else +#define PROTO(s) () +#endif + +/* bm_utils.c */ +char *env_config PROTO((char *var, char *dflt)); +long yes_no PROTO((char *prompt)); +int a_rnd PROTO((int min, int max, int column, char *dest)); +int tx_rnd PROTO((long min, long max, long column, char *tgt)); +long julian PROTO((long date)); +long unjulian PROTO((long date)); +FILE *tbl_open PROTO((int tbl, char *mode)); +long dssncasecmp PROTO((char *s1, char *s2, int n)); +long dsscasecmp PROTO((char *s1, char *s2)); +int pick_str PROTO((distribution * s, int c, char *target)); +void agg_str PROTO((distribution *set, long count, long col, char *dest)); +void read_dist PROTO((char *path, char *name, distribution * target)); +void embed_str PROTO((distribution *d, int min, int max, int stream, char *dest)); +#ifndef STDLIB_HAS_GETOPT +int getopt PROTO((int arg_cnt, char **arg_vect, char *oprions)); +#endif /* STDLIB_HAS_GETOPT */ +long set_state PROTO((int t, long scale, long procs, long step, long *e)); + +/* rnd.c */ +long NextRand PROTO((long nSeed)); +long UnifInt PROTO((long nLow, long nHigh, long nStream)); +double UnifReal PROTO((double dLow, double dHigh, long nStream)); +double Exponential PROTO((double dMean, long nStream)); +void dss_random(long *tgt, long min, long max, long seed); +void row_start(int t); +void row_stop(int t); +void dump_seeds(int t); + +/* text.c */ +#define MAX_GRAMMAR_LEN 12 /* max length of grammar component */ +#define MAX_SENT_LEN 256 /* max length of populated sentence */ +#define RNG_PER_SENT 27 /* max number of RNG calls per sentence */ + +int dbg_text PROTO((char * t, int min, int max, int s)); + +#ifdef DECLARER +#define EXTERN +#else +#define EXTERN extern +#endif /* DECLARER */ + + +EXTERN distribution nations; +EXTERN distribution nations2; +EXTERN distribution regions; +EXTERN distribution o_priority_set; +EXTERN distribution l_instruct_set; +EXTERN distribution l_smode_set; +EXTERN distribution l_category_set; +EXTERN distribution l_rflag_set; +EXTERN distribution c_mseg_set; +EXTERN distribution colors; +EXTERN distribution p_types_set; +EXTERN distribution p_cntr_set; + +/* distributions that control text generation */ +EXTERN distribution articles; +EXTERN distribution nouns; +EXTERN distribution adjectives; +EXTERN distribution adverbs; +EXTERN distribution prepositions; +EXTERN distribution verbs; +EXTERN distribution terminators; +EXTERN distribution auxillaries; +EXTERN distribution np; +EXTERN distribution vp; +EXTERN distribution grammar; + + +EXTERN long scale; +EXTERN int refresh; +EXTERN int resume; +EXTERN long verbose; +EXTERN long force; +EXTERN long header; +EXTERN long columnar; +EXTERN long direct; +EXTERN long updates; +EXTERN long table; +EXTERN long children; +EXTERN long fnames; +EXTERN int gen_sql; +EXTERN int gen_rng; +EXTERN char *db_name; +EXTERN int step; +EXTERN int set_seeds; +EXTERN int validate; +EXTERN char *d_path; + +/* added for segmented updates */ +EXTERN int insert_segments; +EXTERN int delete_segments; +EXTERN int insert_orders_segment; +EXTERN int insert_lineitem_segment; +EXTERN int delete_segment; + + +#ifndef DECLARER +extern tdef tdefs[]; + +#endif /* DECLARER */ + + +/***************************************************************** + ** table level defines use the following naming convention: t_ccc_xxx + ** with: t, a table identifier + ** ccc, a column identifier + ** xxx, a limit type + **************************************************************** + */ + +/* + * defines which control the parts table + */ +#define P_SIZE 126 +#ifdef SSBM +#define P_NAME_SCL 3 /*5 change to 3 according to the new schema*/ +#else +#define P_NAME_SCL 5 +#endif +#define P_MFG_TAG "Manufacturer#" +#define P_MFG_FMT "%s%01d" +#define P_MFG_MIN 1 +#define P_MFG_MAX 5 +#define P_BRND_TAG "Brand#" +#define P_BRND_FMT "%s%02d" +#define P_BRND_MIN 1 + +/*#ifdef SSBM +#define P_BRND_MAX 5 +#else*/ +#define P_BRND_MAX 40 +/*#endif*/ + +#define P_SIZE_MIN 1 +#define P_SIZE_MAX 50 +#define P_MCST_MIN 100 +#define P_MCST_MAX 99900 +#define P_MCST_SCL 100.0 +#define P_RCST_MIN 90000 +#define P_RCST_MAX 200000 +#define P_RCST_SCL 100.0 +/* + * defines which control the suppliers table + */ +#define S_SIZE 145 +#define S_NAME_TAG "Supplier#" +#define S_NAME_FMT "%s%09ld" +#define S_ABAL_MIN -99999 +#define S_ABAL_MAX 999999 +#define S_CMNT_MAX 101 +#define S_CMNT_BBB 10 /* number of BBB comments/SF */ +#define BBB_DEADBEATS 50 /* % that are complaints */ +#define BBB_BASE "Customer " +#define BBB_COMPLAIN "Complaints" +#define BBB_COMMEND "Recommends" +#define BBB_CMNT_LEN 19 +#define BBB_BASE_LEN 9 +#define BBB_TYPE_LEN 10 + +/* + * defines which control the partsupp table + */ +#define PS_SIZE 145 +#define PS_SKEY_MIN 0 +#define PS_SKEY_MAX ((tdefs[SUPP].base - 1) * scale) +#define PS_SCST_MIN 100 +#define PS_SCST_MAX 100000 +#define PS_QTY_MIN 1 +#define PS_QTY_MAX 9999 +/* + * defines which control the customers table + */ +#define C_SIZE 165 +#define C_NAME_TAG "Customer#" +#define C_NAME_FMT "%s%09d" +#define C_MSEG_MAX 5 +#define C_ABAL_MIN -99999 +#define C_ABAL_MAX 999999 +/* + * defines which control the order table + */ +#define O_SIZE 109 +#define O_CKEY_MIN 1 +#define O_CKEY_MAX (long)(tdefs[CUST].base * scale) +#define O_ODATE_MIN STARTDATE +#define O_ODATE_MAX (STARTDATE + TOTDATE - \ + (L_SDTE_MAX + L_RDTE_MAX) - 1) +#define O_CLRK_TAG "Clerk#" +#define O_CLRK_FMT "%s%09d" +#define O_CLRK_SCL 1000 +#define O_LCNT_MIN 1 +#define O_LCNT_MAX 7 + +/* + * defines which control the lineitem table + */ +#define L_SIZE 144L +#define L_QTY_MIN 1 +#define L_QTY_MAX 50 +#define L_TAX_MIN 0 +#define L_TAX_MAX 8 +#define L_DCNT_MIN 0 +#define L_DCNT_MAX 10 +#define L_PKEY_MIN 1 + +#ifdef SSBM +/*part table log based*/ +#define L_PKEY_MAX (tdefs[PART].base * (floor(log((double)scale))+1)) +#else +#define L_PKEY_MAX (tdefs[PART].base * scale) +#endif + +#define L_SDTE_MIN 1 +#define L_SDTE_MAX 121 +#define L_CDTE_MIN 30 +#define L_CDTE_MAX 90 +#define L_RDTE_MIN 1 +#define L_RDTE_MAX 30 +/* + * defines which control the time table + */ +#define T_SIZE 30 +#define T_START_DAY 3 /* wednesday ? */ +#define LEAP(y) ((!(y % 4) && (y % 100))?1:0) + +/******************************************************************* + ******************************************************************* + *** + *** general or inter table defines + *** + ******************************************************************* + *******************************************************************/ +#define SUPP_PER_PART 4 +#define ORDERS_PER_CUST 10 /* sync this with CUST_MORTALITY */ +#define CUST_MORTALITY 3 /* portion with have no orders */ +#define NATIONS_MAX 90 /* limited by country codes in phone numbers */ +#define PHONE_FMT "%02d-%03d-%03d-%04d" +#define STARTDATE 92001 +#define CURRENTDATE 95168 +#define ENDDATE 98365 +#define TOTDATE 2557 +#define UPD_PCT 10 +#define MAX_STREAM 47 +#define V_STR_LOW 0.4 +#define PENNIES 100 /* for scaled int money arithmetic */ +#define Q11_FRACTION (double)0.0001 +/* + * max and min SF in GB; Larger SF will require changes to the build routines + */ +#define MIN_SCALE 1.0 +#define MAX_SCALE 1000.0 +/* + * beyond this point we need to allow for BCD calculations + */ +#define MAX_32B_SCALE 1000.0 +#define INIT_HUGE(v) { \ + v = (DSS_HUGE *)malloc(sizeof(DSS_HUGE) * HUGE_COUNT); \ + MALLOC_CHECK(v); \ + } +#define FREE_HUGE(v) free(v) +#ifdef SUPPORT_64BITS +#define LONG2HUGE(src, dst) *dst = (DSS_HUGE)src +#define HUGE2LONG(src, dst) *dst = (long)src +#define HUGE_SET(src, dst) *dst = *src +#define HUGE_MUL(op1, op2) *op1 *= op2 +#define HUGE_DIV(op1, op2) *op1 /= op2 +#define HUGE_ADD(op1, op2, dst) *dst = *op1 + op2 +#define HUGE_SUB(op1, op2, dst) *dst = *op1 - op2 +#define HUGE_MOD(op1, op2) *op1 % op2 +#define HUGE_CMP(op1, op2) (*op1 == *op2)?0:(*op1 < *op2)-1:1 +#else +#define LONG2HUGE(src, dst) {*dst = src; *(dst + 1) = 0;} +#define HUGE2LONG(src, dst) { dst=0 ; \ + bcd2_bin(dst, (src + 1)); \ + bcd2_bin(dst, src); } +#define HUGE_SET(src, dst) { *dst = *src ; *(dst + 1) = *(src + 1); } +#define HUGE_MUL(op1,op2) bcd2_mul(op1, (op1 + 1), op2) +#define HUGE_DIV(op1,op2) bcd2_div(op1, (op1 + 1), op2) +#define HUGE_ADD(op1,op2,d) { \ + HUGE_SET(op1, d); \ + bcd2_add(d, (d + 1), op2); \ + } +#define HUGE_SUB(op1,op2,d) { \ + HUGE_SET(op1, d); \ + bcd2_sub(d, (d + 1), op2); \ + } +#define HUGE_MOD(op1, op2) bcd2_mod(op1, (op1 + 1), op2) +#define HUGE_CMP(op1, op2) (bcd2_cmp(op1, (op1 + 1), op2) == 0)?0:\ + ((bcd2_cmp(op1, (op1 + 1), op2) < 0)?-1:1) +#endif /* SUPPORT_64BITS */ + +/******** environmental variables and defaults ***************/ +#define DIST_TAG "DSS_DIST" /* environment var to override ... */ +#define DIST_DFLT "dists.dss" /* default file to hold distributions */ +#define PATH_TAG "DSS_PATH" /* environment var to override ... */ +#define PATH_DFLT "." /* default directory to hold tables */ +#define CONFIG_TAG "DSS_CONFIG" /* environment var to override ... */ +#define CONFIG_DFLT "." /* default directory to config files */ +#define ADHOC_TAG "DSS_ADHOC" /* environment var to override ... */ +#define ADHOC_DFLT "adhoc.dss" /* default file name for adhoc vars */ + +/******* output macros ********/ +#ifndef SEPARATOR +#define SEPARATOR '|' /* field spearator for generated flat files */ +#endif +/* Data type flags for a single print routine */ +#define DT_STR 0 +#ifndef MVS +#define DT_VSTR DT_STR +#else +#define DT_VSTR 1 +#endif /* MVS */ +#define DT_INT 2 +#define DT_HUGE 3 +#define DT_KEY 4 +#define DT_MONEY 5 +#define DT_CHR 6 + +int dbg_print(int dt, FILE *tgt, void *data, int len, int eol); +#define PR_STR(f, str, len) dbg_print(DT_STR, f, (void *)str, len, 1) +#define PR_VSTR(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 1) +#define PR_VSTR_LAST(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 0) +#define PR_INT(f, str) dbg_print(DT_INT, f, (void *)str, 0, 1) +#define PR_HUGE(f, str) dbg_print(DT_HUGE, f, (void *)str, 0, 1) +#define PR_KEY(f, str) dbg_print(DT_KEY, f, (void *)str, 0, -1) +#define PR_MONEY(f, str) dbg_print(DT_MONEY, f, (void *)str, 0, 1) +#define PR_CHR(f, str) dbg_print(DT_CHR, f, (void *)str, 0, 1) +#define PR_STRT(fp) /* any line prep for a record goes here */ +#define PR_END(fp) fprintf(fp, "\n") /* finish the record here */ + +#ifdef SSBM +#define PR_DATE(tgt, yr, mn, dy) \ + sprintf(tgt, "19%02d%02d%02d", yr, mn, dy) +#else +#ifdef MDY_DATE +#define PR_DATE(tgt, yr, mn, dy) \ + sprintf(tgt, "%02d-%02d-19%02d", mn, dy, yr) +#else +#define PR_DATE(tgt, yr, mn, dy) \ +sprintf(tgt, "19%02d-%02d-%02d", yr, mn, dy) +#endif /* DATE_FORMAT */ +#endif +/* + * verification macros + */ +#define VRF_STR(t, d) {char *xx = d; while (*xx) tdefs[t].vtotal += *xx++;} +#define VRF_INT(t,d) tdefs[t].vtotal += d +#ifdef SUPPORT_64BITS +#define VRF_HUGE(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1)) +#else +#define VRF_HUGE(t,d) tdefs[t].vtotal += d[0] + d[1] +#endif /* SUPPORT_64BITS */ +/* assume float is a 64 bit quantity */ +#define VRF_MONEY(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1)) +#define VRF_CHR(t,d) tdefs[t].vtotal += d +#define VRF_STRT(t) +#define VRF_END(t) + +/*********** distribuitons currently defined *************/ +#define UNIFORM 0 + +/* + * seed indexes; used to separate the generation of individual columns + */ +#define P_MFG_SD 0 +#define P_BRND_SD 1 +#define P_TYPE_SD 2 +#define P_SIZE_SD 3 +#define P_CNTR_SD 4 +#define P_RCST_SD 5 +#define PS_QTY_SD 7 +#define PS_SCST_SD 8 +#define O_SUPP_SD 10 +#define O_CLRK_SD 11 +#define O_ODATE_SD 13 +#define L_QTY_SD 14 +#define L_DCNT_SD 15 +#define L_TAX_SD 16 +#define L_SHIP_SD 17 +#define L_SMODE_SD 18 +#define L_PKEY_SD 19 +#define L_SKEY_SD 20 +#define L_SDTE_SD 21 +#define L_CDTE_SD 22 +#define L_RDTE_SD 23 +#define L_RFLG_SD 24 +#define C_NTRG_SD 27 +#define C_PHNE_SD 28 +#define C_ABAL_SD 29 +#define C_MSEG_SD 30 +#define S_NTRG_SD 33 +#define S_PHNE_SD 34 +#define S_ABAL_SD 35 +#define P_NAME_SD 37 +#define O_PRIO_SD 38 +#define HVAR_SD 39 +#define O_CKEY_SD 40 +#define N_CMNT_SD 41 +#define R_CMNT_SD 42 +#define O_LCNT_SD 43 +#define BBB_JNK_SD 44 +#define BBB_TYPE_SD 45 +#define BBB_CMNT_SD 46 +#define BBB_OFFSET_SD 47 + +#endif /* DSS_H */ + + + + + + + + + + + + + + + + + diff --git a/data/ssb/dbgen/dss.ri b/data/ssb/dbgen/dss.ri new file mode 100644 index 0000000..fb4c002 --- /dev/null +++ b/data/ssb/dbgen/dss.ri @@ -0,0 +1,100 @@ +-- Sccsid: @(#)dss.ri 2.1.8.1 +-- TPCD Benchmark Version 8.0 + +CONNECT TO TPCD; + +--ALTER TABLE TPCD.REGION DROP PRIMARY KEY; +--ALTER TABLE TPCD.NATION DROP PRIMARY KEY; +--ALTER TABLE TPCD.PART DROP PRIMARY KEY; +--ALTER TABLE TPCD.SUPPLIER DROP PRIMARY KEY; +--ALTER TABLE TPCD.PARTSUPP DROP PRIMARY KEY; +--ALTER TABLE TPCD.ORDERS DROP PRIMARY KEY; +--ALTER TABLE TPCD.LINEITEM DROP PRIMARY KEY; +--ALTER TABLE TPCD.CUSTOMER DROP PRIMARY KEY; + + +-- For table REGION +ALTER TABLE TPCD.REGION +ADD PRIMARY KEY (R_REGIONKEY); + +-- For table NATION +ALTER TABLE TPCD.NATION +ADD PRIMARY KEY (N_NATIONKEY); + +ALTER TABLE TPCD.NATION +ADD FOREIGN KEY NATION_FK1 (N_REGIONKEY) references TPCD.REGION; + +COMMIT WORK; + +-- For table PART +ALTER TABLE TPCD.PART +ADD PRIMARY KEY (P_PARTKEY); + +COMMIT WORK; + +-- For table SUPPLIER +ALTER TABLE TPCD.SUPPLIER +ADD PRIMARY KEY (S_SUPPKEY); + +ALTER TABLE TPCD.SUPPLIER +ADD FOREIGN KEY SUPPLIER_FK1 (S_NATIONKEY) references TPCD.NATION; + +COMMIT WORK; + +-- For table PARTSUPP +ALTER TABLE TPCD.PARTSUPP +ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY); + +COMMIT WORK; + +-- For table CUSTOMER +ALTER TABLE TPCD.CUSTOMER +ADD PRIMARY KEY (C_CUSTKEY); + +ALTER TABLE TPCD.CUSTOMER +ADD FOREIGN KEY CUSTOMER_FK1 (C_NATIONKEY) references TPCD.NATION; + +COMMIT WORK; + +-- For table LINEITEM +ALTER TABLE TPCD.LINEITEM +ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER); + +COMMIT WORK; + +-- For table ORDERS +ALTER TABLE TPCD.ORDERS +ADD PRIMARY KEY (O_ORDERKEY); + +COMMIT WORK; + +-- For table PARTSUPP +ALTER TABLE TPCD.PARTSUPP +ADD FOREIGN KEY PARTSUPP_FK1 (PS_SUPPKEY) references TPCD.SUPPLIER; + +COMMIT WORK; + +ALTER TABLE TPCD.PARTSUPP +ADD FOREIGN KEY PARTSUPP_FK2 (PS_PARTKEY) references TPCD.PART; + +COMMIT WORK; + +-- For table ORDERS +ALTER TABLE TPCD.ORDERS +ADD FOREIGN KEY ORDERS_FK1 (O_CUSTKEY) references TPCD.CUSTOMER; + +COMMIT WORK; + +-- For table LINEITEM +ALTER TABLE TPCD.LINEITEM +ADD FOREIGN KEY LINEITEM_FK1 (L_ORDERKEY) references TPCD.ORDERS; + +COMMIT WORK; + +ALTER TABLE TPCD.LINEITEM +ADD FOREIGN KEY LINEITEM_FK2 (L_PARTKEY,L_SUPPKEY) references + TPCD.PARTSUPP; + +COMMIT WORK; + + diff --git a/data/ssb/dbgen/dsstypes.h b/data/ssb/dbgen/dsstypes.h new file mode 100644 index 0000000..ce2b7d8 --- /dev/null +++ b/data/ssb/dbgen/dsstypes.h @@ -0,0 +1,312 @@ + /* + * Sccsid: @(#)dsstypes.h 2.1.8.1 + * + * general definitions and control information for the DSS data types + * and function prototypes + * Modified for SSBM prototype + */ + +/* + * typedefs + */ +#ifdef SSBM +typedef struct +{ + long custkey; + char name[C_NAME_LEN + 1]; + int nlen; + char address[C_ADDR_MAX + 1]; + int alen; + char city[CITY_FIX+1]; + int nation_key; + char nation_name[C_NATION_NAME_LEN+1]; + int region_key; + char region_name[C_REGION_NAME_LEN+1]; + char phone[PHONE_LEN + 1]; + char mktsegment[MAXAGG_LEN + 1]; +} customer_t; +#else +typedef struct +{ + long custkey; + char name[C_NAME_LEN + 1]; + char address[C_ADDR_MAX + 1]; + int alen; + long nation_code; + char phone[PHONE_LEN + 1]; + long acctbal; + char mktsegment[MAXAGG_LEN + 1]; + char comment[C_CMNT_MAX + 1]; + int clen; +} customer_t; +#endif + +/* customers.c */ +long mk_cust PROTO((long n_cust, customer_t * c)); +int pr_cust PROTO((customer_t * c, int mode)); +int ld_cust PROTO((customer_t * c, int mode)); + +#ifdef SSBM + +typedef struct +{ + DSS_HUGE *okey; /*for clustering line items*/ + int linenumber; /*integer, constrain to max of 7*/ + long custkey; + long partkey; + long suppkey; + char orderdate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + long ship_priority; + long quantity; + long extended_price; + long order_totalprice; + long discount; + long revenue; + long supp_cost; + long tax; + char commit_date[DATE_LEN] ; + char shipmode[O_SHIP_MODE_LEN + 1]; +} lineorder_t; +#else +typedef struct +{ + DSS_HUGE *okey; + long partkey; + long suppkey; + long lcnt; + long quantity; + long eprice; + long discount; + long tax; + char rflag[1]; + char lstatus[1]; + char cdate[DATE_LEN]; + char sdate[DATE_LEN]; + char rdate[DATE_LEN]; + char shipinstruct[MAXAGG_LEN + 1]; + char shipmode[MAXAGG_LEN + 1]; + char comment[L_CMNT_MAX + 1]; + int clen; +} line_t; +#endif + +#ifdef SSBM +typedef struct +{ + DSS_HUGE *okey; + long custkey; + int totalprice; + char odate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + char clerk[O_CLRK_LEN + 1]; + int spriority; + long lines; + lineorder_t lineorders[O_LCNT_MAX]; +} order_t; +#else +typedef struct +{ + DSS_HUGE *okey; + long custkey; + char orderstatus; + long totalprice; + char odate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + char clerk[O_CLRK_LEN + 1]; + long spriority; + long lines; + char comment[O_CMNT_MAX + 1]; + int clen; + line_t l[O_LCNT_MAX]; +} order_t; +#endif + +/* order.c */ +long mk_order PROTO((long index, order_t * o, long upd_num)); +int pr_order PROTO((order_t * o, int mode)); +int ld_order PROTO((order_t * o, int mode)); +void ez_sparse PROTO((long index, DSS_HUGE *ok, long seq)); +#ifndef SUPPORT_64BITS +void hd_sparse PROTO((long index, DSS_HUGE *ok, long seq)); +#endif + +#ifdef SSBM +/*SSBM removes the part supplier table*/ +#else +typedef struct +{ + long partkey; + long suppkey; + long qty; + long scost; + char comment[PS_CMNT_MAX + 1]; + int clen; +} partsupp_t; +#endif + +#ifdef SSBM +typedef struct +{ + long partkey; + char name[P_NAME_LEN + 1]; + int nlen; + char mfgr[P_MFG_LEN + 1]; + char category[P_CAT_LEN + 1]; + char brand[P_BRND_LEN + 1]; + char color[P_COLOR_MAX + 1]; + int clen; + char type[P_TYPE_MAX + 1]; + int tlen; + long size; + char container[P_CNTR_LEN + 1]; +} part_t; +#else +typedef struct +{ + long partkey; + char name[P_NAME_LEN + 1]; + int nlen; + char mfgr[P_MFG_LEN + 1]; + char brand[P_BRND_LEN + 1]; + char type[P_TYPE_LEN + 1]; + int tlen; + long size; + char container[P_CNTR_LEN + 1]; + long retailprice; + char comment[P_CMNT_MAX + 1]; + int clen; + partsupp_t s[SUPP_PER_PART]; +} part_t; +#endif + +/* parts.c */ +long mk_part PROTO((long index, part_t * p)); +int pr_part PROTO((part_t * part, int mode)); +int ld_part PROTO((part_t * part, int mode)); + +#ifdef SSBM +typedef struct +{ + long suppkey; + char name[S_NAME_LEN + 1]; + char address[S_ADDR_MAX + 1]; + int alen; + char city[CITY_FIX +1]; + int nation_key; + char nation_name[S_NATION_NAME_LEN+1]; + int region_key; + char region_name[S_REGION_NAME_LEN+1]; + char phone[PHONE_LEN + 1]; +} supplier_t; +#else +typedef struct +{ + long suppkey; + char name[S_NAME_LEN + 1]; + char address[S_ADDR_MAX + 1]; + int alen; + long nation_code; + char phone[PHONE_LEN + 1]; + long acctbal; + char comment[S_CMNT_MAX + 1]; + int clen; +} supplier_t; +#endif + +/* supplier.c */ +long mk_supp PROTO((long index, supplier_t * s)); +int pr_supp PROTO((supplier_t * supp, int mode)); +int ld_supp PROTO((supplier_t * supp, int mode)); + +#ifdef SSBM +/*todo: add new date table*/ + +typedef struct +{ + long datekey; + char date[D_DATE_LEN+1]; + char dayofweek[D_DAYWEEK_LEN+1] ; + char month[D_MONTH_LEN+1]; + int year; + int yearmonthnum; + char yearmonth[D_YEARMONTH_LEN+1]; + int daynuminweek; + int daynuminmonth; + int daynuminyear; + int monthnuminyear; + int weeknuminyear; + char sellingseason[D_SEASON_LEN + 1]; + int slen; + char lastdayinweekfl[2]; + char lastdayinmonthfl[2]; + char holidayfl[2]; + char weekdayfl[2]; +} date_t; + +/* date.c */ + +long mk_date PROTO((long index, date_t * d)); +int pr_date PROTO((date_t * date, int mode)); +int ld_date PROTO((date_t * date, int mode)); + +#endif + +typedef struct +{ + long timekey; + char alpha[DATE_LEN]; + long year; + long month; + long week; + long day; +} dss_time_t; + +/* time.c */ +long mk_time PROTO((long index, dss_time_t * t)); + + + +/* + * this assumes that N_CMNT_LEN >= R_CMNT_LEN + */ +typedef struct +{ + long code; + char *text; + long join; + char comment[N_CMNT_MAX + 1]; + int clen; +} code_t; + +/* code table */ +int mk_nation PROTO((long i, code_t * c)); +int pr_nation PROTO((code_t * c, int mode)); +int ld_nation PROTO((code_t * c, int mode)); +int mk_region PROTO((long i, code_t * c)); +int pr_region PROTO((code_t * c, int mode)); +int ld_region PROTO((code_t * c, int mode)); + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/data/ssb/dbgen/history.html b/data/ssb/dbgen/history.html new file mode 100644 index 0000000..fb6ed32 --- /dev/null +++ b/data/ssb/dbgen/history.html @@ -0,0 +1,586 @@ + + + + + + +DBGEN Release Notes + + + + + +

Release notes for DBGEN and QGEN

+
+These notes are taken from the History file which is distributed with the TPC-D soft appendix.

+ +

    +
  • Changes as of 5/11/00
      +
    • versions: TPCH 1.3.0, TPCR 1.3.0 +
    • Corrected update range rollover after 1000 update segements +
    • Corrected problem in permute.c causing invalid substitutions in Q16 +
    +
  • Changes as of 10/11/99
      +
    • versions: TPCH 1.2.0a, TPCR 1.1.0a +
    • Corrected range setting of segmented updates that was causing extra file to be generated +
    • Porting corrections for DigUnix +
    +
  • Changes as of 08/30/99
      +
    • versions: TPCH 1.2.0, TPCR 1.1.0 +
    • reduced parameter substitution range for Q18 +
    • added new option to specify location of dists file (-b) +
    • added DBGEN option to suppress all output (-q) +
    +
  • Changes as of 08/16/99
      +
    • versions: TPCH 1.1.0a, TPCR 1.0.1e +
    • prevent "reuse" of original data in update files +
    • correction to lint target in makefile.suite +
    • removal of vestigal l_partkey predicate from 21.sql +
    • reorder lineitem/order join in q5 +
    • removal of table aliases from 2.sql +
    • randomize seeding of qgen RNG to close bug 52 +
    • correct possible round off error in segmented update files +
    • corrected soft copy answer set for Q22 +
    • corrected percision of answer set for Q19 +
    +
  • Changes as of 07/08/99
      +
    • versions: TPCH 1.1.0, TPCR 1.0.1 +
    • WORKLOAD must be set to either TPCH or TPCR in the makefile +
    • unneeded reference to part table removed from q21 template +
    +
  • Changes as of 06/04/99
      +
    • version 1.0.1d +
    • Restarted version numbering to match specification revisions for + TPC-H and TPC-R +
    • Corrected answer set for for Q13 +
    • Corrected parameter substitutions for Q16, Q17, Q19, Q20, Q21, Q22 +
    • Corrected RNG initialization in qgen.c +
    • added adhoc.c adhoc.h to code base to support randomized data sets; + currently disabled +
    • replaced calls to UnifInt() row_stop with call to NthElement() +
    • Corrected a problem that caused small negative money values to + print as a positive value +
    • Simplification of PR_xxx macros +
    • QGEN building correct parameter logs again +
    + +
+

+ +******************
+* NOTE NOTE NOTE *
+******************
+Below this line the file refers to TPC-D which was retired in favor of +TPC-H and TPC-R. Since the new speicifications are numbered from 1.0.0 +the program version was reset.
+******************
+* NOTE NOTE NOTE *
+******************
+
+

+

    + +
  • Changes as of 01/05/99
      +
    • version 2.0.1 +
    • added 1999 to the copyright notice +
    • corrected C++ compilation problem +
    • sub-select phrasing corrected in Q4, Q21, Q22 +
    • added support for segmenting update files (contributed by Larry Kemp, HP) +
    +
  • Changes as of 12/08/98
      +
    • version 2.0.0 +
    • Removed permute.h from clean target in makefile.suite +
    +
  • Changes as of 11/17/98
      +
    • version 2.0.0 Alpha 8 +
    • corrected o_custkey overrun bug +
    • removed upper bound on -C command option +
    • added static permute.h to distribution to match the specification +
    +
  • Changes as of 10/23/98
      +
    • version 2.0.0 Alpha 7 +
    • removed references to DSS_SEED and SEED_TAG +
    • minor query template cleanup +
    • V2 answer sets added +
    • correction to hd_sparse for SF > 300 +
    • added static declaration to row types in gen_tbl to fix update problem +
    • permuted params to Q22 +
    +
  • Changes as of 5/20/98
      +
    • version 2.0.0 Alpha6b +
    • removed trailing apostrophe from dists.dss nouns for Tandem loader +
    • corrected mk_sparse() problem with alpha6 +
    • added 64b support for NCR/Metaware +
    • corrected generation of parent/child tables in parallel +
    • renamed ORDER table to ORDERS table +
    • revision of DBGEN synced with revision of 2.0 specification +
    • portability changes to process termination provided by John Matzka +
    • portability changes for Watcom C provided by Andrew Eisenberg +
    • standardized query template format +
    • queries now include a consistant header format +
    +
  • Changes as of 4/28/98
      +
    • version 2.0.0 Alpha5 +
    • NO RELEASE OF ALPHA 5 ; skipped to sync spec/DBGEN revision levels
    +
  • Changes as of 7 April 98
      +
    • version 2.0.0 Alpha4 +
    • Query template corrections for Q9, Q12, Q15a, Q22 +
    • Parallel generation of parent/child tables fixed +
    • Minor corrections to dists.dss +
    • Portability changes for HPUX +
    + +
  • Changes as of 3/24/98
      +
    • version 2.0.0 Alpha3 +
    • include substitution parameters for Q22 +
    • correct substitution parameters for Q16 under AIX +
    • include permute.h until unix/NT makefile fix +
    • correct orderkey generation +
    +
  • Changes as of 3/20/98
      +
    • version 2.0.0 Alpha2 +
    • correct runtime malloc error from bad INIT_HUGE macro +
    • improve pseudo text distribution in comments +
    • fix problem with parallelism of data gen +
    • re-enable generation of parent/child tables +
    • remove recombinaton code for parallel flat files +
    +
  • Changes as of 3/11/98
      +
    • version 2.0.0 Alpha1 +
    • removed the TIME table +
    • removed the need for seed files +
    • made 1GB the validation database size +
    • add pseudo text support in comments +
    • correct character selection in a_rnd() +
    • correct population of P_NAME +
    • removed unclaimed variants +
    • added new queries 18-22, replaced Q13 +
  • Changes as of 2/6/98
      +
    • version 1.3.1 +
    • Revised 64 bit support to clean up bcd2_bin()and mk_sparse() +
    • Add 64b support for NT +
  • Changes as of 12/31/97
      +
    • version 1.3.0 +
    • support for seed generation > 1TB (data gen still to be tested) +
    • rework of 64b support +
    • added bcd support for subtraction, comparison, modulo +
    • added 1998 to the copyright notice +
    • clarified comments in dists.dss +
    • corrected substitution problem in Q11 +
    • standardized fopen() error messages with OPEN_CHECK() +
    • introduced PATH_SEP in config.h to allow changes in path separators +
  • Changes as of 12/15/96
      +
    • version 1.2.0 +
    • corrected typos in queries 8a, 8c, 8d, 11a, 12F and 14F, 17a +
    • added variant 15c +
    • defined MAX_SCALE and MIN_SCALE; issued error messages for SF >& 1000 +since implementation is incomplete +
    • seed file generation can now be resumed with dbgen -R < n > ... +
    • corrected slight compile bug under Solaris 2.5.1 +
    • documented compile problems under SunOS +
  • Changes as of 8/1/96
      +
    • version 1.1.0D +
    • included new variants for queries 8 and 15 +
    • re-introduced answer sets in the source tree +
  • Changes as of 5/1/96
      +
    • version 1.1.0C +
    • unified version numbering of DBGEN and QGEN +
    • updated BUGS list +
    • removed FAQ from soft appendix; web site will keep the current +version of the FAQ +
    • added 1996 to the copyright notice +
    • corrected bug in PR_DATE macro; NO CHANGE TO DATA SET +
    • properly initialize param values for cleaner logging +
    • adjusted output format of Q11 partam to allow scaling to 1TB +
    • corrected typos in variant 14c +
    • corrected data type for YEAR in variant 8c +
    • corrected typos in variant 10a +
    • added variant 8d +
  • Changes as of 1/23/96
      +
    • qgen version 1.1.0B +
    • include support for ANSI semantics +
    • improved patch for seed sensetivity +
  • Changes as of 1/23/96
      +
    • updated BUGS list +
    • dbgen version 1.1.0A +
    • patch to limit BCD2 fields to 12 characters for columnar output +
    • qgen version 1.1.0A +
    • patch to fix the "unknown flag" problem +
    • patch to fix the seed sensetivity problem +
  • Changes as of 12/19/95
      +
    • updated BUGS list +
    • dbgen version 1.1.0 +
    • upped default value of MAX_CHILDREN to 1000 +
    • corrected naming of detail tables in incremental load +
    • corrected range delete output +
    • forced delete files to truncate existing files +
    • removed fixed size tables from seed generation +
    • corrected overflow problem with large scale seed generation +
    • allow date generation as MM-DD-YY based on config.h #define +
    • correct truncation problem with columnar output in PR_VSTR() +
    • added support for Windows NT +
    • added PLATFORM macro to makefile, removed platform defines from +config.h +
    • removed MAX_CHILDREN define from config.h (set to 1000 in dss.h) +
    • qgen version 1.1.0 +
    • correct SET_OUTPUT macro to TDAT +
    • use %ld in output for q17; portability +
    • add support for SQLSERVER database dialect +
    • add support for SYBASE database dialect +
    • adjust parameter ranges for Q1, Q3, Q6 +
    • add -T/-t option to usage summary +
    • added support for Windows NT +
  • Changes as of 09/01/95
      +
    • qgen version 1.0.1 +
    • formalized version numbering +
    • -p now generates correct query permutations +
    • added separate verion number for qgen +
    • corrected Q3 substitution problem +
    • updated permissible range for Q10 +
    • corrected rowcount_dflt and the MAX row indicator (-1) +
    • expanded param logging to include all possible parameters +
    • allowed qgen's -d option to be used at all scale factors +
    • made parameter substitution permutation-independent +
    • added qgen suppport for END_TRAN (-E) and DFLT_NUM (-N) +
    • correct handling of :n directive +
    • added more complete explanation of QGEN to README +
    • rename of random to rndm, for portability +
    • dbgen version 1.0.1 +
    • formalized version numbering +
    • inclusion of SF=1 seed file +
    • correct typo in usage() update example +
    • patch to driver.c to allow correct updates +
    • documentation change to README to clarify seed/stage/update +intereaction +
    • corrected minor glitch in "open failed" error msg in print.c +
    • added missing line continuation to makefile.suite +
    • seed files are now based on scale factor and number of generators +
    • seed files now hold seeds for one "step" of a given build +
    • clean up of parallel load routines +
    • inclusion of faster seed generation routines from Susanne Englert +
    • removed the -E(xisting) option +
    • assure proper scaling of O_CUSTKEY +
    • corrected default update percentage +
    • proper handling of child tables with '-O f' +
    • removed seed files from the distribution +
    • modified rpb_routine() to limit contribution of partkey in +retailprice +
    • added '-S(tep)' option to allow multi-stage loads +
    • roll in of 32 bit speed_seed routines from Dick Shelton +
    • miscelaneous typo corrections in the documentation +
    • cleanup of usage output +
  • Changes as of 05/08/95
      +
    • version 1.0 +
    • add Teradata defines to tpcd.h for QGEN +
    • add :c to query templates for database CONNECT syntax +
    • add examples of DBGEN and QGEN usage to README +
    • add -T option to qgen to allow time table usage +
    • query template names only requre .sql suffix, rest is arbitrary +
  • Changes as of 03/13/95
      +
    • version 9.1 +
    • surround DBNAME with ifndef in config.h +
    • remove -DDBNAME from makefile.suite +
    • sync varchar handling with 9.1 draft +
  • Changes as of 02/21/95
      +
    • version 9.0a +
    • fixed bug in qgen that incorrectly included rnd.h +
    • included revised DDL with Changes for char/varchar and l_quantity +
    • updated DBGEN help message to include new single table options for +order/lineitem and part/partsupp +
    • included handling for multi-set seed files TPCDSEED.xxx +
    • generated seeds up through 400GB; headed to 1TB! +
    • ANSI lint cleanup; more needed +
    • UF2 now defaults to key lists; use "-O r" to generate key ranges +also note, this routine this routine does NOT use the BCD2_* +routines. As a result, it WILL fail if the keys being deleted +exceed 32 bits. Since this would require ~660 update iterations, +this seems an acceptable oversight +
  • Changes as of 01/19/95
      +
    • version 9.0 +
    • allowed command line seeding of RNG for QGEN +
    • order and number of params in QGEN now matches +presentation in spec +
    • fixed bug in time table format of O_ORDERDATE +
    • changed l_QUANTITY to FLOAT in dss.ddl +
    • reworked QGEN options to be more useful +
    • allowed creation of sparse keys beyond 32 bits (for 1TB) +
    • removed unused '#ifdef' and associated code +
    • allowed independent generation of master/detail tables +(eg, order/lineitem) +
  • Changes as of 12/06/94
      +
    • version 8.6 +
    • fixed renaming of flat files for child tables +
    • various documentation fixes +
    • added naming convention section to Porting.Notes +
    • added -DIBM flag to config.h +
    • synced up QGEN with draft 8.1 +
  • Changes as of 10/25/94
      +
    • version 8.5a +
    • corrected bug in columnar output of pr_supp +
    • added pr_drange to generate a list of order keys to be +deleted instead of generating SQL +
    • added '-O d' to generate range delete as SQL +
    • updated default values for QGEN to sync with spec 8.1 +
    • corrected MK_SPARSE to reflect groups of 8 +
    • corrected a bug in o_orderstatus +
    • regenerated seed files for SF in [1,10] +
    • ANSI cleanup (primarily function declarations) +
  • Changes as of 10/11/94
      +
    • version 8.5 +
    • remove deletes/inserts to other than order/lineitem +
    • increased cardinality for part.type part.container +
    • '-r' argument is now integer; percentage in basis points +
    • initial roll-in of new update scheme +
    • added BBB comments to supplier table +
  • Changes as of 9/27/94
      +
    • version 8.4 +
    • all money calculations now use integer math. This should +bring everyone's data sets into exact aggreement. +
  • Changes as of 9/21/94
      +
    • version 8.3b +
    • fixed handling of MAX_STREAM +
    • added floor function to RPRICE bridge +
    • misc lint cleanup (type fixes, new prototypes, etc.) +
    • MONEY format becomes lf for DOS +
    • further cleanup of PR_VSTR and its length argument +
    • change to parameter generation for Q6 to allow for float +discount +
  • Changes as of 9/15/94
      +
    • version 8.3a +
    • isolated MONEY format for Unisys (Lf) using DOS +
    • make sure all arguments to MAKE_MONEY were double's +
    • rolled in NEW_PTEXT to allow Berni to experiment +
  • Changes as of 9/12/94
      +
    • version 8.3 +
    • added -T n and -T r to usage to match getopt() and README +
    • changed PR_MONEY to remove leading blanks +
    • included revised DDL from Berni +
    • included some MVS portability fixes in re malloc.h +
    • cleaned up error messages in qgen and made #define ofp usage +universal +
    • additional DOS portability changes +
    • added {c,a}len to provide specific length for columnar +output of varchar +
    • added PR_VSTR to handle varchar printing under MVS +
    • fixed bit masking in a_rnd and cleaned up prototype match +with V_STR +
    • PR_MONEY now used %Lf +
    • added revised pseudo text under NEW_PTEXT ifdef for +experiments +
  • Changes as of 9/09/94
      +
    • version 8.2 +
    • l_discount and l_tax are now fractional (per teleconference) +
    • money calculations moved to scaled integer math to clean up +answer sets +
    • changed PR_FLT() to PR_MONEY to clarify usage +
    • portability changes for SYBASE: dbname
    • > db_name +STATUS
    • > DBGEN_STATUS +
    • added nations2 to dists.dss to handle qgen needs for now +
    • reintroduced #ifndef DOS +
    • reintroduced U2200 define to control kill_load() +
    • broke out nation and region separately in -T option +
    • updated dss.ddl based on mail from Berni +
  • Changes as of 8/31/94
      +
    • version 8.1 +
    • scaling for clerks needed to be 1000 (was 100) +
    • added qgen parameter for scale +
    • changed qgen parameter from s)tream to p)ermutation +
    • synced qgen paramter values with 8.0 spec +
    • corrected duplications in dists.dss +
  • Changes as of 8/24/94
      +
    • version 8.0 +
    • added sparse keys to lineitem/order +
    • added varchar generation for comments/addresses +
    • added variable lineitems/orders +
    • removed ifdef for normalized code_tables +
    • included code for parameter generation and template->EQT +routines +
    • updated README and Porting.Notes to reflect QGEN +
    • included DDL and RI examples from Berni +
  • Changes as of 6/15/94
      +
    • version 7.0b (numbers now match spec revsion) +
    • rework of code tables to properly map nation/region; when +compiled with -DCODE_TABLES distributions are taken from +code.dss and two additional fields are generated for +customers and suppliers, [cs]_ncode and [cs]_rcode, +immediately following [cs]_region +
    • replaced ifdef's around DEAD_DATA with opposites. DEAD_DATA +is now the default +
    • worked through code to see that it conformed to 7.0 +specification +
    • adjusted scale factors/rowcounts for 1 GB == sf1 +
    • brought help message in line with current code +
    • fixed order per customer at 10 +
    • make suppkey scalable in lineitem/partsupp +
  • Changes as of 4/25/94
      +
    • version 1.5 +
    • added the customers with no orders; Compile with -DDEAD_DATA +to activate the change. +
    • added the code table for nation and region; +Compile with -DCODE_TABLES to activate the change. +
  • Changes as of 3/17/94
      +
    • version 1.41 +
    • completed implementation of JULIAN_DAY after talks with Berni +
    • misc cleanup in usage/README files +
    • removed all tabs and capped line length at 75 +
    • added -n option to allowing naming of inline-loaded database +
  • Changes as of 3/16/94
      +
    • version 1.4 +
    • prottyped julian day/month for query re-write work. Compile +with -DJULIAN_DAY to enable +
    • removed gen_times() from driver.c +
    • added VMS ifdef to config.h to clean up fork/signal issues +
    • added ICL ifdef to config.h to clean up getopt() issues +
    • changed header file references to config.h from machine.h +
  • Changes as of 3/2/94
      +
    • version 1.31 +
    • corrected format of C_NAME to match S_NAME and O_CLERK +
    • re-allowed fractional scale factors < 1 (updates not +contiguous) +
    • added DSS_CONFIG environemnt variable +
    • reworked read_dist() to look for DSS_DIST in DSS_CONFIG +
    • updated the README file +
  • Changes as of 2/16/94
      +
    • version 1.3 +
    • added command line options for parallel load and data set +expansion +
    • changed dists.dss delimiter to | for portability +
    • limited scale factors to integer values +
    • added command line option for seed file generation +
    • added all seed files to distribution for SFs 1 - 10 +
    • moved machine.h to config.h and added MAX_CHILDREN define +
    • added 'f' flag to options to allow renaming of output files +
    • added generation of SQL delete statements to match updates +(Note: updates are still single-threaded; -C is cleared +by -U) +
    • corrected field sizing in dsstypes.h typedefs to match v 6.4 +
    • update percentage default set to 1% +
  • Changes as of 12/3/93
      +
    • version 1.2 +
    • added command line option to adjust update percentage +
    • fixed update gneration for proper primary key ordering +
    • renamed UUSR/PRC to RUSSIA/CHINA in dists.dss +
    • cleaned up phone number generation to be consistant regard- +less of order of evaluation +
    • adjusted size of lineitem comment to bring data in line with +100 MB == SF=1 +
  • Changes as of 10/15/93
      +
    • added command line option for update data creation +
    • miscelaneous porting and cleanup changes +
    • reworked table generation to allow reuse for updates +
    • added comment field to tdefs structure +
    • added load_state and store_state to sync data gen and +update gen +
  • Changes as of 7/26/93
      +
    • combined loader and header stubs in load_stubs.c +
    • separated Revision History (this file) from README +
    • simplified makefile +
    • removed redundancies from colors distribution +
    • added getopt() for portability +
    • created Porting.Notes +
    • adjusted scaling rules +
    • added help option to the command line +
  • Changes as of 2/26/93
      +
    • combined all typedefs in one header: dsstypes.h +
    • combined flat file generation in print.ec +
    • combined typedef population in build.ec +
    • added -P to control rowcnt scaling (P for percentage) +
    • added -D option for Direct data generation and added +appropriate hooks in tdefs[] structure +
    • added -F option for flat file generation +
    • reused -T option (use -P 0.1 to build test size database) +now accepts suboptions c,o,p,s for single table builds. +
    • dropped -M option (scaling is now by rowcount) +
    • added -O option for optional controls. Currently defined: +-O t
    • generate optional time table a join fields in +order/lineitem +-O h
    • generate headers for flat file output +-O m
    • generate fixed column-length output +
    • removed dynamic memory allocation, redundant calls to +UnifInt, etc to improve performance +
  • Changes as of 1/12/92
      +
    • julian() changed to handle orders -> orderdate correctly +
    • rflag distributions corrected in dists.dss +
    • sea, gold removed from color distribution to clean up substring +problems +
    • part-> number and supplier-> adjusted for 1-based indexing +
    • time-> day changed to be day of month, not day of year +
    • t.week changed to be week in year, not day of week +
  • Changes as of 11/18/92
      +
    • checked line length and tab for transmission +
    • another chapter in the portability wars. added #include +"machine.h" to dss.h (which is included by everyone else). Any +machine particular porting changes should go here. +
    • fixed fixed-field formats to prevent double printing +
    • expanded PR_FLT formats to %010.2 +
  • Changes as of 10/21/92
      +
    • added fixed format and column header handling; users of headers +will have to define the header functions to be called in +int (*tdefs.header)() +
  • Changes as of 10/09/92:
      +
    • added ansi prototypes and recompiled with gcc -ansi. users may +need to change the CC definition in the makefile and the contents +of CFLAGS to reflect their particular ansi compiler. +
    • replaced all int references with long +
    • replaced all float references with double +
    • found and fixed odate/julian problem TS mentioned in 10/09 phone +call + +
  • Changes as of 9/09/92:
      +
    • Park/Miller random number generator included +
    • clerk scaling changed to 100 * scale +
    • parts.name always built from 5 selections from colors set +
    • test scaling changed to ~60MB (TEST_SCALING == 10) +
    • logarithmic scaling removed +
    • mfgcost removed and retail/supplier cost bounds adjusted +
    • agg_str memory leak fixed +
    • independent RNG streams on a per column basis +
    +
+This is the revised data generator for DSS. + +The rewrite tried to accomplish three things: +
    +
  1. identify and isolate +all the implicit assumptions about limits, bounds, ranges, distributions, etc.; +
  2. standardize the way any given table was generated/ +printed to ease understanding and maintenance; +
  3. bring the generator +in line with the current work of the committee and the excellent spec +the Indira put together; +
  4. provide an easy way to adjust distributions, string contents and to facilitate experimentation to get a +better idea of the impact of data population changes. +

+ +The files included are:

+

+
driver.c +
main and the calling routines for the generator +
dist.c +
should really be named dss_util.c; misc routines +
customer.c
generation and print routines for customer table +
orders.c
"" "" order table +
parts.c
"" "" parts/partsupp +
suppliers.c
"" "" suppliers table +
time.c
"" "" time table +
customer.h
associate header files; contain structure +definitions +
dss.h
dss.h holds the large number of assumptions and +
orders.h
values that have been used as IFDEFs. +
parts.h
+
suppliers.h
+
time.h
+
dists.dss
string selections and weights; used to build +distributions + +
+

+Running make will create an executable (using the compiler flags in +CFLAGS, the ld flags in LDFLAGS and the libraries in LIBS [-O, -s, +and -lm by default]) which will create flat files suitable for dbload. + + + + + + diff --git a/data/ssb/dbgen/load_stub.c b/data/ssb/dbgen/load_stub.c new file mode 100644 index 0000000..e3339b5 --- /dev/null +++ b/data/ssb/dbgen/load_stub.c @@ -0,0 +1,281 @@ +/***************************************************************** + * Title: load_stub.c + * Sccsid: @(#)load_stub.c 2.1.8.1 + * Description: + * stub routines for: + * inline load of dss benchmark + * header creation for dss benchmark + * + ***************************************************************** + */ + +#include +#include "config.h" +#include "dss.h" +#include "dsstypes.h" + +int +close_direct(void) +{ + /* any post load cleanup goes here */ + return(0); +} + +int +prep_direct(void) +{ + /* any preload prep goes here */ + return(0); +} + +int +hd_cust (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the customer table\n"); + + return(0); +} + +int +ld_cust (customer_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the customer table"); + + return(0); +} + +int +hd_part (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the part table\n"); + + return(0); +} + +int +ld_part (part_t *pp, int mode) +{ + static int count = 0; + + if (! count++) + printf("No load routine has been defined for the part table\n"); + + return(0); +} + +int +ld_psupp (part_t *pp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined for the", + "psupp table\n"); + + return(0); + +} + + +int +hd_supp (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the supplier table\n"); + + return(0); +} + +int +ld_supp (supplier_t *sp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the supplier table\n"); + + return(0); +} + + +int +hd_order (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the order table\n"); + + return(0); +} + +int +ld_order (order_t *p, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the order table"); + + return(0); +} + +ld_line (order_t *p, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the line table"); + + return(0); +} + + + +int +hd_psupp (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No header has been defined for the", + "part supplier table"); + + return(0); +} + + +int +hd_line (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the lineitem table\n"); + + return(0); +} + +int +hd_nation (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the nation table\n"); + + return(0); +} + +#ifdef SSBM +#else +int +ld_nation (code_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the nation table"); + + return(0); +} + +int +hd_region (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the region table\n"); + + return(0); +} + +int +ld_region (code_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the region table"); + + return(0); +} + +int +ld_order_line (order_t *p, int mode) +{ + ld_order(p, mode); + ld_line (p, mode); + + return(0); +} + +int +hd_order_line (FILE *f) +{ + hd_order(f); + hd_line (f); + + return(0); +} + +int +ld_part_psupp (part_t *p, int mode) +{ + ld_part(p, mode); + ld_psupp (p, mode); + + return(0); +} + +int +hd_part_psupp (FILE *f) +{ + hd_part(f); + hd_psupp(f); + + return(0); +} +#endif + +#ifdef SSBM +int +ld_date (date_t *d, int mode) +{ + /*do nothing for now*/ + return(0); +} + +#endif + + + + + + diff --git a/data/ssb/dbgen/load_stub.o b/data/ssb/dbgen/load_stub.o new file mode 100644 index 0000000000000000000000000000000000000000..a30638600e6732f5376411c496494e6482852588 GIT binary patch literal 6680 zcmds*e~4676vywVyRK&IW|pR=uhLv8hnaPCS0P*3%E8QEu_P(&&d$7THg{*HnK!ap znPpLF3mZ`o2Kk49^iTaI1^rmcKZHUe41x%X{wXjpBMJ%ZeD6DF_Uv_Dt0n{;c=z1< zdFPyW?tAy$cV?cxcjJANVlg8UGi%M*NT@N}Zyf7)*lvfJX&TLMGZ&tJALGx)_YTzo z4sVIQbOSoij|TOjS?a)L_O-v6TnuHICxUZg8r7Ex~Dn}XGCHWj>_pU(OvFPAQRy}qCKvVLEG-p_h{g_0NK zd@oZe2Zceh2-3X+zV)(x1BGijQ1Oza6!_GcpOr|<{W&2vEnZ;jv^ znY;7KM(`(@%l+l_YATpcF~44@b#ijeiU7|TPkOvyPmK|m=O`8^AHeTqbK#4aH#;0- z*e`ZC;(~h)Z)YxRO5%4gpX=DCn9KSU`z_3y9Q*CeXE?lvd4t1q%q6bqB6BvUnS?TA zl`Wd&l+{n!wtgJL;8{!R4Cbxn1v{uWb2OxWxc2KzkHcTF<2p0q@Yn6Q-Yj(ZVYWZ% z@FUFsbokqLTyO4h_&aQ$;590z-(&ku0j!TRf5_qCJ6dlZbNHurO!IZ(hk4hT7ae|z z?MEGscYPMEI1zc5eqb(R(Z4X4b!FdGV!zWNYl^y6IZr*rzn{ql)|2Rxwd`jShy818 zC-LVn|H9#mm`^4Rk@(HbA7CzV@H`OzImf=8`6s72*vTkL) zf^{__j}s=ovC}q5^jgrYK+}q5C7M-e+R(J4S&b$cx>z5|62y8Xty|LiB`KGILfP-m z_LuxjV2UNbDE(Zv8^xa~R)Vq_;6ZnHvDBXr`nof@U4|_1oyW9sAK!ob?U#N2wg@i!7VNL15zLokA ziQgO!&BLr+UQm1%^(Fpp#jjU9+-Jl1cz4A<+)u-NmMR|ZgJbzD=D0^$OFqIsP#iyB zCH}|E-T8i{?6Jf>9)BCb&nk{@t;EN^199U|qxT0QdCGTlRs>(F_#E<+_;JPOD!x*2 zeLwGJj(qg}d{EhAzK8|c8NnY{{3f!Ke0Hn&di@+!_J}Y0^Q#g3nBrL568~Mr_4+xk zxSsDQb9cU{l|Ay2z5crheopcEDxW`8d_CW5@L);g`AXd~JAyA$9BV=5n^0WOw@q<9 z-*wF0`F1IL%(nr_kvtN?%ZlGjc9PFt#dSWE`m>GBb@f=kn7H1j`(^#c*-7Kv0oO!*D8LCvhQSm zQGBw2ARkWIj$3Uz#^pPmayaf0q{p^n_EHxOIb7Yr~MF7=Pz zS1~WCe;U;L2rl)H=WwZiP(MNB{z(0^#oYVgS z{y;h&_(Oq-)8{&E;=SdviI)l(z&shJl0*-wAjuR42mL&G~hqZ zN(bH-EgkIdX-IvUzu^7UK}^@bhy6#K3%dSKkZ;ZP!yDCp_Q(3co+kdXerjEt%*= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) +LDFLAGS = +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .o +EXE = +LIBS = -lm +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +TREE_ROOT=/tmp/tree +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) +clean: + rm -f $(PROGS) $(OBJS) $(JUNK) +lint: + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC1) + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC2) + +tar: $(DBGENSRC) + tar cvhf $(PROG1).tar $(DBGENSRC) +dbgenshar: $(DBGENSRC) + shar -o dbgen.shar $(DBGENSRC) +zip: $(DBGENSRC) + zip dbgen $(DBGENSRC) +tree: $(DBGENSRC) $(FQD) $(VARIANTS) $(TREE_DOC) $(ANS) + rm -rf $(TREE_ROOT) + mkdir $(TREE_ROOT) + mkdir $(TREE_ROOT)/appendix + mkdir $(TREE_ROOT)/appendix/queries + mkdir $(TREE_ROOT)/appendix/variants + mkdir $(TREE_ROOT)/appendix/dbgen + mkdir $(TREE_ROOT)/appendix/answers + cp tree.readme $(TREE_ROOT)/README + cp appendix.readme $(TREE_ROOT)/appendix/README + cp answers.readme $(TREE_ROOT)/appendix/answers/README + cp queries.readme $(TREE_ROOT)/appendix/queries/README + cp variants.readme $(TREE_ROOT)/appendix/variants/README + cp tree.changes $(TREE_ROOT)/CHANGES + cp appendix.version $(TREE_ROOT)/appendix/VERSION + cp $(FQD) $(TREE_ROOT)/appendix/queries + cp $(VARIANTS) $(TREE_ROOT)/appendix/variants + cp $(DBGENSRC) $(TREE_ROOT)/appendix/dbgen + cp $(ANS) $(TREE_ROOT)/appendix/answers + (cd $(TREE_ROOT); tar chf - .) |compress > tree.tar.Z + (cd $(TREE_ROOT); zip -r - . ) > tree.zip + date > tree.update +portable: + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f; \ + awk 'length > 72 { print FILENAME ":" NR " too long " }' /tmp/$$f ; \ + rm /tmp/$$f ; \ + done +release: + @chkout $(SRC) $(HDR) + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f ; \ + mv /tmp/$$f $$f ; \ + done + @chkin $(SRC) $(HDR) + +rnd$(OBJ): rnd.h +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h +$(QSRC) $(ALLSRC): + get -r`cat .version` ./SCCS/s.$@ diff --git a/data/ssb/dbgen/makefile.suite b/data/ssb/dbgen/makefile.suite new file mode 100644 index 0000000..5ab13d1 --- /dev/null +++ b/data/ssb/dbgen/makefile.suite @@ -0,0 +1,127 @@ +# @(#)makefile.suite 2.1.8.1 +################ +## CHANGE NAME OF ANSI COMPILER HERE +################ +CC = +# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata) +# SQLSERVER, SYBASE +# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS, +# SGI, SUN, U2200, VMS, LINUX +# Current values for WORKLOAD are: SSBM, TPCH, TPCR +DATABASE= +MACHINE = +WORKLOAD = +# +# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -O -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) +LDFLAGS = -O +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .o +EXE = +LIBS = -lm +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +TREE_ROOT=/tmp/tree +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) +clean: + rm -f $(PROGS) $(OBJS) $(JUNK) +lint: + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC1) + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC2) + +tar: $(DBGENSRC) + tar cvhf $(PROG1).tar $(DBGENSRC) +dbgenshar: $(DBGENSRC) + shar -o dbgen.shar $(DBGENSRC) +zip: $(DBGENSRC) + zip dbgen $(DBGENSRC) +tree: $(DBGENSRC) $(FQD) $(VARIANTS) $(TREE_DOC) $(ANS) + rm -rf $(TREE_ROOT) + mkdir $(TREE_ROOT) + mkdir $(TREE_ROOT)/appendix + mkdir $(TREE_ROOT)/appendix/queries + mkdir $(TREE_ROOT)/appendix/variants + mkdir $(TREE_ROOT)/appendix/dbgen + mkdir $(TREE_ROOT)/appendix/answers + cp tree.readme $(TREE_ROOT)/README + cp appendix.readme $(TREE_ROOT)/appendix/README + cp answers.readme $(TREE_ROOT)/appendix/answers/README + cp queries.readme $(TREE_ROOT)/appendix/queries/README + cp variants.readme $(TREE_ROOT)/appendix/variants/README + cp tree.changes $(TREE_ROOT)/CHANGES + cp appendix.version $(TREE_ROOT)/appendix/VERSION + cp $(FQD) $(TREE_ROOT)/appendix/queries + cp $(VARIANTS) $(TREE_ROOT)/appendix/variants + cp $(DBGENSRC) $(TREE_ROOT)/appendix/dbgen + cp $(ANS) $(TREE_ROOT)/appendix/answers + (cd $(TREE_ROOT); tar chf - .) |compress > tree.tar.Z + (cd $(TREE_ROOT); zip -r - . ) > tree.zip + date > tree.update +portable: + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f; \ + awk 'length > 72 { print FILENAME ":" NR " too long " }' /tmp/$$f ; \ + rm /tmp/$$f ; \ + done +release: + @chkout $(SRC) $(HDR) + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f ; \ + mv /tmp/$$f $$f ; \ + done + @chkin $(SRC) $(HDR) + +rnd$(OBJ): rnd.h +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h +$(QSRC) $(ALLSRC): + get -r`cat .version` ./SCCS/s.$@ diff --git a/data/ssb/dbgen/makefile_win b/data/ssb/dbgen/makefile_win new file mode 100644 index 0000000..1712a11 --- /dev/null +++ b/data/ssb/dbgen/makefile_win @@ -0,0 +1,85 @@ +VC="c:/Program Files/Microsoft Visual Studio 9.0/VC" +WIN_INC="C:\Program Files\Microsoft SDKs\Windows\v6.0A\Include" +WIN_LIB="C:\Program Files\Microsoft SDKs\Windows\v6.0A\Lib" +#VC = "C:\Program Files\Microsoft Visual Studio .NET 2003\Vc7" +VCLIB = $(VC)\LIB +# @(#)makefile.suite 2.1.8.1 +################ +## CHANGE NAME OF ANSI COMPILER HERE +################ +CC =cl.exe +# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata) +# SQLSERVER, SYBASE +# Current values for MACHINE are: ATT, DOS, WIN32 HP, IBM, ICL, MVS, +# SGI, SUN, U2200, VMS, LINUX +# Current values for WORKLOAD are: SSBM, TPCH, TPCR +DATABASE=DB2 +MACHINE =WIN32 +WORKLOAD =SSBM +# +# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) /I$(VC)\include /I$(WIN_INC) + +#LDFLAGS = -O +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .obj +EXE = .exe +LIBS =$(VCLIB)\libcmt.lib $(VCLIB)\oldnames.lib $(VCLIB)\oldnames.lib $(WIN_LIB)\kernel32.lib +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) + +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) + +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) + +clean: + del /F $(PROGS) $(OBJS) $(JUNK) + +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h diff --git a/data/ssb/dbgen/permute.c b/data/ssb/dbgen/permute.c new file mode 100644 index 0000000..b34f04c --- /dev/null +++ b/data/ssb/dbgen/permute.c @@ -0,0 +1,175 @@ +/* @(#)permute.c 2.1.8.3 */ +/* +* permute.c -- a permutation generator for the query +* sequences in TPC-H and TPC-R +*/ + +#ifdef TEST +#define DECLARER +#endif +#include "config.h" +#include "dss.h" +#ifdef TEST +#include +#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ +#include +#include +#endif /* WIN32 */ +#include /* */ +#include +#include +#include +#include +#include +#include +#ifdef HP +#include +#endif +#if (defined(WIN32)&&!defined(_POSIX_)) +#include +#pragma warning(disable:4201) +#pragma warning(disable:4214) +#pragma warning(disable:4514) +#define WIN32_LEAN_AND_MEAN +#define NOATOM +#define NOGDICAPMASKS +#define NOMETAFILE +#define NOMINMAX +#define NOMSG +#define NOOPENFILE +#define NORASTEROPS +#define NOSCROLL +#define NOSOUND +#define NOSYSMETRICS +#define NOTEXTMETRIC +#define NOWH +#define NOCOMM +#define NOKANJI +#define NOMCX +#include +#pragma warning(default:4201) +#pragma warning(default:4214) +#endif +#endif + +long NextRand(long seed); +long *permute(long *set, int cnt, long stream); +long *permute_dist(distribution *d, long stream); +long seed; +char *eol[2] = {" ", "},"}; +extern seed_t Seed[]; +#ifdef TEST +tdef tdefs = { NULL }; +#endif + + +#define MAX_QUERY 22 +#define ITERATIONS 1000 +#define UNSET 0 + +long * +permute(long *a, int c, long s) + { + int i; + static long source; + static long *set, temp; + + if (a != (long *)NULL) + { + set = a; + for (i=0; i < c; i++) + *(a + i) = i; + for (i=0; i < c; i++) + { + RANDOM(source, 0L, (long)(c - 1), s); + temp = *(a + source); + *(a + source) = *(a + i) ; + *(a + i) = temp; + source = 0; + } + } + else + source += 1; + + if (source >= c) + source -= c; + + return(set + source); + } + +long * +permute_dist(distribution *d, long stream) + { + static distribution *dist = NULL; + int i; + + if (d != NULL) + { + if (d->permute == (long *)NULL) + { + d->permute = (long *)malloc(sizeof(long) * DIST_SIZE(d)); + MALLOC_CHECK(d->permute); + for (i=0; i < DIST_SIZE(d); i++) + *(d->permute + i) = i; + } + dist = d; + return(permute(dist->permute, DIST_SIZE(dist), stream)); + } + + + if (dist != NULL) + return(permute(NULL, DIST_SIZE(dist), stream)); + else + INTERNAL_ERROR("Bad call to permute_dist"); + } + + +#ifdef TEST + +main(int ac, char *av[]) + { + long *sequence, + i, + j, + streams = UNSET, + *a; + char sep; + int index = 0; + + set_seeds = 0; + sequence = (long *)malloc(MAX_QUERY * sizeof(long)); + a = sequence; + for (i=0; i < MAX_QUERY; i++) + *(sequence + i) = i; + if (ac < 3) + goto usage; + Seed[0].value = (long)atoi(av[1]); + streams = atoi(av[2]); + if (Seed[0].value == UNSET || streams == UNSET) + goto usage; + + index = 0; + printf("long permutation[%d][%d] = {\n", streams, MAX_QUERY); + for (j=0; j < streams; j++) + { + sep = '{'; + printf("%s\n", eol[index]); + for (i=0; i < MAX_QUERY; i++) + { + printf("%c%2d", sep, *permute(a, MAX_QUERY, 0) + 1); + a = (long *)NULL; + sep = ','; + } + a = sequence; + index=1; + } + printf("}\n};\n"); + return(0); + +usage: + printf("Usage: %s \n",av[0]); + printf(" uses to start the generation of permutations of [1..%d]\n", MAX_QUERY); + return(-1); + + } +#endif /* TEST */ diff --git a/data/ssb/dbgen/permute.h b/data/ssb/dbgen/permute.h new file mode 100644 index 0000000..bf5e8c4 --- /dev/null +++ b/data/ssb/dbgen/permute.h @@ -0,0 +1,47 @@ +/* + * @(#)permute.h 2.1.8.1 + */ +long permutation[41][22] = +{ + {14, 2, 9,20, 6,17,18, 8,21,13, 3,22,16, 4,11,15, 1,10,19, 5, 7,12}, + {21, 3,18, 5,11, 7, 6,20,17,12,16,15,13,10, 2, 8,14,19, 9,22, 1, 4}, + { 6,17,14,16,19,10, 9, 2,15, 8, 5,22,12, 7,13,18, 1, 4,20, 3,11,21}, + { 8, 5, 4, 6,17, 7, 1,18,22,14, 9,10,15,11,20, 2,21,19,13,16,12, 3}, + { 5,21,14,19,15,17,12, 6, 4, 9, 8,16,11, 2,10,18, 1,13, 7,22, 3,20}, + {21,15, 4, 6, 7,16,19,18,14,22,11,13, 3, 1, 2, 5, 8,20,12,17,10, 9}, + {10, 3,15,13, 6, 8, 9, 7, 4,11,22,18,12, 1, 5,16, 2,14,19,20,17,21}, + {18, 8,20,21, 2, 4,22,17, 1,11, 9,19, 3,13, 5, 7,10,16, 6,14,15,12}, + {19, 1,15,17, 5, 8, 9,12,14, 7, 4, 3,20,16, 6,22,10,13, 2,21,18,11}, + { 8,13, 2,20,17, 3, 6,21,18,11,19,10,15, 4,22, 1, 7,12, 9,14, 5,16}, + { 6,15,18,17,12, 1, 7, 2,22,13,21,10,14, 9, 3,16,20,19,11, 4, 8, 5}, + {15,14,18,17,10,20,16,11, 1, 8, 4,22, 5,12, 3, 9,21, 2,13, 6,19, 7}, + { 1, 7,16,17,18,22,12, 6, 8, 9,11, 4, 2, 5,20,21,13,10,19, 3,14,15}, + {21,17, 7, 3, 1,10,12,22, 9,16, 6,11, 2, 4, 5,14, 8,20,13,18,15,19}, + { 2, 9, 5, 4,18, 1,20,15,16,17, 7,21,13,14,19, 8,22,11,10, 3,12, 6}, + {16, 9,17, 8,14,11,10,12, 6,21, 7, 3,15, 5,22,20, 1,13,19, 2, 4,18}, + { 1, 3, 6, 5, 2,16,14,22,17,20, 4, 9,10,11,15, 8,12,19,18,13, 7,21}, + { 3,16, 5,11,21, 9, 2,15,10,18,17, 7, 8,19,14,13, 1, 4,22,20, 6,12}, + {14, 4,13, 5,21,11, 8, 6, 3,17, 2,20, 1,19,10, 9,12,18,15, 7,22,16}, + { 4,12,22,14, 5,15,16, 2, 8,10,17, 9,21, 7, 3, 6,13,18,11,20,19, 1}, + {16,15,14,13, 4,22,18,19, 7, 1,12,17, 5,10,20, 3, 9,21,11, 2, 6, 8}, + {20,14,21,12,15,17, 4,19,13,10,11, 1,16, 5,18, 7, 8,22, 9, 6, 3, 2}, + {16,14,13, 2,21,10,11, 4, 1,22,18,12,19, 5, 7, 8, 6, 3,15,20, 9,17}, + {18,15, 9,14,12, 2, 8,11,22,21,16, 1, 6,17, 5,10,19, 4,20,13, 3, 7}, + { 7, 3,10,14,13,21,18, 6,20, 4, 9, 8,22,15, 2, 1, 5,12,19,17,11,16}, + {18, 1,13, 7,16,10,14, 2,19, 5,21,11,22,15, 8,17,20, 3, 4,12, 6, 9}, + {13, 2,22, 5,11,21,20,14, 7,10, 4, 9,19,18, 6, 3, 1, 8,15,12,17,16}, + {14,17,21, 8, 2, 9, 6, 4, 5,13,22, 7,15, 3, 1,18,16,11,10,12,20,19}, + {10,22, 1,12,13,18,21,20, 2,14,16, 7,15, 3, 4,17, 5,19, 6, 8, 9,11}, + {10, 8, 9,18,12, 6, 1, 5,20,11,17,22,16, 3,13, 2,15,21,14,19, 7, 4}, + { 7,17,22, 5, 3,10,13,18, 9, 1,14,15,21,19,16,12, 8, 6,11,20, 4, 2}, + { 2, 9,21, 3, 4, 7, 1,11,16, 5,20,19,18, 8,17,13,10,12,15, 6,14,22}, + {15,12, 8, 4,22,13,16,17,18, 3, 7, 5, 6, 1, 9,11,21,10,14,20,19, 2}, + {15,16, 2,11,17, 7, 5,14,20, 4,21, 3,10, 9,12, 8,13, 6,18,19,22, 1}, + { 1,13,11, 3, 4,21, 6,14,15,22,18, 9, 7, 5,10,20,12,16,17, 8,19, 2}, + {14,17,22,20, 8,16, 5,10, 1,13, 2,21,12, 9, 4,18, 3, 7, 6,19,15,11}, + { 9,17, 7, 4, 5,13,21,18,11, 3,22, 1, 6,16,20,14,15,10, 8, 2,12,19}, + {13,14, 5,22,19,11, 9, 6,18,15, 8,10, 7, 4,17,16, 3, 1,12, 2,21,20}, + {20, 5, 4,14,11, 1, 6,16, 8,22, 7, 3, 2,12,21,19,17,13,10,15,18, 9}, + { 3, 7,14,15, 6, 5,21,20,18,10, 4,16,19, 1,13, 9, 8,17,11,12,22, 2}, + {13,15,17, 1,22,11, 3, 4, 7,20,14,21, 9, 8, 2,18,16, 6,10,12, 5,19} +}; diff --git a/data/ssb/dbgen/permute.o b/data/ssb/dbgen/permute.o new file mode 100644 index 0000000000000000000000000000000000000000..0f4af181e3b8728699c965d1dc722fffd073600e GIT binary patch literal 3248 zcmbVNO>7%g5Ppu`)}&?Qv_*>AQY}b@RM3^3=0I9N-lS<>CASi^Mo4KJHujoW^-r{3 zLrO&=a1y$5j3RO2f)H0u6{o_1hK8R50`SxY07u`AJ*oD^b`#aIu!+q3^~^A$5~o=KnP3(cPS_rS=# ze@}Qnaqk6mT(~zo+c?$qHm;(^y<^`nURw$Ap&Qi=?yaR7?hn!0olq)sGv(b*c{i`2 z+b!YSK;L?d*SB*o%Dr9uyDf+#@!D!MHFUT7J+D6|^tv$dM&!o$IrFS}&OE=$YXS@L z%snp6L*LH*$Pcb_PgvHji}JB_?=MmP5~lY4;kCIKbN6uXaX zPgYBARZk?0K_h-BQ59-pc3{AW4<3}Th>pNJ1`qas>{Oa{F}8wO7#^hEK4YP`US^@0 zSZME#ZP8_D$Mb;ilQz+V<67+fu3@dO5gyj`rHHBZFK#ilcx`Lfztm-FeT!l6twjpmTA$f+m!6v${+S5-v$@%V-+m#L4WcSy$0+gJnj6?bil_u;Q0=CsRLf^fL{Wr4kULA4^0^Zwmvn!kiedy=#m(A|!zLzj z(@wtR<|Z;z7nnVhcUkUDC-2(Knk+jmv&#imvF)rVbib?@=0?m6&C9D2$DOtVp8Ie( zAWci0{ONn<131PY)qRF@`B46k132zqq)&bJz@PGb-U0tPfYW5$kvQs@o9ge9BZau8 z;DZv!I`Nyx|EPjLqTpi+-lO0zDfw{^P@ZuG$C)I2Qptm77S;cT;;+{KuHvt*`+dtl{`BY{38Y5rQje6LNo5>qrUp!JRn@$3SiTv9s7n4(Q8s5oaUDqm5W8!ngs4T zKA$4Vu?v>r+A}UQvX*NxW3p0VhEqlb-4g~3gK!cENCwB^rzUcaRkW$x|IKxXq@Jirl2Z8ujS2-+wij&p&y$J| zidW=O85H2VuO&FlR9SmUPr*95Gs-^aKd#IzWKr z6rT*>v_A1su%jGYKQ6zU&QVGe)RSOn*T0o*eneabV*hYIQ~lJxG+?+s`GF`XDc8sT zs6=%WKMzKdRlkCaS22`>zIe}IsX_N?3Fu%`iRve85d_Q`j9-?(Wpb1}82=97?fSRg zrhj-2Q9Sjp-P*WRpZuY;fAOqs7msIZ@ZV#6zuf=xs3;WdAD*S{;&oZ*vJ#78F#bQv C$)b(` literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/print.c b/data/ssb/dbgen/print.c new file mode 100644 index 0000000..932a576 --- /dev/null +++ b/data/ssb/dbgen/print.c @@ -0,0 +1,1006 @@ +/* @(#)print.c 2.1.8.2 */ +/* generate flat files for data load */ +#include +#ifndef VMS +#include +#endif + +#if defined(SUN) +#include +#endif + +#if defined(LINUX) +#include +#endif /*LINUX*/ + +#include + +#include "dss.h" +#include "dsstypes.h" +#include + +#include +#include + + +/* + * Function Prototypes + */ +FILE *print_prep PROTO((int table, int update)); +int pr_drange PROTO((int tbl, long min, long cnt, long num)); + +FILE * +print_prep(int table, int update) +{ + char upath[128]; + FILE *res; + + if (updates) + { + if (update > 0) /* updates */ + if ( insert_segments ) + { + int this_segment; + if(strcmp(tdefs[table].name,"orders.tbl")) + this_segment=++insert_orders_segment; + else + this_segment=++insert_lineitem_segment; + sprintf(upath, "%s%c%s.u%d.%d", + env_config(PATH_TAG, PATH_DFLT), + PATH_SEP, tdefs[table].name, update%10000,this_segment); + } + else + { + sprintf(upath, "%s%c%s.u%d", + env_config(PATH_TAG, PATH_DFLT), + PATH_SEP, tdefs[table].name, update); + } + else /* deletes */ + if ( delete_segments ) + { + ++delete_segment; + sprintf(upath, "%s%cdelete.u%d.%d", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, -update%10000, + delete_segment); + } + else + { + sprintf(upath, "%s%cdelete.%d", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, -update); + } + return(fopen(upath, "w")); + } + res = tbl_open(table, "w"); + OPEN_CHECK(res, tdefs[table].name); + return(res); +} + +int +dbg_print(int format, FILE *target, void *data, int len, int sep) +{ + int dollars, + cents; + + switch(format) + { + case DT_STR: + if (columnar) + fprintf(target, "%-*s", len, (char *)data); + else + fprintf(target, "%s", (char *)data); + break; +#ifdef MVS + case DT_VSTR: + /* note: only used in MVS, assumes columnar output */ + fprintf(target, "%c%c%-*s", + (len >> 8) & 0xFF, len & 0xFF, len, (char *)data); + break; +#endif /* MVS */ + case DT_INT: + if (columnar) + fprintf(target, "%12ld", (long)data); + else + fprintf(target, "%ld", (long)data); + break; + case DT_HUGE: +#ifndef SUPPORT_64BITS + if (*(long *)((long *)data + 1) == 0) \ + if (columnar) fprintf(target, "%12ld", *(long *)data); + else fprintf(target, "%ld", *(long *)data); + else + if (columnar) fprintf(target, "%5ld%07ld", + *(long *)((long *)data + 1), *(long *)data); + else fprintf(target,"%ld%07ld", + *(long *)((long *)data + 1), *(long *)data); +#else + fprintf(target, HUGE_FORMAT, *(DSS_HUGE *)data); +#endif /* SUPPORT_64BITS */ + break; + case DT_KEY: + fprintf(target, "%ld", (long)data); + break; + case DT_MONEY: + cents = (long)data; + if (cents < 0) + { + fprintf(target, "-"); + cents = -cents; + } + dollars = cents / 100; + cents %= 100; + if (columnar) + fprintf(target, "%12ld.%02ld", dollars, cents); + else + fprintf(target, "%ld.%02ld", dollars, cents); + break; + case DT_CHR: + if (columnar) + fprintf(target, "%c ", (char)data); + else + fprintf(target, "%c", (char)data); + break; + } + +#ifdef EOL_HANDLING + if (sep) +#endif /* EOL_HANDLING */ + if (!columnar && (sep != -1)) + fprintf(target, "%c", SEPARATOR); + + return(0); +} + +#ifdef SSBM +int +pr_cust(customer_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(CUST, 0); + + PR_STRT(fp); + PR_INT(fp, c->custkey); + PR_VSTR(fp, c->name, C_NAME_LEN); + PR_VSTR(fp, c->address, + (columnar)?(long)(ceil(C_ADDR_LEN * V_STR_HGH)):c->alen); + PR_STR(fp, c->city,CITY_FIX); + PR_STR(fp, c->nation_name, C_NATION_NAME_LEN); + PR_STR(fp, c->region_name, C_REGION_NAME_LEN); + PR_STR(fp, c->phone, PHONE_LEN); + PR_STR(fp, c->mktsegment,MAXAGG_LEN); + PR_END(fp); + + return(0); +} + +#else +int +pr_cust(customer_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(CUST, 0); + + PR_STRT(fp); + PR_INT(fp, c->custkey); + PR_VSTR(fp, c->name, C_NAME_LEN); + PR_VSTR(fp, c->address, + (columnar)?(long)(ceil(C_ADDR_LEN * V_STR_HGH)):c->alen); + PR_INT(fp, c->nation_code); + PR_STR(fp, c->phone, PHONE_LEN); + PR_MONEY(fp, c->acctbal); + PR_STR(fp, c->mktsegment, C_MSEG_LEN); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(C_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} +#endif + +/* + * print the numbered order + */ +#ifdef SSBM + +#else +int +pr_order(order_t *o, int mode) +{ + static FILE *fp_o = NULL; + static int last_mode = 0; + + if (fp_o == NULL || mode != last_mode) + { + if (fp_o) + fclose(fp_o); + fp_o = print_prep(ORDER, mode); + last_mode = mode; + } + PR_STRT(fp_o); + PR_HUGE(fp_o, o->okey); + PR_INT(fp_o, o->custkey); + PR_CHR(fp_o, o->orderstatus); + PR_MONEY(fp_o, o->totalprice); + PR_STR(fp_o, o->odate, DATE_LEN); + PR_STR(fp_o, o->opriority, O_OPRIO_LEN); + PR_STR(fp_o, o->clerk, O_CLRK_LEN); + PR_INT(fp_o, o->spriority); + PR_VSTR_LAST(fp_o, o->comment, + (columnar)?(long)(ceil(O_CMNT_LEN * V_STR_HGH)):o->clen); + PR_END(fp_o); + + return(0); +} +#endif + +/* + * print an order's lineitems + */ +#ifdef SSBM +int +pr_line(order_t *o, int mode) +{ + + static FILE *fp_l = NULL; + static int last_mode = 0; + long i; + int days; + char buf[100]; + + if (fp_l == NULL || mode != last_mode) + { + if (fp_l) + fclose(fp_l); + fp_l = print_prep(LINE, mode); + last_mode = mode; + } + + for (i = 0; i < o->lines; i++) + { + PR_STRT(fp_l); + PR_HUGE(fp_l, o->lineorders[i].okey); + PR_INT(fp_l, o->lineorders[i].linenumber); + PR_INT(fp_l, o->lineorders[i].custkey); + PR_INT(fp_l, o->lineorders[i].partkey); + PR_INT(fp_l, o->lineorders[i].suppkey); + PR_STR(fp_l, o->lineorders[i].orderdate, DATE_LEN); + PR_STR(fp_l, o->lineorders[i].opriority, O_OPRIO_LEN); + PR_INT(fp_l, o->lineorders[i].ship_priority); + PR_INT(fp_l, o->lineorders[i].quantity); + PR_INT(fp_l, o->lineorders[i].extended_price); + PR_INT(fp_l, o->lineorders[i].order_totalprice); + PR_INT(fp_l, o->lineorders[i].discount); + PR_INT(fp_l, o->lineorders[i].revenue); + PR_INT(fp_l, o->lineorders[i].supp_cost); + PR_INT(fp_l, o->lineorders[i].tax); + PR_STR(fp_l, o->lineorders[i].commit_date, DATE_LEN); + PR_STR(fp_l, o->lineorders[i].shipmode, O_SHIP_MODE_LEN); + PR_END(fp_l); + } + + return(0); +} +#else +int +pr_line(order_t *o, int mode) +{ + static FILE *fp_l = NULL; + static int last_mode = 0; + long i; + int days; + char buf[100]; + + if (fp_l == NULL || mode != last_mode) + { + if (fp_l) + fclose(fp_l); + fp_l = print_prep(LINE, mode); + last_mode = mode; + } + + for (i = 0; i < o->lines; i++) + { + PR_STRT(fp_l); + PR_HUGE(fp_l, o->l[i].okey); + PR_INT(fp_l, o->l[i].partkey); + PR_INT(fp_l, o->l[i].suppkey); + PR_INT(fp_l, o->l[i].lcnt); + PR_INT(fp_l, o->l[i].quantity); + PR_MONEY(fp_l, o->l[i].eprice); + PR_MONEY(fp_l, o->l[i].discount); + PR_MONEY(fp_l, o->l[i].tax); + PR_CHR(fp_l, o->l[i].rflag[0]); + PR_CHR(fp_l, o->l[i].lstatus[0]); + PR_STR(fp_l, o->l[i].sdate, DATE_LEN); + PR_STR(fp_l, o->l[i].cdate, DATE_LEN); + PR_STR(fp_l, o->l[i].rdate, DATE_LEN); + PR_STR(fp_l, o->l[i].shipinstruct, L_INST_LEN); + PR_STR(fp_l, o->l[i].shipmode, L_SMODE_LEN); + PR_VSTR_LAST(fp_l, o->l[i].comment, + (columnar)?(long)(ceil(L_CMNT_LEN * + V_STR_HGH)):o->l[i].clen); + PR_END(fp_l); + } + + return(0); +} +#endif + +/* + * print the numbered order *and* its associated lineitems + */ +#ifdef SSBM +#else +int +pr_order_line(order_t *o, int mode) +{ + tdefs[ORDER].name = tdefs[ORDER_LINE].name; + pr_order(o, mode); + pr_line(o, mode); + + return(0); +} +#endif + +/* + * print the given part + */ +#ifdef SSBM +int +pr_part(part_t *part, int mode) +{ + static FILE *p_fp = NULL; + + if (p_fp == NULL) + p_fp = print_prep(PART, 0); + + PR_STRT(p_fp); + PR_INT(p_fp, part->partkey); + PR_VSTR(p_fp, part->name, + (columnar)?(long)P_NAME_LEN:part->nlen); + PR_STR(p_fp, part->mfgr, P_MFG_LEN); + PR_STR(p_fp, part->category, P_CAT_LEN); + PR_STR(p_fp, part->brand, P_BRND_LEN); + + /*need to handle color*/ + PR_VSTR(p_fp, part->color,(columnar)?(long)P_COLOR_LEN:part->clen); + PR_VSTR(p_fp, part->type, + (columnar)?(long)P_TYPE_LEN:part->tlen); + PR_INT(p_fp, part->size); + PR_STR(p_fp, part->container, P_CNTR_LEN); + PR_END(p_fp); + return(0); +} + +#else +int +pr_part(part_t *part, int mode) +{ +static FILE *p_fp = NULL; + + if (p_fp == NULL) + p_fp = print_prep(PART, 0); + + PR_STRT(p_fp); + PR_INT(p_fp, part->partkey); + PR_VSTR(p_fp, part->name, + (columnar)?(long)P_NAME_LEN:part->nlen); + PR_STR(p_fp, part->mfgr, P_MFG_LEN); + PR_STR(p_fp, part->brand, P_BRND_LEN); + PR_VSTR(p_fp, part->type, + (columnar)?(long)P_TYPE_LEN:part->tlen); + PR_INT(p_fp, part->size); + PR_STR(p_fp, part->container, P_CNTR_LEN); + PR_MONEY(p_fp, part->retailprice); + PR_VSTR_LAST(p_fp, part->comment, + (columnar)?(long)(ceil(P_CMNT_LEN * V_STR_HGH)):part->clen); + PR_END(p_fp); + + return(0); +} +#endif + +/* + * print the given part's suppliers + */ +#ifdef SSBM +/*SSBM don't have partsupplier table*/ +#else +int +pr_psupp(part_t *part, int mode) +{ + static FILE *ps_fp = NULL; + long i; + + if (ps_fp == NULL) + ps_fp = print_prep(PSUPP, mode); + + for (i = 0; i < SUPP_PER_PART; i++) + { + PR_STRT(ps_fp); + PR_INT(ps_fp, part->s[i].partkey); + PR_INT(ps_fp, part->s[i].suppkey); + PR_INT(ps_fp, part->s[i].qty); + PR_MONEY(ps_fp, part->s[i].scost); + PR_VSTR_LAST(ps_fp, part->s[i].comment, + (columnar)?(long)(ceil(PS_CMNT_LEN * V_STR_HGH)):part->s[i].clen); + PR_END(ps_fp); + } + + return(0); +} +#endif + +/* + * print the given part *and* its suppliers + */ +#ifdef SSBM +/*SSBM don't have partsupplier table*/ +#else +int +pr_part_psupp(part_t *part, int mode) +{ + tdefs[PART].name = tdefs[PART_PSUPP].name; + pr_part(part, mode); + pr_psupp(part, mode); + + return(0); +} +#endif + + +#ifdef SSBM +int +pr_supp(supplier_t *supp, int mode) +{ + static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(SUPP, mode); + + PR_STRT(fp); + PR_INT(fp, supp->suppkey); + PR_STR(fp, supp->name, S_NAME_LEN); + + PR_VSTR(fp, supp->address, + (columnar)?(long)(ceil(S_ADDR_LEN * V_STR_HGH)):supp->alen); + PR_STR(fp, supp->city, CITY_FIX); + PR_STR(fp, supp->nation_name, C_NATION_NAME_LEN); + PR_STR(fp, supp->region_name, C_REGION_NAME_LEN); + PR_STR(fp, supp->phone, PHONE_LEN); + PR_END(fp); + + return(0); +} +#else +int +pr_supp(supplier_t *supp, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(SUPP, mode); + + PR_STRT(fp); + PR_INT(fp, supp->suppkey); + PR_STR(fp, supp->name, S_NAME_LEN); + PR_VSTR(fp, supp->address, + (columnar)?(long)(ceil(S_ADDR_LEN * V_STR_HGH)):supp->alen); + PR_INT(fp, supp->nation_code); + PR_STR(fp, supp->phone, PHONE_LEN); + PR_MONEY(fp, supp->acctbal); + PR_VSTR_LAST(fp, supp->comment, + (columnar)?(long)(ceil(S_CMNT_LEN * V_STR_HGH)):supp->clen); + PR_END(fp); + + return(0); +} +#endif + +#ifdef SSBM +#else +int +pr_nation(code_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(NATION, mode); + + PR_STRT(fp); + PR_INT(fp, c->code); + PR_STR(fp, c->text, NATION_LEN); + PR_INT(fp, c->join); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(N_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} + +int +pr_region(code_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(REGION, mode); + + PR_STRT(fp); + PR_INT(fp, c->code); + PR_STR(fp, c->text, REGION_LEN); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(R_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} +#endif + +/* + * NOTE: this routine does NOT use the BCD2_* routines. As a result, + * it WILL fail if the keys being deleted exceed 32 bits. Since this + * would require ~660 update iterations, this seems an acceptable + * oversight + */ +int +pr_drange(int tbl, long min, long cnt, long num) +{ + static int last_num = 0; + static FILE *dfp = NULL; + int child = -1; + long start, last, new; + + static int rows_per_segment=0; + static int rows_this_segment=0; + static int residual_rows=0; + + if (last_num != num) + { + if (dfp) + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) + return(-1); + last_num = num; + rows_this_segment=0; + } + + start = MK_SPARSE(min, (num - 1)/ (10000 / refresh)); + last = start - 1; + for (child=min; cnt > 0; child++, cnt--) + { + new = MK_SPARSE(child, (num - 1) / (10000 / refresh)); + if (gen_rng == 1 && new - last == 1) + { + last = new; + continue; + } + if (gen_sql) + { + fprintf(dfp, + "delete from %s where %s between %ld and %ld;\n", + tdefs[ORDER].name, "o_orderkey", start, last); + fprintf(dfp, + "delete from %s where %s between %ld and %ld;\n", + tdefs[LINE].name, "l_orderkey", start, last); + fprintf(dfp, "commit work;\n"); + } + else + if (gen_rng) + { + PR_STRT(dfp); + PR_INT(dfp, start); + PR_INT(dfp, last); + PR_END(dfp); + } + else + { + if (delete_segments) + { + if(rows_per_segment==0) + { + rows_per_segment = (cnt / delete_segments); + residual_rows = (cnt % delete_segments); + rows_per_segment++; + } + if(delete_segment <= residual_rows) + { + if((++rows_this_segment) > rows_per_segment) + { + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) return(-1); + last_num = num; + rows_this_segment=1; + } + } + else + { + if((++rows_this_segment) >= rows_per_segment) + { + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) return(-1); + last_num = num; + rows_this_segment=1; + } + } + } + PR_STRT(dfp); + PR_KEY(dfp, new); + PR_END(dfp); + } + start = new; + last = new; + } + if (gen_rng) + { + PR_STRT(dfp); + PR_INT(dfp, start); + PR_INT(dfp, last); + PR_END(dfp); + } + + return(0); +} + +#ifdef SSBM +int pr_date(date_t *d, int mode){ + static FILE *d_fp = NULL; + + if (d_fp == NULL) + d_fp = print_prep(DATE, 0); + + PR_STRT(d_fp); + PR_INT(d_fp, d->datekey); + PR_STR(d_fp, d->date,D_DATE_LEN); + PR_STR(d_fp, d->dayofweek,D_DAYWEEK_LEN); + PR_STR(d_fp, d->month,D_MONTH_LEN); + PR_INT(d_fp, d->year); + PR_INT(d_fp, d->yearmonthnum); + PR_STR(d_fp, d->yearmonth,D_YEARMONTH_LEN); + PR_INT(d_fp, d->daynuminweek); + PR_INT(d_fp, d->daynuminmonth); + PR_INT(d_fp, d->daynuminyear); + PR_INT(d_fp, d->monthnuminyear); + PR_INT(d_fp, d->weeknuminyear); + PR_VSTR(d_fp, + d->sellingseason,(columnar)?(long)D_SEASON_LEN:d->slen); + PR_STR(d_fp,d->lastdayinweekfl,2); + PR_STR(d_fp,d->lastdayinmonthfl,2); + PR_STR(d_fp,d->holidayfl,2); + PR_STR(d_fp,d->weekdayfl,2); + + PR_END(d_fp); + return(0); + +} + +#endif +/* + * verify functions: routines which replace the pr_routines and generate a pseudo checksum + * instead of generating the actual contents of the tables. Meant to allow large scale data + * validation without requiring a large amount of storage + */ +#ifdef SSBM +int +vrf_cust(customer_t *c, int mode) +{ + VRF_STRT(CUST); + VRF_INT(CUST, c->custkey); + VRF_STR(CUST, c->name); + VRF_STR(CUST, c->address); + VRF_STR(CUST, c->city); + VRF_STR(CUST, c->nation_name); + VRF_STR(CUST, c->region_name); + VRF_STR(CUST, c->phone); + VRF_STR(CUST, c->mktsegment); + VRF_END(CUST); + + return(0); +} + +#else +int +vrf_cust(customer_t *c, int mode) +{ + VRF_STRT(CUST); + VRF_INT(CUST, c->custkey); + VRF_STR(CUST, c->name); + VRF_STR(CUST, c->address); + VRF_INT(CUST, c->nation_code); + VRF_STR(CUST, c->phone); + VRF_MONEY(CUST, c->acctbal); + VRF_STR(CUST, c->mktsegment); + VRF_STR(CUST, c->comment); + VRF_END(CUST); + + return(0); +} +#endif + +/* + * print the numbered order + */ +#ifdef SSBM +#else +int +vrf_order(order_t *o, int mode) +{ + VRF_STRT(ORDER); + VRF_HUGE(ORDER, o->okey); + VRF_INT(ORDER, o->custkey); + VRF_CHR(ORDER, o->orderstatus); + VRF_MONEY(ORDER, o->totalprice); + VRF_STR(ORDER, o->odate); + VRF_STR(ORDER, o->opriority); + VRF_STR(ORDER, o->clerk); + VRF_INT(ORDER, o->spriority); + VRF_STR(ORDER, o->comment); + VRF_END(ORDER); + + return(0); +} +#endif + +/* + * print an order's lineitems + */ +#ifdef SSBM +int +vrf_line(order_t *o, int mode) +{ + int i; + + for (i = 0; i < o->lines; i++) + { + VRF_STRT(LINE); + VRF_HUGE(LINE, o->lineorders[i].okey); + VRF_INT(LINE, o->lineorders[i].linenumber); + VRF_INT(LINE, o->lineorders[i].custkey); + VRF_INT(LINE, o->lineorders[i].partkey); + VRF_INT(LINE, o->lineorders[i].suppkey); + VRF_STR(LINE, o->lineorders[i].orderdate); + VRF_STR(LINE, o->lineorders[i].opriority); + VRF_INT(LINE, o->lineorders[i].ship_priority); + VRF_INT(LINE, o->lineorders[i].quantity); + VRF_INT(LINE, o->lineorders[i].extended_price); + VRF_INT(LINE, o->lineorders[i].order_totalprice); + VRF_INT(LINE, o->lineorders[i].discount); + VRF_INT(LINE, o->lineorders[i].revenue); + VRF_INT(LINE, o->lineorders[i].supp_cost); + VRF_INT(LINE, o->lineorders[i].tax); + VRF_STR(LINE, o->lineorders[i].commit_date); + VRF_STR(LINE, o->lineorders[i].shipmode); + VRF_END(LINE); + } + + return(0); +} + +#else +int +vrf_line(order_t *o, int mode) +{ + int i; + + for (i = 0; i < o->lines; i++) + { + VRF_STRT(LINE); + VRF_HUGE(LINE, o->l[i].okey); + VRF_INT(LINE, o->l[i].partkey); + VRF_INT(LINE, o->l[i].suppkey); + VRF_INT(LINE, o->l[i].lcnt); + VRF_INT(LINE, o->l[i].quantity); + VRF_MONEY(LINE, o->l[i].eprice); + VRF_MONEY(LINE, o->l[i].discount); + VRF_MONEY(LINE, o->l[i].tax); + VRF_CHR(LINE, o->l[i].rflag[0]); + VRF_CHR(LINE, o->l[i].lstatus[0]); + VRF_STR(LINE, o->l[i].sdate); + VRF_STR(LINE, o->l[i].cdate); + VRF_STR(LINE, o->l[i].rdate); + VRF_STR(LINE, o->l[i].shipinstruct); + VRF_STR(LINE, o->l[i].shipmode); + VRF_STR(LINE, o->l[i].comment); + VRF_END(LINE); + } + + return(0); +} +#endif + +/* + * print the numbered order *and* its associated lineitems + */ +#ifdef SSBM +#else +int +vrf_order_line(order_t *o, int mode) +{ + vrf_order(o, mode); + vrf_line(o, mode); + + return(0); +} +#endif + +/* + * print the given part + */ +#ifdef SSBM +int +vrf_part(part_t *part, int mode) +{ + + VRF_STRT(PART); + VRF_INT(PART, part->partkey); + VRF_STR(PART, part->name); + VRF_STR(PART, part->mfgr); + VRF_STR(PART, part->brand); + VRF_STR(PART, part->type); + VRF_INT(PART, part->size); + VRF_STR(PART, part->container); + VRF_STR(PART, part->category); + VRF_END(PART); + + return(0); +} + +#else +int +vrf_part(part_t *part, int mode) +{ + + VRF_STRT(PART); + VRF_INT(PART, part->partkey); + VRF_STR(PART, part->name); + VRF_STR(PART, part->mfgr); + VRF_STR(PART, part->brand); + VRF_STR(PART, part->type); + VRF_INT(PART, part->size); + VRF_STR(PART, part->container); + VRF_MONEY(PART, part->retailprice); + VRF_STR(PART, part->comment); + VRF_END(PART); + + return(0); +} +#endif + +/* + * print the given part's suppliers + */ +#ifdef SSBM +#else +int +vrf_psupp(part_t *part, int mode) +{ + long i; + + for (i = 0; i < SUPP_PER_PART; i++) + { + VRF_STRT(PSUPP); + VRF_INT(PSUPP, part->s[i].partkey); + VRF_INT(PSUPP, part->s[i].suppkey); + VRF_INT(PSUPP, part->s[i].qty); + VRF_MONEY(PSUPP, part->s[i].scost); + VRF_STR(PSUPP, part->s[i].comment); + VRF_END(PSUPP); + } + + return(0); +} +#endif + +/* + * print the given part *and* its suppliers + */ +#ifdef SSBM +#else +int +vrf_part_psupp(part_t *part, int mode) +{ + vrf_part(part, mode); + vrf_psupp(part, mode); + + return(0); +} +#endif + +#ifdef SSBM +int +vrf_supp(supplier_t *supp, int mode) +{ + VRF_STRT(SUPP); + VRF_INT(SUPP, supp->suppkey); + VRF_STR(SUPP, supp->name); + + VRF_STR(CUST, supp->address); + VRF_INT(CUST, supp->nation_key); + VRF_STR(CUST, supp->nation_name); + VRF_INT(CUST, supp->region_key); + VRF_STR(CUST, supp->region_name); + VRF_STR(CUST, supp->phone); + VRF_END(SUPP); + + return(0); +} + +#else +int +vrf_supp(supplier_t *supp, int mode) +{ + VRF_STRT(SUPP); + VRF_INT(SUPP, supp->suppkey); + VRF_STR(SUPP, supp->name); + VRF_STR(SUPP, supp->address); + VRF_INT(SUPP, supp->nation_code); + VRF_STR(SUPP, supp->phone); + VRF_MONEY(SUPP, supp->acctbal); + VRF_STR(SUPP, supp->comment); + VRF_END(SUPP); + + return(0); +} +#endif + +#ifdef SSBM +#else +int +vrf_nation(code_t *c, int mode) +{ + VRF_STRT(NATION); + VRF_INT(NATION, c->code); + VRF_STR(NATION, c->text); + VRF_INT(NATION, c->join); + VRF_STR(NATION, c->comment); + VRF_END(NATION); + + return(0); +} + +int +vrf_region(code_t *c, int mode) +{ + VRF_STRT(REGION); + VRF_INT(REGION, c->code); + VRF_STR(REGION, c->text); + VRF_STR(REGION, c->comment); + VRF_END(fp); + + return(0); +} +#endif + + +#ifdef SSBM +int vrf_date(date_t * d, int mode) +{ + VRF_STRT(DATE); + VRF_INT(DATE, d->datekey); + VRF_STR(DATE, d->date); + VRF_STR(DATE, d->dayofweek); + VRF_STR(DATE, d->month); + VRF_INT(DATE, d->year); + VRF_INT(DATE, d->yearmonthnum); + VRF_STR(DATE, d->yearmonth); + VRF_INT(DATE, d->daynuminweek); + VRF_INT(DATE, d->daynuminmonth); + VRF_INT(DATE, d->daynuminyear); + VRF_INT(DATE, d->monthnuminyear); + VRF_INT(DATE, d->weeknuminyear); + VRF_STR(DATE, d->sellingseason); + VRF_STR(DATE, d->lastdayinweekfl); + VRF_STR(DATE, d->lastdayinmonthfl); + VRF_STR(DATE, d->weekdayfl); + VRF_END(DATE); + return(0); + +} +#endif + diff --git a/data/ssb/dbgen/print.o b/data/ssb/dbgen/print.o new file mode 100644 index 0000000000000000000000000000000000000000..6f3f5d4fe452bcb611c5ec0ed707adcd5ab68540 GIT binary patch literal 19760 zcmbuG4}4VBmB-&C0RqBIT2WCD223#ylf)rl?^YH#7Nak*zkqV-y*mxh8() zUAoL?Za8)~VC$QJnJg$Ng{*09EWQ(t6}@KS`=W+x#ilxNE-K{+~04e z3U->i4i%WGGf;0Pr{<`eLG9{ zWh~iIh7s?+2ciEZb9HZnw+9EfJ|)IKJx@l%JP}hy44au0lOQvCJ8>|DIq{XAvexGd ztUZbT;jTNbz>%|Fx1bmH)bkf?Hy!)t^&e7mH#WJ>IxwIW>yu0iePe`ycZK~vynr(G zOzTa)-v8j}%2?WrqdvTMg{y!29#x^|qhN3#;^Ss(AbMK#qo0|-tc zpJ|;^7F`=JfKm5ugD;~8J}_vfn@O>F?Sw^7E+Z*lp_H60Bz8Hbw#kmW9>JS59UGv(`Y*T2_-dQvhrs|o27`X@SQ&w$-K z;M~}muc&E>%eij{T<7ZIth2thK9g6n+1(!-Gh4(eHc{BI-r0#&L|)dm&PAhjiCc;r z4A)&ghtJ$PnenhbTXpt6Y}h9muo} zW*f{gQc@^-qs<)Xo}4Fg^4aCo{qkJRL;b2D%BzPcSCf&3m$u(9M13_`x%w+})=&9b zWw{7pmWv@S%d=lB`b;$wFFDK&yB`N{>Sl+E8(f>Wf79QeS-}Qh|2GVM{r8-SV@IxU zH(bPpwqMzXyjZ$C9IIvN(nYd-1XDgeSC@Nm?o(&#!I^oCGd*I7AfAUnEJt*(XGveD zY#EwJd!CsLDPLDE&y>OJ%LbaH7k=Lza@iQT92X2d9GT$ve7og%F4UO9LF6mB`bNtS1?ug_q)){?uATxJieWp=suYVR_!c~2Z0XWddfy3YE%&icJ+ zjV?C*_6}?I-dGJ;g@@UJPfjSd-Z}A35~|RVFmZbxhiPT6#jgkx_6#T}u1|ILZ^I4u z11H4x%=GWAw~m_+m?yqW9Lle^K7yTjzPaoDJago}`c&~tSg+nXk~lP?ZseZ2)OE%E zmv{HO?s^PnQN8ue0>>wPOJRgf>p*?k5i@bp;o3L{&eaTEE<#MWg z9ynUZ6%I%vOf)$luI?A%1UK`3INBPV zEk1BljoCRcB~0sxm=czVXS#28h1qrwH|Im8?47?0i{yM2J*`g`7uQ;cnYp?T0wNzE z?a2zgH~Myhhu_}Dy;*JCOnMo2!H|u^eXsX=?+xC@-2*0H&bdjp0_mwcXl{xN{pf?+ zpzHlde{3I8_Tl!yx2ak%L>w4moBCboooW!Wa{8ovDjbf)#CnH*s{#3gH{mKVCVE-KvE z8#~iXyy!4pJ9=f2Yir)d-uOpmYDuYUtG6_#!UTnxQ?UXRD{?AApa|tutOLcmT#9?} zvB$M_10HytxGHyxP$X`{M-p+nN^M>bwM6Xdti5K^d%#RK6q@-(X1-YQOnc!o{oX_H z`-6P6G4s7gGTGyq>`9q*ZFPVnXPefmY;?zXU0bWi@b11qm1N=AHE`j8Igbm+;_xSn(~2j-}En@VLD(pBA2hlRbZhimxaaEX1=`@wCsbwrj}UnFE&lbE;C&QflgcZ zFK!@bsl`j7t))5IN-b^$g}joO9^VLx8*|zSgCfi;>;W;1jz}OFiF#tqA;V+T`h1NI z@N2qJ5-stUL_P76fTtut&O|m43?p zmqgu7G5DTS5*THaOuH~@lvGrPz&L(ph5{w!v+OgO!9q_-xs<4!-)(d>3ZGX8*BPNq z*5A?I-WGGO?TD6ICEI0pOQfS6U0d54j07>$9E`0Eg6H6tyD1#NBidUufBqbI z>9Xc{I2LzTRCs21%BNMt#c9P4D=R(aGs+~E4daco2tGK$7G1H-fO~T+6f?{&Hp1&f@|IP!kt+~%lp z_Bw8G?sa&b`y4@l?W3G-ka?X&HO@lsX?Y7l;{^?tt%dwbnQw;te3=LHQ{l5owznWl zd--b}qcT=)-~L1DSKxMffwL%)mp=|JLhw_v;(a%D(0cmr_R#@4> zIF!or@=W_A6Uaav4CI6nrACcUv@n1;3MB`6LevQVTmK@_j)4z(7VaClALjK<+ z-=Ohbl6PzT70EYi{5O){ukqI<|GCEBl>8};zbpB_X#9W0xf<0>;Zdm})w<5S7^ z5{B6m@cAyxAeF@03@3Sj)Erc0cpSy>a~SSsIR4csoBi)I9M{8a`l}g!9m8V`-^lQL z8U7H%A7%Kn4ByJ|SAgTVsPPc@Fo^$uGx~!JKg96E(*8-gV2FDj*#B7a2DyZadmZpE z7(3r6J8~+D`yJ?q!MczB-yt_G;ywp_jO70&7aDPY1AZ3pY~wqL;TJI6!|;oNqd))P zg2o_;`yU+VNj?^TF@mJ-dy6EWbh^#ey{|#?JLN`4-T$tUe2sV zgK9t7(&?!vH(ELyL!Q}2s3{t2Z0`sJJyk|$BNWYo63@jp569a*GmQX%8AhaIZL~4g z+7@k$23NHQVP`tsW}U%E8dVyRV6-g|ZwfV{-cw-&&}O;esh=;tDlk`xU(@kx7JkjZ zubKFDF@9Cy*KGU(FY#N{sSuSaM6C)@qe4`v5Opeqp$bv8Le#Akbt^0EU36n-B-m-h zI|EIzVAN;}M}v`=ayn|n0>PH35sgLs?VYO7-pwnG5e%6s4X09iv`=$)$F~oI*l3w&kcB1XcYCM;6fwVU{_lVTZ=XMS7Td^ z_zYkKnpZ(@;K{)7cZA~Y;id=-SUeUL-+t(Bqdy*v;Ss$tTKu7oXb?|3n<4^7fQOsb3 z?fDCTzz_+hqk;KT_^T_Vc{avs9d^CCBCDLc+TwNBb8M zo?!To2>%Yz+xrT*F9E|#RkdsHEA0Ic?)eq}E#xtw9xq|V-$w`u{hS6L#g8L|gnGQR z75^L|B-G;G@h3cPWyWm;k3WYB}ac~e?vr%OOWzsJ<-$t zs&yLuFNcpB_xp*R3S#GH41b((>i^S(Q~#frT=!o+PoO`zR4V_~Glf20qfrP6547FW z38(FjlU#4NlIYI@TdLiKL{H;(HPO>~axKwU5<6W)k9&P(|2D$ueE1RJXA}Jv!lx7d zh~(I>8H7JU_)Nl|Cj4T;_Y?bQOZoW$;Z?xZJULGIIYgf)e+bb1aS@KsKFa=h$VWOw)KEvq6+Y8{I-rtvropXu* z2N*ja68%J?|ANuy;ebHGcB!9ZCD+GmJkitmT*vSh!Y4r+D*j!PJ(NjNn5I&jM*~i#m-45W(a&M@wT%86M(=0z>ll54(LccG zA7%7CjD9zxf0NOF!05kZ^!f5RT#x_RlI!t5kI~O&^z#}0HH_ZR=+`m&1fzd|(Lc)Q zdl>z0M*k|K{~giO^&x+xJv2Ax(U&v&g^a#| z(JyE8O+-)U&sxIIhw)9X|BRi#)A)HfsUSV2@nRbr+lYO--}o(~KfvfeBzhYEQ-tF< zs&6G#}JGWg7uzf+7P9IrXm-!llO z{hA~>_6zr7NN*^Yi{{Hp@!fE^y47W6{+TEgY75@h%M?dI# z@+9GOJ$at7vq$4cBqr^b~(=Mr8E z`t-OHj&_uvS4xh4()(gQobf1J?|#~=M6 zsdy^;MUv}rIG@p%Gx~*$zJbxVF#1kLe>Jf1J?|#~-`1 z#b5G4@n`hqjD9ZB)BT>0aGd{Y+^=WsL>c|9jQ(Cm|M!gkIY$2iqko;zzs=}BWb_{s zJ-r?a@Q)3UF#dF1I!$u@dYMc(-uGw7`~F;p-@x$o4F5-l?_v01hCAioztMl{=VcmK z?}%N+@RbZ-&G0o0-@x#nGW-RG{};pGVEB6sKgRGA49}A{4i!%ow=oPK$MEwQeksG7 z8U7QE*TOhsdWPZq8UFtaKU2OBq57-rmoj`l!-EV@X#7LledA$<_i9{Scl#OsM~3$^ z{2ck7hw87gKbPTuqj7u>2~(Wm_c2`ji!Rht?J7I3G5Y-sKg94;3?FlraCcC&;_B~_ z^0uG0Gg{h_msgrEmVBITF3n$-+^unWYTT`H^?q5e#?^aahcvF<3mYxxsq+7%?C4aD ztM|X^HLl+O`nkr{`&oN5uHMf&pmFtn)-jE%_p?M`pbyGV^?ue^1ca;ivkXskU3;vl z8IG}teQZ^^NHEmoi3Pi2h6i4(X)-*`(Wv2xbl?ffE8q<)j}X~xU>Up)g)j5NyIO`P z+yVb+0I$za6R#7=^Ht%vr!xX?AVp&9q@7@EV@sr|J!t#-*L=I7GKOflW~5r4za!_n zDwg3?rw0Bl9H&-wn%J3g{P6&iIoUqqV;I&|^IpwQ+!G;}(;sqnny3T{$fMr=RN!b+ zukV)i8?^eWeT6F^gElJ_kVjSj4UMbz)o}roV_r_1wECoMUp-f<0(lv#QxUgp@#|V> zOCFa>j317X@?VALa$wo~ABQiAK=Rm!^tY<7{J#!LFkhZl*qMd0KE{_ORZksp3aUKS zMPz--Ft2Ol!0@*cJ$_xXz8lJDQvNHJf(+W!>))^HYXy4!JE3m2_`4U`EmTQ|G5)wV ztNJSb+1kdM+wj7GR49K4!pCNu>w5i>!1TEb$mpGd--fYT^Iwl2K6YfQzrqyNjr;6F I`dhF6XE<_@r~m)} literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/qgen b/data/ssb/dbgen/qgen new file mode 100755 index 0000000000000000000000000000000000000000..2a11803792d98fb9b5136bfb27cf16ef12225fc6 GIT binary patch literal 78240 zcmeFa3wRVo)<4_{Gz26jDuZ}sK%+qkhN}cb5@4V~5+WfAt_UF+NHiodnII@P9ioiG zD7%Ww>Z+)ycw13H0}3R7NmSIJsPVEoUUy<}HzK}9Wu5=;RMqL}X%cY%pZk2@_dbqh zs_Wd(sj5@e)iup>*OUpdF)*lJ^&X)BjTaoh|1})1RIFI3-FhG#_H4ex!+ijcF=$N;5|Q__}qg(oCt8c zpS}>(9;7dDzzt#c#qz>#{ z)B)Vxft&|BfIrp&d`buKdpdy6?g0Kk2keBi{N z{qqk1+Vl5&z)#Rl)zYcM^#Nc7imkP-a~8~tbnHx!(58hwKJBl6x<&+~w~1MHL=*`Si?j#ib?g>3MUD-P-Kg^B0ts%&zd{m3wB-mZa?x&MPi0E!T=m z=Yu7GQQqu%MJ0K~Mc2EvdGp<#3a!FZo?l4#{JdgM(E_)&Ag>q|4al;h0-*CeU_{2x zSGpGD6_sf7$|^ni_%1Ih@yrvo3N6o5I*-1J=&M+pS6b#SAu*+89&KKJacPBHC@Wh` zg{Sly4T!vQVU}lcnS1u!*`&9^Q&3t7hIz%66@?lEm0ssACytVQ$yp4k^9oqFyJVqO zq0GpeTUzcR0q*j05?)k7_Lt`i4*)AEB@KC=qS6x0y{L%%Dyk^WPfMF!QIVfl0za-R zFY-`m;dTBsv-1nDnLRJBs91yd3ut;9PFMNzYT}ehW5>-NJaF*9VVZfJW?l|4uP!vN zhD3lvBfz0({aF0hNqom*_M#g9*I8ZZs*J(R#<*CmtK~Wt{W5}y(Yje`P=OCg_Ms2b zJerEX%2SKFQE$Fi!F+$b3)7~qJ%&1cNY@<-NA;76isCiM-K=0Ii+WdWhpIm(_`7Hg zs@_xJJ8N&N`euRer2SphrT$o(YoABQt(Sr)+TeW@+-ZX+D|oUE zo~q!fHh7wXkFdeVD0sRJ?ox2W2A`zh**18Vg3qwQa}<1z4St1!7uw(j3SMS|FHrDB zHuypXUtxnUSMXIf_{|Ew#s&{4_&OVWt%9$&!S7e_jW+m31>a0tRPae2;=B z+u-#Io@l!dGSq%pZyS8Ff;W94$1Cmmbds0iJ7|G-wZKCbcrOdQ)dD}y0&laxQ!H@4 zAEbRQx~Jq1-`^3O_MG^`_jd$OWgtqvPb4_)Q}Jj0$MTA7mn|gg8!hlyCPul*0_S^6 zLRVYhq=P>-7C6O@KMfW*&uxTjw7_xWV1Ak`@a`rQ_q7)IaTa*U0`Fmgw_4!1(J()4 z7WnZd6c?&pC_V`mc$@{EXn`kM;3rt%y)AI-{lC)!KgmL$Y=P6hC4W*aa0dfXjL4aeJt=5 z7P!*_UuA*!wZPX{;QcJ{br$$p7WjG#{A>$+qXnL1fp4vU;Z9}-y`sQ1b&ae|E>spmvH7!hW9|6;n&~ascA-4jVCtTXn6O; z?G}!Phh4ox(+-A{u0fH|Mp~$WY%dxcCfjxS*L~A!R8apIxWBsHpiHCT6i67{`MF4mlj;`->lO@3;vsRT42F{ zvrY>u_;1!}K?VQKIxVE&zgedRJN!56w4j3jW}OyN@ZYS{0t)_{by_&Vf3r>tCirjG zX`uxF%{ncR;J;a?g%SKW>wQH1+kdnFPEr5VtkVJr{+o4L_`rX&P75CRZ`Ns{1OLrB zEpXt!S*L{!{O3A)U*>k|ed9IJ|4{!D+a1>W-);4GZ1vY|^_Ok+?Y8Ay^;}y$(^hxc>Z5J-VYYgTt)677pJA&zZ1s3s zUANW$y)C*uzqi%DvDLq{)&Fj*zhkSvZmYj+t8cf}w^-|gzM16Pca`D$%aUiuW1QDgyZsA};pA|&6 zTa}>SD(HtjOGW=*3}35J`_C~(ZCfWJrq0;+i>D_9xRijnaB!aJ_smcEcg5)K;MFS6 z&o#WG`+-t3d|!Hc82-`cpc2}8I2;ZYAeq(aYe5$?3%sU&;%~kVNV>=cpW#~?GU{Dz zYi7r2hCef8Y@0t%ehgoO@x#}KuMxJz8nr);F?_EY{TmHmt+8!%Vd_tMVECz4n&Svx z`_mYrDQfJSMP@ZnCC1ezCBOMY&gT!`v&U)W>T3AjFlv7RSF7RMWfbiSVfHZk*OD+& z9gH<%8jV2DDvBaq3J6JbwawaXdf`2g4o{j4UnLC8Z74AOos;PP$MCt@2Gtn;+y*1y z3L3tB+x`LhI|>APNIcWVI?PGz(EVY*cScDrn2@UKg7p?DE!hNqGLmRf&SryDK)~d2Lnf z8lc(=jPqqYJ$mSrpTpr-tdURU$R8PXhVy=5jY4i0NLN$6 zmN{YKw7T@pXduY&=QbfQh$}ghc?$`O1g1;GWunq0)wwpRAPhgkn@fJOdE*o#!dDqQ z!W+>%@CNb0Wnd8&QRG}L4641E3?kFyRh?|G5@-~}-kM|(ZabBtK(qPIF8B)V!^8~7 zg1)ii{d&KzHO(7}G4e%Nww^|$_!9wp%teHbt{{OpB#dBZ8Vde&n2;MZyt`(IXuKCX z<402HI8zYDzt?P~Z!rAYk(|Yf^Yb=kY9M=tGTAVE?^1K!jrEHF1&qa{ejBYPWbX~# zAR`|%be{n3+Chs6#36@F0Q29m*bu>hrPPgjieZUv^cECGbt5MN--ZE2 zZC=8^UO8waoY!zLAyH36d(KO^pmyOYkOhj&R#*cL8b%?qsik*2h{u6gV^Ku;4y_9% zO~NS>rJB@R6KB+eT(}$Z1~I@yq6vz~Awi>ssp%Ct!pV8b3Z?GRWW*dv`m&N75kRDE z5z;lkI-&XX-T%f*KhYTt0JHQ~VSW0tSq@|HGYEnr1z$>JENCitPGNmntw0hkE zdNK0rU7@bTR_|&>4kjbJkk~*rdU5>EUl9a>lg*hlo2rg41yuh^6eo(n>-$8-@fH={ zUEswi@Wnz}4M741Ao>6)B}C!ZD@CCc7K(}^ zkBTUTQwbs!ju$kTWu(3jg!%fw2*Fvnv|5xb_4N>Zx-JT(ukA}=%C}TR>3dHgsMCau zzUW{28w8LB?50bpe3n%HjG`79SPbOqt4OfEju6z>{fROI3*t~a0bju#0xO2&1{UibeTs2$c(H7h4 z%3`z~dGoOp!H|9~9Vox!+b^ha)gX6uGE(A39VeCn7<~TR^@vYTmB9KPLWlJ(U8i}R z{?S{|v8ofH;V(agjeJ@xtTX;%;5v{U)1uRoiA1h9{Nx|37Ad4mWNXJvku~+Jsc$Ac zjW9z^;QF|&$;9N_C1$bE$*`?uyp)k0=Xbqitdrvy?o-elCrK%1ODT<#5{DWx$;A+1zLE?brBXpa%mLv7HN=6Dy%KNg3(*kmuxzRYzycYQ>YNR zLd-E}=trqn71hv^Wt&*Hq%|4|nKZvNZl!x(eg+SY3+83>d6I2BFWS zZ>3G*ZC&wYGWQ_ndRG&XHBap#*+pd07iiiR%Q&oVu@bD2D>zhIAOVpDHqat& zzlue4%M3Z{T}|Ga7|g-PepWtZT&8~=+Okui3p6_O8XF2fzyP*4q7O%28KhC zYdy6Y*+bhT+XmyywCIGR3s@f|AZ{z|t8D8|0`Mm~Y9snv$KhDBiO4J#P?r=*)D&>L z5C9tk2JV|WawXdmhD0^m5B^Imc!;y_Q8>#Y`r{R6wc_-rUu5Jr9IG?wSa`$NIE=c6 zt+dm#?FxL!WfF$!8>2)7AO;x@f%ds@>G=02!tn-M&O@K<$i+1^q{fUD28YOLJ%p{l zlF31-gf_E~N`?ecz`irY;xBW3hniy=&^ZuaLH)?Lv}(Zg*V;08UCeq^54aT50Dj|L+H+Mnswx90k8F$}w#|T05`zWhWY5 zS5u7W)c*8zBhA&6u=;+w!%?$M%Y}qP9DdC(T(2T$7rEp`iYbOp^zF%z1Ge=|YKM2Z zSTBiTnFwnrQ)2AZZcBp99b%aq(C7bMj-?*BNf9)G?299`V)d02CroJcjh%u-5u~3r zXxns>!8gqhUnrP^(1jWN`m?D=1@y&>NHpDeR*EZiLDrTD=9;yl;-QmDrHL%Ns*DsrUXMz z3t(^gM?_N}dI_@vN%4&>M&c_VD2V;(#aK~U?sGx8=Id}+bd5$|HfTkx%eGRCqi3ZX zhD#Y}ibbTbvsBnuDZB&<+imi6$hX`mdJj1NZ?*ysE+oIGMdX!=LVk;gLJ&svqyFK9 zQI@TMIy|F56tER=kzYUggQ%^5zC?;Y5w81&5uxKSez1ZNw6hg}7-M7Pz0irxq|k8^ zWa0K|Zxnq)yITPTinA%COqJ^qGC5ss1>BBULTwO}=C!2Wx)o4{c5XMWA;^o0t2vY~ zkVC>Ty73FF$?qeiH_8QJlcCR1cLHhDM<|1~zk_Jmn?yuNmdP2$qU9)@*kn{Ymhw)& z4Lg@|hfnNW0@$J>31M3%DxcGxh-l)a50k}1arfzJ^#X`Hn?GPEw01a`_}(;X!`SR> zBYv?kZf}3Hzu5k)ddqV>?r1_W%eq$62yD@1c(Ha)bG0Sho{w!@C=TJK29qw&HKuK! zZXnxwl5K5b-?r|*YGtg=%Kqqu-$Emj3+B&M;`@!4r(EsbfJE+V|6|EjMfD+BEGT}9vU)-f*0&;x*j))#S#vY+Jlt_{C^KHDi zGp%ip0<3hOk13Z*Z%yy5{n6vLRp4s}W-T;SeQ5tBBs#a%chhD-{v)db?dh~y1*P@r zw7jRLmj;}NO0)qv$1otZgDJY+^@Wm*o6-7^@df4)TBD+GqM9bE2gD49+_9}286j8C zJ3bGGy-N?oE$`=d9q_rnXzA&9eL*|4lw)EV>olj=^+k+#=@)U!zQT0dx^*tuOajb& z{m%=OOSD7?OztN8{+ZX3j>rbK>q{||n-KZ#bsbDk55SnHUf|^Esor&n{KHcmSsbRK zZ2lN#0r6kUXTJ`^BJ|nVZZBelv^FOq6j+ekQddgxRiE?L=%P#{fwD; z{ZYfDB;_`JieUQCi_nw&>AkUz2pBOPNSnndZVi;VgnkOp$a2R^6pAfE42|~`S@@t6mt%y_7KEn zBpWgyRIKipCoaR+D%dKmY(8d7Pc3SsJvU{*H8lm&1_(kD5?Rf$WIY4;DW|{J95=d_2aj=~LBmsZkqp*0$-seYHRK>ECco zLjg4MutbNfFc&)D3g=2AO}F? z!IpBBtfJOL0f%-kREjMY+7NE@r}u#5B^mO%C$5(dlh<)+uHcgX@;VXM%g>-|+Ab#p zKuA6D(JibuDj#9 zr86Y8S|r?fU_B)m-}XYR`Os^kA8uC>l(I}fKYR_5si(@flafGHnZPR8&wwR7FJK8f zYP@ZWJ*VJnc|UJkiKiF7miP9yWqEpd+h$fCFISztMJH@I0n#6n(nED0g~KhkAUi}o z(bFK>Vly7e`<8y>t8DV+?l*jy_Gj{b*M8i#(?puz=y!c&EZjkrFhQH#mmw4``j~$JQIz z1NsYp#p#siJIAb|wR1@90nudRb+yvcF^JW7Yr^XJ6jd>k(KG>0b854NrTIf^SaQ&1 zXP`G!`zkm4ayR)hpPb@zt+zI5h|ViyuJ`9Y>95@6cWp***@&ZAMj-d^{>&$#t6DBj zycC&$>+h6QDxt&Y+Us{cY2-J7WIX}r0O-%%YvjL0%YT}4eDIDIJ@1=gn2LPucY&r6 zT)-d_V2;7o0tu!CN@(i`^09fydI+E=WE=ijZPD!wznXlNXze3UV5s(#tqvuyQmp@) z{F=&DyjV@7_A)mb{&o)C_;Bo*D{|i}$7I^BB_COYM$keB?m5IJOZa`!WN7K=jyn+^ zS{zX)VyOFJWpyQrmP?RPWQI!-3#nsexTticFQ6#^&xc%X<`M}u&{>;f-Or}gEvY&h zWu*H@-M3O(i&HmIXJqJn3_ z{SR+^qXI}zi^To8xQV1?Dct}sJrmxv8Ryf;tcfxD!=D>nA-a2C(|jHV=NdFKE%J#z zGGpWzHy$X&78G*n#($$b__nMU#oBc!?8C6WssT{y2u6KXBVYRUy4L`qr|(Y@Dxm*u z9touU`-r%z4{117BJNcKLQ9}32yWc{ZpG~YIEMKook*>riPSG837)A}3Y6%uZ#0pV zsv7Vb-tAOq302R*L(CJ;){~&MBsZh_H0oi2_=|`&Ff83F{Db%S;eIPg^XuKn_<-a1 zh?@EBshJ|wI6iz8)P8+Dv}IH~rJ8r#Le2YB@LW@2s_~ID>CFy$*h8u4`*(#SP0|r6 zG+Iwp(N!G@6Y<$`mLjh^IyvSqavS#+dh^57mfr26aVl?!L~*7tPDF7XOp~`=h5(A~ zv4dT~hbdQY&L;^0Jz3&*!UQekRxJLi^<*+7!uLWuO_o$*kv#$t(VCq?gfMCw!Ul2i zpBJEziTa%`xWsgX2UM~t5s=OAgu~6t1Y4k*RG=kAk{3eIhK~wPWF?=B~;x)`u$ZT6zSeywT>>3B?Uz63P8O>1enqCk+q-{rzv179UIAh z83#Qk2!)Q%^2w`VeXUllfDiK6$Xz5(s;b98?rCYs(h$^Ge_MGz^PF^%F<#4{TUG-0CO;43+wS&ZFfDx?&)H%Ut4&oE`j8nk$4u z;1#G!w>68~=H`mo0|$c+Dbf`+t&bM6tkXK>%LepQ-}Z~hdf%2*6t+C+uM#+{A+X|B zCpKyO3GoW(-RsHt&UBdYq6;e1j!OG609%R3a#CT zr4{ZYV?7t3ea_tll{|psFBDQE?K2bnJ24-HL#0sRuPTI$Bn_+E(b@`};++Kny)e?K zTVIM^Me(Irm3zjSGF4KD|K}X?Eh74OVY9@d>0kd&^rgz~GKpz+b=Y64yevvZ*KHXX ztT+*qjzR@U3<4RGi%r+j{m3xChy#{W&=v!v;j1zLZkZV^b*q%R)gpD;2tu|m z#`RWpJ?E%B>KbGhv6BO?;YX#+5R|mfE@Ht(gx_KSmLb-IkMF-pCPj}i?|~7tcE<53 zj3GKjmRWZHI2eH5t@bcmW(&A~1`;Fa==MY=ts7lruR7q#%j6n`d>8Q`Ih1fF zlX{E>dENjw{7$;r5|3eMGm|zrk$hZ@%kN^2-o!!M--%W!#Dig738V-e6* zbk>$L^*4_2V;ey5gU)e9 z{C)UHx<3iJh#$J}<4Ch+*plnc&{Ql)3c!utuo2ILHfmyaes(*izZ1m@!Z98voMbX0G zLQ>sJs($4ULUo)~^?XzHQmBS7n)snHca!Rmt=P)$NC#1Qcf~2nDSMH2T^nLf z_SF+^8-oT#s5hefM~x~k2&03E8|`dtKv>5VZndis!A8x~6-A9!*!}- zFHqzPb*%DBWN?d0x;3YxhDZ3jBbut^fhH9fNQ>U-aEja^ zvM)BX^<7}ZJYkU59ZO*#wRy~|FoSMey-V9P&sih`S(J8=T2<#ReTB|P-w0;G2lW0#B-Vp{#Z`T%;A;`kAS644b(sywCPB;Hd5om2h8ivO7&+qC zzoy|GaD00?i4zOMRN)~8A)e8T^@*d7XmEZ5j2YE;3M1ohC5FJT7X_cXpPOJBc_n1{ z^<~6IDY=R?V(GeNl^8cZ{mB<`q0?Jx{JDEXM?qUWgCBtUU3-$WbnzR2m48JF@;hz> zDU@IF0!fH36#63Tt+(rO!&yOu#2816l?fm}j#NWtA1ty^rT0o1K37xoi6BMWi%m_h zs}bGSp^;>>8ZQn9g%u!$ z6?nR&Km0rp0mmzsQ4znJ3d>sYWYR=NG}CLM!RighVYq?OaW>9!gWoOq<8QMueq6_l zqaHN+E$pK4=$$YdoJOSG%nXx%A8LTMbQtzn)|kA)>-qp~q3^oR&e zqKb}zHIO{|obZB{@B&`-D8v7@!e8X{H2596aOMl1+_OcP^H?qlO9*rG-v*4xg1MxiX;a*sdhK5mV;rR?V3;JY-QEwS+ z_FD4bYS@E^10$gh(b*vEQPH`7kBZJ3;xI5QP&Vxlq-b|1p?yHmiXx%v4RHhS_~1DT zB(2>=1WzL(zrFz}ftD;gC|Odc8A5PMp~dnL3#PXE(0#iF@t;Wqs>?wG)z+o*S3?xg9B4RLuAe8GfbM(K#g+2GPa*^Q$t?h;NnE7FE4LRy&K~S05^t)nTeSOIEW~ zHA`0KtLhk8U8Ab0vbt4O`^xHPs(PZV9!Dt%vF#+Q=d0?WTH#TOs(vY}SE=f|s7C)d z%ZX>7bhp#|d1vb1hrOXJZ`*kZx7>+pRQd};1gF36NSsW6`u$rpZN=VLpMDoEn(qZ0 zddZGwq;ZdsdRjEY(5uN%pPmCUP)wmhZGde8^;Ws_>p1&TzyrfX!Bb%nQsg}PKP1EN zm`&pSdK}4+z4IH~Q#Cuu1^swHSL}`R={hc&dkEN17#rS(7@z(vE}GjgC4?$4Phh!* zs|SQoy}PpcAVjgv&y)Q={S$COUlSF|=EIv{^LdnO0*abC1xv~-cCy~n`g1*JZZ}%Bmn!^R47}@$d*)T%NMxG zf-Rz8Th>F0jGyqw@8~0K(MX20<*A)y%i9>Q-iCCa{wgk-_tAWz@5JSby?{MWux)6O zVsB@kUPf~K`n`&9tu*u(l0mPG2kIt;8VNPxXE<==E6GJKNyRUAYUmd`HTVUo-U~Tl z;YDCtAgpwBL48UXxKq2;44&Vp;gd8KTajBJ(Si%+aoPn z4Lp=L%hC236)K9qJcTIclDz>(c_tOZM8Q!U2u>Nr@#LW2@e~-KO(tBKEvc&7vUQq`KFkB@~S>`v&olg@)bt-4t*y4>uK`I ztNKuu$=BQD8xi4qN%DPkDaS=#;YA20pJwuPi}0c$Yc~u|!_#=)>=n0PJdx{G2 zoFe&(O+I;5A9}>(Yc%;bM)-P4zM&?cys8hCn0(bH-@*vrN1uwg9L{F{}L}qd>bU+9+OXA)rWqe6B43duQK^gitrVKPYjjlN7Dw9vv2!L|0doon^4$x1F@$} zP%&=x`_X~vhw)P(7Qar^e?Yxn&%if*jC%c4zVxppE`OC2OuP4~KTsYy1q9w;B9`Xk zkY@Tk1}=IzRKPK2lBP||5DQGT)}+)2#O}#d44W?eRXO0gt+2?uCo$CZFbUm*e&7wp z(Bk%OF!}Z807%lXqCgt#Cl?O|VDasiDP&zWo_IXH8_Oo2J^%*6k|ZjsY8K;>sj$(| z!#KPHd=H7&sqpCw?}Ecsd*B*b*D?mw`2f*WGMb);`t<2k_3N1^{A-DjPQ%~?dhMq= zoEa{NJD=c{GQ8V~qSrEP{^$>h&P)m50h)R_-CYD*TM__$PC`R3zCbSTLzhaTe}n)| zuf+64IzOdu7I4%|BzBRz!(cXi@)4nTJ3ai=DyJdKzd)3JJ@_as>>n4?LwpXg1%|B@ z+$zoq;Fl>mi6ZWIlz_v)V=eMas(2FS;DHLsR)`&R|4I?5B@|kq-miakr?7O@F6i>> zsgRN;x<+|jQHQ}LO48A(WH2L1{ zzuPZZt&@%Hv~m*}rVNK8bVdcgp3HySvgap%A57m3=0p|H;}$`=d@A^^@GJhU3FK1b zcC!oQ`5jXp2Q*McG%^@xe*-nLX{AY{q?fDDM{jCg0_P*0r+EW88o&1ra`$sxNRyq@q%ctl83nkmre8Qb5N75*D}=a7!FyH2I-+@f7Kw^_|jyo+VK0Nc4D<; zN6Y1rAhnEDgGkzjwUXh2rIwah@;uqUYopEnyCc$hkZr+U2gs5%ryMnpz!MCD=LD&aTLjy8Kiwiah^+T0#gSL z9WQQOazTbCh**@>2G#3p9{3}9g`G1xy&i?Rg)qJzv6k1G;C7Uc|MuZCzDmwaqOR|Z zEP%8u&-7lf?875(XYoUaE|p#DT?aZ7;nD+LMTNMOvw&_fWkzrWuf$wuBGr`0A@{Q`Y{eI|MXrP9ApEn4HPrbc**&2#uBx4&Nz|azRoAKTyA6d z^mZVS;_!9?3x;g_gfRZjG5$=5dWD8d?N^w7;&%#okK>L0yx-Or50AKw5^h`b3$`61 zOKIv*mUhmyXrV=Ho{$!4ipo?QfngFH7AxCvnB&a|mT;vZR7(s zyYcJ^FD;{ASLpQw!^13eZ+e~_62CH|gVOrF3WHt^jQ&W35JzJ0LJRn-_RSX`2mC_9 ztH)W9^$`3`=4Z4RK=^UY(k5*YJ-KVTU>V-j<7%SLDHM-#1((s25ezc248w@zi$R8K zs-I8#YGTaWK&+R@QtZ>211_1Ypq_|P`3pji!HQixUMqAbEZRMq=TJcMSmY+g?*#B* z!d2px4{h|yhZ<>?IF8tgJ`&ZY$z)0hUq}yh2B2LpvCPBi{Dgt7;Pt&C*OlGHbu%VD zS_l%w1+L)2GsU&=kgkGCL87aTt|%TBuLmm+$m_TxyzWWZBfS1)^^smXUIB1kLNZ>} zbk(a$?Um%$OvC}PRyNIWwMAZ%5fz72VJ1QlLa&rm`0~7(aN%dP&=P5xzptU_mZ#abJh6@`iRR zQlL5P_rlaVJ?SA3H-B)P)FWgJCK<^0T4;eO!)MB>TibCVTDRuk=h(Hax zR|>rWI_mVf!iAHS3jxR54=A!%71?xsn>i&d6vdxF$epr$o%>~4t|JrGx4hiT1r0g_~Yl%nt5NFFS3%1 zsiB2rEG;9wyKw9Vv<*hlE0VU^x$no?wm!Z(Uqk;^Z_O;`K5`vk|*K zMJCOG$6B*PnG|sB1BnP!9BxP``qOX<$c#W9Svw0~p#U~*5CK>qbUZIsOr#?b0!4tbC^s zUvJ`j6Wdg>hqsI)EoAt6R=%%AdyNx@x5Y`r1CGte9)k9@yJc{Oy(yxwMs_j>Bnb4A z3f&9nG}lL-V4c1QIIa09Q&bU)TA^sa#-SJ4r7i2GURE^;jWezqqxbA`lmM8^uKvW zojaD>;u4t&s-aU0UG+Fw{sKT+GyRrP20NK8AA{u{`TiJA%OJnTpNpYOd3HWpmgdt) zIz=P*F;x^Q6)~jZXT+^e-!D{jGgZ6{Ng_B`Disr;0!^Ws`y94lv_hW?v}o>j#>=c9 zWYE-jxjBPfoulj!u2u*;_6j?0?82^&W;>>U$ zq*JUW;aWxM$?qve;G7O;{|M@j5`~2zZotHHQN5htl`$NETdmseeUzJve!nPs)Z>4a zo%Rr&RSD=HszLhs!(yu2OI115tubdhIm^*HHB<*Qel>C{$#4a+9OwkI>DEI1BrCXs zCSn>CLrz7B$p}Y?X#GY%lf?Te>Gqc}`fa4}g&Nq4SHJF-hvLqi`HCd zHDnaV@6HkU3jIvyn8jq2X^QV;72gZu>-189wC1y|G9oSef{@W}qyJT+j&x@KXm)0; z%mdPyjcCet!j;>xhLIgCHRv*{u3oPT4v zFaJr(y#R9S^j?UZ*8Cd2r21Ph#6=&zPm#q5vVQ@riG4%F&%25wh#?H9)^Y{0ieFU* zcbozMSzXo(r5M89F@)uzSaTGM@gYX>3}JR;6eq)8+bHh3Y`yCLE$14zzs431{n9fK zf3}VHqX)1_*dId#Gac?M(rHuW6jZ_V41!GiS#DgXti^S}(MxcPUALSEsqg8liO8>4 zDOw!Dcf=5_Umpye8P)X9AwumZBdHknDHR;%JgAn_)O94wuhaTPTA42BA|4J0DRLiV zY0W?3%e)Ix8$f$;`kW%Bwv5}^XtnMaH{C)8;`WG~6&s|k|4hK2g#bDstFOLW_?Kie zxN1Ef15CB#VH!d~bWZ$iICf=?XQB=DGWA<}8-hMc3f!MYk%~lMC?uKW=J!;Ru`l@oCr8C#h_E2nR;jZG`36xhux zC(=F|TR{A1`{NNLml|6>)_#UW|64>~hO6OM#=G!l2$J#kOhLC7jrD7p&u3=8W4+j6 z5xwur`w##+;`Vl|3mVQ$N%aJl>P^XQDKnh>cWE~ia=5$#MNQv7=F zPb9K0L@u_9Tw)dZG)SyFUpPRH?Fjm%w)H6--X~x0YCxRO<8xc+2ii3KbrO5B^`UQe zp|1yAEd~yTHNQ(lk$o!l;|Fis4A1%aT0Yp@Hr~_K+jf~Jq2(ON2-5}~PMNiwg34gA z&xlG7RNSh9{*YVk&)kfqLzCz|p{3A=Uw#vSet{;FBHwM$O+|fZ#=U|>qGDXtxQIc( z=!X(uBcsq`d!b&9SGY7NSRzo>^eT8)HO>o1koB%c;W{0N3pn1nkSrEWHWk)qkd}rpG5dZ5uN&wzt$4rZ#sx@mS-&9W3YUZw{5cLOnfaL<88~J zSdaId-ZBIc9U^)L!b^dl#}x`Z^$*bs6!-zEa%A8;!;E7D{*!xtRp3P%aJ((8aJ0Z* zIou5VR)o|N_<#P=Zs7A_OY|lbx;7p3KB8;yr}pJeD|+}I(Ze%DDC$GU;gF8%;X!fW z)!L1)n=5)0&a{4McCLh#9-1a3M!a?6GS7MF_6fJszuM|;o1U<8H+0g)?UaD%4HRd2 z+p;|uHvbbzr;VN( zK%(!;?MM*qCq9t;k(c1%4Sk0J>w)m&RdR^C8W8ns^6e#^*h zSe4j32Pkw~R|D=bybJc@u3^7@G-g4krBmE^Ae-9Bx4i+ZzAby`du=t(ZahZ)NL}HG0j{u>g`YZ$i$*+q6c?>VK+scLy#EEJnocx}trg^fs#eg*2A)an zD1NEw-A=zA))prlI8z{&SGC))f--+xbHxzZvdL(-(06~PxCjmYwPa2IMqkwzDE&g# zC~>_s%LuF`RiGpvRqFZ&cqJJ{USMPuOSBl_8q?G7H+*Y}6-lvWykAH)*43@lNh&6U zcBK1+bTUQCVkT1)Su1*+j8EFGgw;f?bfWeA%7*B-R{RliEsZBrX#<`%))>CrYI|eR zuVCt_t!Nxp%Xis;5NSvJ(YjT5zO-9{waoKaTg2Vmu!ri-1i<5j4)iGaw(v@geNQ z;$HggpNJ>Wll&8;wM*MNdlwvv!4ru#t@2S}U(^FCS|D^7^8g+{1n^J9P{8GEoki`c zer<`6lDo|ZCAwsv|}`ehVRgwaV9l9QoEB+rK76jX^KW zY@d*xm74!JP}K2?{&JzuKjeZ5Phy5>eaoAEBK z?y?Lyqgp1VBeFn)$N~y$W+vo}YK(*m_~0~ ze8TOxyTdw^0bQ1j1}!3-hHehM1cEyL`FP{=RJUch0f= zjY^29{@avB$OFcr5_ zv~+5ohV(>-JGv|z3KY(Hh=^DR2=&wuda4C&_L=ht2tV!gK}8?kQCi_vWAyq*&dl;h z(O8a%!!KryUh_$o;pNq031YFp(leu)p++_X@=B_On!-soA32p?3M#~nnvVSFNNj@? z3rZmtlD53UXRhRplPWNA(`>fmE0PIpD(qCc6phop=xg zv5C;ws%}UUzE>JY(+(29SexE=hJI!x{ED8-kZI>r&X4ZN*S2d5-jqqEAZ#HmLF^q0 zsrYxps?39aElzoIEb?0sEa;FI>A;;YmQIP`dKBKBiQ9t1Fi>lL2w!+}2yGaygB=Ju zJxAL?IBcTKhG+`pXYZ1=dNx&~WXtgDV}NK!Gq8@+1y2vU>(`(F_Pi>ZEVdnbj352b zN>Uk%Gw>hdiarsu6INe}6=BPXAbXj}#1HNCZMdYDpU^s3e~RE~yOyIn=noJg7YOkh zMW6mh!r_drD0D9s@~BAE&6v|{cp2$^FhRB-{<#WlOjps2LICtZ+PcIG7I}Is}+AT_@lRNW$^v_#l-bP6UJ+3Sv;f+q&g}~PT6)X1fgr*AY|0-40j@H(JjQl zSWLf+k0D~}(J?gY^tpl(Lk)MFw9v%u@4lt22v{p#D-l|Lu!VAGtH|I82kzy>6XiBF zq935-FOsg~f$MNdFN2W#9k(cqNIh`XM3gWKsiBqb^7{zeK`-;`uUrqg=vlYXV=4UJ z3u@4_eR>8i{Ekti^;mD_t(@)GPZWf-s*tbnrB#Jq+ooQ`{FmCV&%}}_*a_P!u0!;m zUHV;r-9qu?BQaqI4W8a4ArPO^4~6J0)i6j4U3!biRY4lJRW-{_f-|^ApVI#fc8oT( z$_5akRle*?tg2N)Vy4#1iOABHa|M%V%SR;F(^jr$<@}CuguFx7a>f(dscB^J>6r@k zSEP#Q9z^Txb?AO;U|t!?JcRm?dE~LN z9b^q7#R0-IA(;*eHbzdigt{Y4q6+ardWI7NB6+TIW+nk#_?O786o(LTxH3`y+9_pN z4jYE*ZjQ)kC8F0s#4-rrqBDz;bD3@QqsXnGspvsSab~r#1Euj6u7Igif96H<;WZapF~SlZEkfEZjB5R&de>-1N^|iGQHL=h{Sqf+T3Llg$VU zby5&H9)B`W!tQp>XeI7LeVDt|Vg|V!|2|O6Z8Jk0(AbEkehM^b>P?ivCULPQVSko@GJ zx)NIu7&p=XYVhYb+44L8f}W?T6rP^cFAM%8fAmt|eU)9VaBVYih0p=9>Z&vjZ5>gQMEUZcRF}{P7yHJj`lS{$gSd=s(XU zckN~sL8uq=0=5ZzFMC}X*k#!^7#N!$G4QLorWqF!$I=5al?c8tesaY4)7bcc<50wK zyG>)y%b$>bU)zOMAtHG4@7vbh(P@jXh8&88G-(d ze3TMqN1%%$w37fmcFm88Sz1y> zQfgX(Hn*~rfXSv^X7QhIBifG*~EWZ z66`4~FRJh?$g6<#1+YuIyr{&3BB!jpsARr20aj^Q?(3X0-Ff8|&hdGRwZhWkqJq4| zgEUuO1t_hs!HgWj7eo1C7+(zMiwm{Mc_o#3<%_im?z!dSA~UZ%zfjA7>SDxWu{OD~ z#Eq}w#ac$?e0POM%W;=^+zaNyzNz`1(sH`UDqScc$E|LZgsX$1QDx66L1MnBs>gl(q%X;e=5d2~01zEF3 zI<@R#cV30tnO9PA9YS&LVt2*4&Qe^Il%6|K8>o%X$(cQFYSx5F6RElLlPa`wZSI2E zm7b#F3bgsuGIt5L_&l&daHQe8cXMiET@xo|X|AmCTGAh`S~6=%652e!w6et0VRxD4 z&MR=b%gZ6Hw8U9}wsoGHRB>*1VM=z!bVIA~*SP7<=~Kz#Q3S}$oHX5e`P6Bbi~L0_&MXzXbCqJH18|AVbD#mTvb|m1MR@9c)tE` z>d(m;o9Rr>@#K{|bMgz_3-X*}-6i>j3-ZdZ=}$36%Vg)3=Phtk8p)}gTj444U}&J< zCNCU_=0$_ye{|LENL*<{ohi=2si~=&HoY9Hatxvcc_sO7 z=Qs?Kd}NVvrDcoDi{=+nI>rC=C_>Y6b228nMiS(r(lU}#ak29c&TA{(z0z&ts8X%jPZ=DoJr0Kcv0dh%CDfV-YsRW^P&n*`Nc$1T#A85@0JaFQ`O*q&`Rq8LjEgLmQUhCz7PE+~lipW0odH^u+=+ zb4evlOHO1ycX=MBCg*iUP%9a`rJ$!NeZnMbBO>4kZizEz0!5w}x}_`>rXdZ_Evr5@*eOw;K9@Ey~}#IXd%pe-(@&RgJK=q@fTqb{vF+T|J3vLIX@GLZl_OJ3UXhHOH+N)yL1EFzl95@(BTGk? zjVvEoG14<~;UeujBn-}|aQSJ_s9lCng_z=DbjF)5Fqlrvo`o1I_cDaYJijHsaISzP9>&`Ly4G|RzS zIff$Al~y{t6ob4}D=I+`t<3jm6$?rW+*&?z$NW;PsLE-+(iT*>=P$ro;nG1v^5_eD zz~Y0^GCg}7<-d;7?`7`t3K|1ZIathxBGY0S;>^RyL?@w9P$~w(w5*9Ty+-KTOIj7m z2+Mk^dydOB9*c;KEK18H^AAY{vj)oCHCoK`Nd+UjL90?AMXXSgRM9#nEma#dOdC5b zBPU05KP+=2?#2c{0!1`pLH zOv}g`=h7y+re$Vi%_NCeOu~XX(=~0C!45NlCBgRrnIcw-JEz>oA zQf{U*d+L-)IfiSzHZ)BeG*lZrL<4S6nl?lmGF(g3BsriUcaEJpLmPrQQUV7JRu@Bt z;-4HIl&TF*)i4HWB|R=9D`R|ymXnb?ev&g|TELNw1MVyE+^$ z#NYW_!{NK}m$EG!{v-Y-zZed$#oy^Kg~Q|U_b&d@wui&D`0KPI9KH*G{qgIkOYy_4 z5Ab&;e!13$pABBOI~=Ar&SyY2{gFK><4$k+Z1|~Tu)c=}6-by^79l6n;)NnacZx$1 zL#MWMfF`FT_fgi6ArIHc}j6OydGr@${LiVrQvW8<z+$&bE)@|4x#@OYGyaA>>`ds)SQ`#+LfHpDq}h-1d6aD^JFP>X>8fd0-VMDdhus6cD4W(}otL zyPpStH_YXA;72L`wG5PfUI9PKp|61-<+ppmkMe!IjHLmk8!xm8p{&PyjN&kS-gyiB zC?~!P{U~Fbz>l&Dm{i(8?MId$ZDL(c98sE~Ul%Fvu} zSTGV0AJXT+Ul8yz;#VKiwHkkY3+N|m__&Twyg7DUe3#hmut?Jg|0MqEfxjO1nfOlE z#t;GF_u{WF#>g8eO?>RjCjS9|-voYf6n~k?PwxasE)0i@qWI^S`~!f$7WhY_@b{Vc zDZsY@|1|l<_IG+DmhCGB{uYeQApD8o--~1XYTze&!r|{wM(|&o_B{#ssSCs58BzGD zCVnsA)2>5K#2An8kLbkm4**|*F@G1(CO$S`+TR0qHv@lT6n>G39{~Jhu=h(8u-tUb8d*@FE{z01b+02aQLPu{9+To7x?>u ze>e*NR}+5#__LA2##-@`UG$$GuxkSFo2~fRCrthUz<&Vz$x-n?Uc}!Co+-fZy9w)4 zE5FOzDfTrXLg*+1&COmMDYDYU)|fih0Dr;CaJVT7|E7uG1pGgM?_#wf_TK{U1WzOI zuOlC)BF~y_YZ6-9hd}c_a&Q64D4HD66dK9tjXZw}eo|0_Ymrp+bjlWE zi$sg@51O9%8Cq2QV;fDs=>@T`1OJdUHl$~*j19?11a8V-!r>~bEuEH_V~pbZ4Cb8y zRvDen5(0=n8}RM-W36M2ZKr-BuukC1fN#M3^rn@c+Gj_^hC6|)*cc8k0KMsJY=Q8D zbZiE`2KbAt_zD?M!tV$EBj7hi`EbAK!y(`cFel<5szv?+Q+`j(vD<*hxm*kWS`$AM z_?}N-PL0BUXyT^>-voS|Rex;Q#Cw3Rc`_XSI*R{O6MrZ0w>}jPKTf~$H1p^K2}B_L zC;eC>&b7+#G}H8NKk%3B=s3?G0{-cp;jkmBFZD3f zNB0B&1J)uhM%n+YY5yVM|Ae)PH_E>2P5XLce)=Bxj_e-_d>+;`3!>x~nDVCs|2G@} z&4|K}F!3JX-^5y|qyBp*@GZbMN7)xN?b{6e=6%QoQS!4*`TK$Y25YS8QSrOPjNc*P z`@VztM)B(=e^0E{60kPAElPitsedT&OM%}Sh2Lc2rvu;Xy>R%YDExCK-UIxzz~i`i zq<_bjnD{$^?}oMD3sLyLnfT4XuLu6eD158H6X^HPZy14dq^z1GSyGpwk5652{SI}} zL!vd!DKF?3bo6<0gDg*xg|_G;d2yPmqf3gVtUGZW4XR12W{T!D5A)EsAi>fy&N5ZPkcD1T{p~C;D>SFFxvVY5B zm1-Be%v7g@q^mMpm2*^CrpgtnT%*eMs@$Z?8dWx`GN{T{Rcd`T@kvyrQ*a#W~*|JD$7*4LX~S& zDW(1T@3>(5v`a2Xxw3tXg!Vx}V~I-YbJo&&%{Y)meTe1^{Iy5h9xRIaD3z->NhHaj z50@nh&f}jSpqAU{uaIooLIvmXv`E3Px6zArIB@hY&e4bIx5%n?lZ}4Ag5PX|f2ZKL z+T@%NBN4aR=;@!UqtBmg^jQjimyKSmEhLNhep2CyoKgwJiE{1p7C7ZXf>YwyZh;dI z!6}99vA}UVApEEDH4B__6w!0;#HGgiNhjs5MhiX3|5(*yaZNe$AF58WseVw^$wsRG ztm>36sowb+eBnd3Q$1eQ$rh^jQgyP4>U~t5VnOv}RZmrAnkwm^Q=^Ydm6KGNrOF&t zUZKhYRW4BFLRBtT<;|)LsB*0;?^oqURX(jssa?~KH9k`Z4IVOd*zgNSq~*=c#|HHL z!lJ9MDPB-gT6S%Dg{N}ib&D2X@5~rGZoF&4L}SwAOQvLIP0hY^TF&&`%PyZW^NIni zr=#*%`Z2GFPXPo_jJa+vDb{wFwNCt8=WBqpL2vsd64AMhv)4RWK}y+l{_!< zJk9F~o~PBcOaFwE_)veNkD6{Zx`m>T8fIpnK1qVj^#jk_ynbPQtcUI4^#;plKDM9z zVLwA3Vl5F6=MIi|yoivA^sO`^)-SFY~irmc#m39_wYhSU>y8`q&Tlm+j}c z*zMssaQrx499O%Zj{IPKET8qUUF;|O!~U^9te5R!f7uR>3;VvY%`x+r|3X9_DBL><8;(|5!fT$NE_x z+r@TsoY+6MhvUKVVgEQDTxYp#H|t}&SuXRjAM797$#%0pte@>=JK2A>pZ(@|u>EW| z$Cc%>Jxs@VmdAFnJuHXqV0+mg*2{5V|Jh#lm*d6ub9~r-jvxEYa#=6q*&de9{;)mF z&vIEm`^9#$|8{%WZ?>2H;rMX8IG!92w!==(df0B(&;EBLkNsym*njqe?d15dpX@)! zgX6&Qvir+&*&nu(<=EwO99Td5%YLxEte5Rzzga)Wh5h6>vVR;;wwvu_xpsS5FUw^; zESLRYxvYo%U_I1Lj{bfJcevT8{$$qdr z*30r(KJ&5NESKf5opygY4y>Q;W4&xA`_2AwTsU4FAC3d-V|!UI%VRxud3HH$pS^w9 zFOCD-$9{7h*e~{n_6MV{&F1HU$&p)$#yaySc+r{>?9c(w-!~U{<_K*GL z_^`bk7xtI^WWQNI+rxIU9QKF(V7V-Z?O=KAH`~qrvOjDG+imxY?d5o|UmO?aXFE9_ zte5>@{cJzm%YLyvtdH&Ic(C2=)Z>x6^JH$A{y{_H(>gKGQLt?PtAgKgWaP$ab<{>_7X#_OL#-oBd~h*nhT@yVSm{kjt9q!<7D^8j%WMWZkEITupYLX<*^;?KikjoW&hbv zjz7nN{bu{wZ}yYxc0Ftl>*01_ee4hW&wj8zyB(~b?dLdfoH!orH^+hPWdFEiz3eC3 z$@J_O+r@HOKl{ge*gp1)?c%tw-y8?FpZ#R}*?(>?#xp(3WjmRU<+FXPm+fKy*e|x9 z^|GHF7xtIq!*;WMcE35UY%klvaF)k*+3m2)W&c<|+s}5eo%XnJ9M~`RpWBDy#r|^1 zd~7G{v)jRP*&aBt&FU#dP zvpsA-`_F!HT-Yy;2iwW<;5f1zwwL)>F5AcPXFFIQ%VE9jFZ;pz*|2aPF zH~Y)>vmMOO_OV{3W4qWtwu9|vy{wP*+x_Rbu>I^O`^WaNy&NC*lS{UfyAc5{5#KaKIMWw(p zi;sr?>9}t^R(zT2Xe{TB56_uo#j{+-ul>_r*HQ7Wd$=nIvL;2${8c~QIi@{+?jJ+O zE#fXC$hcoJ{=(VE_qFod@neU_MaAEaKlA$+Ii&5%Z`wOH`DpSt4z0iDX!!e=eX`_e z_$_@F799=m|MFkMj)vcS*JvKfg3RvUhlh5ZcQpRXN-oK;;-688fb+pAv#N$#@jUJs z-?g}5h855KgYh+6ALk=Gf{fu@KEJ1dQ?{K?O>3^*Epin~|T__yq7R>MrSnttaf)x8w^n68RdOTtS9La;lM@FaMd632k~<$Kf#C`s zHEAfAwct2#J8-|{cH{oTouBgt_X}?S?n>ydjTa|-9@M-Z;Ca5MlJ{%l%IAam+%TUH zq_c_i;q!dIHvZh%lO(%#jw;VpB`5n|+fD-&-vz2nRplU6Mh!j%qvC%{TrDm|T`qbt zs`{H%ZcuWUsdD9Sq{px1-1!^HV~6kmjpec2$Cccte`EP9Z<~_4<2RCDtK>BNM)Eig zjY{5os{G_PlK(%yksP*@%ftUAxg6K7FR`ZIraWHBJ4uz?KmMOe@2%uHe`9<5|Hksz z&!PV%xnq=ELzOdB`3F`0pX#nXNUo~PpMnM_9Qb|D-5_`e!q})gONlMN4$K<`#*KvFQ|1p0z`(OMs*$M4GNV^YH zYX3jN_+ylhQ$9%<+WG$;m-Bn_48QgfU!!%Dnijqvl6NX=R<67}(|h$G?l>II;A)x0 z+4+|&7M`m6o& zJ>b&LZ@AaDoBItqf;bZtD%Vie#a;9_I{kTOM$m3s zyXZfP`8ZWORA|TWoTKaZ4&vK_c77`wR>t|@YVRP6H#6|USlZ!muib>&mrx0SEqT!;6Vx7_IsxVwfB%yiLT*aeq+A7^3SWw;J(qd%Ky|E=8jcN5=A{NXD+ z!0t48X9wrT!2gK)xa!qCH`2}?;4YAAhe`boY%Rcqi>24D5f3__km_dcfh> z$J^eH{vG5IXWwJV|Gy7BlLQ!ku8aOjXCLS1pnrcw{Qh9w_?^S&nu)eIyoBRw#%t03 z)A9L;IEyv21jl&j3+B-r;)eEh{$50UB50pW9FCRXptr2+*fNJ>{o&`J!z_3u?X-jS z?U!B7Ig7fq&pOzVb)&M%8(u;CS38`MgowJ}AEZAfXonlR=*!(*Q>}Xs(9YqYeIDjK zZ3pYdSExV6ayQY}hly_s{P`yFGI8CHjsWjLKbi~J;k0v<`sW9B+Q6mVUU#2Ye2Dsg z=i0f)WZvw7zb1Zy`g>y!@Yrfj(takW*Keu6d)Vu{JKhjH4GznAyD0GIJmOCW^?EaK z@pH##y&ZR#7lQMte|lhNDfRE}^ZM=%N(7fv|Chmh>8JiHJTHCCXERq3-@MiX-zNTk z+Q|gtHlqI7w|K>Gup8b;{kwzl&~*AVnh-YumwpV-Cm*Ms@xcFkoSh!?2_D?seS@&~ z8RD<-T(_TP$bGlco-^68PY{>8)&zfLod@LpLwOz{{-E`7cV8lcj}w1@^X`yOHb;mb zt$Sb@@qZ)!y<0tSg7~w*Wnbvwy4pkhdFs!8zX#l1jtKt5>CZDg!8rN3(`V*{c$N0Y z+8)&PdDcun4%@V&>szafopY%FBF`b}XGZPNf3YHGm z{)YG*o=dg9^2G!h|Ho+Or}SqgaB1gF!FW59cspp%InE9Uo2>=ey0JDl7}*x6E&$->rBVqu+!u<%k|ES&wpF$HN*%Oc z&@9yLP^CVYtJs1tkJ#Ms2rg5A?4Dyk^7I zcGX&@uDx=zv1`_ET(Nw;-MD%+=yL3q!uJ?#H*qmvZI}(M@~TQv$Z?e21&jvE2A5@^~L2WA0VBW@{-D=q?h^9K(zygvR z1P@OK5Ff1tTU@Pfr4+H0N(8%1E#DYTVP!DP6`RKPZyB(Yb8XAON||X}gw_KTa|O^a zrEyh6f(Q$$UL z>jte0wSsN91kf;*Qa-?Vj3JM?$Pp|ZRMQ@cg@VWIDi*hc3((du&13IIb-03F(FyC^ zL|-?eLdtky(B;AMBiu{sjm0~Wr`;M_47Af9r`fS?Rr6vegzl1r?`qv#py@}MQ9NB#WrZ3n_IbY~rF zph1`AnhCFD5y*|kk~_!zB}dPe9+}n-!ML}Hl?h!nIeQi@l2yDaTf=a*Xq*#nLyPOS z46ogM6PhwCzs+id3J8T1n52fQwVce;jj&sQ17HrrIwRylE?Km&U|~YlO3{#Mp=H5g znQFGG<0MT%*jF2-RKnU?PT|%@aXr6usg)(KHd(vNup0wu5+GbV=ttL7akwOe~hhbIU4e z=}asqkyRrNtomR?L@$*p^?G{z!1X9)2{v%Haea`_3u74^z`~l+8Nvw*Y=vgEU5$BG zStVSiXg*JTwnGY)-)Wa6?>a|?;tj&+)WR1rt-kshr+?msDc#?-ySe^ct#c8;K^(rrHa&}O*_)c{f zq3N+5r~AvTx=8raUVa%?tP1hYeR25`_a zSvOs?a%_io9o?2Q*P8$H0Evi6T5LaG>oEh)db53eRW`Ij)iN_{`1dGzy?#@`v_FEhW&i&T-a^L~Nx z{vf}8pH5|ldMd*F??b|`aQC^^U+;%kxr7<{@wW3lD7BYE03QuI^*=%XRo=}4rH!PW zwf_3PB?2Z{wEQtDs6636=0d|iLDa~f)9_Wrj{ka}yGqO-_o)9`j>CnP30TpznEdM)5kn%OZe!od&OA}}yEdN-L|C-0VqROZ6elq#!_)+XX5D*tR%sSs^9r+ip z5$bDPZWnaHo-pB?*O=Kme?%GlNN^?P4?nO_@5)(?5KTy4iM0+aUZ +#include +#if (defined(_POSIX_)||!defined(WIN32)) +/* +#include +*/ +#else +#include "process.h" +#endif /* WIN32 */ +#include +#include +#include "config.h" +#include "dss.h" +#include "tpcd.h" +#include "permute.h" + + +#define LINE_SIZE 512 + +/* + * Function Protoypes + */ +void varsub PROTO((int qnum, int vnum, int flags)); +int strip_comments PROTO((char *line)); +void usage PROTO((void)); +int process_options PROTO((int cnt, char **args)); +int setup PROTO((void)); +void qsub PROTO((char *qtag, int flags)); + + + +extern char *optarg; +extern int optind; +char **mk_ascdate(void); +extern seed_t Seed[]; + +char **asc_date; +int snum = -1; +char *prog; +tdef tdefs = { NULL }; +long rndm; +double flt_scale; +distribution q13a, q13b; +int qnum; + + +/* + * FUNCTION strip_comments(line) + * + * remove all comments from 'line'; recognizes both {} and -- comments + */ +int +strip_comments(char *line) +{ + static int in_comment = 0; + char *cp1, *cp2; + + cp1 = line; + + while (1) /* traverse the entire string */ + { + if (in_comment) + { + if ((cp2 = strchr(cp1, '}')) != NULL) /* comment ends */ + { + strcpy(cp1, cp2 + 1); + in_comment = 0; + continue; + } + else + { + *cp1 = '\0'; + break; + } + } + else /* not in_comment */ + { + if ((cp2 = strchr(cp1, '-')) != NULL) + { + if (*(cp2 + 1) == '-') /* found a '--' comment */ + { + *cp2 = '\0'; + break; + } + } + if ((cp2 = strchr(cp1, '{')) != NULL) /* comment starts */ + { + in_comment = 1; + *cp2 = ' '; + continue; + } + else break; + } + } + return(0); +} + +/* + * FUNCTION qsub(char *qtag, int flags) + * + * based on the settings of flags, and the template file $QDIR/qtag.sql + * make the following substitutions to turn a query template into EQT + * + * String Converted to Based on + * ====== ============ =========== + * first line database ; -n from command line + * second line set explain on; -x from command line + * : parameter + * :k set number + * :o output to outpath/qnum.snum + * -o from command line, SET_OUTPUT + * :s stream number + * :b BEGIN WORK; -a from command line, START_TRAN + * :e COMMIT WORK; -a from command line, END_TRAN + * :q query number + * :n sets rowcount to be returned + */ +void +qsub(char *qtag, int flags) +{ +static char *line = NULL, + *qpath = NULL; +FILE *qfp; +char *cptr, + *mark, + *qroot = NULL; + + qnum = atoi(qtag); + if (line == NULL) + { + line = malloc(BUFSIZ); + qpath = malloc(BUFSIZ); + MALLOC_CHECK(line); + MALLOC_CHECK(qpath); + } + + qroot = env_config(QDIR_TAG, QDIR_DFLT); + sprintf(qpath, "%s%c%s.sql", + qroot, PATH_SEP, qtag); + qfp = fopen(qpath, "r"); + OPEN_CHECK(qfp, qpath); + + rowcnt = rowcnt_dflt[qnum]; + varsub(qnum, 0, flags); /* set the variables */ + if (flags & DFLT_NUM) + fprintf(ofp, SET_ROWCOUNT, rowcnt); + while (fgets(line, BUFSIZ, qfp) != NULL) + { + if (!(flags & COMMENT)) + strip_comments(line); + mark = line; + while ((cptr = strchr(mark, VTAG)) != NULL) + { + *cptr = '\0'; + cptr++; + fprintf(ofp,"%s", mark); + switch(*cptr) + { + case 'b': + case 'B': + if (!(flags & ANSI)) + fprintf(ofp,"%s\n", START_TRAN); + cptr++; + break; + case 'c': + case 'C': + if (flags & DBASE) + fprintf(ofp, SET_DBASE, db_name); + cptr++; + break; + case 'e': + case 'E': + if (!(flags & ANSI)) + fprintf(ofp,"%s\n", END_TRAN); + cptr++; + break; + case 'n': + case 'N': + if (!(flags & DFLT_NUM)) + { + rowcnt=atoi(++cptr); + while (isdigit(*cptr) || *cptr == ' ') cptr++; + fprintf(ofp, SET_ROWCOUNT, rowcnt); + } + continue; + case 'o': + case 'O': + if (flags & OUTPUT) + fprintf(ofp,"%s '%s/%s.%d'", SET_OUTPUT, osuff, + qtag, (snum < 0)?0:snum); + cptr++; + break; + case 'q': + case 'Q': + fprintf(ofp,"%s", qtag); + cptr++; + break; + case 's': + case 'S': + fprintf(ofp,"%d", (snum < 0)?0:snum); + cptr++; + break; + case 'X': + case 'x': + if (flags & EXPLAIN) + fprintf(ofp, "%s\n", GEN_QUERY_PLAN); + cptr++; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + varsub(qnum, atoi(cptr), flags & DFLT); + while (isdigit(*++cptr)); + break; + default: + fprintf(stderr, "-- unknown flag '%c%c' ignored\n", + VTAG, *cptr); + cptr++; + break; + } + mark=cptr; + } + fprintf(ofp,"%s", mark); + } + fclose(qfp); + fflush(stdout); + return; +} + +void +usage(void) +{ +printf("%s Parameter Substitution (v. %d.%d.%d%s)\n", + NAME, VERSION,RELEASE, + MODIFICATION,PATCH); +printf("Copyright %s %s\n", TPC, C_DATES); +printf("USAGE: %s [ queries ]\n", prog); +printf("Options:\n"); +printf("\t-a\t\t-- use ANSI semantics.\n"); +printf("\t-b \t-- load distributions from \n"); +printf("\t-c\t\t-- retain comments found in template.\n"); +printf("\t-d\t\t-- use default substitution values.\n"); +printf("\t-h\t\t-- print this usage summary.\n"); +printf("\t-i \t-- use the contents of file to begin a query.\n"); +printf("\t-l \t-- log parameters to .\n"); +printf("\t-n \t-- connect to database .\n"); +printf("\t-N\t\t-- use default rowcounts and ignore :n directive.\n"); +printf("\t-o \t-- set the output file base path to .\n"); +printf("\t-p \t\t-- use the query permutation for stream \n"); +printf("\t-r \t\t-- seed the random number generator with \n"); +printf("\t-s \t\t-- base substitutions on an SF of \n"); +printf("\t-v\t\t-- verbose.\n"); +printf("\t-t \t-- use the contents of file to complete a query\n"); +printf("\t-x\t\t-- enable SET EXPLAIN in each query.\n"); +} + +int +process_options(int cnt, char **args) +{ + int flag; + + while((flag = getopt(cnt, args, "ab:cdhi:n:Nl:o:p:r:s:t:vx")) != -1) + switch(flag) + { + case 'a': /* use ANSI semantics */ + flags |= ANSI; + break; + case 'b': /* load distributions from named file */ + d_path = (char *)malloc(strlen(optarg) + 1); + MALLOC_CHECK(d_path); + strcpy(d_path, optarg); + break; + case 'c': /* retain comments in EQT */ + flags |= COMMENT; + break; + case 'd': /* use default substitution values */ + flags |= DFLT; + break; + case 'h': /* just generate the usage summary */ + usage(); + exit(0); + break; + case 'i': /* set stream initialization file name */ + ifile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(ifile); + strcpy(ifile, optarg); + flags |= INIT; + break; + case 'l': /* log parameter usages */ + lfile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(lfile); + strcpy(lfile, optarg); + flags |= LOG; + break; + case 'N': /* use default rowcounts */ + flags |= DFLT_NUM; + break; + case 'n': /* set database name */ + db_name = malloc(strlen(optarg) + 1); + MALLOC_CHECK(db_name); + strcpy(db_name, optarg); + flags |= DBASE; + break; + case 'o': /* set the output path */ + osuff = malloc(strlen(optarg) + 1); + MALLOC_CHECK(osuff); + strcpy(osuff, optarg); + flags |=OUTPUT; + break; + case 'p': /* permutation for a given stream */ + snum = atoi(optarg); + break; + case 'r': /* set random number seed for parameter gen */ + flags |= SEED; + rndm = atol(optarg); + break; + case 's': /* scale of data set to run against */ + flt_scale = atof(optarg); + if (scale > MAX_SCALE) + fprintf(stderr, "%s %5.0f %s\n%s\n", + "WARNING: Support for scale factors >", + MAX_SCALE, + "GB is still in development.", + "Data set integrity is not guaranteed.\n"); + break; + case 't': /* set termination file name */ + tfile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(tfile); + strcpy(tfile, optarg); + flags |= TERMINATE; + break; + case 'v': /* verbose */ + flags |= VERBOSE; + break; + case 'x': /* set explain in the queries */ + flags |= EXPLAIN; + break; + default: + printf("unknown option '%s' ignored\n", args[optind]); + usage(); + exit(1); + break; + } + return(0); +} + +int +setup(void) +{ + + asc_date = mk_ascdate(); + + read_dist(env_config(DIST_TAG, DIST_DFLT), "p_cntr", &p_cntr_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "colors", &colors); + read_dist(env_config(DIST_TAG, DIST_DFLT), "p_types", &p_types_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "nations", &nations); + read_dist(env_config(DIST_TAG, DIST_DFLT), "nations2", &nations2); + read_dist(env_config(DIST_TAG, DIST_DFLT), "regions", ®ions); + read_dist(env_config(DIST_TAG, DIST_DFLT), "o_oprio", + &o_priority_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "instruct", + &l_instruct_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "smode", &l_smode_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "category", + &l_category_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "rflag", &l_rflag_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "msegmnt", &c_mseg_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13a", &q13a); + read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13b", &q13b); + + return(0); +} + + +main(int ac, char **av) +{ + int i; + FILE *ifp; + char line[LINE_SIZE]; + + prog = av[0]; + flt_scale = (double)1.0; + flags = 0; + d_path = NULL; + process_options(ac, av); + if (flags & VERBOSE) + fprintf(ofp, + "-- TPC %s Parameter Substitution (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, MODIFICATION, PATCH); + + setup(); + + if (!(flags & DFLT)) /* perturb the RNG */ + { + if (!(flags & SEED)) + rndm = (long)((unsigned)time(NULL) * DSS_PROC); + if (rndm < 0) + rndm += 2147483647; + Seed[0].value = rndm; + for (i=1; i <= QUERIES_PER_SET; i++) + { + Seed[0].value = NextRand(Seed[0].value); + Seed[i].value = Seed[0].value; + } + printf("-- using %ld as a seed to the RNG\n", rndm); + } + else + printf("-- using default substitutions\n"); + + if (flags & INIT) /* init stream with ifile */ + { + ifp = fopen(ifile, "r"); + OPEN_CHECK(ifp, ifile); + while (fgets(line, LINE_SIZE, ifp) != NULL) + fprintf(stdout, "%s", line); + } + + if (snum >= 0) + if (optind < ac) + for (i=optind; i < ac; i++) + { + char qname[10]; + sprintf(qname, "%d", SEQUENCE(snum, atoi(av[i]))); + qsub(qname, flags); + } + else + for (i=1; i <= QUERIES_PER_SET; i++) + { + char qname[10]; + sprintf(qname, "%d", SEQUENCE(snum, i)); + qsub(qname, flags); + } + else + if (optind < ac) + for (i=optind; i < ac; i++) + qsub(av[i], flags); + else + for (i=1; i <= QUERIES_PER_SET; i++) + { + char qname[10]; + sprintf(qname, "%d", i); + qsub(qname, flags); + } + + if (flags & TERMINATE) /* terminate stream with tfile */ + { + ifp = fopen(tfile, "r"); + if (ifp == NULL) + OPEN_CHECK(ifp, tfile); + while (fgets(line, LINE_SIZE, ifp) != NULL) + fprintf(stdout, "%s", line); + } + + return(0); +} + diff --git a/data/ssb/dbgen/qgen.o b/data/ssb/dbgen/qgen.o new file mode 100644 index 0000000000000000000000000000000000000000..ff21d7d38bed3b98af34b0177c8983e8ecf347e1 GIT binary patch literal 33552 zcmeHweSDPVneH11X+kwY7cH${(>AQ2CKA3@kYWac3^hU|C|c1l%uJGzWG2oBLaP=H zs1c%e*Pd$i*Lqgl`m=6jyIqUcVhZBN*0a*qT30<=YP&}wRFPWi*UGu?=e@7w;xS{j z=bv-VZ{MGp_kFMTey;nv?&oZ;V}0-Re$Ih~>D#W5jac8W_vn6X<3MrjqoaeZx^v-I$FU_9YMhVc1FqJ{m;!UjL}HzEdScmq*Wwo*P|uZfmS}--KA- zoZ4vLA0HemaoglCaANvg#+WIs*>Im6i1p9VR%q0CLfm*Bi}gM-PVCfdcuv}chhm9e#rmC)P31R*ikkXf9v0cu zw`XF|7zBMywUsp+|EIKT?w=^H!QkrALGW-8F*C8g9kH-EW1+*1&lgV9v>T5Xz0s|g zOY2yF=%N28%(fwZAG`bnwJb8woGjvxLTSwhqhvyN@W_U>4yj&eCNG`2d&^v z;^4KTf*|^*oq?)kPGcpPGkOYa^Q_MUST+f=!?Tq4VA?jIei*|zJArgO?Fi?7Uy z6kgucTzgpT{k=A9Yw%jhBF;vcv$ZbizRFE*ENmQjP&>RPw!NiUsKzyiaOvSNOss=< z@4c~2;R&*7PHiYv%ROFlgvHzVglkX}dwc|u!wr5b8bo8cX?$9C|9p2z&z5oNUUhtO zw)DPO*7qo#X;#B=vEH3!QoaSj8%^`1e*5=k7u0MUcwn~6_P&8nPQ}yS`Ec47=4tMG zds`6EQ8IjK&`Z-w9T1gL(hZ&espC>FT{8AvmA1K5MxbY?#NREKKaaYp=KlF*(f-D# z<}9qa_5wLaYwjOdlqlcOT(*1e!QIV$2h9*A?tNs!g%?FvL@$b79Btd7S3J2!jk7CB zOY{L15p|9pkiL=PI=gmtb7v~sFgWbM3>jEq9JYd2g)jS?R{B zT#@w}wRfO7Jc*k7{uK6X|Jt%}qwg`hi0eoh+uZktgZJJjU8$$>N4g4I!b-VSG|QOk zb@|H%8)e(EKCDjM)`yex3CJo~WDK=84sLMij{3;yOmU+q zjta$Jl)ZAW!eu}HBWM4M(v2GYnp{qzTWddZI=V#Cr7FC;49*i$t44Vcj2vo1aPVk} zbq^wZ$vaSUgu9Wn*StD#AXamd?VRe?HP^iOu9P<%rgr~7P&3XQ`+&hK7Q=x!c&9f1 zP$!2!8Y9xTDQO+uI_o1>EF50r1}94UaJzi)ld>I4gvvK9+bQRsyP|uS&hSJU((f(@ zxGuY^h8ZItSzBQ@Uh|N6HkN+=jEa{Mv7fwCd+X$1^pC%-fBY{d@9NvZm$ECu zdi3roi`^1SJRg%N`D=~!m$#g~sr=FAzQHhBB*?9tZsNG|ycC{#y&`4Se-?WOuU4_) z+nJV20z6%5^AEVH$ZY4Cdva2KCL zJ7s6Ae?_gnoh+Ob(|3&8Yr}7|Yc^c14MRu3_dXMSV1vm1i{6_3NDU1=2X$%JOjIbA z_$hj4S1kSWuy@|B?R)em@0~FDK>zf=?H_+&@-FF#{_*Rz3*;|)_NJztQu^Nv_d~4j z)!~Xjb@=2h?}K{ZIlB0@a9l-dHvW@l-4vcW$=J!^xm2?;BF@4y@Ty1jWW$A69*~Cy z-W3gQ(5*OR(8(a&S!p>poo;=O%WcqqOw_=F4yXHI*q)FRi1gQbosuE%vr%C>%-67IeKb^He$ zLwmGRF5Q+@P^~YrNdLK|%GS!KOGUVnPnyEzNLyFh+PzJY?eJ+k2;?c;El1m0o?6`^ zPd{$?xUFk!t(~oldeh~v!d2j|wzB;(+ds_K$cH@8gLv?x9rVKwe1H%7pa(vn2X%p8 z)C)RMFZhKY_(gu?MSJ805AuNzd7%sW;S>4b1Ad_&b@BC}4yYgMg}VBAO8G!O@FO2| z!6*E{AN(LM^uRB4pf2!(dciOBqaJ97yl9Vns6X1HU*Hcq;UD>+6Z+r-e&HWFQ5Vz! zI^h#Kp$qw;2kns`K9CRozz==Mk9^Pt-KZ1%K@aMI`oJIRfqn2oH}XL@c+n0%;14>X z8-9=-AN7HL)DOPF zi@Z1vJ>Z8Q=s|n%B0qdVC;a<*;2V132lYX{P*2nYI{bL#fo|l7|5AML4;}CiAJB>V zz$g5p9;gHA<@*IM{6Hsoe16mc`QaBnpci?e1HO?Tb%9UR5&lq5=!Q=4`g)NUyvPGy z_y8~Rzz6ccH}pb3{J}qbqaWb|{eXPfhaS`y`jH=b!3Ukl4WI3c9rQsr;^7B;&+I3x1&=^?+~aM1Mj*{G%Sw2|dsWe#9X^^dm3yAP@8-4|G8f{6Y`v z3!kV1{G)#Gh4zRC4|GE>^dmob;1@dK2l=1}df^ZLp#y$V2l$14)Dt?<4#(jGy1|2X z&;|X_0o~98zsL`N@QeCDFX{rn@Co0@4?WNc9{7O|@PY?Azz5&Z4ZrXM9nkIjf?m`E zzEBsmhfdT3dEo>3p&xqT3wn?b`cV()hF|!E4%7qsp%cEKAAHb@{5TH%z8>g?F64tA z_=PX%^>zBXP#@G0`cW_NBM!%*A9I2_6jy&*-c<2B>{D233;2Zf%>4i_!2Y#Uk^+3H) zC*O~M9QvReJn(}&&<#H5fPd&mec>NIQGe6{zM&t!;S>9Q9_T?H^aJw25B$Ri^7%TD zANo-T)Cu)~Z`1)g;SX!%g-_^2JbXbHc#$9ekO%tU3%XDj_(mO|A3mWU{?T7Jj(G4w zC*r^leaH(v@CRSekG$}Sy1*~$1KrT)`$k=%7dn8054wCEJ}>+sKlDQfbozBc9pDT8 z(Lbmc{9=uE(20D$4)8)R{DK$xpwIUS{iq-Gqh6>F`Wd=WN908u@`4ZTz=wWDUf(Zx zQD^9Ze)xwk)CIm!59mZaP)G1UFWP|@`cQx9KtAvwFZ{v>^20CcfI2}B{G&eb4ZqM2 z9cT}I$cs4Wf0yDLm%3K zAMxOUKj?x_=!akU@^v9Ee8Ly%0^iV!I>EP1!ws!2?MP+n6T#W>uC8n%(jHHDrIL|& zAu=&Pdt!2IP#-k4wzi#f?t-Nk1QYWU6BG0G`AfTkT(G1kl|guWHYf4as;=&{tqYb# zmM&S|+IHrGW%FYZ7*LNv(x=F4CIlOm)X2^HP~aXLmfedUDV(Yu5BgU1aK%DN};^*`9T| zbVsKum;c6!z(P8k$f%*Zb#;+qJ{5^BZe0|~3r(hwPUP$5*cx|iO)9r4n@`mT z3+F}B`AEKy?&^x9Gm&I!O{y!~)1Ar`>Q&st8TC`zm0mLxGpjS%%QBIy%cR0jjHEj< z*<4D@##hZwBs#0F5)uEPxY+JS`m(B+1OumpSCJI5mJDW@eiFhH^kgdiCf^!vQdw`R6WOx?u6L$b$1*nxl&rM_Lz0TUujFmPM8< zJ{6+pyDOsVYdLxt8)Ju0u2g-jF^OJ2rF3ZI; z`FO%bwB)jhR6d{1bVOQGIT=0DABj|CezurNq`N`|=e9-{E|{&IIVY@des1K#$fd

mgSyi|AeLi#9%Mwx+sZLyLIm6UGI6uVc+Xp}2jDi<$E)63G5 zHf&f`CnL%7IZ51!`G}0$cqY<%W<)CQ8dTL4hU@m{`$xu6PnVnv=pxa%)_F^1;;W=1 z^|Zt3CLNiICpw4wQhhZ^f4KU|ut{~~(uH+;A+y|uR zMU+-K>k7Se@j^TEhT=v#De~&*zn1^+D$saiW9VuB8NtH&^JhmUox4h|55-7BL;dvn zDRm7+x7%>#)T#ATrcXB5HPPoCeOA%uk%f!T)$c0}D2C@DJpYt^acNMtc3jyphkdN_ zCV7aOATh_vpDw-nvY>ig@2HNk)s@kYuaMo-6qDtElAT#S?#5B`swdnqdR}#;zkFWx zq|IZZ)l;sih*qE0`?03#mQhXB=QmZalvT^Awr*T6<==v zp1ZZL6<_P&$~t=J+hn>IF12noCO83rttT7)X%7$Yzhi=vJ$#zQ&-CzceBmj2_@Mg* zkBnwKzvIX}V<0oc0P@YNoEj^RBXzTEJuJp3ZVZ}9Mh;kSEu+U?_A!|=hZ z;W|cnDY~68fjyoL9r~i--yA~1jrDF-5yU+F8n-_tc*w&yTl`uN|67ZH+{16N_`5y) z8y5d>9)6eMFM9YrZoeYf<>6Zl_lNrZhEK6`63s$C6~Rgm-)`~Od3fljB6!@x|JCC6 zd-&6a>(}3TdCu?_58rM0bsqk|4A(EM^RmzIH$D7-!^Z^gajK8uQxpB z;nNMj+{0%ZzR|-Q4d3kHXBz%@9^P#DZ63bV@H;&`tmByAUJt+6;(y@bNy8uV@JkHe z>ES(w|J1|R82+?}hjkefJm=y1Qvki|zv$uN`7kDU$-{4O`&If2(L^L!+p!%N`fOW?r3D> z1lRiDx{Ch)T#5LzT>NmS!bzBwgQK3cZuFIMd z@%oE<)wv}M8yfG+OT_DST={p}E!#bFO50mX#DCNHUpIc#=UXM>?=6AwQIiJ?r6*8s6gJKNWnWarkqKk9qOW7=Dwd=LN$b_u^kH!9OUt*7M``80?v@tsehq8G`B;Z)7Ie@oK?G@-@NY8*S(!{shA#9)7anE3E?&Kh5w)51#`b z4{tJDrRoBnR>OD8n(@!Lz_Pn5u)bNuDOO5<;_^=`v= z+rD)}@RH&CJ^c5EziBwmy90(xDck~{GTG2^7(Qp|d$(};q~Vob{HcZ?=HX`;KF-6J z7#{KP^9`@{@KuI~&soEIIt)L}i_aQ9$HOl(ywPyf;Y!0}9)7Lixc(x(-*7yC0{^Pv zD?Ogu4e#{uZyDa>;rAN8*2BMV_<9e2(D3UFmtD8~#Be+}0)N8rJ1idO#m@{M@bIS% z-|gYQG8{h`1J7>_kH~dS7vS$1-ss`48otuQ-!*)_hnLHJQOD;U9$sVkfQKJt_-+pm zY{Dv?X|vDn>!$`?=}fA=A-GgOX0D$S)HlzcsxQ;@WxBr1&=+^0LAPe=%irkBY5LL- z9&v{nT+0U6w86D)aE)iVK-aLrwQLBRI(%xF(-BN{1XCTsR7Wt?5lnS$r@FRNUE8Uy z?KIbRnrk~PlRA_DDj&V;b|KppTq>X02dT^&iOsa9JA$^hyqh^`Z%cHp4%)La%NXc1 zM5fS|Z0{=AcCaR%Q<82sYbjjGR4ylN+o^48ZCWPA3Q3uZ2$HMXGBQ;q5ec1IYg^SO zlbu0cb7%9#_V%D%rYrJ6dq+xN6J0VD7__%{74w~eo4k>R#N3n1b_DXGb67GH7f#Yi zQ`ro3u1qHAdSt?~Lp^qh{$!hu%OI^YIzg9v&1I6^VF{Abq|oL*zz+)Um@;OPfy|{8 zdxGxOZSj0UCTmiGtlQj1I9u2z0Wu>PPqyiVmp6Cl4t~J=p}5KF9D|GNYQsdL?Yiki z+s34#?YgN&x1DIy2}ZYhsm?IUi*`sl6ST^NL9jTrwy;#DWdfSc2?}ZH1gTU{I;lNf zh$mKSKey?$b0D4Ap4XztY)_Ea8POoWI(1o)OzFgmBme8l{q|Ak`xxGi|v{hg<7}ce{?Ce30l&cO`Sur!sja zomuSBwt1P-)d?}}gJO3ko(npqTT(e0zPUsyu(@UFE13dJ$*NtpI)e_GJe3K-U`mY=(s^z!IG5_l=F{OAizk&UAHmyGxo5 z{|;9#oARg-^vd;EmrsQ&_grw6Y|B5_;g%VM>t~U`F%b-&^~ST9;+dzt1b(UEKF@b3 zUOAEXA1Hna;rOf_JYOEL;SaQ$=%c=-8R_&lP|X28SG#lqiF>l7F8w}?mU zS+KmL>AtO>)gnIpJS*hUw-&&Uuz2VPPxyIOsAnm~pF;6mhna-8Qv4Z&FC)C!aFwl} zJws25;`K9Z;0205kMJugUh8nV)q$U1IQ~X29)9m3{4nDGN5b_HA9{XFxPJBxe7oT) z<9x!OB>V!x|BZM!FP@LIUpVhRis!s<5I^S)lt32M!+G`d6kT-1dG+%YU3A6cN&v;jAZ0JUmWfgzI;ikarp3Um*Nq;^#VCLh(FKvK0S$;_0V& zeIJLsw-e5JzfC-xcN@iDNIW|U*Y9S5|7V0V|MMmI-=KK??iF~(s7YD$F!R(H?)S@7 z!uh(V@7r}z9`2VY#p`E!$lF2j++TUZS&zOy*TvU!8^!A!68OJQ_{D@jKsfi;0O5Z{ z@lO)YdY&=d*Yn#F@%t#gjriZ8c-DWI63e3X94~+HHGy!}bDZJ6o;ehM6!DxzIP;ua zf~T8s?sweBR3~5Oalh4OeBItiJUkxmFa&ex3r;^Fh?e^5O8`X$A4{eMUC z+7-Aj3{w0`(lg3FA61%e^E|@i#D7rdgTisIK( zJdgid37@sSeoCsO=pDE^cZ z@zW^&vlM??iTE=pzLw&fO2nT<@e?V&r9}L4ivMehUr{1HLGdS2d}oRHEX7Zv_(F;J zFH!ttieFzM{#uGZiQ+exi2pl^KbhihDG~ooia&+o?cyyuYS+uIGPHJo|cwaIW(x`ygA#1J~Joc}y_Hxy}PG z?-0)Q8B--im&LD74dGm$qYc;b#&tNJ;<*kd63%sKARd0-F_+@G4owuFmfRR8%L(T? zTueCE;S$2R4h6&gI$TLO*WpIu;X2$*@%;S%b&BWrFW(|OExvG_e~@tIf7o!<$^4H} zd{&O(y!#d5%<}^A^icfoC|*C)1J7%O=Lmm`@F|2>jFlZ(beuHEA9xNYd@A8b5zaiv z60YCf1kYy)pHBFxgmeGSGTiUq1r&b?@tjTZJf6=ZoX7J@;?d7%p|hLfc|6PSirk|0 zNy{Jly^nAn&o>dyN+3&Xv_x)}moX7M1#G`s}er+#-KSg*!;^Fss!u2y};4d4l zdXj{{N;u!I-y)oK9{q7?EQ|82kw5SsXSmk$GQw*KpGkN<;eSK;OyX}LypiJ9626e| z(+F=So^^yTC!G1?#Pdap?<9N{;oZc;{A&qk{;P@SOT^Ph_-w*&ARgwwm2l?2lXxyC zo_h#Co$&jJhxs2QocSLio-2swr-aWTd>8RB|MP@1|9=qAmBh1;@Xrzc8u2jyyM!}; zg`Ui^==l7y{9)W4PWW8Hk2GAzIrAS!IP;%KJnM<4p71jWpGG{)KbLUkj}gyR#IuC( zM#7g75A(MX&irZOxte%-2#*q8Bp&8pPdM{mPdvTE^A*D95&qwZhxz|I;mrSC;@Lnv z-zR)N;XfoE=HE^@^FK*E8;R!^gf|iXEb%b^ZwY7q-xJR@#Pd4g3kZLUc$oildUKLR z$9YQrFwUC@Ka=pY2IC= z{CxGD68QH@;Qvhcb&^;0>hh3>*M@NLFCLEgrwrHqKKX;rXG+8m63+Vf6TXmm4ibNo z@G;f0Ba8O0))RS;BwU|w@%(TC;ryJiknrn?rU=8Un+ZRi z__KsBBzzI!&BW7A`1usCw=(2yBfOjNB=PXP_Em(hr1-B857*&Vis#Ss@1}UYSb{C2|mIr&M${rW#c_yxrCBJuFJeT8ryw|^p>$8A&xlPsE-$LB)B{qt)H z#q;>Qs6_lNg!6g#Bf@$7?#$4RH*{y51I&f{bq@$fjmisE^ke}(XsQX`DpI|%P5 zTsGaJ<3#^1RAI8%%3+GP8DJm(4B-jGwN|=-N58kLi*@f1zS?jd1G*qyzt5|SUITRj zo;O_Qs&oOa-}BWa;$DYw%#YT3_;!n*}YJ$$9<-{9e&vH0se{3?sz?BU`6xJ&*&t{z@*JU4szRKsua@R^3+?%^1> zcX;?*i@(dm=NW#tho5Qqy&nE~!?$?&V#9yn;Y$tQ>fxA2f5^kXVDSSUezDM@Jozmmxt@`IdpkCTn&wroW(!u;cECrKV|b&-v(NA*58rQi%)?(dyxGIwG`z*b-!*)h zhX?j~6MpXko#lqF@Zu{CU+Lk88J_g;afWw#_;|y+JUnIdmpvX{G#uYMh0cFB9KWXl z{+i*JdpwgY?|KhE#qbRtKE?3sJp6DQZ<{?lV)#uSKF4@&_HcYIe2a&F!+37@@R;%3 z;o-Ga;rPGH!@p+u-5x&H#>2fHe!lT<@$hLD{{s(C7{1lRpELX+569=x10KH2@;>6> zJ%&H-;oymQ*Eib@=6HCo?ceU<_#Li)|HAKQ{p&4$2O9D8;P-|>eSTfHeCuD<^2-&s z?!;Cu)fKNV$PXTZdigy-Jg8qK|F~OyE~`5l-XOnCI%HRVO5@suAw$O!`GOp=U(E#d z@{d2I>gC_csdGP+u>Foqv0i?ABEPCAtTUEWXIs1eOKd37|Is=kt+fR9^^+m4M%%i} z-gn@Dl?%Ipem6~TIgLX|xN*~B18T#yzWvp+6x>=9XiJA>E1` zx*MD#`^xFJAE*xlH{1OX%_7*ab%U%`m(D@*g7*77!`Q{TQ5ucZe#?}Q4ez7C4gYAr z%-ZYqj+Y5KX(r2)2n=_J-q+c#`wr=d_0NS7kK6gH_18-&@=q{=6|&Y8R&GP(|IDb9 zWU`D4S3|F8kF|bwpzY1>P~4l=Ui*g^*uh$5Vw>Y?buw;*#gDyOV$_#kze(0U;&DJX z;IGSua{BFK)*kmG#QW{PDhEcY|M^#hEpD=gTK~i45A9L^k#cL3fz$Lg2)0-U=<4(G p=eO6}f7>`)jSI_ekycnEt!a(AIwa|2HA8)Bykh literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/rnd.c b/data/ssb/dbgen/rnd.c new file mode 100644 index 0000000..a159446 --- /dev/null +++ b/data/ssb/dbgen/rnd.c @@ -0,0 +1,262 @@ +/* @(#)rnd.c 2.1.8.2 + * + * + * RANDOM.C -- Implements Park & Miller's "Minimum Standard" RNG + * + * (Reference: CACM, Oct 1988, pp 1192-1201) + * + * NextRand: Computes next random integer + * UnifInt: Yields an long uniformly distributed between given bounds + * UnifReal: ields a real uniformly distributed between given bounds + * Exponential: Yields a real exponentially distributed with given mean + * + */ + +#include "config.h" +#include +#include +#include "dss.h" +#include "rnd.h" + +char *env_config PROTO((char *tag, char *dflt)); +void NthElement(long, long *); + +void +dss_random(long *tgt, long lower, long upper, long stream) +{ + *tgt = UnifInt((long)lower, (long)upper, (long)stream); + Seed[stream].usage += 1; + + return; +} + +void +row_start(int t) \ +{ + int i; + for (i=0; i <= MAX_STREAM; i++) + Seed[i].usage = 0 ; + + return; +} + +void +row_stop(int t) \ + { + int i; + + /* need to allow for handling the master and detail together */ + if (t == ORDER_LINE) + t = ORDER; + if (t == PART_PSUPP) + t = PART; + + for (i=0; i <= MAX_STREAM; i++) + if ((Seed[i].table == t) || (Seed[i].table == tdefs[t].child)) + { + if (set_seeds && (Seed[i].usage > Seed[i].boundary)) + { + fprintf(stderr, "\nSEED CHANGE: seed[%d].usage = %d\n", + i, Seed[i].usage); + Seed[i].boundary = Seed[i].usage; + } + else + { + NthElement((Seed[i].boundary - Seed[i].usage), &Seed[i].value); + } + } + return; + } + +void +dump_seeds(int tbl) +{ + int i; + + for (i=0; i <= MAX_STREAM; i++) + if (Seed[i].table == tbl) + printf("%d:\t%ld\n", i, Seed[i].value); + return; +} + +/****************************************************************** + + NextRand: Computes next random integer + +*******************************************************************/ + +/* + * long NextRand( long nSeed ) + */ +long +NextRand(long nSeed) + +/* + * nSeed is the previous random number; the returned value is the + * next random number. The routine generates all numbers in the + * range 1 .. nM-1. + */ + +{ + + /* + * The routine returns (nSeed * nA) mod nM, where nA (the + * multiplier) is 16807, and nM (the modulus) is + * 2147483647 = 2^31 - 1. + * + * nM is prime and nA is a primitive element of the range 1..nM-1. + * This * means that the map nSeed = (nSeed*nA) mod nM, starting + * from any nSeed in 1..nM-1, runs through all elements of 1..nM-1 + * before repeating. It never hits 0 or nM. + * + * To compute (nSeed * nA) mod nM without overflow, use the + * following trick. Write nM as nQ * nA + nR, where nQ = nM / nA + * and nR = nM % nA. (For nM = 2147483647 and nA = 16807, + * get nQ = 127773 and nR = 2836.) Write nSeed as nU * nQ + nV, + * where nU = nSeed / nQ and nV = nSeed % nQ. Then we have: + * + * nM = nA * nQ + nR nQ = nM / nA nR < nA < nQ + * + * nSeed = nU * nQ + nV nU = nSeed / nQ nV < nU + * + * Since nA < nQ, we have nA*nQ < nM < nA*nQ + nA < nA*nQ + nQ, + * i.e., nM/nQ = nA. This gives bounds on nU and nV as well: + * nM > nSeed => nM/nQ * >= nSeed/nQ => nA >= nU ( > nV ). + * + * Using ~ to mean "congruent mod nM" this gives: + * + * nA * nSeed ~ nA * (nU*nQ + nV) + * + * ~ nA*nU*nQ + nA*nV + * + * ~ nU * (-nR) + nA*nV (as nA*nQ ~ -nR) + * + * Both products in the last sum can be computed without overflow + * (i.e., both have absolute value < nM) since nU*nR < nA*nQ < nM, + * and nA*nV < nA*nQ < nM. Since the two products have opposite + * sign, their sum lies between -(nM-1) and +(nM-1). If + * non-negative, it is the answer (i.e., it's congruent to + * nA*nSeed and lies between 0 and nM-1). Otherwise adding nM + * yields a number still congruent to nA*nSeed, but now between + * 0 and nM-1, so that's the answer. + */ + + long nU, nV; + + nU = nSeed / nQ; + nV = nSeed - nQ * nU; /* i.e., nV = nSeed % nQ */ + nSeed = nA * nV - nU * nR; + if (nSeed < 0) + nSeed += nM; + return (nSeed); +} + +/****************************************************************** + + UnifInt: Yields an long uniformly distributed between given bounds + +*******************************************************************/ + +/* + * long UnifInt( long nLow, long nHigh, long nStream ) + */ +long +UnifInt(long nLow, long nHigh, long nStream) + +/* + * Returns an integer uniformly distributed between nLow and nHigh, + * including * the endpoints. nStream is the random number stream. + * Stream 0 is used if nStream is not in the range 0..MAX_STREAM. + */ + +{ + double dRange; + long nTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + + if (nLow > nHigh) + { + nTemp = nLow; + nLow = nHigh; + nHigh = nTemp; + } + + dRange = DOUBLE_CAST (nHigh - nLow + 1); + Seed[nStream].value = NextRand(Seed[nStream].value); + nTemp = (long) (((double) Seed[nStream].value / dM) * (dRange)); + return (nLow + nTemp); +} + + + +/****************************************************************** + + UnifReal: Yields a real uniformly distributed between given bounds + +*******************************************************************/ + +/* + * double UnifReal( double dLow, double dHigh, long nStream ) + */ +double +UnifReal(double dLow, double dHigh, long nStream) + +/* + * Returns a double uniformly distributed between dLow and dHigh, + * excluding the endpoints. nStream is the random number stream. + * Stream 0 is used if nStream is not in the range 0..MAX_STREAM. + */ + +{ + double dTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + if (dLow == dHigh) + return (dLow); + if (dLow > dHigh) + { + dTemp = dLow; + dLow = dHigh; + dHigh = dTemp; + } + Seed[nStream].value = NextRand(Seed[nStream].value); + dTemp = ((double) Seed[nStream].value / dM) * (dHigh - dLow); + return (dLow + dTemp); +} + + + +/******************************************************************% + + Exponential: Yields a real exponentially distributed with given mean + +*******************************************************************/ + +/* + * double Exponential( double dMean, long nStream ) + */ +double +Exponential(double dMean, long nStream) + +/* + * Returns a double uniformly distributed with mean dMean. + * 0.0 is returned iff dMean <= 0.0. nStream is the random number + * stream. Stream 0 is used if nStream is not in the range + * 0..MAX_STREAM. + */ + +{ + double dTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + if (dMean <= 0.0) + return (0.0); + + Seed[nStream].value = NextRand(Seed[nStream].value); + dTemp = (double) Seed[nStream].value / dM; /* unif between 0..1 */ + return (-dMean * log(1.0 - dTemp)); +} diff --git a/data/ssb/dbgen/rnd.h b/data/ssb/dbgen/rnd.h new file mode 100644 index 0000000..a8e8d36 --- /dev/null +++ b/data/ssb/dbgen/rnd.h @@ -0,0 +1,80 @@ +/* + * Sccsid: @(#)rnd.h 2.1.8.1 + * + * rnd.h -- header file for use withthe portable random number generator + * provided by Frank Stephens of Unisys + */ + +/* function protypes */ +long NextRand PROTO((long)); +long UnifInt PROTO((long, long, long)); +double UnifReal PROTO((double, double, long)); +double Exponential PROTO((double, long)); + +static long nA = 16807; /* the multiplier */ +static long nM = 2147483647;/* the modulus == 2^31 - 1 */ +static long nQ = 127773; /* the quotient nM / nA */ +static long nR = 2836; /* the remainder nM % nA */ + +static double dM = 2147483647.0; + +/* + * macros to control RNG and assure reproducible multi-stream + * runs without the need for seed files. Keep track of invocations of RNG + * and always round-up to a known per-row boundary. + */ +/* + * preferred solution, but not initializing correctly + */ +#define VSTR_MAX(len) (long)(len / 5 + (len % 5 == 0)?0:1 + 1) +seed_t Seed[MAX_STREAM + 1] = +{ + {PART, 1, 0, 1}, /* P_MFG_SD 0 */ + {PART, 46831694, 0, 1}, /* P_BRND_SD 1 */ + {PART, 1841581359, 0, 1}, /* P_TYPE_SD 2 */ + {PART, 1193163244, 0, 1}, /* P_SIZE_SD 3 */ + {PART, 727633698, 0, 1}, /* P_CNTR_SD 4 */ + {NONE, 933588178, 0, 1}, /* P_RCST_SD 5 UNUSED 2-4-98 */ + {PART, 804159733, 0, RNG_PER_SENT * 3}, /* P_CMNT_SD 6 */ + {PSUPP, 1671059989, 0, SUPP_PER_PART}, /* PS_QTY_SD 7 */ + {PSUPP, 1051288424, 0, SUPP_PER_PART}, /* PS_SCST_SD 8 */ + {PSUPP, 1961692154, 0, SUPP_PER_PART * RNG_PER_SENT * 20}, /* PS_CMNT_SD 9 */ + {ORDER, 1227283347, 0, 1}, /* O_SUPP_SD 10 */ + {ORDER, 1171034773, 0, 1}, /* O_CLRK_SD 11 */ + {ORDER, 276090261, 0, RNG_PER_SENT * 8}, /* O_CMNT_SD 12 */ + {ORDER, 1066728069, 0, 1}, /* O_ODATE_SD 13 */ + {LINE, 209208115, 0, O_LCNT_MAX}, /* L_QTY_SD 14 */ + {LINE, 554590007, 0, O_LCNT_MAX}, /* L_DCNT_SD 15 */ + {LINE, 721958466, 0, O_LCNT_MAX}, /* L_TAX_SD 16 */ + {LINE, 1371272478, 0, O_LCNT_MAX}, /* L_SHIP_SD 17 */ + {LINE, 675466456, 0, O_LCNT_MAX}, /* L_SMODE_SD 18 */ + {LINE, 1808217256, 0, O_LCNT_MAX}, /* L_PKEY_SD 19 */ + {LINE, 2095021727, 0, O_LCNT_MAX}, /* L_SKEY_SD 20 */ + {LINE, 1769349045, 0, O_LCNT_MAX}, /* L_SDTE_SD 21 */ + {LINE, 904914315, 0, O_LCNT_MAX}, /* L_CDTE_SD 22 */ + {LINE, 373135028, 0, O_LCNT_MAX}, /* L_RDTE_SD 23 */ + {LINE, 717419739, 0, O_LCNT_MAX}, /* L_RFLG_SD 24 */ + {LINE, 1095462486, 0, O_LCNT_MAX * RNG_PER_SENT * 5}, /* L_CMNT_SD 25 */ + {CUST, 881155353, 0, 9}, /* C_ADDR_SD 26 */ + {CUST, 1489529863, 0, 1}, /* C_NTRG_SD 27 */ + {CUST, 1521138112, 0, 3}, /* C_PHNE_SD 28 */ + {CUST, 298370230, 0, 1}, /* C_ABAL_SD 29 */ + {CUST, 1140279430, 0, 1}, /* C_MSEG_SD 30 */ + {CUST, 1335826707, 0, RNG_PER_SENT * 12}, /* C_CMNT_SD 31 */ + {SUPP, 706178559, 0, 9}, /* S_ADDR_SD 32 */ + {SUPP, 110356601, 0, 1}, /* S_NTRG_SD 33 */ + {SUPP, 884434366, 0, 3}, /* S_PHNE_SD 34 */ + {SUPP, 962338209, 0, 1}, /* S_ABAL_SD 35 */ + {SUPP, 1341315363, 0, RNG_PER_SENT * 11}, /* S_CMNT_SD 36 */ + {PART, 709314158, 0, 92}, /* P_NAME_SD 37 */ + {ORDER, 591449447, 0, 1}, /* O_PRIO_SD 38 */ + {LINE, 431918286, 0, 1}, /* HVAR_SD 39 */ + {ORDER, 851767375, 0, 1}, /* O_CKEY_SD 40 */ + {NATION, 606179079, 0, RNG_PER_SENT * 16}, /* N_CMNT_SD 41 */ + {REGION, 1500869201, 0, RNG_PER_SENT * 16}, /* R_CMNT_SD 42 */ + {ORDER, 1434868289, 0, 1}, /* O_LCNT_SD 43 */ + {SUPP, 263032577, 0, 1}, /* BBB offset 44 */ + {SUPP, 753643799, 0, 1}, /* BBB type 45 */ + {SUPP, 202794285, 0, 1}, /* BBB comment 46 */ + {SUPP, 715851524, 0, 1} /* BBB junk 47 */ +}; diff --git a/data/ssb/dbgen/rnd.o b/data/ssb/dbgen/rnd.o new file mode 100644 index 0000000000000000000000000000000000000000..51f039b99d6f87cae74f2d5fdc726f5d2e2c9fba GIT binary patch literal 10608 zcmbtZeRNdSwZ9VvGDbRg@H2klP=cLE7!rs`5G0cia_2D#U;z0D#F$JVC10J~2}o<8 z9RuEmXnp!zU4313b-m}SEwxYA;#I3Yoe2q{R{Fqd8}J*gCLJR*pk#rP?tSjrGdtHI z{o|drX3jak{dx8|d!LiZuF}A=G@DJKW>fA|k~2aTC3jYGTquTx$~48LoKdHr*SaSo zqILf$It99ZLDnKsw^nd8eEJ}=_}2Q?_*So}jEqguZ;6bhKaV_W$*9(S6OMc@;dh|+ z)QWEXX~iG=*dAZ_iXEW%yOAYGQuK^KSE(a#N2zPq60Pfgmp1!;hh}^>h%{fM*QP}d zx_yx^7wCS?ct`6h9t{)>>Ib#%tC8VfYFE!IL}%WNKT~3SAkmj`om!XBFZ>b8G%%wX zrLOodH*WG(`8N4Bn+p)`YmrYKKI2QRdk(675x)ZFI6gIU5S-|9M=mVTXZmYCnpEG#eVv2Z}_{um*o&E_ZuZy4lO$lp|sVjr=la8dh0tg`qL^azW`Y()1ljk(!&zkWE&3M%erBMop!aN(X4<~Dpl1!Iv<0g`T zSk`N5$H5VG+SDv9GL~?5v|jbbcBbi7KyHqYsIIK6dUfun?HS1aA+YCUX|Ycx%U#fH zU%`PemjD~*AlIYdv9Jvare>k4Gb3u%ABF$wxW=f%v&VQN4P%B(0Bu2d<46;As3T=7{7HHeh_T zUS0hskeY>2hlrePpD*GL0NPvPB^@eo^&h^-4^o7Um<8yEe?nD2ivAL7+ucMPa-1i_|=f3Jt~&R31W`2xs# zmLvWijH4rd_0~hMuduhS+swGkJ-z+7-xy9R3K*k->_IIuYU|j)yt`zHq8WW!Y(h>XCGZ0;qID2?3zS#kuE^64Y6Dp=zGrf`Hpd_i;Gb9toD#_Of_Luk=ROfNQz$np z+60PAvNB;I=J^o&>bLch-opXobRheEl7>SFCk&PjhZh~8&6zu>?(MrcHdbEnaX>8} z4yb{zhAzTf6x>9;f?^=j40c;AW~Vzm9Ta|qXXul9zuNH;d^LmZL_HE|2khbu?3|)L zupgrZ2?YD=6-8j@Wq~enF7<iUE@aEf&!ckhr3=+gL*9mXP7{IF(< z1A107{sbO$^!be==n{JKCe(r+b?-58bmoeSMcsQ{nn+G__CvhJ)Xv|+H`Tq!;-iWl zqFGnYQNQsQD9pF1d*g5hB@Sy?eAG;X1g9M;yU?oLVLzchfSM%`=vsUNZdV)84Le*g zqp*LOEOTA}e&uDnLSMm=3?ydLt*3EepC?RD%O8lRYh7Zq{l0XU=T1Noq%<1#qh<;KT;qgOKqwAlM_7l|v_ zZ|nDW?V_uw%n>k72C_%ANRLgN3D{34XkV+ea2W;EoY8XIIq6EkSUeCg-Y+-KCc>3} z!XG(ggM`AUm3Km-R1DHNJA^xXByr9N?t@Gd2@UxsF1ya?3*fw>sTDo6P>4G%zu!I} zsODuCF!Be68Xys6REM6#u>stoW?7yBLyrL!z32-Vr-UQPt+5XbJqXlt;}clY&}=%d zc*Vn&1$Q}A;KS%M9L?Bv4}6RCRQEmtz9y3Os_+#Fm9D9yaXZG5UI1P8x3%uAi5MSB zaBjBa9zzgBOcLpY7AXQ<5P{C{nvL(p2u3_o03%JYj4<33#OM%ME~ahZJwQq4j~uq^~NV}Pemu}^WC4#r7`xa zC!`+a{tv(ed8*CZhk(&Fu{n(YyY7N+dV&Sp7Tj_>yaghP{>k*4dt0C!()y zUhd=cXD696*(uj;>dfc#%jAUU>5YfiaXRfE(WAfn)do%{ociDP9-hYOR{1^Jx#EfF zS6=tT3QjMP3Mi?^nywwUe)6B__YFP@e_JLS*(dtT!RN-UpZq5J&u^+KNJ=O3j_5Ow zkBsv#n+!;y=4EB${Hu~U<;RvIg)>v>r}O{EeJ@{^icaz?PWR4BMVD!=q(3|(i_@Qx z{!yG(g~IDo(Mi8=!=5#qPRpb9+a7*eorr$zv+w=QHQ0 zWsS`(o>P2d{N#e`mmaCT*|UGu`K69u-?*Xfjx$SlzWbBV!c~7?>RMND;Bfm%s>y|> zyhHXb*uVx3Y>g3dARh812JVFve!vd6#0_;j-N@HQn(e*esN}QBV|x5@jLGb0rqTewowt zP+E~QvwKpJ)4k8`b9(kn_BnGSQ_`MxdVEedkcynHA}1sTCVQily$5@c?MB&slYns# z%0?!aICImM3rQbIq96Z>{LuVnr{KjQekg%Hj2P(Jmj)txP*=pB*5J(aNxNGU?k+mk(W2vf$&82Xr4YCV5Jn?zCM(8?d^8lq>o6O1?-Mvb1LiUB1gj z+*V7T*N_LA2A{1G`#$#I@58wEhcIs4_9CZeAI5FZ6rXcpB)!CWPjiNIApj5*U`Rv} z)GN3L2c)hXsVh|+w`GivBgr={#Om?;0ukCkaJUdj>N&&4v?dSGnb1Ki5}8BRAu z+T03sV1kBj*_FSKrDBzL-?>edw_!SX&nWsqn`yf&cUp_1d{Wu{J>bYs z?*X}aYS}w__YAP7UEZ);4TnDLC-UVgnXWm%@e|TLMHEaCDgLLx=pVM z=}IL$KIF8eRcQ-E*OTem57 z;igt8P+iUNir^0YuA1gLWleKKy&rVcwY61;0BC6v&|SfrMx}H|YfCd2Xn=8J%NFn= z3=)+sjqSl@Eur#oqu$Wk7z`<`E!&l{pe}(1Kq|Kdx1-jIa8pS`o8Hh|tFNtT3-D8=DW2yBT-F&NT84aZj3gti3raS)ji>|%$g660~lnr+s3 zMJbZ|N4Px3vJNTu-VrtSf_zBrmi)+v5Bx7B{&R^V|Md)ij^TL3i2oqN@vb2Jh{RDp zzMlvml{ooHIOZQV#If`dK40RfXF9{huUas->gi|vH#7cWrXOQS`ZG}hG}P~g58*Q< zZq?s$3H*ml54ZaS|VHp9+%8_By&BP4965FJu@VZ>&jyIe8!J7;;&>l zmIT6U8IF&C!XIQfxBC>sxt`}4&h;E;IM?%CbPgJ>7hg+KJKkqP{+Z|yz<|y%BU^xEfNqC05@6oQ4;a4&o|Ij7= zZ!#S3C&Fhl9N&6`FJd_Nvs&WTcy7A{e<#EFzWj{o$zpc@i}7E<@aGuL<9VLpS2BJF z9(rgPACBL^aC~FZdb1fmo8dl&=PK4rQv2n!?z=5;XFP+ zV>tJ}pXvV=)AI%6=YHDcyT!UL7sI)JH^X^6=Q13B10etJlsK-J-#aTA?qU3QGd(>1 zk1(9)=@Sg+=lNF*=jY)ihVym3!EkPOSmNkEkLM`k=kas{A2y69eyEv$hb8fulF%x9 z;{Qm_pR(Ze?#I6mun|AK`v*-pfz!KxD!Z>yhTi@7uQ+U^CyE1T>nu3E?;9;Rz3-p5 z;Pj3^Yr*LqUnuR6U3xbw-nRRi^qPC2*F$E%l}4dpV~tmbcc|j6tI=x|@4anpiZ|4P z6J*cx!rPfo;hij}wQYKm1`~WX*S0i?$FR4#MGt!6N31#Uey-i7nDZ^oVQ*`wr8OAR z?*lbxI=HpEK2*~bG$Zu?`*wpkhDMg>gt9_8nwOD)#6X5I0pi;b&thQ$F$bgg$b_~| z=D8EbC@ZIl-vw@wIOZ;IyQN*)C!)D&AbZRW)M=G3G=C6LS}p#&r5ts6FhIZP?@_`@ zp7Mfl@}J^R2!vGfwbDM;THZ*G`V~OLG2WrP?UwTQa1h31pZY4mQpvwB<=qx}`kR;f zZ%g@H3m-Xz@#T&DrXGLapfTsmu`Wmwt-KMy@qUqMm{5@`<=qLy9MHN6{&yIlPOJP% zDc@=#SmhrF*;MCurQG<1(qTNmQ{hAMnJ~td$~NLrymX-b7?nfAH{R9ovC3oFww_&p gly&mIGe5Ni%raGQn^Va@n1xyt(kd|T!J0WSrE`Tzg` literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/shared.h b/data/ssb/dbgen/shared.h new file mode 100644 index 0000000..c1c18ce --- /dev/null +++ b/data/ssb/dbgen/shared.h @@ -0,0 +1,140 @@ +/* + * Sccsid: @(#)shared.h 2.1.8.1 + * Modified for SSBM + */ +#define N_CMNT_LEN 72 +#define N_CMNT_MAX 152 +#define R_CMNT_LEN 72 +#define R_CMNT_MAX 152 +#define MONEY_SCL 0.01 +#define V_STR_HGH 1.6 + +#ifdef SSBM +#define P_NAME_LEN 22 +#define P_MFG_LEN 6 +#define P_COLOR_LEN 3 +#define P_COLOR_MAX 11 +#define P_TYPE_MAX 25 +#define P_CAT_LEN 7 +#define P_CAT_MIN 1 +#define P_CAT_MAX 5 +#define P_CAT_SD 97 +#define S_NATION_NAME_LEN 15 +#define S_REGION_NAME_LEN 12 +#define C_NATION_NAME_LEN 15 +#define C_REGION_NAME_LEN 12 +#define C_NAT_SD 16 +#define C_REG_SD 3 +#define O_SHIP_STRU_LEN 25 +#define O_SHIP_MODE_LEN 10 +#define O_SHIP_PRIO_LEN 1 +#define D_DATE_LEN 18 +#define D_DAYWEEK_LEN 9 +#define D_YEARMONTH_LEN 7 +#define D_SEASON_LEN 12 +#define D_MONTH_LEN 9 +#define D_STARTDATE 694245661 /*corresponding to 1/1/1992 1:1:1*/ +#define NAMTION_BRIEF_LEN 9 +#define CITY_CODE_SEED 15 +#define NUM_DAYS 2556 +#define NUM_SEASONS 5 +#define NUM_HOLIDAYS 10 +#define CITY_FIX 10 +#else + +#define P_NAME_LEN 55 +#define P_MFG_LEN 25 + +#endif + +#define P_BRND_LEN 10 + +#ifdef SSBM +#define P_TYPE_LEN 12 + +#else + +#define P_TYPE_LEN 25 + +#endif + +#define P_CNTR_LEN 10 +#define P_CMNT_LEN 14 +#define P_CMNT_MAX 23 +#define P_CAT_SEED 25 + +#define S_NAME_LEN 25 + +#ifdef SSBM +#define S_ADDR_LEN 15 +#define S_ADDR_MAX 25 +#else + +#define S_ADDR_LEN 25 +#define S_ADDR_MAX 40 +#endif + +#define S_CMNT_LEN 63 +#define S_CMNT_MAX 101 +#define PS_CMNT_LEN 124 +#define PS_CMNT_MAX 199 + +#ifdef SSBM +#define C_NAME_LEN 25 +#define C_MSEG_MIN 1 +#define C_MSEG_MAX 5 +#define C_ADDR_LEN 15 +#define C_ADDR_MAX 25 +#else +#define C_NAME_LEN 18 +#define C_ADDR_LEN 25 +#define C_ADDR_MAX 40 +#endif + +#define C_MSEG_LEN 10 +#define C_CMNT_LEN 73 +#define C_CMNT_MAX 117 + +#ifdef SSBM +#define O_OPRIO_LEN 8 + +#else +#define O_OPRIO_LEN 15 + +#endif + +#define O_CLRK_LEN 15 +#define O_CMNT_LEN 49 +#define O_CMNT_MAX 79 +#define L_CMNT_LEN 27 +#define L_CMNT_MAX 44 +#define L_INST_LEN 25 +#define L_SMODE_LEN 10 +#define T_ALPHA_LEN 10 +#define DATE_LEN 13 /* long enough to hold either date format */ +#define NATION_LEN 25 +#define REGION_LEN 25 +#define PHONE_LEN 15 + +#ifdef SSBM +#define MAXAGG_LEN 10 /* max component length for a agg str */ + +#else +#define MAXAGG_LEN 20 /* max component length for a agg str */ + +#endif + +#define P_CMNT_SD 6 +#define PS_CMNT_SD 9 +#define O_CMNT_SD 12 +#define C_ADDR_SD 26 +#define C_CMNT_SD 31 +#define S_ADDR_SD 32 +#define S_CMNT_SD 36 +#define L_CMNT_SD 25 + + + + + + diff --git a/data/ssb/dbgen/speed_seed.c b/data/ssb/dbgen/speed_seed.c new file mode 100644 index 0000000..402b7de --- /dev/null +++ b/data/ssb/dbgen/speed_seed.c @@ -0,0 +1,325 @@ +/* @(#)speed_seed.c 2.1.8.2 */ +#include +#include +#include "dss.h" + +/* _tal long RandSeed = "Random^SeedFromTimestamp" (void); */ + +#define FAKE_V_STR(avg, sd, cnt) \ + ADVANCE_STREAM(sd, \ + (long)(Seed[sd].boundary*cnt)) +#define ADVANCE_STREAM(stream_id, num_calls) \ + NthElement(num_calls, &Seed[stream_id].value) + +#define MAX_COLOR 92 +long name_bits[MAX_COLOR / BITS_PER_LONG]; +extern seed_t Seed[]; + +/* WARNING! This routine assumes the existence of 64-bit */ +/* integers. The notation used here- "HUGE" is *not* ANSI standard. */ +/* Hopefully, you have this extension as well. If not, use whatever */ +/* nonstandard trick you need to in order to get 64 bit integers. */ +/* The book says that this will work if MAXINT for the type you choose */ +/* is at least 2**46 - 1, so 64 bits is more than you *really* need */ + +static DSS_HUGE Multiplier = 16807; /* or whatever nonstandard */ +static DSS_HUGE Modulus = 2147483647; /* trick you use to get 64 bit int */ + +/* Advances value of Seed after N applications of the random number generator + with multiplier Mult and given Modulus. + NthElement(Seed[],count); + + Theory: We are using a generator of the form + X_n = [Mult * X_(n-1)] mod Modulus. It turns out that + X_n = [(Mult ** n) X_0] mod Modulus. + This can be computed using a divide-and-conquer technique, see + the code below. + + In words, this means that if you want the value of the Seed after n + applications of the generator, you multiply the initial value of the + Seed by the "super multiplier" which is the basic multiplier raised + to the nth power, and then take mod Modulus. +*/ + +/* Nth Element of sequence starting with StartSeed */ +/* Warning, needs 64-bit integers */ +#ifdef SUPPORT_64BITS +void NthElement (long N, long *StartSeed) + { + DSS_HUGE Z; + DSS_HUGE Mult; + static int ln=-1; + int i; + + if ((verbose > 0) && ++ln % 1000 == 0) + { + i = ln % LN_CNT; + fprintf(stderr, "%c\b", lnoise[i]); + } + Mult = Multiplier; + Z = (DSS_HUGE) *StartSeed; + while (N > 0 ) + { + if (N % 2 != 0) /* testing for oddness, this seems portable */ + Z = (Mult * Z) % Modulus; + N = N / 2; /* integer division, truncates */ + Mult = (Mult * Mult) % Modulus; + } + *StartSeed = (long)Z; + + return; + } +#else +/* add 32 bit version of NthElement HERE */ +/* + * MODMULT.C + * R. M. Shelton -- Unisys + * July 26, 1995 + * + * RND_seed: Computes the nth seed in the total sequence + * RND_shift: Shifts a random number by a given number of seeds + * RND_ModMult: Multiplies two numbers mod (2^31 - 1) + * + */ + + + +#include +#include /* required only for F_FatalError */ + +typedef signed long RND; +typedef unsigned long URND; + +#define FatalError(e) F_FatalError( (e), __FILE__, __LINE__ ) +void F_FatalError( int x, char *y, int z ) {fprintf(stderr, "Bang!\n");} + + +/* Prototypes */ +RND RND_seed( RND ); +RND RND_shift( RND, RND ); +static RND RND_ModMult( RND, RND ); + + + +RND +RND_seed ( RND Order ) +{ +static const RND TopMask = 0x40000000; +RND Mask; +RND Result; + + +if (Order <= -Modulus || Order >= Modulus) + FatalError(1023); + +if (Order < 0) Order = Modulus - 1L + Order; + +Mask = TopMask; +Result = 1L; + +while (Mask > Order) Mask >>= 1; + +while (Mask > 0) + { + if (Mask & Order) + { + Result = RND_ModMult( Result, Result); + Result = RND_ModMult( Result, Multiplier ); + } + else + { + Result = RND_ModMult( Result, Result ); + } + Mask >>= 1; + } + +return (Result); + +} /* RND_seed */ + + + +/*********************************************************************** + + RND_shift: Shifts a random number by a given number of seeds + +***********************************************************************/ + +void +NthElement ( long Shift, long *Seed) + +{ + RND Power; + static int ln=-1; + int i; + + if ((verbose > 0) && ++ln % 100 == 0) + { + i = (ln/100) % LN_CNT; + fprintf(stderr, "%c\b", lnoise[i]); + } + + +if (*Seed <= 0 || *Seed >= Modulus) + FatalError(1023); +if (Shift <= -Modulus || Shift >= Modulus) + FatalError(1023); + +Power = RND_seed( Shift ); + +*Seed = RND_ModMult( *Seed, Power ); + +return; +} /* RND_shift */ + + + +/********************************************************************* + + RND_ModMult: Multiplies two numbers mod (2^31 - 1) + +*********************************************************************/ + +static RND +RND_ModMult ( RND nA, RND nB) + +{ + +static const double dTwoPowPlus31 = 2147483648.; +static const double dTwoPowMinus31 = 1./2147483648.; +static const double dTwoPowPlus15 = 32768.; +static const double dTwoPowMinus15 = 1./32768.; +static const RND nLowMask = 0xFFFFL; +static const URND ulBit31 = 1uL << 31; + +double dAH, dAL, dX, dY, dZ, dW; +RND nH, nL; +URND ulP, ulQ, ulResult; + +nL = nB & nLowMask; +nH = (nB - nL) >> 16; +dAH = (double)nA * (double)nH; +dAL = (double)nA * (double)nL; +dX = floor( dAH * dTwoPowMinus15 ); +dY = dAH - dX*dTwoPowPlus15; +dZ = floor( dAL * dTwoPowMinus31 ); +dW = dAL - dZ*dTwoPowPlus31; + +ulQ = (URND)dW + ((URND)dY << 16); +ulP = (URND)dX + (URND)dZ; +if (ulQ & ulBit31) { ulQ -= ulBit31; ulP++; } + +ulResult = ulP + ulQ; +if (ulResult & ulBit31) { ulResult -= ulBit31; ulResult++; } + +return (RND)ulResult; +} +#endif /* SUPPORT_64BITS */ + +/* updates Seed[column] using the a_rnd algorithm */ +void +fake_a_rnd(int min, int max, int column) +{ + long len, itcount; + RANDOM(len, (long)min, (long)max, (long)column); + if (len % 5L == 0) + itcount = len/5; + else itcount = len/5 + 1L; + NthElement(itcount, &Seed[column].usage); + return; +} + + +long +sd_part(int child, long skip_count) +{ + int i; + + for (i=P_MFG_SD; i<= P_CNTR_SD; i++) + ADVANCE_STREAM(i, skip_count); + + FAKE_V_STR(P_CMNT_LEN, P_CMNT_SD, skip_count); + ADVANCE_STREAM(P_NAME_SD, skip_count * 92); + + return(0L); +} + +long +sd_line(int child, long skip_count) + { + int i,j; + + for (j=0; j < O_LCNT_MAX; j++) + { + for (i=L_QTY_SD; i<= L_RFLG_SD; i++) + ADVANCE_STREAM(i, skip_count); + } + + FAKE_V_STR(L_CMNT_LEN, L_CMNT_SD, skip_count); + /* need to special case this as the link between master and detail */ + if (child == 1) + { + ADVANCE_STREAM(O_ODATE_SD, skip_count); + ADVANCE_STREAM(O_LCNT_SD, skip_count); + } + + return(0L); + } + +long +sd_order(int child, long skip_count) +{ + ADVANCE_STREAM(O_LCNT_SD, skip_count); + ADVANCE_STREAM(O_CKEY_SD, skip_count); + FAKE_V_STR(O_CMNT_LEN, O_CMNT_SD, skip_count); + ADVANCE_STREAM(O_SUPP_SD, skip_count); + ADVANCE_STREAM(O_CLRK_SD, skip_count); + ADVANCE_STREAM(O_PRIO_SD, skip_count); + ADVANCE_STREAM(O_ODATE_SD, skip_count); + + return (0L); +} + +long +sd_psupp(int child, long skip_count) + { + int j; + + for (j=0; j < SUPP_PER_PART; j++) + { + ADVANCE_STREAM(PS_QTY_SD, skip_count); + ADVANCE_STREAM(PS_SCST_SD, skip_count); + } + FAKE_V_STR(PS_CMNT_LEN, PS_CMNT_SD, skip_count); + + return(0L); + } + +long +sd_cust(int child, long skip_count) +{ + + FAKE_V_STR(C_ADDR_LEN, C_ADDR_SD, skip_count); + FAKE_V_STR(C_CMNT_LEN, C_CMNT_SD, skip_count); + ADVANCE_STREAM(C_NTRG_SD, skip_count); + ADVANCE_STREAM(C_PHNE_SD, 3L * skip_count); + ADVANCE_STREAM(C_ABAL_SD, skip_count); + ADVANCE_STREAM(C_MSEG_SD, skip_count); + return(0L); +} + +long +sd_supp(int child, long skip_count) +{ + ADVANCE_STREAM(S_NTRG_SD, skip_count); + ADVANCE_STREAM(S_PHNE_SD, 3L * skip_count); + ADVANCE_STREAM(S_ABAL_SD, skip_count); + FAKE_V_STR(S_ADDR_LEN, S_ADDR_SD, skip_count); + FAKE_V_STR(S_CMNT_LEN, S_CMNT_SD, skip_count); + ADVANCE_STREAM(BBB_CMNT_SD, skip_count); + ADVANCE_STREAM(BBB_JNK_SD, skip_count); + ADVANCE_STREAM(BBB_OFFSET_SD, skip_count); + ADVANCE_STREAM(BBB_TYPE_SD, skip_count); /* avoid one trudge */ + + return(0L); +} diff --git a/data/ssb/dbgen/speed_seed.o b/data/ssb/dbgen/speed_seed.o new file mode 100644 index 0000000000000000000000000000000000000000..41df59e924c934a4e0a10e572b2d3a5103f1d3d8 GIT binary patch literal 7776 zcmbtYe{j^r72mrAk^;RuR20<0bxN3ka3mOtq1xm^z-1<6C=hFh;1Mo&A(L>)+A45^{~U04zBlq3Y1#V&?#j2r?uv6b7bI(A4vtWB41-N1d z_!9&;jT1ZxwnreqQ4Bz&Wyc`EhY;X%aDEmEaGjDXfU&p2?wQ|2hp`yWwwHH&2+Pks z1QY=Sr2{A<1f>Qk?c4!rnE5S%)FRRa>NufBP(8#Q2QPMl-fWHLkBSbDw>!9_hbJG0 z$I|>e9OLf!on7GFjv_+4+b$P3Sx^OoaQAL3hVeE$o6h!M4w{&$2uPYmKrm#@&~nz+E@Vliy< zU9BQh$;FO@NCgje^4j)EE&V^>9fLWE-i}L?rXH+ouP*GgYI$o0kM}wFeGp^|tni8S zE8L|e;^%2;iBIf=Bo6S_@Ab7o-gpAsi`ia3iKz#F(ed#_M^F4@#?&7EG*60&y7!N9 zQrCVwtzt#5LNGhkA1#91;xk&Bc$)_g^WYaen4*{+ysai|3%~d=@4d_u2VuQMd}=38 zW77^2h^2g6Rnobx3`r|_m%QYOvzrz1e7foPz z&i4GI+J9K2oW=ltn6YM&cKu?Q73N5vt6V9O256U;6YyCiHm6$@io_W?{YQBSLaJvoQSL^=Bs@f zCS-th-=Y9zp1#&JA=9u4QzccAns1L{vBprSIuL`KFUWF&*=*Cy8I|}nNQ&$>Z=T(L zN;>=)3prU#EsdXq7oIH`17wTT+5Pk8&GqJ1tZIr_P2T)`Ux6=gMt+k}^Ec0$<;yFW zO0Y1&!tlU_oB96o3g%?29Wi)o3%P|_$A%@$vCiYTV@y^y{t}_hoiz3qzOX@%nT^_$HI~Rr;i8pv7@3~Qu=pQX& z?ciOahq)9=dNHp1`9i;r~vaFi<->|w0LMh*CSz|qeaR4-!x0rnW-zlIHuGH5p} z32i$6yA1gAfTRCkQmKf&2-r&o{>ug&zxL42bSexw^?}0$J^a_E5O@qynPpMbMST#-U{)SpT`yi7F1bP_4A|AhA4|! z)uCvVg&&UAS|Ns^@b`n2TdU{Sht`B57F!#Nu4;gAfk3!1S{t##f#B*o7Otub1*!tk zNHwd7)P@%VaVZ+cssoKxQ48Dp+DHi7hA4y(1F@#YM(hNeVuB&`NY$E9U{$RZV{8^B z0?P&W8h-4f&<_;;Zm@4CxV;bXUBTEW1qW9`5FZVf^n+i@D2R`NOXANU2%`JnYr{AA z$LEit`~S>_1&*g1=^si(2=ejWCh^-8+^yi>QE-(%+kne=zwZAL1AnIh|E&Rk-GCoc zaD2DOb@eGY*0jXW6OI)>PQmZQiGyG>7B0!3LOAkoR`5S6eEgmI(k_$Keg(%@h4k~L z0UuIuHEuS&XY{x^grk|tpK0LFQSe)p`7|ni@LetExlzGyQt%%uxSHoD6#o+x{_hq1 z4h6rW_`&Zg8FwVTg0Q(w!RHc=>qS1Ptjxe)sqy7LsnPIm@P$%u@bj?7-$VK)1OGP~ ze=@D}MGfz!`g_ITr&r?_ke`2Oxb%PA;Ac?d@1eNj0~pqg!)l(h==VbAxter0wL0Hx z;O83n_TNMH{u0lTcykQ=63xGSUo6z{t+cLkgP#hGFW*xu41Brn;XIK4dILYE`R}H@ zZ#3{9)A;haY&Y;<()g#bXhHTF_y-O6QO!>Qt?Pt_PbU1d!A}-_Kgj$nqxy7dcq!pG z6Ry|KM2-I$@pCl%G~v?>e)2VbHSr4#{8Ej-mG}z{{KXo75Ah!~@BEU$*ryDElh!3fZd| z_I2(iz>uovPcGC8W2(q?OKc}}(5G(iCE_2nfV%z9z&2g|uA>K^P4-y7W8jkZvM$oa z1{&+Z!#2oDDoi}1;yFyWN19%{0I@tg1cO|(o8h~*9C4OPXWxv68jxSv&G22f{|}G7 B4GsVR literal 0 HcmV?d00001 diff --git a/data/ssb/dbgen/tags b/data/ssb/dbgen/tags new file mode 100644 index 0000000..8a9376c --- /dev/null +++ b/data/ssb/dbgen/tags @@ -0,0 +1,1078 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.8 // +101 history.html /^

  • Changes as of 06\/04\/99<\/A>