From 81defaad834f5a8726998f3df625d472e79950f0 Mon Sep 17 00:00:00 2001 From: tomasznazarewicz Date: Sat, 14 Nov 2020 18:11:17 +0100 Subject: [PATCH] working chromo-sweep --- generated/README.md | 20 +++ generated/denser-labels/results/part-00000 | 126 ++++++++++++++++ generated/denser-labels/results/part-00001 | 126 ++++++++++++++++ generated/denser-labels/results/part-00002 | 126 ++++++++++++++++ generated/denser-labels/results/part-00003 | 127 ++++++++++++++++ generated/denser-labels/results/part-00004 | 126 ++++++++++++++++ generated/denser-labels/results/part-00005 | 126 ++++++++++++++++ generated/denser-labels/results/part-00006 | 126 ++++++++++++++++ generated/denser-labels/results/part-00007 | 127 ++++++++++++++++ generated/denser-queries/results/part-00000 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00001 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00002 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00003 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00004 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00005 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00006 | 135 ++++++++++++++++++ generated/denser-queries/results/part-00007 | 135 ++++++++++++++++++ .../generation/TestDataGenerator.scala | 44 ++++++ .../methods/chromosweep/ChromoSweep.scala | 58 ++++++++ .../methods/chromosweep/ChromoSweepJoin.scala | 51 +++++++ .../chromosweep/ChromoSweepJoinImpl.scala | 95 ++++++++++++ .../chromosweep/ChromoSweepJoinStrategy.scala | 15 ++ .../methods/chromosweep/RangeTreeNode.scala | 32 +++++ .../methods/chromosweep/SortedInterval.scala | 11 ++ .../tests/rangejoins/ChromoSweepTest.scala | 81 +++++++++++ 25 files changed, 2497 insertions(+) create mode 100644 generated/README.md create mode 100644 generated/denser-labels/results/part-00000 create mode 100644 generated/denser-labels/results/part-00001 create mode 100644 generated/denser-labels/results/part-00002 create mode 100644 generated/denser-labels/results/part-00003 create mode 100644 generated/denser-labels/results/part-00004 create mode 100644 generated/denser-labels/results/part-00005 create mode 100644 generated/denser-labels/results/part-00006 create mode 100644 generated/denser-labels/results/part-00007 create mode 100644 generated/denser-queries/results/part-00000 create mode 100644 generated/denser-queries/results/part-00001 create mode 100644 generated/denser-queries/results/part-00002 create mode 100644 generated/denser-queries/results/part-00003 create mode 100644 generated/denser-queries/results/part-00004 create mode 100644 generated/denser-queries/results/part-00005 create mode 100644 generated/denser-queries/results/part-00006 create mode 100644 generated/denser-queries/results/part-00007 create mode 100644 src/main/scala/org/biodatageeks/rangejoins/generation/TestDataGenerator.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweep.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoin.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinImpl.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinStrategy.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/RangeTreeNode.scala create mode 100644 src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/SortedInterval.scala create mode 100644 src/test/scala/org/biodatageeks/sequila/tests/rangejoins/ChromoSweepTest.scala diff --git a/generated/README.md b/generated/README.md new file mode 100644 index 00000000..79b85307 --- /dev/null +++ b/generated/README.md @@ -0,0 +1,20 @@ +# Directory for generated test files. +Files are generated with org.biodatageeks.rangejoins.generation.TestDataGenerator + +## Generation parameters +Data is generated semi-randomly with following parameters: + +- amount - amount of intervals to generate +- maxOffset - max interval to start generation +- maxRange - max size of interval +- maxStep - max space between intervals + +## Datasets +Parameters for generated datasets +1. Denser Labels (expecting less nulls after join) + - queries [1000, 50, 5, 30] + - labes [1000, 50, 15, 20] + +2. Denser Queries (expecting more nulls after join) + - queries [1000, 50, 15, 20] + - labels [1000, 50, 5, 30] \ No newline at end of file diff --git a/generated/denser-labels/results/part-00000 b/generated/denser-labels/results/part-00000 new file mode 100644 index 00000000..3546f2d4 --- /dev/null +++ b/generated/denser-labels/results/part-00000 @@ -0,0 +1,126 @@ +11078,11080,q625,11071,11085,l633 +11087,11091,q626,0,0,null +11098,11101,q627,0,0,null +11112,11113,q628,0,0,null +11138,11142,q629,11134,11141,l636 +11166,11169,q630,0,0,null +11175,11176,q631,0,0,null +11197,11201,q632,11192,11198,l640 +11197,11201,q632,11200,11213,l641 +11230,11232,q633,0,0,null +11253,11257,q634,11248,11254,l644 +11275,11276,q635,11273,11285,l645 +11290,11294,q636,11292,11306,l646 +11301,11305,q637,11292,11306,l646 +11319,11321,q638,0,0,null +11328,11331,q639,11325,11334,l647 +11345,11346,q640,0,0,null +11360,11361,q641,11353,11363,l648 +11383,11385,q642,11380,11386,l650 +11402,11405,q643,11398,11408,l651 +11429,11432,q644,11427,11437,l652 +11435,11437,q645,11427,11437,l652 +11441,11445,q646,0,0,null +11474,11475,q647,11470,11476,l654 +11504,11506,q648,0,0,null +11515,11517,q649,0,0,null +11538,11541,q650,11538,11542,l659 +11553,11557,q651,11554,11556,l660 +11575,11578,q652,11575,11581,l661 +11598,11600,q653,11598,11610,l662 +11611,11614,q654,0,0,null +11615,11619,q655,0,0,null +11625,11629,q656,11621,11628,l663 +11642,11643,q657,0,0,null +11656,11659,q658,11655,11660,l666 +11677,11680,q659,11677,11690,l668 +11709,11710,q660,11707,11711,l669 +11722,11726,q661,11716,11724,l670 +11729,11733,q662,0,0,null +11735,11738,q663,11736,11740,l671 +11766,11768,q664,11760,11768,l673 +11770,11774,q665,0,0,null +11781,11782,q666,11779,11784,l674 +11796,11800,q667,11798,11812,l675 +11811,11812,q668,11798,11812,l675 +11819,11822,q669,0,0,null +11825,11828,q670,0,0,null +11835,11839,q671,11835,11839,l677 +11866,11867,q672,0,0,null +11879,11882,q673,11879,11882,l680 +11900,11904,q674,0,0,null +11921,11924,q675,0,0,null +11925,11928,q676,0,0,null +11941,11943,q677,11935,11941,l684 +11947,11949,q678,0,0,null +11967,11969,q679,11959,11967,l686 +11975,11979,q680,0,0,null +11986,11988,q681,11984,11987,l687 +12000,12004,q682,12001,12010,l688 +12023,12025,q683,12016,12028,l689 +12047,12051,q684,12043,12049,l691 +12056,12060,q685,0,0,null +12069,12071,q686,12066,12069,l692 +12076,12080,q687,0,0,null +12082,12086,q688,0,0,null +12092,12094,q689,12087,12098,l693 +12100,12104,q690,12100,12108,l694 +12107,12108,q691,12100,12108,l694 +12109,12110,q692,0,0,null +12139,12141,q693,0,0,null +12150,12153,q694,12152,12157,l697 +12154,12155,q695,12152,12157,l697 +12184,12187,q696,12180,12191,l699 +12206,12209,q697,12206,12217,l701 +12220,12221,q698,0,0,null +12224,12228,q699,12226,12238,l702 +12246,12248,q700,12240,12248,l703 +12271,12273,q701,0,0,null +12292,12293,q702,0,0,null +12300,12302,q703,12302,12313,l706 +12329,12331,q704,0,0,null +12334,12336,q705,12335,12346,l708 +12337,12339,q706,12335,12346,l708 +12348,12351,q707,0,0,null +12370,12373,q708,12364,12377,l710 +12375,12376,q709,12364,12377,l710 +12391,12394,q710,0,0,null +12407,12411,q711,12403,12415,l712 +12432,12433,q712,0,0,null +12462,12464,q713,12455,12467,l715 +12491,12494,q714,12494,12502,l717 +12500,12503,q715,12494,12502,l717 +12527,12530,q716,12521,12530,l718 +12550,12553,q717,12538,12550,l719 +12579,12583,q718,12566,12580,l720 +12612,12614,q719,0,0,null +12616,12619,q720,12615,12627,l722 +12647,12648,q721,12643,12654,l723 +12656,12659,q722,0,0,null +12674,12675,q723,12666,12677,l724 +12684,12686,q724,12680,12685,l725 +12691,12694,q725,0,0,null +12718,12721,q726,12716,12728,l727 +12735,12738,q727,0,0,null +12745,12746,q728,12745,12748,l728 +12762,12763,q729,0,0,null +12765,12767,q730,12765,12779,l729 +12786,12787,q731,12787,12788,l730 +12800,12803,q732,0,0,null +12812,12814,q733,12804,12814,l732 +12827,12828,q734,12823,12829,l733 +12856,12857,q735,12855,12866,l735 +12883,12885,q736,12877,12884,l736 +12905,12908,q737,0,0,null +12921,12925,q738,12923,12934,l738 +12939,12940,q739,12937,12940,l739 +12944,12946,q740,0,0,null +12959,12963,q741,12962,12968,l741 +12976,12977,q742,0,0,null +12981,12984,q743,0,0,null +12996,13000,q744,12987,12999,l742 +13020,13024,q745,13009,13020,l743 +13042,13043,q746,13035,13044,l744 +13072,13075,q747,13059,13073,l745 +13103,13106,q748,13103,13115,l747 +13135,13136,q749,0,0,null diff --git a/generated/denser-labels/results/part-00001 b/generated/denser-labels/results/part-00001 new file mode 100644 index 00000000..b442e47c --- /dev/null +++ b/generated/denser-labels/results/part-00001 @@ -0,0 +1,126 @@ +13137,13139,q750,0,0,null +13164,13168,q751,13162,13170,l750 +13179,13182,q752,13179,13184,l751 +13195,13198,q753,13198,13203,l752 +13214,13217,q754,0,0,null +13241,13242,q755,0,0,null +13253,13255,q756,13251,13256,l754 +13275,13277,q757,0,0,null +13286,13287,q758,13279,13288,l756 +13291,13293,q759,0,0,null +13318,13319,q760,13314,13319,l759 +13328,13331,q761,13328,13336,l760 +13334,13337,q762,13328,13336,l760 +13363,13367,q763,13356,13363,l762 +13393,13397,q764,0,0,null +13413,13416,q765,0,0,null +13436,13438,q766,13428,13439,l765 +13446,13449,q767,13443,13452,l766 +13468,13471,q768,0,0,null +13498,13499,q769,13496,13502,l770 +13514,13516,q770,0,0,null +13540,13543,q771,13534,13546,l772 +13566,13569,q772,13565,13570,l773 +13587,13589,q773,13589,13601,l774 +13618,13622,q774,13606,13619,l775 +13623,13624,q775,0,0,null +13651,13652,q776,13650,13656,l777 +13678,13680,q777,13670,13680,l779 +13704,13705,q778,0,0,null +13716,13718,q779,13708,13717,l781 +13723,13725,q780,13722,13724,l782 +13745,13746,q781,13745,13746,l784 +13763,13766,q782,13754,13767,l785 +13787,13788,q783,13785,13795,l787 +13793,13795,q784,13785,13795,l787 +13811,13815,q785,0,0,null +13832,13834,q786,13827,13839,l789 +13855,13858,q787,0,0,null +13880,13884,q788,13878,13881,l792 +13912,13916,q789,0,0,null +13943,13947,q790,13937,13951,l795 +13956,13959,q791,0,0,null +13969,13971,q792,13965,13979,l796 +13973,13974,q793,13965,13979,l796 +13984,13988,q794,13982,13995,l797 +14009,14011,q795,0,0,null +14034,14036,q796,0,0,null +14064,14065,q797,0,0,null +14069,14070,q798,0,0,null +14097,14101,q799,0,0,null +14109,14111,q800,14108,14109,l802 +14118,14121,q801,0,0,null +14133,14135,q802,14126,14137,l803 +14150,14151,q803,14142,14156,l804 +14180,14184,q804,0,0,null +14185,14186,q805,0,0,null +14187,14190,q806,14188,14202,l806 +14197,14199,q807,14188,14202,l806 +14211,14212,q808,14203,14216,l807 +14231,14235,q809,14233,14237,l809 +14240,14242,q810,0,0,null +14256,14257,q811,0,0,null +14271,14275,q812,14267,14277,l811 +14299,14302,q813,14302,14307,l813 +14313,14314,q814,0,0,null +14336,14337,q815,14334,14342,l815 +14364,14366,q816,14356,14369,l816 +14385,14386,q817,0,0,null +14391,14393,q818,14393,14404,l818 +14395,14399,q819,14393,14404,l818 +14409,14412,q820,0,0,null +14430,14432,q821,0,0,null +14441,14445,q822,14436,14442,l820 +14465,14469,q823,14467,14477,l822 +14472,14476,q824,14467,14477,l822 +14486,14489,q825,0,0,null +14518,14522,q826,14511,14522,l824 +14524,14526,q827,0,0,null +14529,14533,q828,0,0,null +14559,14562,q829,14562,14568,l827 +14570,14571,q830,0,0,null +14573,14575,q831,14574,14576,l828 +14585,14588,q832,0,0,null +14617,14618,q833,14616,14619,l830 +14647,14651,q834,14643,14652,l832 +14670,14673,q835,14669,14677,l833 +14689,14691,q836,14690,14692,l834 +14698,14702,q837,0,0,null +14726,14729,q838,14729,14733,l836 +14744,14745,q839,14738,14747,l837 +14767,14770,q840,14765,14767,l839 +14767,14770,q840,14768,14778,l840 +14797,14800,q841,14796,14809,l841 +14811,14812,q842,0,0,null +14825,14828,q843,14825,14839,l843 +14839,14840,q844,14825,14839,l843 +14853,14855,q845,14850,14857,l844 +14884,14885,q846,14879,14891,l846 +14912,14914,q847,14912,14917,l848 +14925,14928,q848,14928,14933,l849 +14954,14957,q849,14950,14956,l850 +14964,14968,q850,14965,14968,l851 +14994,14996,q851,14982,14994,l852 +15013,15017,q852,0,0,null +15043,15046,q853,15041,15055,l855 +15067,15068,q854,0,0,null +15086,15089,q855,15087,15097,l857 +15104,15108,q856,0,0,null +15129,15130,q857,0,0,null +15145,15148,q858,15136,15150,l859 +15170,15173,q859,0,0,null +15193,15194,q860,0,0,null +15221,15224,q861,0,0,null +15225,15229,q862,15226,15238,l863 +15246,15247,q863,0,0,null +15261,15265,q864,0,0,null +15269,15272,q865,0,0,null +15300,15302,q866,15294,15306,l866 +15322,15323,q867,15322,15323,l867 +15326,15329,q868,0,0,null +15354,15358,q869,15351,15354,l870 +15373,15374,q870,0,0,null +15397,15398,q871,15390,15401,l872 +15403,15406,q872,0,0,null +15422,15423,q873,15416,15426,l873 +15440,15442,q874,15442,15451,l874 diff --git a/generated/denser-labels/results/part-00002 b/generated/denser-labels/results/part-00002 new file mode 100644 index 00000000..8d0e31cc --- /dev/null +++ b/generated/denser-labels/results/part-00002 @@ -0,0 +1,126 @@ +2243,2245,q125,2244,2253,l128 +2269,2273,q126,0,0,null +2284,2287,q127,2281,2288,l130 +2294,2296,q128,2290,2300,l131 +2320,2324,q129,2323,2334,l133 +2329,2331,q130,2323,2334,l133 +2336,2338,q131,0,0,null +2361,2363,q132,2352,2361,l134 +2388,2389,q133,0,0,null +2396,2400,q134,2395,2401,l136 +2410,2412,q135,2410,2418,l137 +2422,2423,q136,0,0,null +2439,2443,q137,2440,2448,l139 +2449,2451,q138,0,0,null +2478,2479,q139,2474,2481,l141 +2481,2485,q140,2474,2481,l141 +2504,2506,q141,2505,2516,l143 +2534,2538,q142,0,0,null +2558,2561,q143,2555,2565,l147 +2567,2569,q144,0,0,null +2573,2575,q145,0,0,null +2590,2592,q146,2584,2592,l148 +2601,2602,q147,0,0,null +2605,2606,q148,2605,2615,l149 +2618,2621,q149,0,0,null +2629,2630,q150,0,0,null +2648,2651,q151,2646,2653,l151 +2680,2681,q152,0,0,null +2707,2709,q153,0,0,null +2736,2739,q154,2731,2736,l155 +2752,2754,q155,2746,2755,l156 +2773,2777,q156,2769,2781,l157 +2788,2789,q157,0,0,null +2806,2809,q158,2796,2808,l158 +2829,2831,q159,0,0,null +2844,2848,q160,2841,2854,l161 +2870,2874,q161,2866,2875,l162 +2879,2882,q162,0,0,null +2891,2895,q163,2894,2907,l163 +2904,2905,q164,2894,2907,l163 +2918,2920,q165,2920,2926,l164 +2936,2939,q166,2938,2947,l165 +2965,2967,q167,2961,2973,l166 +2984,2988,q168,2976,2987,l167 +3013,3016,q169,3006,3020,l169 +3026,3029,q170,3022,3035,l170 +3058,3059,q171,0,0,null +3084,3088,q172,0,0,null +3105,3108,q173,0,0,null +3110,3112,q174,0,0,null +3136,3139,q175,3137,3139,l176 +3156,3160,q176,0,0,null +3185,3186,q177,3181,3193,l179 +3209,3213,q178,3206,3216,l180 +3237,3238,q179,3224,3237,l181 +3265,3266,q180,3258,3266,l183 +3273,3277,q181,3270,3278,l184 +3288,3291,q182,3281,3293,l185 +3294,3298,q183,0,0,null +3312,3313,q184,0,0,null +3336,3340,q185,3335,3339,l188 +3345,3348,q186,3347,3358,l189 +3365,3366,q187,3365,3374,l190 +3377,3379,q188,3375,3389,l191 +3404,3407,q189,3399,3413,l193 +3424,3427,q190,3414,3427,l194 +3451,3452,q191,0,0,null +3456,3457,q192,3457,3469,l196 +3467,3468,q193,3457,3469,l196 +3496,3498,q194,3485,3499,l197 +3500,3502,q195,0,0,null +3513,3517,q196,3504,3518,l198 +3539,3541,q197,0,0,null +3546,3549,q198,0,0,null +3552,3555,q199,0,0,null +3570,3573,q200,3556,3570,l200 +3570,3573,q200,3573,3587,l201 +3598,3599,q201,0,0,null +3619,3620,q202,0,0,null +3627,3629,q203,3627,3637,l203 +3631,3634,q204,3627,3637,l203 +3643,3645,q205,3643,3656,l205 +3666,3670,q206,0,0,null +3690,3693,q207,3692,3705,l207 +3722,3723,q208,3723,3728,l208 +3743,3744,q209,0,0,null +3772,3776,q210,3760,3773,l211 +3802,3803,q211,0,0,null +3807,3809,q212,0,0,null +3814,3817,q213,3811,3814,l213 +3839,3843,q214,0,0,null +3854,3855,q215,3846,3859,l215 +3856,3859,q216,3846,3859,l215 +3879,3880,q217,3879,3888,l217 +3903,3906,q218,3895,3904,l218 +3928,3930,q219,3921,3933,l220 +3957,3958,q220,3954,3958,l222 +3974,3976,q221,3976,3978,l223 +3997,4000,q222,3994,4002,l224 +4009,4011,q223,0,0,null +4040,4044,q224,4038,4046,l227 +4052,4055,q225,4049,4053,l228 +4057,4058,q226,0,0,null +4085,4088,q227,4080,4086,l230 +4110,4112,q228,0,0,null +4131,4133,q229,0,0,null +4134,4138,q230,0,0,null +4144,4146,q231,4139,4152,l233 +4163,4165,q232,0,0,null +4185,4189,q233,0,0,null +4214,4218,q234,0,0,null +4247,4248,q235,4243,4257,l238 +4252,4253,q236,4243,4257,l238 +4282,4286,q237,4275,4286,l239 +4307,4310,q238,4304,4316,l240 +4335,4339,q239,4328,4338,l241 +4335,4339,q239,4339,4347,l242 +4356,4359,q240,0,0,null +4375,4376,q241,4372,4378,l244 +4399,4400,q242,0,0,null +4403,4405,q243,4401,4412,l246 +4430,4431,q244,4419,4431,l247 +4434,4438,q245,0,0,null +4440,4443,q246,0,0,null +4467,4469,q247,4467,4470,l249 +4479,4480,q248,4479,4485,l250 diff --git a/generated/denser-labels/results/part-00003 b/generated/denser-labels/results/part-00003 new file mode 100644 index 00000000..598116de --- /dev/null +++ b/generated/denser-labels/results/part-00003 @@ -0,0 +1,127 @@ +4484,4488,q249,4479,4485,l250 +4510,4513,q250,4507,4513,l252 +4536,4539,q251,0,0,null +4564,4568,q252,4558,4569,l255 +4574,4578,q253,4578,4588,l256 +4591,4595,q254,4595,4596,l257 +4613,4617,q255,4603,4615,l258 +4628,4632,q256,4627,4635,l259 +4652,4655,q257,0,0,null +4666,4667,q258,4667,4678,l262 +4691,4694,q259,0,0,null +4696,4700,q260,4695,4706,l263 +4718,4719,q261,4712,4721,l264 +4729,4731,q262,4731,4739,l265 +4744,4746,q263,4741,4750,l266 +4756,4760,q264,0,0,null +4764,4767,q265,4763,4766,l267 +4787,4789,q266,4779,4790,l268 +4810,4811,q267,0,0,null +4823,4824,q268,0,0,null +4850,4853,q269,0,0,null +4856,4857,q270,0,0,null +4872,4876,q271,0,0,null +4889,4890,q272,4889,4903,l274 +4910,4913,q273,4907,4921,l275 +4941,4945,q274,4928,4942,l276 +4946,4950,q275,0,0,null +4977,4979,q276,0,0,null +4995,4998,q277,4994,4996,l279 +5004,5005,q278,4999,5005,l280 +5025,5029,q279,0,0,null +5047,5049,q280,5039,5048,l283 +5057,5060,q281,5057,5071,l284 +5088,5092,q282,0,0,null +5114,5117,q283,5117,5130,l287 +5124,5128,q284,5117,5130,l287 +5152,5153,q285,0,0,null +5176,5179,q286,5179,5185,l291 +5187,5188,q287,0,0,null +5201,5205,q288,5201,5206,l292 +5211,5215,q289,0,0,null +5227,5231,q290,0,0,null +5259,5263,q291,0,0,null +5286,5287,q292,0,0,null +5309,5313,q293,0,0,null +5332,5333,q294,0,0,null +5338,5342,q295,5336,5344,l299 +5351,5353,q296,0,0,null +5364,5366,q297,5362,5364,l300 +5364,5366,q297,5366,5376,l301 +5374,5376,q298,5366,5376,l301 +5405,5408,q299,0,0,null +5436,5439,q300,0,0,null +5461,5464,q301,5460,5464,l308 +5490,5494,q302,0,0,null +5516,5518,q303,0,0,null +5535,5538,q304,5533,5546,l311 +5539,5542,q305,5533,5546,l311 +5568,5569,q306,5567,5569,l313 +5572,5575,q307,0,0,null +5586,5587,q308,5583,5590,l314 +5593,5595,q309,0,0,null +5613,5615,q310,5615,5617,l316 +5628,5631,q311,5623,5630,l317 +5632,5635,q312,0,0,null +5654,5656,q313,5648,5655,l318 +5669,5671,q314,5664,5672,l319 +5672,5674,q315,5664,5672,l319 +5697,5699,q316,0,0,null +5713,5717,q317,5705,5718,l321 +5723,5726,q318,0,0,null +5752,5754,q319,5752,5763,l323 +5778,5779,q320,0,0,null +5795,5797,q321,5789,5795,l325 +5804,5807,q322,5798,5809,l326 +5822,5826,q323,5817,5825,l327 +5848,5851,q324,0,0,null +5867,5869,q325,0,0,null +5888,5891,q326,5887,5901,l331 +5915,5919,q327,5910,5924,l333 +5934,5936,q328,0,0,null +5938,5940,q329,5939,5943,l334 +5965,5966,q330,5958,5969,l335 +5993,5994,q331,5987,5999,l337 +6017,6019,q332,6016,6027,l339 +6038,6039,q333,0,0,null +6061,6062,q334,0,0,null +6063,6064,q335,0,0,null +6091,6094,q336,0,0,null +6099,6101,q337,6100,6104,l343 +6111,6113,q338,6108,6111,l344 +6119,6123,q339,0,0,null +6130,6134,q340,6126,6138,l345 +6140,6144,q341,0,0,null +6159,6162,q342,6154,6163,l346 +6185,6189,q343,6182,6185,l348 +6211,6213,q344,6206,6216,l350 +6227,6228,q345,6227,6231,l351 +6251,6255,q346,6243,6255,l352 +6263,6265,q347,0,0,null +6274,6276,q348,6271,6277,l354 +6282,6285,q349,0,0,null +6288,6290,q350,6288,6294,l355 +6309,6312,q351,0,0,null +6330,6333,q352,0,0,null +6350,6354,q353,0,0,null +6359,6361,q354,6360,6368,l358 +6390,6394,q355,6393,6407,l361 +6419,6421,q356,0,0,null +6426,6430,q357,6427,6433,l364 +6433,6436,q358,6427,6433,l364 +6437,6439,q359,0,0,null +6454,6456,q360,0,0,null +6460,6462,q361,0,0,null +6468,6469,q362,6467,6470,l366 +6484,6487,q363,0,0,null +6516,6518,q364,0,0,null +6526,6527,q365,6526,6530,l369 +6532,6536,q366,0,0,null +6563,6567,q367,6561,6568,l372 +6592,6596,q368,6587,6593,l374 +6601,6604,q369,0,0,null +6622,6626,q370,6624,6626,l376 +6631,6634,q371,0,0,null +6644,6646,q372,6641,6649,l377 +6671,6673,q373,6671,6683,l379 +6678,6682,q374,6671,6683,l379 diff --git a/generated/denser-labels/results/part-00004 b/generated/denser-labels/results/part-00004 new file mode 100644 index 00000000..52c6e946 --- /dev/null +++ b/generated/denser-labels/results/part-00004 @@ -0,0 +1,126 @@ +15457,15458,q875,0,0,null +15462,15463,q876,0,0,null +15484,15486,q877,15485,15486,l876 +15492,15495,q878,0,0,null +15507,15510,q879,0,0,null +15524,15525,q880,15525,15526,l878 +15540,15543,q881,15542,15554,l880 +15546,15548,q882,15542,15554,l880 +15570,15571,q883,0,0,null +15597,15599,q884,15593,15606,l884 +15618,15619,q885,15618,15626,l885 +15634,15636,q886,0,0,null +15655,15657,q887,15652,15666,l887 +15664,15667,q888,15652,15666,l887 +15696,15700,q889,0,0,null +15703,15707,q890,15706,15717,l889 +15726,15727,q891,0,0,null +15740,15743,q892,0,0,null +15758,15760,q893,15756,15759,l891 +15758,15760,q893,15760,15771,l892 +15780,15784,q894,15784,15796,l893 +15799,15802,q895,0,0,null +15807,15811,q896,15803,15811,l894 +15835,15836,q897,15835,15845,l896 +15842,15844,q898,15835,15845,l896 +15870,15874,q899,15857,15871,l897 +15885,15886,q900,15876,15887,l898 +15902,15904,q901,15901,15910,l899 +15907,15910,q902,15901,15910,l899 +15919,15923,q903,0,0,null +15950,15951,q904,15948,15957,l901 +15957,15959,q905,15948,15957,l901 +15969,15971,q906,15968,15976,l902 +15996,15998,q907,15994,16005,l904 +16022,16023,q908,16022,16028,l905 +16046,16047,q909,16047,16055,l906 +16050,16051,q910,16047,16055,l906 +16056,16059,q911,0,0,null +16080,16081,q912,0,0,null +16087,16088,q913,16088,16090,l908 +16101,16103,q914,0,0,null +16119,16120,q915,16116,16128,l910 +16146,16147,q916,16138,16152,l911 +16174,16176,q917,16176,16187,l914 +16187,16191,q918,16176,16187,l914 +16208,16212,q919,16204,16216,l916 +16222,16226,q920,0,0,null +16227,16229,q921,0,0,null +16240,16244,q922,16230,16241,l917 +16252,16255,q923,16255,16266,l918 +16274,16278,q924,0,0,null +16299,16301,q925,0,0,null +16324,16328,q926,0,0,null +16354,16358,q927,16352,16360,l923 +16359,16363,q928,16352,16360,l923 +16372,16373,q929,16367,16373,l924 +16388,16389,q930,16386,16391,l925 +16412,16415,q931,16409,16412,l927 +16423,16425,q932,16423,16427,l928 +16434,16438,q933,0,0,null +16450,16453,q934,16449,16460,l930 +16468,16470,q935,16463,16473,l931 +16474,16477,q936,0,0,null +16493,16494,q937,16491,16500,l932 +16502,16506,q938,16504,16506,l933 +16510,16513,q939,0,0,null +16538,16539,q940,16533,16546,l935 +16543,16545,q941,16533,16546,l935 +16549,16551,q942,0,0,null +16578,16582,q943,0,0,null +16589,16590,q944,16583,16597,l937 +16616,16618,q945,16618,16631,l939 +16625,16629,q946,16618,16631,l939 +16658,16659,q947,0,0,null +16679,16681,q948,0,0,null +16702,16705,q949,0,0,null +16734,16735,q950,0,0,null +16754,16755,q951,16746,16756,l947 +16780,16783,q952,16774,16780,l949 +16807,16810,q953,0,0,null +16820,16824,q954,16819,16833,l951 +16826,16828,q955,16819,16833,l951 +16852,16855,q956,0,0,null +16856,16859,q957,0,0,null +16875,16878,q958,16870,16884,l954 +16906,16909,q959,16901,16911,l955 +16935,16936,q960,0,0,null +16961,16962,q961,16954,16967,l958 +16974,16977,q962,16968,16976,l959 +16984,16988,q963,16980,16985,l960 +16995,16999,q964,16992,17006,l961 +17013,17017,q965,17012,17020,l962 +17033,17036,q966,0,0,null +17061,17062,q967,17060,17074,l965 +17080,17081,q968,0,0,null +17100,17103,q969,17087,17100,l966 +17130,17131,q970,0,0,null +17137,17141,q971,17133,17144,l968 +17151,17154,q972,0,0,null +17173,17174,q973,17167,17177,l970 +17192,17193,q974,17191,17201,l972 +17207,17208,q975,0,0,null +17213,17217,q976,0,0,null +17225,17227,q977,17220,17229,l973 +17242,17243,q978,17238,17245,l975 +17254,17255,q979,0,0,null +17268,17271,q980,17266,17268,l977 +17276,17277,q981,17277,17280,l978 +17294,17298,q982,17294,17306,l979 +17306,17308,q983,17294,17306,l979 +17336,17339,q984,17329,17340,l981 +17361,17362,q985,0,0,null +17375,17376,q986,17368,17381,l983 +17379,17383,q987,17368,17381,l983 +17412,17414,q988,0,0,null +17423,17425,q989,17417,17430,l985 +17431,17433,q990,0,0,null +17440,17441,q991,17434,17443,l986 +17445,17448,q992,0,0,null +17470,17474,q993,17473,17480,l988 +17476,17477,q994,17473,17480,l988 +17478,17481,q995,17473,17480,l988 +17508,17511,q996,17507,17520,l990 +17527,17530,q997,0,0,null +17542,17545,q998,17539,17553,l991 +17563,17566,q999,17557,17563,l992 diff --git a/generated/denser-labels/results/part-00005 b/generated/denser-labels/results/part-00005 new file mode 100644 index 00000000..5ed074fb --- /dev/null +++ b/generated/denser-labels/results/part-00005 @@ -0,0 +1,126 @@ +6709,6710,q375,6709,6712,l381 +6728,6732,q376,6728,6742,l383 +6754,6755,q377,6750,6755,l385 +6759,6763,q378,6761,6767,l386 +6769,6773,q379,0,0,null +6780,6784,q380,0,0,null +6802,6806,q381,6796,6807,l388 +6809,6812,q382,0,0,null +6817,6821,q383,6814,6817,l389 +6829,6831,q384,0,0,null +6843,6844,q385,0,0,null +6852,6855,q386,0,0,null +6871,6873,q387,0,0,null +6878,6882,q388,6881,6893,l393 +6893,6895,q389,6881,6893,l393 +6918,6920,q390,0,0,null +6921,6925,q391,6922,6930,l395 +6932,6934,q392,0,0,null +6945,6948,q393,6944,6949,l396 +6949,6951,q394,6944,6949,l396 +6957,6959,q395,6953,6957,l397 +6965,6969,q396,6963,6973,l398 +6979,6980,q397,0,0,null +6985,6986,q398,6981,6992,l399 +6987,6991,q399,6981,6992,l399 +6994,6997,q400,0,0,null +7006,7009,q401,0,0,null +7024,7027,q402,7020,7027,l401 +7036,7039,q403,0,0,null +7046,7050,q404,7046,7056,l402 +7069,7072,q405,7070,7072,l404 +7073,7074,q406,0,0,null +7100,7102,q407,0,0,null +7131,7132,q408,0,0,null +7136,7140,q409,7136,7140,l408 +7157,7159,q410,7152,7157,l409 +7187,7191,q411,7176,7190,l410 +7187,7191,q411,7191,7205,l411 +7207,7211,q412,0,0,null +7215,7219,q413,7212,7226,l412 +7233,7234,q414,0,0,null +7260,7262,q415,7255,7261,l414 +7264,7266,q416,0,0,null +7275,7276,q417,0,0,null +7304,7306,q418,7299,7310,l417 +7309,7312,q419,7299,7310,l417 +7324,7328,q420,7318,7329,l418 +7351,7353,q421,7350,7358,l420 +7382,7385,q422,0,0,null +7403,7405,q423,7398,7411,l423 +7424,7425,q424,7416,7429,l424 +7439,7442,q425,7437,7450,l425 +7468,7469,q426,0,0,null +7475,7479,q427,7472,7482,l427 +7491,7495,q428,7494,7506,l428 +7519,7522,q429,0,0,null +7542,7544,q430,0,0,null +7559,7561,q431,7552,7559,l430 +7583,7587,q432,0,0,null +7602,7606,q433,7599,7607,l433 +7612,7616,q434,0,0,null +7617,7619,q435,0,0,null +7647,7651,q436,0,0,null +7668,7669,q437,7656,7669,l436 +7693,7695,q438,7694,7704,l439 +7707,7709,q439,0,0,null +7727,7731,q440,7725,7730,l441 +7734,7735,q441,0,0,null +7751,7752,q442,0,0,null +7767,7768,q443,7767,7769,l443 +7784,7786,q444,7786,7793,l445 +7811,7814,q445,7811,7817,l446 +7824,7828,q446,7820,7828,l447 +7831,7833,q447,7833,7841,l448 +7843,7847,q448,0,0,null +7876,7879,q449,7866,7878,l450 +7883,7884,q450,0,0,null +7901,7905,q451,0,0,null +7920,7924,q452,7916,7926,l452 +7951,7954,q453,7941,7951,l454 +7971,7972,q454,7971,7978,l456 +7978,7982,q455,7971,7978,l456 +7978,7982,q455,7979,7983,l457 +8009,8012,q456,8003,8011,l459 +8025,8029,q457,0,0,null +8058,8061,q458,0,0,null +8086,8090,q459,8078,8092,l464 +8103,8104,q460,8100,8110,l465 +8111,8114,q461,8114,8126,l466 +8124,8126,q462,8114,8126,l466 +8142,8144,q463,8128,8142,l467 +8165,8167,q464,8161,8173,l470 +8169,8173,q465,8161,8173,l470 +8186,8187,q466,0,0,null +8199,8201,q467,0,0,null +8223,8224,q468,0,0,null +8251,8253,q469,0,0,null +8258,8260,q470,8257,8271,l474 +8284,8285,q471,8281,8293,l475 +8304,8307,q472,8297,8306,l476 +8321,8323,q473,8320,8331,l477 +8331,8332,q474,8320,8331,l477 +8356,8358,q475,8356,8366,l479 +8370,8374,q476,8368,8375,l480 +8395,8399,q477,8390,8398,l481 +8412,8413,q478,0,0,null +8434,8436,q479,8435,8447,l483 +8456,8460,q480,0,0,null +8478,8479,q481,0,0,null +8507,8509,q482,8500,8510,l486 +8511,8513,q483,8511,8516,l487 +8533,8537,q484,8528,8538,l489 +8565,8566,q485,0,0,null +8568,8569,q486,0,0,null +8592,8596,q487,8592,8598,l492 +8608,8610,q488,0,0,null +8627,8631,q489,8616,8628,l493 +8651,8653,q490,8644,8657,l494 +8675,8677,q491,8676,8686,l495 +8702,8705,q492,0,0,null +8725,8729,q493,8728,8739,l498 +8731,8733,q494,8728,8739,l498 +8744,8747,q495,0,0,null +8772,8775,q496,0,0,null +8799,8800,q497,0,0,null +8806,8810,q498,0,0,null diff --git a/generated/denser-labels/results/part-00006 b/generated/denser-labels/results/part-00006 new file mode 100644 index 00000000..47bf4de6 --- /dev/null +++ b/generated/denser-labels/results/part-00006 @@ -0,0 +1,126 @@ +8838,8841,q499,8840,8843,l503 +29,32,q0,21,32,l1 +57,59,q1,47,60,l2 +65,68,q2,0,0,null +97,100,q3,99,108,l4 +117,119,q4,112,126,l5 +126,127,q5,112,126,l5 +139,142,q6,131,139,l6 +161,164,q7,0,0,null +166,169,q8,0,0,null +198,199,q9,190,200,l9 +201,205,q10,205,219,l10 +230,233,q11,0,0,null +244,248,q12,242,245,l12 +272,273,q13,0,0,null +298,299,q14,0,0,null +312,315,q15,311,321,l15 +322,324,q16,0,0,null +326,327,q17,0,0,null +329,330,q18,330,341,l16 +359,363,q19,0,0,null +374,376,q20,368,378,l18 +404,407,q21,407,414,l20 +424,425,q22,0,0,null +440,441,q23,427,440,l21 +450,454,q24,452,466,l22 +465,466,q25,452,466,l22 +482,483,q26,483,486,l23 +502,505,q27,500,508,l25 +531,533,q28,525,534,l26 +548,550,q29,549,556,l27 +561,565,q30,562,575,l28 +590,592,q31,592,604,l29 +611,614,q32,610,623,l30 +625,629,q33,0,0,null +630,632,q34,0,0,null +649,650,q35,643,657,l32 +663,666,q36,0,0,null +689,692,q37,691,704,l35 +694,695,q38,691,704,l35 +696,697,q39,691,704,l35 +707,708,q40,705,714,l36 +732,736,q41,0,0,null +750,752,q42,747,753,l38 +769,771,q43,768,773,l39 +800,803,q44,0,0,null +822,824,q45,0,0,null +850,854,q46,854,866,l44 +856,857,q47,854,866,l44 +866,869,q48,854,866,l44 +881,882,q49,881,891,l45 +888,892,q50,881,891,l45 +917,920,q51,907,917,l46 +927,929,q52,0,0,null +930,934,q53,0,0,null +944,948,q54,947,951,l48 +969,973,q55,958,969,l49 +1000,1004,q56,991,1003,l51 +1016,1018,q57,0,0,null +1041,1042,q58,0,0,null +1061,1062,q59,0,0,null +1067,1070,q60,1063,1077,l55 +1073,1075,q61,1063,1077,l55 +1079,1082,q62,0,0,null +1086,1090,q63,0,0,null +1102,1104,q64,0,0,null +1128,1129,q65,0,0,null +1147,1149,q66,1145,1153,l59 +1176,1180,q67,1174,1183,l61 +1209,1211,q68,0,0,null +1232,1235,q69,0,0,null +1263,1267,q70,1261,1268,l66 +1274,1275,q71,1275,1281,l67 +1294,1297,q72,1284,1295,l68 +1302,1305,q73,1299,1304,l69 +1323,1325,q74,0,0,null +1332,1336,q75,0,0,null +1357,1359,q76,0,0,null +1373,1374,q77,1364,1378,l74 +1397,1401,q78,1396,1404,l75 +1406,1408,q79,0,0,null +1415,1418,q80,1413,1422,l76 +1436,1439,q81,1431,1443,l77 +1447,1450,q82,1449,1457,l78 +1473,1474,q83,1469,1473,l80 +1497,1499,q84,1486,1497,l82 +1497,1499,q84,1498,1502,l83 +1515,1519,q85,1519,1526,l85 +1531,1532,q86,0,0,null +1558,1559,q87,1551,1560,l87 +1571,1574,q88,1566,1571,l88 +1571,1574,q88,1573,1580,l89 +1591,1595,q89,1588,1596,l90 +1623,1627,q90,1623,1629,l92 +1654,1656,q91,1656,1664,l94 +1675,1676,q92,0,0,null +1691,1694,q93,1684,1698,l96 +1720,1723,q94,0,0,null +1737,1740,q95,1731,1743,l98 +1768,1771,q96,1759,1768,l99 +1785,1789,q97,1779,1789,l100 +1806,1810,q98,1804,1809,l102 +1821,1822,q99,1816,1830,l103 +1825,1829,q100,1816,1830,l103 +1845,1847,q101,0,0,null +1872,1873,q102,0,0,null +1888,1890,q103,1878,1892,l106 +1919,1921,q104,0,0,null +1928,1932,q105,1922,1930,l108 +1935,1936,q106,0,0,null +1948,1949,q107,1945,1949,l109 +1953,1956,q108,1955,1956,l110 +1985,1989,q109,1977,1988,l112 +1994,1998,q110,1992,2002,l113 +2007,2009,q111,0,0,null +2031,2033,q112,0,0,null +2046,2048,q113,2044,2050,l116 +2059,2060,q114,2059,2064,l117 +2071,2072,q115,2065,2073,l118 +2086,2090,q116,2079,2087,l119 +2117,2121,q117,0,0,null +2134,2135,q118,2124,2135,l121 +2136,2137,q119,0,0,null +2141,2145,q120,0,0,null +2166,2170,q121,0,0,null +2197,2200,q122,0,0,null diff --git a/generated/denser-labels/results/part-00007 b/generated/denser-labels/results/part-00007 new file mode 100644 index 00000000..b99127e1 --- /dev/null +++ b/generated/denser-labels/results/part-00007 @@ -0,0 +1,127 @@ +2206,2210,q123,2209,2214,l125 +2224,2226,q124,0,0,null +8844,8847,q500,0,0,null +8865,8869,q501,0,0,null +8897,8898,q502,8898,8909,l506 +8922,8924,q503,8921,8933,l507 +8932,8933,q504,8921,8933,l507 +8940,8944,q505,0,0,null +8956,8960,q506,8955,8959,l509 +8988,8991,q507,8982,8995,l511 +9017,9020,q508,9011,9023,l513 +9039,9042,q509,9037,9048,l514 +9051,9054,q510,0,0,null +9082,9085,q511,9085,9090,l516 +9088,9091,q512,9085,9090,l516 +9110,9114,q513,0,0,null +9123,9125,q514,0,0,null +9131,9132,q515,0,0,null +9153,9157,q516,0,0,null +9183,9187,q517,0,0,null +9196,9197,q518,9195,9204,l522 +9206,9210,q519,0,0,null +9234,9235,q520,9223,9236,l523 +9247,9250,q521,0,0,null +9266,9269,q522,9254,9266,l524 +9293,9294,q523,0,0,null +9310,9314,q524,9307,9316,l527 +9326,9328,q525,0,0,null +9356,9359,q526,9355,9362,l529 +9361,9363,q527,9355,9362,l529 +9391,9392,q528,9386,9393,l531 +9414,9418,q529,9418,9427,l533 +9431,9433,q530,9433,9436,l534 +9449,9451,q531,0,0,null +9453,9454,q532,9454,9461,l536 +9461,9463,q533,9454,9461,l536 +9473,9477,q534,9473,9484,l537 +9493,9496,q535,0,0,null +9523,9524,q536,9515,9525,l539 +9548,9551,q537,9549,9554,l542 +9580,9582,q538,0,0,null +9607,9610,q539,9606,9613,l546 +9626,9629,q540,9629,9639,l547 +9643,9647,q541,0,0,null +9661,9665,q542,9656,9669,l548 +9678,9680,q543,9680,9692,l549 +9695,9696,q544,0,0,null +9698,9701,q545,9699,9702,l550 +9704,9708,q546,0,0,null +9734,9738,q547,9735,9742,l553 +9754,9758,q548,9756,9770,l554 +9775,9778,q549,9774,9777,l555 +9780,9781,q550,9781,9782,l556 +9782,9786,q551,9781,9782,l556 +9793,9795,q552,0,0,null +9799,9803,q553,9802,9805,l558 +9811,9815,q554,9808,9819,l559 +9833,9837,q555,9835,9849,l560 +9857,9861,q556,0,0,null +9885,9888,q557,9882,9889,l562 +9912,9913,q558,9910,9919,l564 +9927,9930,q559,9927,9940,l565 +9937,9941,q560,9927,9940,l565 +9956,9957,q561,9949,9957,l566 +9963,9964,q562,9961,9973,l567 +9966,9969,q563,9961,9973,l567 +9990,9991,q564,9988,9999,l569 +10014,10016,q565,10011,10022,l570 +10021,10024,q566,10011,10022,l570 +10042,10045,q567,0,0,null +10047,10048,q568,0,0,null +10073,10074,q569,10064,10073,l573 +10079,10081,q570,0,0,null +10099,10101,q571,0,0,null +10108,10111,q572,10102,10112,l575 +10140,10141,q573,10135,10144,l577 +10162,10166,q574,10161,10166,l578 +10175,10177,q575,0,0,null +10198,10199,q576,0,0,null +10227,10228,q577,0,0,null +10247,10250,q578,0,0,null +10259,10263,q579,10261,10263,l586 +10265,10266,q580,0,0,null +10273,10276,q581,10276,10282,l587 +10300,10303,q582,10292,10302,l588 +10311,10313,q583,10304,10315,l589 +10340,10342,q584,10328,10340,l590 +10366,10369,q585,10368,10369,l592 +10379,10383,q586,0,0,null +10397,10398,q587,10389,10401,l594 +10426,10430,q588,0,0,null +10449,10453,q589,0,0,null +10456,10458,q590,10455,10456,l597 +10484,10485,q591,0,0,null +10500,10502,q592,0,0,null +10512,10513,q593,0,0,null +10527,10529,q594,0,0,null +10547,10550,q595,0,0,null +10559,10563,q596,10556,10568,l603 +10573,10574,q597,0,0,null +10592,10594,q598,10590,10602,l605 +10604,10608,q599,0,0,null +10631,10635,q600,10626,10637,l607 +10663,10665,q601,0,0,null +10669,10672,q602,10671,10673,l610 +10697,10699,q603,0,0,null +10704,10706,q604,0,0,null +10720,10724,q605,10710,10721,l612 +10730,10734,q606,0,0,null +10753,10754,q607,0,0,null +10775,10779,q608,10774,10782,l615 +10781,10784,q609,10774,10782,l615 +10804,10806,q610,0,0,null +10833,10836,q611,10834,10841,l619 +10847,10850,q612,0,0,null +10865,10866,q613,10860,10871,l620 +10887,10888,q614,10877,10890,l621 +10909,10911,q615,10906,10912,l622 +10926,10929,q616,0,0,null +10946,10948,q617,0,0,null +10952,10956,q618,0,0,null +10970,10973,q619,10968,10977,l626 +11001,11002,q620,11001,11003,l628 +11009,11010,q621,11005,11014,l629 +11016,11020,q622,11017,11019,l630 +11037,11038,q623,11038,11049,l631 +11067,11068,q624,0,0,null diff --git a/generated/denser-queries/results/part-00000 b/generated/denser-queries/results/part-00000 new file mode 100644 index 00000000..eb90f8fb --- /dev/null +++ b/generated/denser-queries/results/part-00000 @@ -0,0 +1,135 @@ +4629,4636,q250,0,0,null +4641,4642,q251,4641,4644,l264 +4653,4659,q252,0,0,null +4678,4691,q253,4678,4680,l266 +4710,4720,q254,0,0,null +4732,4735,q255,4729,4732,l270 +4753,4755,q256,0,0,null +4759,4771,q257,4759,4762,l271 +4759,4771,q257,4764,4766,l272 +4779,4785,q258,0,0,null +4792,4795,q259,4793,4796,l274 +4809,4817,q260,4816,4817,l275 +4824,4830,q261,0,0,null +4837,4851,q262,4849,4852,l277 +4870,4879,q263,0,0,null +4882,4887,q264,4880,4884,l278 +4899,4900,q265,0,0,null +4905,4912,q266,4908,4909,l279 +4917,4926,q267,0,0,null +4930,4935,q268,0,0,null +4949,4951,q269,0,0,null +4955,4962,q270,4962,4965,l281 +4970,4975,q271,4974,4977,l282 +4994,5002,q272,0,0,null +5014,5025,q273,0,0,null +5042,5054,q274,0,0,null +5066,5071,q275,5069,5071,l285 +5081,5089,q276,5088,5092,l286 +5096,5104,q277,0,0,null +5123,5127,q278,0,0,null +5146,5157,q279,5144,5146,l289 +5173,5186,q280,0,0,null +5194,5198,q281,5192,5194,l291 +5204,5211,q282,0,0,null +5215,5222,q283,5219,5220,l292 +5236,5244,q284,0,0,null +5251,5256,q285,5248,5251,l293 +5264,5274,q286,5261,5265,l294 +5284,5289,q287,5288,5291,l295 +5301,5307,q288,5300,5304,l296 +5311,5312,q289,0,0,null +5331,5341,q290,5329,5332,l298 +5347,5360,q291,5350,5354,l299 +5362,5365,q292,0,0,null +5368,5371,q293,0,0,null +5387,5391,q294,0,0,null +5403,5414,q295,0,0,null +5427,5435,q296,5425,5427,l303 +5452,5464,q297,5449,5452,l305 +5452,5464,q297,5460,5462,l306 +5477,5487,q298,5479,5481,l308 +5489,5501,q299,5490,5493,l309 +5514,5527,q300,5516,5520,l310 +5539,5544,q301,0,0,null +5553,5565,q302,5558,5559,l312 +5568,5579,q303,5570,5574,l313 +5595,5603,q304,5599,5601,l314 +5609,5619,q305,5617,5619,l315 +5626,5638,q306,5628,5631,l316 +5654,5658,q307,0,0,null +5672,5674,q308,0,0,null +5692,5694,q309,0,0,null +5701,5709,q310,5704,5707,l320 +5718,5729,q311,5719,5723,l321 +5739,5752,q312,5741,5742,l322 +5755,5765,q313,5764,5768,l323 +5769,5780,q314,0,0,null +5791,5801,q315,5794,5796,l324 +5812,5819,q316,5810,5813,l325 +5826,5840,q317,5836,5839,l326 +5843,5857,q318,0,0,null +5867,5868,q319,0,0,null +5870,5873,q320,0,0,null +5892,5904,q321,5900,5904,l330 +5914,5919,q322,0,0,null +5936,5940,q323,0,0,null +5944,5957,q324,5943,5944,l333 +5960,5964,q325,0,0,null +5967,5981,q326,5965,5969,l335 +5997,6010,q327,0,0,null +6013,6017,q328,0,0,null +6032,6033,q329,0,0,null +6041,6053,q330,6045,6047,l338 +6054,6066,q331,6061,6062,l339 +6082,6096,q332,6082,6084,l340 +6107,6120,q333,6119,6123,l342 +6132,6143,q334,0,0,null +6157,6168,q335,6163,6166,l344 +6157,6168,q335,6167,6171,l345 +6183,6195,q336,6182,6186,l347 +6213,6215,q337,0,0,null +6223,6231,q338,0,0,null +6249,6253,q339,0,0,null +6263,6265,q340,0,0,null +6282,6284,q341,0,0,null +6301,6307,q342,6300,6302,l353 +6326,6328,q343,0,0,null +6339,6348,q344,0,0,null +6357,6367,q345,6360,6363,l355 +6381,6395,q346,6385,6386,l357 +6381,6395,q346,6389,6392,l358 +6408,6409,q347,6405,6409,l360 +6427,6441,q348,6431,6434,l362 +6427,6441,q348,6440,6442,l363 +6446,6449,q349,0,0,null +6451,6456,q350,6455,6458,l364 +6473,6479,q351,6479,6480,l365 +6489,6497,q352,0,0,null +6511,6521,q353,6517,6518,l367 +6511,6521,q353,6519,6523,l368 +6527,6536,q354,0,0,null +6552,6560,q355,6557,6560,l371 +6571,6574,q356,0,0,null +6586,6596,q357,6594,6595,l373 +6605,6609,q358,6607,6608,l374 +6616,6626,q359,6621,6622,l375 +6641,6642,q360,6640,6642,l376 +6656,6670,q361,6660,6663,l377 +6688,6696,q362,0,0,null +6703,6706,q363,0,0,null +6709,6713,q364,6710,6714,l379 +6724,6732,q365,6726,6728,l380 +6724,6732,q365,6731,6732,l381 +6738,6743,q366,0,0,null +6761,6769,q367,0,0,null +6771,6772,q368,0,0,null +6791,6802,q369,6801,6805,l384 +6807,6816,q370,0,0,null +6825,6839,q371,6825,6827,l385 +6847,6851,q372,0,0,null +6852,6853,q373,0,0,null +6861,6866,q374,0,0,null +6,18,q0,0,0,null +31,36,q1,36,37,l0 +50,64,q2,55,58,l1 diff --git a/generated/denser-queries/results/part-00001 b/generated/denser-queries/results/part-00001 new file mode 100644 index 00000000..57cbce29 --- /dev/null +++ b/generated/denser-queries/results/part-00001 @@ -0,0 +1,135 @@ +73,82,q3,72,75,l2 +101,106,q4,102,106,l3 +119,129,q5,128,131,l4 +139,145,q6,145,146,l5 +147,153,q7,147,149,l6 +163,166,q8,0,0,null +176,189,q9,184,187,l8 +198,207,q10,0,0,null +213,216,q11,216,219,l9 +228,234,q12,225,229,l11 +247,248,q13,0,0,null +254,267,q14,250,254,l12 +268,277,q15,0,0,null +296,307,q16,304,306,l14 +317,328,q17,0,0,null +344,352,q18,0,0,null +353,366,q19,359,363,l17 +367,377,q20,0,0,null +390,398,q21,388,390,l18 +407,416,q22,0,0,null +419,432,q23,421,423,l20 +419,432,q23,428,429,l21 +438,440,q24,0,0,null +441,454,q25,0,0,null +463,467,q26,0,0,null +486,499,q27,497,500,l24 +504,515,q28,506,510,l25 +517,522,q29,0,0,null +535,541,q30,532,536,l26 +552,558,q31,0,0,null +566,572,q32,564,568,l27 +589,590,q33,0,0,null +593,597,q34,0,0,null +616,626,q35,626,629,l30 +632,645,q36,0,0,null +658,668,q37,0,0,null +677,685,q38,0,0,null +692,699,q39,695,698,l33 +709,722,q40,717,720,l34 +740,747,q41,0,0,null +760,768,q42,0,0,null +783,793,q43,0,0,null +796,798,q44,795,799,l38 +807,811,q45,0,0,null +814,815,q46,0,0,null +817,818,q47,0,0,null +819,829,q48,824,828,l39 +845,857,q49,848,849,l41 +871,874,q50,0,0,null +891,903,q51,893,896,l43 +919,926,q52,0,0,null +928,936,q53,0,0,null +955,969,q54,967,970,l46 +988,996,q55,989,991,l48 +1012,1021,q56,0,0,null +1036,1049,q57,0,0,null +1061,1064,q58,0,0,null +1069,1072,q59,1069,1073,l53 +1085,1097,q60,1093,1094,l55 +1112,1119,q61,0,0,null +1137,1140,q62,0,0,null +1153,1155,q63,0,0,null +1160,1174,q64,1158,1161,l58 +1176,1189,q65,1178,1180,l59 +1201,1206,q66,0,0,null +1224,1226,q67,0,0,null +1231,1234,q68,1234,1237,l63 +1235,1247,q69,1234,1237,l63 +1255,1268,q70,1261,1262,l64 +1282,1294,q71,1280,1283,l65 +1299,1307,q72,0,0,null +1318,1325,q73,0,0,null +1344,1351,q74,0,0,null +1352,1356,q75,0,0,null +1369,1376,q76,1369,1371,l68 +1369,1376,q76,1373,1376,l69 +1380,1391,q77,0,0,null +1392,1404,q78,1399,1402,l70 +1392,1404,q78,1404,1405,l71 +1422,1426,q79,0,0,null +1435,1440,q80,1434,1436,l72 +1459,1466,q81,1462,1466,l75 +1472,1484,q82,1480,1481,l76 +1499,1510,q83,0,0,null +1529,1535,q84,0,0,null +1552,1559,q85,0,0,null +1569,1570,q86,0,0,null +1582,1596,q87,0,0,null +1611,1621,q88,1613,1617,l84 +1630,1644,q89,1644,1646,l85 +1654,1661,q90,1660,1661,l87 +1676,1677,q91,0,0,null +1691,1702,q92,1692,1695,l89 +1708,1709,q93,1705,1708,l90 +1714,1727,q94,0,0,null +1734,1739,q95,1730,1734,l91 +1734,1739,q95,1739,1743,l92 +1754,1758,q96,1758,1761,l93 +1772,1780,q97,0,0,null +1799,1807,q98,0,0,null +1825,1828,q99,1825,1827,l96 +1844,1846,q100,0,0,null +1861,1868,q101,1862,1866,l98 +1886,1889,q102,0,0,null +1890,1904,q103,1902,1904,l100 +1915,1923,q104,1917,1921,l101 +1928,1929,q105,0,0,null +1939,1945,q106,1942,1943,l102 +1949,1959,q107,0,0,null +1969,1981,q108,1974,1976,l104 +1998,2010,q109,2005,2007,l105 +2026,2027,q110,0,0,null +2029,2031,q111,0,0,null +2045,2051,q112,2043,2047,l107 +2063,2076,q113,2071,2072,l108 +2084,2090,q114,2087,2089,l109 +2104,2106,q115,2105,2107,l110 +2112,2121,q116,0,0,null +2128,2141,q117,2131,2133,l111 +2128,2141,q117,2139,2142,l112 +2157,2165,q118,2157,2161,l113 +2181,2191,q119,2186,2189,l114 +2207,2221,q120,2205,2207,l115 +2207,2221,q120,2221,2224,l116 +2227,2241,q121,0,0,null +2254,2261,q122,2254,2257,l118 +2254,2261,q122,2259,2260,l119 +2272,2273,q123,0,0,null +2292,2297,q124,2296,2299,l121 +15720,15731,q875,15717,15721,l900 +15737,15748,q876,15742,15746,l901 +15759,15768,q877,15768,15769,l902 +15778,15789,q878,15783,15784,l903 +15796,15808,q879,0,0,null +15816,15823,q880,0,0,null diff --git a/generated/denser-queries/results/part-00002 b/generated/denser-queries/results/part-00002 new file mode 100644 index 00000000..dc3e34e9 --- /dev/null +++ b/generated/denser-queries/results/part-00002 @@ -0,0 +1,135 @@ +15838,15844,q881,0,0,null +15863,15864,q882,15864,15866,l906 +15882,15883,q883,0,0,null +15891,15901,q884,15895,15896,l908 +15891,15901,q884,15900,15901,l909 +15909,15915,q885,0,0,null +15928,15933,q886,15929,15930,l910 +15941,15945,q887,0,0,null +15956,15965,q888,15953,15956,l912 +15983,15997,q889,15983,15984,l914 +15983,15997,q889,15986,15989,l915 +16010,16023,q890,16007,16010,l916 +16010,16023,q890,16021,16022,l917 +16024,16035,q891,0,0,null +16036,16046,q892,16043,16046,l918 +16050,16062,q893,16056,16057,l919 +16066,16075,q894,0,0,null +16080,16086,q895,16084,16087,l920 +16087,16090,q896,16084,16087,l920 +16087,16090,q896,16088,16090,l921 +16101,16104,q897,16101,16104,l922 +16119,16125,q898,0,0,null +16138,16142,q899,16137,16140,l924 +16155,16164,q900,16159,16161,l925 +16155,16164,q900,16162,16164,l926 +16172,16175,q901,0,0,null +16185,16191,q902,16183,16187,l927 +16193,16199,q903,0,0,null +16206,16214,q904,16206,16209,l928 +16229,16235,q905,16229,16232,l929 +16241,16246,q906,0,0,null +16261,16267,q907,16259,16261,l930 +16261,16267,q907,16267,16269,l931 +16269,16278,q908,16267,16269,l931 +16296,16299,q909,16294,16297,l932 +16310,16321,q910,16312,16316,l933 +16328,16342,q911,16342,16343,l935 +16360,16367,q912,16366,16368,l936 +16377,16391,q913,16379,16382,l938 +16377,16391,q913,16389,16392,l939 +16400,16413,q914,16409,16412,l940 +16428,16438,q915,16428,16429,l942 +16439,16446,q916,16446,16448,l943 +16460,16461,q917,16459,16463,l945 +16477,16481,q918,0,0,null +16497,16503,q919,16502,16506,l947 +16516,16517,q920,0,0,null +16524,16534,q921,16523,16524,l948 +16524,16534,q921,16525,16529,l949 +16541,16552,q922,16550,16554,l950 +16557,16567,q923,0,0,null +16579,16585,q924,0,0,null +16604,16608,q925,0,0,null +16622,16629,q926,0,0,null +16644,16652,q927,0,0,null +16665,16677,q928,0,0,null +16695,16706,q929,16698,16701,l958 +16695,16706,q929,16704,16705,l959 +16718,16721,q930,16721,16725,l960 +16722,16727,q931,16721,16725,l960 +16737,16747,q932,16736,16738,l962 +16737,16747,q932,16746,16750,l963 +16762,16776,q933,0,0,null +16784,16798,q934,16791,16793,l966 +16815,16818,q935,0,0,null +16823,16833,q936,0,0,null +16843,16857,q937,16841,16843,l968 +16874,16886,q938,16872,16875,l969 +16874,16886,q938,16879,16881,l970 +16874,16886,q938,16884,16886,l971 +16900,16904,q939,0,0,null +16910,16922,q940,16911,16915,l973 +16931,16945,q941,16937,16940,l974 +16946,16955,q942,16951,16952,l975 +16965,16968,q943,0,0,null +16972,16975,q944,16974,16976,l977 +16993,17002,q945,0,0,null +17012,17017,q946,0,0,null +17027,17032,q947,17030,17031,l979 +17036,17045,q948,0,0,null +17061,17063,q949,17060,17061,l981 +17072,17077,q950,0,0,null +17078,17085,q951,0,0,null +17098,17107,q952,17096,17098,l983 +17115,17122,q953,17115,17118,l984 +17126,17127,q954,0,0,null +17145,17159,q955,0,0,null +17166,17175,q956,17165,17166,l987 +17179,17181,q957,0,0,null +17184,17188,q958,0,0,null +17190,17196,q959,17192,17195,l988 +17202,17211,q960,0,0,null +17212,17220,q961,17214,17216,l989 +17230,17238,q962,17230,17233,l990 +17245,17247,q963,17244,17248,l991 +17251,17257,q964,17256,17259,l992 +17267,17270,q965,0,0,null +17278,17282,q966,17279,17282,l993 +17290,17291,q967,0,0,null +17310,17321,q968,17309,17310,l996 +17310,17321,q968,17317,17321,l997 +17328,17330,q969,0,0,null +17338,17348,q970,17348,17349,l998 +17352,17360,q971,0,0,null +17371,17378,q972,17373,17377,l999 +17380,17389,q973,0,0,null +17408,17413,q974,0,0,null +17414,17427,q975,0,0,null +17431,17442,q976,0,0,null +17445,17454,q977,0,0,null +17471,17483,q978,0,0,null +17494,17504,q979,0,0,null +17507,17519,q980,0,0,null +17520,17530,q981,0,0,null +17544,17547,q982,0,0,null +17564,17574,q983,0,0,null +17581,17595,q984,0,0,null +17599,17604,q985,0,0,null +17615,17618,q986,0,0,null +17637,17646,q987,0,0,null +17647,17654,q988,0,0,null +17671,17674,q989,0,0,null +17679,17682,q990,0,0,null +17691,17699,q991,0,0,null +17701,17708,q992,0,0,null +17722,17729,q993,0,0,null +17739,17741,q994,0,0,null +17748,17754,q995,0,0,null +17764,17777,q996,0,0,null +17778,17790,q997,0,0,null +17806,17815,q998,0,0,null +17834,17843,q999,0,0,null +9207,9210,q500,0,0,null +9211,9217,q501,0,0,null +9231,9238,q502,0,0,null diff --git a/generated/denser-queries/results/part-00003 b/generated/denser-queries/results/part-00003 new file mode 100644 index 00000000..b18a751d --- /dev/null +++ b/generated/denser-queries/results/part-00003 @@ -0,0 +1,135 @@ +9243,9255,q503,9243,9246,l523 +9263,9265,q504,0,0,null +9278,9282,q505,0,0,null +9287,9289,q506,9286,9288,l525 +9303,9312,q507,9311,9312,l526 +9318,9329,q508,9325,9326,l527 +9347,9361,q509,9347,9350,l528 +9367,9380,q510,9374,9377,l529 +9387,9396,q511,9386,9390,l530 +9401,9404,q512,0,0,null +9408,9416,q513,9406,9409,l531 +9408,9416,q513,9411,9414,l532 +9430,9444,q514,9431,9434,l533 +9456,9469,q515,0,0,null +9474,9477,q516,0,0,null +9496,9502,q517,0,0,null +9510,9519,q518,9509,9511,l537 +9535,9540,q519,0,0,null +9558,9571,q520,9561,9563,l539 +9575,9582,q521,0,0,null +9598,9602,q522,0,0,null +9616,9630,q523,9615,9619,l541 +9647,9661,q524,0,0,null +9669,9673,q525,9672,9676,l544 +9681,9695,q526,9681,9685,l545 +9681,9695,q526,9692,9696,l546 +9700,9706,q527,0,0,null +9718,9730,q528,9725,9726,l547 +9736,9744,q529,9734,9738,l548 +9760,9768,q530,0,0,null +9784,9788,q531,9788,9789,l551 +9802,9814,q532,9801,9805,l552 +9802,9814,q532,9814,9818,l553 +9830,9835,q533,0,0,null +9846,9860,q534,9854,9858,l556 +9865,9870,q535,0,0,null +9884,9894,q536,0,0,null +9898,9912,q537,9904,9906,l558 +9922,9927,q538,0,0,null +9933,9943,q539,0,0,null +9948,9959,q540,9953,9954,l561 +9964,9965,q541,9965,9967,l562 +9973,9979,q542,0,0,null +9988,10002,q543,9989,9993,l563 +9988,10002,q543,9997,9998,l564 +10009,10016,q544,10016,10017,l566 +10021,10033,q545,10032,10034,l567 +10034,10043,q546,10032,10034,l567 +10034,10043,q546,10042,10043,l568 +10056,10068,q547,0,0,null +10085,10090,q548,10088,10092,l572 +10096,10100,q549,0,0,null +10105,10106,q550,0,0,null +10110,10124,q551,10114,10116,l573 +10139,10150,q552,10141,10143,l575 +10160,10173,q553,0,0,null +10179,10193,q554,10186,10188,l577 +10200,10209,q555,10198,10200,l578 +10200,10209,q555,10201,10204,l579 +10221,10232,q556,10224,10227,l580 +10239,10242,q557,10239,10243,l581 +10247,10255,q558,10248,10250,l582 +10247,10255,q558,10253,10255,l583 +10263,10272,q559,0,0,null +10281,10291,q560,10280,10282,l584 +10304,10311,q561,0,0,null +10315,10320,q562,0,0,null +10328,10341,q563,10325,10329,l586 +10328,10341,q563,10338,10340,l587 +10347,10350,q564,0,0,null +10360,10366,q565,10364,10366,l588 +10374,10376,q566,0,0,null +10383,10387,q567,10384,10385,l589 +10395,10403,q568,0,0,null +10420,10421,q569,0,0,null +10438,10452,q570,10442,10443,l592 +10454,10465,q571,10456,10458,l593 +10483,10496,q572,10487,10491,l596 +10511,10520,q573,10519,10521,l597 +10536,10537,q574,10536,10538,l598 +10539,10550,q575,0,0,null +10553,10563,q576,10563,10567,l599 +10578,10592,q577,10579,10583,l600 +10605,10610,q578,10602,10606,l601 +10605,10610,q578,10608,10611,l602 +10618,10632,q579,10629,10633,l603 +10645,10653,q580,10646,10647,l604 +10656,10668,q581,10656,10659,l605 +10670,10674,q582,10671,10672,l606 +10679,10688,q583,10686,10689,l607 +10699,10703,q584,0,0,null +10721,10724,q585,0,0,null +10740,10753,q586,0,0,null +10755,10758,q587,10755,10756,l611 +10772,10773,q588,0,0,null +10785,10787,q589,0,0,null +10788,10789,q590,0,0,null +10791,10801,q591,0,0,null +10805,10812,q592,10804,10808,l614 +10813,10814,q593,0,0,null +10815,10823,q594,0,0,null +10832,10842,q595,0,0,null +10858,10866,q596,10863,10864,l618 +10885,10896,q597,10888,10891,l619 +10899,10909,q598,10908,10912,l620 +10921,10931,q599,10922,10926,l621 +10942,10948,q600,0,0,null +10951,10965,q601,10953,10954,l622 +10972,10976,q602,10976,10980,l623 +10988,10999,q603,10988,10992,l624 +11016,11025,q604,11016,11017,l626 +11038,11039,q605,0,0,null +11048,11055,q606,11055,11059,l628 +11073,11074,q607,0,0,null +11081,11083,q608,0,0,null +11089,11095,q609,11088,11092,l629 +11104,11115,q610,0,0,null +11118,11119,q611,11116,11120,l630 +11133,11144,q612,11133,11136,l631 +11151,11156,q613,0,0,null +11157,11160,q614,11160,11162,l632 +11179,11188,q615,0,0,null +11207,11215,q616,0,0,null +11230,11235,q617,0,0,null +11240,11252,q618,11246,11247,l636 +11265,11270,q619,0,0,null +11288,11301,q620,11292,11295,l640 +11319,11324,q621,0,0,null +11327,11329,q622,0,0,null +11334,11348,q623,11345,11346,l642 +11359,11364,q624,11359,11361,l643 +11359,11364,q624,11363,11364,l644 +2306,2307,q125,0,0,null +2310,2324,q126,2308,2312,l122 +2342,2343,q127,0,0,null diff --git a/generated/denser-queries/results/part-00004 b/generated/denser-queries/results/part-00004 new file mode 100644 index 00000000..f17189ed --- /dev/null +++ b/generated/denser-queries/results/part-00004 @@ -0,0 +1,135 @@ +2353,2357,q128,2355,2358,l124 +2375,2386,q129,2376,2378,l125 +2404,2409,q130,2403,2405,l126 +2415,2421,q131,0,0,null +2426,2434,q132,2428,2430,l127 +2439,2441,q133,0,0,null +2446,2459,q134,2458,2460,l128 +2474,2486,q135,2485,2486,l130 +2504,2506,q136,0,0,null +2523,2530,q137,0,0,null +2536,2540,q138,2536,2538,l134 +2536,2540,q138,2539,2540,l135 +2554,2564,q139,2555,2556,l137 +2575,2582,q140,2575,2578,l138 +2601,2613,q141,2606,2607,l139 +2615,2626,q142,2620,2621,l140 +2642,2656,q143,0,0,null +2668,2671,q144,0,0,null +2680,2683,q145,2682,2684,l144 +2695,2701,q146,0,0,null +2717,2730,q147,0,0,null +2732,2733,q148,0,0,null +2739,2750,q149,2737,2739,l146 +2739,2750,q149,2743,2747,l147 +2767,2770,q150,0,0,null +2776,2780,q151,2777,2781,l149 +2783,2794,q152,2784,2787,l150 +2808,2821,q153,2809,2813,l151 +2830,2844,q154,2833,2837,l152 +2830,2844,q154,2841,2842,l153 +2856,2861,q155,2855,2857,l154 +2880,2889,q156,2889,2890,l158 +2904,2908,q157,0,0,null +2912,2913,q158,0,0,null +2914,2922,q159,0,0,null +2934,2945,q160,2942,2943,l161 +2947,2959,q161,0,0,null +2970,2983,q162,0,0,null +2997,2999,q163,2998,3002,l165 +3016,3025,q164,3019,3020,l166 +3033,3043,q165,3034,3038,l167 +3055,3063,q166,3057,3061,l168 +3068,3069,q167,0,0,null +3088,3099,q168,3088,3091,l170 +3112,3124,q169,3118,3121,l171 +3137,3151,q170,3141,3145,l173 +3152,3161,q171,0,0,null +3172,3182,q172,3173,3174,l176 +3201,3205,q173,0,0,null +3206,3217,q174,3213,3214,l178 +3231,3238,q175,3234,3236,l180 +3256,3262,q176,3260,3263,l181 +3267,3273,q177,0,0,null +3292,3295,q178,0,0,null +3303,3312,q179,3310,3314,l183 +3315,3319,q180,0,0,null +3336,3342,q181,0,0,null +3344,3353,q182,3347,3349,l185 +3360,3369,q183,3369,3370,l186 +3387,3397,q184,3391,3395,l187 +3399,3411,q185,3411,3415,l188 +3412,3420,q186,3411,3415,l188 +3431,3439,q187,0,0,null +3453,3454,q188,0,0,null +3472,3477,q189,0,0,null +3491,3494,q190,0,0,null +3498,3503,q191,3499,3502,l193 +3513,3515,q192,0,0,null +3518,3524,q193,3516,3518,l194 +3529,3538,q194,3536,3538,l195 +3546,3560,q195,3550,3554,l196 +3566,3570,q196,3568,3569,l197 +3571,3574,q197,0,0,null +3577,3590,q198,0,0,null +3607,3615,q199,0,0,null +3627,3633,q200,3631,3632,l200 +3640,3642,q201,0,0,null +3647,3660,q202,3658,3660,l201 +3664,3678,q203,3676,3677,l203 +3691,3704,q204,3702,3703,l205 +3721,3723,q205,3717,3721,l207 +3738,3748,q206,3745,3747,l209 +3763,3777,q207,3775,3778,l211 +3793,3804,q208,3798,3801,l212 +3816,3825,q209,3821,3825,l213 +3840,3851,q210,3844,3847,l214 +3854,3867,q211,3852,3855,l215 +3854,3867,q211,3860,3861,l216 +3873,3875,q212,0,0,null +3877,3880,q213,0,0,null +3889,3903,q214,0,0,null +3914,3924,q215,3916,3920,l219 +3942,3944,q216,3943,3945,l220 +3953,3965,q217,0,0,null +3977,3983,q218,0,0,null +3987,3999,q219,0,0,null +4010,4021,q220,0,0,null +4032,4037,q221,4033,4037,l224 +4045,4050,q222,0,0,null +4069,4079,q223,4079,4081,l226 +4090,4102,q224,0,0,null +4110,4120,q225,4115,4118,l228 +4124,4138,q226,4127,4129,l229 +4152,4163,q227,0,0,null +4167,4181,q228,4179,4181,l231 +4196,4210,q229,4197,4199,l233 +4196,4210,q229,4201,4204,l234 +4225,4232,q230,0,0,null +4247,4260,q231,0,0,null +4272,4278,q232,4276,4279,l238 +4283,4296,q233,0,0,null +4312,4313,q234,0,0,null +4329,4332,q235,0,0,null +4345,4359,q236,4341,4345,l242 +4345,4359,q236,4346,4348,l243 +4345,4359,q236,4353,4354,l244 +4373,4380,q237,4378,4380,l245 +4389,4403,q238,0,0,null +4413,4418,q239,4410,4413,l247 +4413,4418,q239,4416,4418,l248 +4435,4445,q240,4436,4437,l250 +4435,4445,q240,4445,4447,l251 +4454,4460,q241,4460,4462,l252 +4478,4487,q242,0,0,null +4504,4512,q243,4504,4505,l255 +4519,4521,q244,4520,4523,l256 +4531,4545,q245,4531,4535,l257 +4531,4545,q245,4539,4540,l258 +4549,4561,q246,4553,4556,l259 +4565,4574,q247,4562,4566,l260 +4584,4592,q248,4590,4591,l261 +4611,4622,q249,4619,4620,l263 +11377,11388,q625,11377,11378,l645 +11377,11388,q625,11387,11389,l646 +11405,11415,q626,11409,11411,l647 diff --git a/generated/denser-queries/results/part-00005 b/generated/denser-queries/results/part-00005 new file mode 100644 index 00000000..d1f7efbb --- /dev/null +++ b/generated/denser-queries/results/part-00005 @@ -0,0 +1,135 @@ +11431,11444,q627,11436,11440,l650 +11446,11455,q628,0,0,null +11459,11468,q629,11462,11465,l651 +11475,11476,q630,0,0,null +11491,11503,q631,11501,11503,l654 +11505,11512,q632,11505,11508,l655 +11523,11530,q633,0,0,null +11538,11542,q634,0,0,null +11550,11563,q635,11550,11553,l659 +11565,11579,q636,0,0,null +11590,11593,q637,0,0,null +11594,11598,q638,0,0,null +11615,11618,q639,0,0,null +11637,11651,q640,0,0,null +11668,11671,q641,11666,11668,l665 +11690,11701,q642,11690,11692,l666 +11702,11706,q643,0,0,null +11722,11723,q644,0,0,null +11724,11734,q645,11733,11734,l668 +11753,11765,q646,0,0,null +11784,11793,q647,0,0,null +11810,11818,q648,11818,11820,l672 +11823,11836,q649,11826,11827,l673 +11823,11836,q649,11833,11834,l674 +11839,11853,q650,0,0,null +11858,11872,q651,11859,11860,l675 +11880,11886,q652,0,0,null +11904,11909,q653,0,0,null +11912,11920,q654,11910,11912,l677 +11912,11920,q654,11917,11918,l678 +11925,11933,q655,0,0,null +11942,11945,q656,11941,11945,l680 +11946,11957,q657,0,0,null +11969,11970,q658,0,0,null +11977,11987,q659,11985,11986,l682 +11990,11993,q660,0,0,null +12002,12014,q661,0,0,null +12025,12030,q662,0,0,null +12048,12050,q663,0,0,null +12060,12068,q664,12067,12070,l685 +12073,12076,q665,0,0,null +12080,12091,q666,12078,12082,l686 +12080,12091,q666,12085,12086,l687 +12080,12091,q666,12090,12092,l688 +12100,12112,q667,12111,12114,l690 +12120,12133,q668,12125,12126,l691 +12142,12156,q669,12148,12150,l692 +12165,12168,q670,0,0,null +12182,12191,q671,12178,12182,l694 +12198,12200,q672,0,0,null +12202,12215,q673,12211,12215,l695 +12229,12237,q674,12236,12237,l696 +12244,12258,q675,12243,12247,l698 +12244,12258,q675,12252,12254,l699 +12265,12279,q676,0,0,null +12292,12297,q677,12296,12300,l701 +12315,12320,q678,0,0,null +12325,12329,q679,0,0,null +12347,12358,q680,12352,12353,l704 +12364,12374,q681,12367,12369,l705 +12391,12401,q682,12393,12397,l707 +12404,12412,q683,0,0,null +12426,12430,q684,12425,12427,l708 +12446,12449,q685,12447,12449,l709 +12455,12457,q686,0,0,null +12476,12477,q687,12475,12478,l710 +12479,12481,q688,0,0,null +12499,12504,q689,12499,12502,l712 +12513,12519,q690,0,0,null +12537,12539,q691,12537,12540,l714 +12552,12556,q692,12552,12554,l715 +12560,12572,q693,12571,12574,l716 +12574,12577,q694,12571,12574,l716 +12585,12592,q695,0,0,null +12607,12612,q696,12606,12607,l718 +12627,12634,q697,12628,12631,l719 +12644,12651,q698,12643,12644,l720 +12667,12680,q699,12668,12669,l721 +12692,12697,q700,12694,12696,l722 +12711,12724,q701,12724,12726,l723 +12729,12730,q702,0,0,null +12749,12751,q703,0,0,null +12762,12764,q704,0,0,null +12781,12789,q705,12778,12781,l727 +12794,12799,q706,0,0,null +12801,12805,q707,12801,12802,l728 +12820,12831,q708,12829,12832,l729 +12832,12845,q709,12829,12832,l729 +12858,12864,q710,12861,12862,l730 +12858,12864,q710,12864,12868,l731 +12877,12888,q711,12875,12877,l732 +12900,12902,q712,0,0,null +12912,12924,q713,12916,12919,l735 +12926,12937,q714,0,0,null +12942,12946,q715,12941,12945,l736 +12952,12964,q716,12956,12958,l738 +12970,12977,q717,12976,12980,l739 +12980,12992,q718,12976,12980,l739 +12999,13013,q719,12997,12999,l740 +13017,13025,q720,13023,13026,l741 +13037,13043,q721,0,0,null +13046,13047,q722,0,0,null +13062,13070,q723,13067,13069,l743 +13080,13082,q724,0,0,null +13086,13095,q725,13088,13089,l745 +13099,13107,q726,13104,13106,l746 +13114,13122,q727,0,0,null +13139,13142,q728,13142,13144,l749 +13159,13172,q729,13164,13165,l750 +13175,13184,q730,0,0,null +13198,13200,q731,0,0,null +13219,13229,q732,13220,13221,l753 +13219,13229,q732,13229,13230,l754 +13232,13241,q733,13235,13239,l755 +13232,13241,q733,13241,13245,l756 +13248,13257,q734,13255,13257,l757 +13270,13273,q735,0,0,null +13276,13282,q736,0,0,null +13290,13299,q737,0,0,null +13314,13324,q738,0,0,null +13340,13345,q739,0,0,null +13352,13357,q740,0,0,null +13362,13363,q741,0,0,null +13364,13366,q742,13365,13367,l762 +13383,13393,q743,13390,13394,l763 +13396,13405,q744,0,0,null +13406,13409,q745,13408,13409,l764 +13414,13423,q746,0,0,null +13440,13449,q747,0,0,null +13457,13470,q748,13460,13462,l766 +13486,13488,q749,13488,13492,l767 +13496,13499,q750,0,0,null +13502,13503,q751,0,0,null +13512,13520,q752,13519,13521,l769 +13538,13548,q753,13540,13543,l771 diff --git a/generated/denser-queries/results/part-00006 b/generated/denser-queries/results/part-00006 new file mode 100644 index 00000000..4e0432ae --- /dev/null +++ b/generated/denser-queries/results/part-00006 @@ -0,0 +1,135 @@ +13538,13548,q753,13545,13547,l772 +13560,13572,q754,0,0,null +13581,13592,q755,13588,13589,l775 +13581,13592,q755,13592,13593,l776 +13610,13620,q756,13614,13617,l779 +13635,13647,q757,13640,13643,l781 +13660,13661,q758,0,0,null +13673,13674,q759,0,0,null +13687,13700,q760,13696,13697,l784 +13705,13717,q761,13708,13712,l785 +13705,13717,q761,13713,13714,l786 +13730,13738,q762,0,0,null +13750,13752,q763,0,0,null +13763,13776,q764,13769,13772,l788 +13781,13794,q765,13786,13788,l789 +13781,13794,q765,13791,13795,l790 +13795,13797,q766,13791,13795,l790 +13799,13806,q767,0,0,null +13825,13837,q768,13832,13836,l792 +13854,13867,q769,13857,13861,l793 +13882,13885,q770,0,0,null +13887,13898,q771,13894,13897,l795 +13905,13908,q772,0,0,null +13924,13932,q773,13926,13928,l796 +13939,13952,q774,13948,13949,l798 +13965,13978,q775,13977,13979,l799 +13990,13993,q776,13990,13994,l800 +14006,14018,q777,14010,14011,l801 +14006,14018,q777,14018,14021,l802 +14022,14031,q778,0,0,null +14044,14051,q779,14043,14045,l804 +14069,14078,q780,14078,14082,l806 +14084,14095,q781,14085,14088,l807 +14097,14109,q782,14096,14100,l808 +14119,14130,q783,0,0,null +14132,14137,q784,14133,14137,l810 +14147,14160,q785,14160,14161,l811 +14165,14176,q786,0,0,null +14177,14179,q787,0,0,null +14186,14200,q788,14186,14188,l812 +14186,14200,q788,14192,14194,l813 +14206,14216,q789,14211,14212,l814 +14224,14227,q790,0,0,null +14239,14240,q791,0,0,null +14252,14254,q792,0,0,null +14263,14268,q793,0,0,null +14282,14294,q794,0,0,null +14299,14313,q795,14306,14310,l820 +14322,14325,q796,0,0,null +14338,14351,q797,0,0,null +14365,14379,q798,0,0,null +14386,14394,q799,14385,14387,l824 +14395,14406,q800,0,0,null +14414,14420,q801,14416,14420,l826 +14438,14442,q802,0,0,null +14448,14455,q803,14447,14451,l827 +14448,14455,q803,14453,14455,l828 +14468,14482,q804,0,0,null +14496,14510,q805,0,0,null +14528,14534,q806,14530,14533,l832 +14538,14541,q807,14536,14538,l833 +14538,14541,q807,14540,14543,l834 +14545,14550,q808,14545,14548,l835 +14569,14571,q809,0,0,null +14579,14593,q810,0,0,null +14595,14604,q811,14600,14603,l838 +14605,14606,q812,0,0,null +14611,14619,q813,0,0,null +14634,14645,q814,14631,14634,l840 +14652,14661,q815,14652,14653,l841 +14671,14678,q816,14675,14676,l842 +14680,14693,q817,14689,14691,l843 +14694,14706,q818,0,0,null +14707,14708,q819,14708,14709,l844 +14713,14719,q820,0,0,null +14724,14738,q821,14736,14737,l845 +14746,14760,q822,0,0,null +14770,14771,q823,14769,14772,l847 +14785,14791,q824,0,0,null +14807,14810,q825,0,0,null +14815,14819,q826,14818,14819,l850 +14825,14828,q827,0,0,null +14846,14847,q828,0,0,null +14857,14865,q829,14855,14859,l852 +14857,14865,q829,14864,14866,l853 +14867,14877,q830,0,0,null +14890,14893,q831,0,0,null +14904,14917,q832,14907,14909,l855 +14929,14938,q833,14936,14939,l856 +14956,14963,q834,14962,14963,l857 +14979,14981,q835,0,0,null +14999,15005,q836,0,0,null +15013,15019,q837,0,0,null +15037,15051,q838,0,0,null +15062,15070,q839,15063,15064,l862 +15088,15095,q840,0,0,null +15106,15109,q841,0,0,null +15128,15142,q842,15139,15140,l867 +15146,15160,q843,15152,15155,l868 +15168,15174,q844,0,0,null +15183,15195,q845,15184,15186,l871 +15203,15217,q846,15217,15219,l873 +15226,15235,q847,15232,15235,l874 +15250,15253,q848,0,0,null +15258,15272,q849,15262,15266,l876 +15287,15301,q850,0,0,null +15316,15321,q851,0,0,null +15337,15349,q852,0,0,null +15358,15363,q853,15354,15358,l880 +15358,15363,q853,15362,15365,l881 +15366,15379,q854,0,0,null +15391,15397,q855,15393,15395,l882 +15404,15418,q856,15406,15408,l883 +15404,15418,q856,15409,15410,l884 +15421,15424,q857,0,0,null +15443,15450,q858,0,0,null +15467,15480,q859,0,0,null +15490,15494,q860,15489,15493,l887 +15495,15502,q861,0,0,null +15506,15519,q862,15508,15511,l888 +15524,15533,q863,0,0,null +15551,15555,q864,0,0,null +15571,15578,q865,0,0,null +15597,15603,q866,15598,15602,l893 +15604,15616,q867,0,0,null +15621,15635,q868,15629,15633,l894 +15643,15644,q869,0,0,null +15645,15648,q870,0,0,null +15659,15663,q871,0,0,null +15668,15673,q872,0,0,null +15680,15690,q873,15680,15684,l896 +15697,15701,q874,15696,15698,l897 +15697,15701,q874,15699,15702,l898 +6878,6885,q375,0,0,null +6897,6905,q376,0,0,null diff --git a/generated/denser-queries/results/part-00007 b/generated/denser-queries/results/part-00007 new file mode 100644 index 00000000..aa50d6d5 --- /dev/null +++ b/generated/denser-queries/results/part-00007 @@ -0,0 +1,135 @@ +6924,6934,q377,6932,6936,l390 +6939,6943,q378,0,0,null +6955,6957,q379,0,0,null +6965,6970,q380,6966,6968,l392 +6978,6980,q381,6979,6982,l393 +6999,7010,q382,6999,7003,l394 +7013,7024,q383,7014,7015,l395 +7035,7048,q384,0,0,null +7055,7067,q385,0,0,null +7079,7080,q386,0,0,null +7094,7100,q387,7093,7094,l400 +7103,7109,q388,7107,7109,l401 +7121,7125,q389,0,0,null +7138,7143,q390,0,0,null +7161,7164,q391,0,0,null +7180,7194,q392,7178,7181,l404 +7205,7216,q393,7210,7212,l405 +7224,7237,q394,7230,7231,l406 +7249,7252,q395,0,0,null +7266,7268,q396,0,0,null +7286,7300,q397,7295,7299,l409 +7313,7317,q398,7312,7314,l410 +7325,7332,q399,7324,7325,l411 +7340,7350,q400,7350,7354,l412 +7356,7364,q401,0,0,null +7368,7374,q402,0,0,null +7391,7398,q403,0,0,null +7408,7421,q404,7408,7410,l414 +7408,7421,q404,7414,7418,l415 +7435,7447,q405,7438,7441,l417 +7449,7463,q406,7453,7457,l418 +7449,7463,q406,7460,7463,l419 +7465,7477,q407,7469,7471,l420 +7494,7501,q408,7500,7504,l421 +7517,7522,q409,0,0,null +7525,7529,q410,7525,7526,l422 +7546,7554,q411,7552,7556,l423 +7564,7578,q412,7574,7577,l424 +7581,7589,q413,7587,7590,l425 +7591,7596,q414,0,0,null +7606,7618,q415,7606,7610,l426 +7606,7618,q415,7613,7615,l427 +7630,7644,q416,7631,7634,l428 +7657,7671,q417,7659,7663,l429 +7688,7702,q418,7697,7701,l432 +7715,7722,q419,0,0,null +7739,7752,q420,7736,7739,l434 +7767,7777,q421,7766,7770,l435 +7767,7777,q421,7774,7778,l436 +7786,7789,q422,0,0,null +7795,7805,q423,0,0,null +7806,7810,q424,7806,7807,l437 +7806,7810,q424,7808,7809,l438 +7824,7835,q425,7830,7833,l441 +7847,7851,q426,0,0,null +7870,7881,q427,0,0,null +7899,7913,q428,7911,7912,l446 +7915,7922,q429,0,0,null +7929,7935,q430,0,0,null +7952,7963,q431,7958,7959,l449 +7964,7970,q432,0,0,null +7974,7983,q433,7972,7975,l450 +7999,8011,q434,8008,8010,l453 +8022,8024,q435,0,0,null +8037,8044,q436,8043,8046,l456 +8048,8062,q437,8048,8049,l457 +8048,8062,q437,8051,8054,l458 +8048,8062,q437,8057,8059,l459 +8078,8081,q438,8074,8078,l460 +8087,8091,q439,0,0,null +8105,8115,q440,8102,8105,l462 +8121,8129,q441,8129,8130,l463 +8140,8153,q442,8146,8148,l464 +8140,8153,q442,8152,8155,l465 +8163,8171,q443,8160,8164,l466 +8181,8182,q444,0,0,null +8201,8211,q445,8205,8208,l468 +8218,8226,q446,0,0,null +8233,8245,q447,8241,8245,l470 +8260,8261,q448,0,0,null +8265,8270,q449,0,0,null +8285,8288,q450,8287,8288,l472 +8300,8306,q451,0,0,null +8310,8323,q452,8308,8311,l474 +8336,8343,q453,8338,8339,l475 +8352,8362,q454,8360,8361,l476 +8379,8393,q455,8383,8386,l477 +8396,8410,q456,8395,8396,l478 +8396,8410,q456,8400,8404,l479 +8424,8432,q457,8423,8426,l480 +8424,8432,q457,8428,8432,l481 +8451,8459,q458,0,0,null +8476,8483,q459,0,0,null +8498,8506,q460,8504,8505,l485 +8514,8519,q461,0,0,null +8534,8548,q462,8534,8538,l486 +8566,8573,q463,0,0,null +8584,8587,q464,8587,8588,l489 +8591,8600,q465,8589,8591,l490 +8591,8600,q465,8593,8597,l491 +8603,8606,q466,8606,8610,l492 +8607,8616,q467,8606,8610,l492 +8627,8639,q468,8636,8637,l494 +8655,8659,q469,0,0,null +8677,8690,q470,8677,8680,l496 +8701,8709,q471,8701,8702,l499 +8711,8725,q472,0,0,null +8740,8754,q473,0,0,null +8768,8770,q474,0,0,null +8773,8777,q475,0,0,null +8782,8794,q476,8781,8785,l502 +8809,8810,q477,0,0,null +8811,8824,q478,8811,8814,l503 +8836,8844,q479,8842,8846,l505 +8846,8856,q480,8842,8846,l505 +8871,8875,q481,8872,8875,l506 +8893,8896,q482,0,0,null +8901,8913,q483,8906,8909,l508 +8918,8922,q484,0,0,null +8924,8926,q485,0,0,null +8935,8936,q486,8935,8936,l509 +8938,8948,q487,0,0,null +8964,8965,q488,8963,8964,l510 +8980,8994,q489,8986,8989,l511 +9001,9008,q490,8997,9001,l512 +9025,9026,q491,0,0,null +9036,9049,q492,9038,9040,l514 +9068,9082,q493,9067,9068,l515 +9068,9082,q493,9079,9083,l516 +9091,9102,q494,0,0,null +9111,9118,q495,0,0,null +9126,9136,q496,0,0,null +9149,9161,q497,0,0,null +9170,9179,q498,9170,9174,l520 +9183,9195,q499,0,0,null diff --git a/src/main/scala/org/biodatageeks/rangejoins/generation/TestDataGenerator.scala b/src/main/scala/org/biodatageeks/rangejoins/generation/TestDataGenerator.scala new file mode 100644 index 00000000..0329585b --- /dev/null +++ b/src/main/scala/org/biodatageeks/rangejoins/generation/TestDataGenerator.scala @@ -0,0 +1,44 @@ +package org.biodatageeks.rangejoins.generation + +import org.apache.spark.sql.SparkSession + +import scala.collection.mutable +import scala.util.Random + +object TestDataGenerator { + def main(args: Array[String]): Unit = { + val queryPath = "generated/query" + val labelPath = "generated/label" + val spark = SparkSession.builder().master("local[*]").appName("generator").getOrCreate() + val query = generateRangesList(1000, 50, 5,30).map(rr => QueryRecord(rr.start, rr.end, s"q${rr.index}")) + val label = generateRangesList(1000, 50, 15,20).map(rr => LabelRecord(rr.start, rr.end, s"l${rr.index}")) + import spark.sqlContext.implicits._ + spark.sparkContext.parallelize(query).toDF.write.csv(queryPath) + spark.sparkContext.parallelize(label).toDF.write.csv(labelPath) + // brute force join + val joined = query.map(q=> (q,label.filter(l => q.end >= l.start && q.start <= l.end))) + .map(e=> if(e._2.isEmpty) (e._1, Seq(LabelRecord(0,0,null)).toList) else e) + .flatMap(e=> e._2.map(l=>(e._1, l))) + .map(e=>s"${e._1.start},${e._1.end},${e._1.value},${e._2.start},${e._2.end},${e._2.value}") + spark.sparkContext.parallelize(joined).saveAsTextFile("generated/result") + } + + def generateRangesList(amount: Int, maxOffset: Int, maxRange: Int, maxStep: Int): Seq[RangeRecord] = { + val r = new Random() + var offset = r.nextInt(maxOffset) + var list = new mutable.MutableList[RangeRecord]() + for (i <- Range(0, amount)) { + val size = r.nextInt(maxRange - 1) + 1 + val step = r.nextInt(maxStep - 1) + 1 + list += RangeRecord(offset, offset+size, i) + offset = offset + size + step + } + list + } +} + +case class RangeRecord(start: Int, end: Int, index: Int) + +case class QueryRecord(start: Int, end: Int, value: String) + +case class LabelRecord(start: Int, end: Int, value: String) diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweep.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweep.scala new file mode 100644 index 00000000..c4b79f8c --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweep.scala @@ -0,0 +1,58 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +import org.apache.spark.sql.catalyst.InternalRow + +import scala.collection.mutable + + +/** + * Contains all methods and fields necessary to perform chromo-sweep. + */ +class ChromoSweep extends Serializable { + + var lastQuery: Option[(SortedInterval[Int], InternalRow)] = None + var lastLabel: Option[(SortedInterval[Int], InternalRow)] = None + val mutList: mutable.MutableList[JoinedRow] = mutable.MutableList[JoinedRow]() + // push value to result + def push = mutList += JoinedRow(lastQuery, lastLabel) + // update value of last label + def updateLabel(i: (Boolean, SortedInterval[Int], InternalRow)):Unit = lastLabel = Some((i._2, i._3)) + // update value of last query + def updateQuery(i: (Boolean, SortedInterval[Int], InternalRow)):Unit = lastQuery = Some((i._2, i._3)) + // clears value of label + def flushLabel:Unit = lastLabel = None + // clears value of query + def flushQuery:Unit = lastQuery = None + + /** + * Based on the state of the object and given element decides what action should be performed. + * @param i + */ + def next(i: (Boolean, SortedInterval[Int], InternalRow)):Unit = { + ( + i._1, //if record is query (true) or a label (false) + lastQuery.isDefined, // is there a query that we need to flush or push? + lastLabel.isDefined, // is there a label that we need to flush or push? + lastQuery.isDefined && i._2.start > lastQuery.get._1.end, // if start of current record is after end of last query + lastLabel.isDefined && i._2.start > lastLabel.get._1.end) // if start of current record is after end of last label + match { + case (false, false, _, _, _) => updateLabel(i) + case (false, true, false, true, _) => push; flushQuery; updateLabel(i) + case (false, true, false, false, _) => updateLabel(i) + case (false, _, true, _, false) => throw new IllegalStateException("Overlapping of two labels") + case (false, true, true, false, true) => push; updateLabel(i) + case (false, true, true, true, true) => push; flushQuery; updateLabel(i) + case (true, false, false, _, _) => updateQuery(i) + case (true, false, true, _, true) => flushLabel; updateQuery(i) + case (true, false, true, _, false) => updateQuery(i) + case (true, true, _, false, _) => throw new IllegalStateException("Overlapping of two queries") + case (true, true, false, _, _) => push; updateQuery(i) + case (true, true, true, true, false) => push; updateQuery(i) + case (true, true, true, _, true) => push; flushLabel; updateQuery(i) + case (a, b, c, d, e) => throw new IllegalStateException(s"unknown case: $a $b $c $d $e") + } + } +} + + +case class JoinedRow(q: Option[(SortedInterval[Int], InternalRow)], l: Option[(SortedInterval[Int], InternalRow)]) \ No newline at end of file diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoin.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoin.scala new file mode 100644 index 00000000..bfbb7e4e --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoin.scala @@ -0,0 +1,51 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeRowJoiner +import org.apache.spark.sql.catalyst.expressions.{Expression, InterpretedProjection, UnsafeRow} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan} +// logic copied from IntervalTreeJoinOptim +case class ChromoSweepJoin(left: SparkPlan, + right: SparkPlan, + condition: Seq[Expression], + context: SparkSession, leftLogicalPlan: LogicalPlan, righLogicalPlan: LogicalPlan) extends BinaryExecNode { + + def output = left.output ++ right.output + + lazy val (buildPlan, streamedPlan) = (left, right) + + lazy val (buildKeys, streamedKeys) = (List(condition(0), condition(1)), + List(condition(2), condition(3))) + + @transient lazy val buildKeyGenerator = new InterpretedProjection(buildKeys, buildPlan.output) + @transient lazy val streamKeyGenerator = new InterpretedProjection(streamedKeys, + streamedPlan.output) + + protected override def doExecute(): RDD[InternalRow] = { + val v1 = left.execute() + val v1kv = v1.map(x => { + val v1Key = buildKeyGenerator(x) + + (new SortedInterval[Int](v1Key.getInt(0), v1Key.getInt(1)), + x.copy()) + }) + val v2 = right.execute() + val v2kv = v2.map(x => { + val v2Key = streamKeyGenerator(x) + (new SortedInterval[Int](v2Key.getInt(0), v2Key.getInt(1)), + x.copy()) + }) + + val v3 = ChromoSweepJoinImpl.overlapJoin(context.sparkContext, v1kv, v2kv) + v3.map { + case (l: InternalRow, r: InternalRow) => { + val joiner = GenerateUnsafeRowJoiner.create(left.schema, right.schema); + joiner.join(l.asInstanceOf[UnsafeRow], r.asInstanceOf[UnsafeRow]).asInstanceOf[InternalRow] //resultProj(joinedRow(l, r)) joiner.joiner + } + } + } +} + diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinImpl.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinImpl.scala new file mode 100644 index 00000000..e318b2f0 --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinImpl.scala @@ -0,0 +1,95 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +import org.apache.spark.rdd.{RDD, ShuffledRDD} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.{RangePartitioner, SparkContext} + +object ChromoSweepJoinImpl extends Serializable { + + + type K = SortedInterval[Int] + type V = (Boolean, SortedInterval[Int], InternalRow) + + /** + * Multi-joins together two RDDs that contain objects that map to reference regions. + * The object from both sets will be joined using distributed version of the chromo-sweep algorithm. + * The basic assumption is that all the intervals from specific range will be on one partition. + * Process has following steps: + * - create RangePartitioner from RDD1 + * - partition RDD1 using partitioner + * - create range tree where the leaves are pairs of lowest starting and highest ending value of the intervals in a partition + * - based on that, map RDD2 to split intervals that belong to multiple partitions and remove the ones that belong to none + * - partition mapped RDD2 using previously created partitioner + * - union both RDDs + * - sort based on start value of the interval + * - execute chromo-sweep on each partition. + * + * @param sc + * @param rdd1 - first rdd to join (queries) + * @param rdd2 - second rdd to join (labels) + * @return + */ + def overlapJoin(sc: SparkContext, rdd1: RDD[(SortedInterval[Int], InternalRow)], rdd2: RDD[(SortedInterval[Int], InternalRow)]) + : RDD[(InternalRow, InternalRow)] = { + // partiton first RDD and based on get range of each partition + val tRDD1 = rdd1.map(e => (e._1, (true, e._1, e._2))) + val part = new RangePartitioner(tRDD1.getNumPartitions, tRDD1) + val rangeTree = getRangeTree(new ShuffledRDD[K, V, V](tRDD1, part)) + // based on the ranges partition second RDD, split intervals that belong to more than one + // remove the ones that belong to none + val tRDD2 = rdd2.flatMap(e => rangeTree.getPartitions(e._1).map((_, (false, e._1, e._2)))) + val uRDD = new ShuffledRDD[K, V, V](tRDD1.union(tRDD2), part) + // in each partition do chromosweep + uRDD.mapPartitions { p => + val row = p.map(_._2).toList.sortBy(_._2.start).iterator + val temp = new ChromoSweep() + var i: V = null + while (row.hasNext) { + i = row.next() + temp.next(i) + } + if (i._1 || temp.lastQuery.isDefined) { + temp.push + } + // in case if there is no label for query we need to replace None with dummy internal row. + val dummyInternalRow = createDummyInternalRow(temp) + temp.mutList + .map(e => (e.q.get._2, e.l.getOrElse((SortedInterval[Int](0, 0), dummyInternalRow))._2)).iterator + } + } + + private def createDummyInternalRow(temp: ChromoSweep) = { + val ir = temp.mutList.head.q.get._2.copy() + Range(0, ir.numFields).foreach(i => ir.setNullAt(i)) + ir + } + + private def getRangeTree(rdd: RDD[(K, V)]) = { + val arr = getPartitionRanges(rdd) + new RangeTreeNode(arr) + } + + /** + * Calculates sum of all intervals for each partition + * e.g. for parititon with intervals [1,3], [5,8], [11, 15] the result is [1,15] + * + * @param rdd + * @return Array of sorted intervals for each parittion + */ + private def getPartitionRanges(rdd: RDD[(K, V)]) = { + rdd.mapPartitions( + { + p => + val temp = p.next()._1 + var max = temp.end + var min = temp.start + for (i <- p) { + if (min > i._1.start) min = i._1.start + if (max < i._1.end) max = i._1.end + } + (SortedInterval[Int](min, max) :: Nil).iterator + }, true) + .collect().sortBy(_.start) + } +} + diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinStrategy.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinStrategy.scala new file mode 100644 index 00000000..1b18bdfd --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/ChromoSweepJoinStrategy.scala @@ -0,0 +1,15 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +import org.apache.spark.sql.catalyst.expressions.PredicateHelper +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.{SparkSession, Strategy} +import org.biodatageeks.sequila.rangejoins.common.ExtractRangeJoinKeys + +class ChromoSweepJoinStrategy(spark: SparkSession) extends Strategy with Serializable with PredicateHelper { + def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { + case ExtractRangeJoinKeys(joinType, rangeJoinKeys, left, right) => + ChromoSweepJoin(planLater(left), planLater(right), rangeJoinKeys, spark,left,right) :: Nil + case _ => Nil + } +} diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/RangeTreeNode.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/RangeTreeNode.scala new file mode 100644 index 00000000..40ea2b72 --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/RangeTreeNode.scala @@ -0,0 +1,32 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +case class RangeTreeNode(range: SortedInterval[Int], left: Option[RangeTreeNode], right: Option[RangeTreeNode]) { + + /** + * Creates tree structure based on given list of intervals. Each parent note is sum of childes intervals. + * + * @param si - list of intervals + */ + def this(si: Array[SortedInterval[Int]]) = this( + SortedInterval[Int](si.head.start, si.last.end), + if (si.length > 1) Some(new RangeTreeNode(si.slice(0, si.length / 2))) else None, + if (si.length > 1) Some(new RangeTreeNode(si.slice(si.length / 2, si.length))) else None) + + /** + * Retruns list of partitions to which given interval should belong. + * One interval can belong to zero or more ranges. + * @param si - interval + * @return + */ + def getPartitions(si: SortedInterval[Int]): Seq[SortedInterval[Int]] = { + if (si.intersects(this.range)) { + if (this.left.isEmpty) + range :: Nil + else + this.left.get.getPartitions(si) ++ this.right.get.getPartitions(si) + } + else { + Nil + } + } +} diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/SortedInterval.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/SortedInterval.scala new file mode 100644 index 00000000..233cc4c3 --- /dev/null +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/methods/chromosweep/SortedInterval.scala @@ -0,0 +1,11 @@ +package org.biodatageeks.sequila.rangejoins.methods.chromosweep + +case class SortedInterval[T](start: T, end: T)(implicit ev$1: T => Int) extends Ordered[SortedInterval[T]]{ + def haveStarted(point: T): Boolean = { + start < point + } + def intersects(other: SortedInterval[T]) = { + start <= other.end && end >= other.start + } + override def compare(that: SortedInterval[T]): Int = if(start < that.start) -1 else if(start > that.start) 1 else 0 +} diff --git a/src/test/scala/org/biodatageeks/sequila/tests/rangejoins/ChromoSweepTest.scala b/src/test/scala/org/biodatageeks/sequila/tests/rangejoins/ChromoSweepTest.scala new file mode 100644 index 00000000..1a00f269 --- /dev/null +++ b/src/test/scala/org/biodatageeks/sequila/tests/rangejoins/ChromoSweepTest.scala @@ -0,0 +1,81 @@ +package org.biodatageeks.sequila.tests.rangejoins + +import com.holdenkarau.spark.testing.DataFrameSuiteBase +import org.apache.spark.sql.Row +import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} +import org.biodatageeks.sequila.rangejoins.methods.chromosweep.ChromoSweepJoinStrategy +import org.scalatest.{BeforeAndAfter, FunSuite} + +class ChromoSweepTest + extends FunSuite + with DataFrameSuiteBase + with BeforeAndAfter { + val oneFieldSchema = StructType( + Seq(StructField("start", IntegerType))) + val twoFieldSchema = StructType( + Seq(StructField("start", IntegerType), + StructField("end", IntegerType))) + + val resultSchema = StructType(Seq( + StructField("start1", IntegerType), + StructField("end1", IntegerType), + StructField("rval1", StringType), + StructField("start2", IntegerType), + StructField("end2", IntegerType), + StructField("rval2", StringType))) + + before { + spark.experimental.extraStrategies = new ChromoSweepJoinStrategy(spark) :: Nil + + val rdd1 = sc + .parallelize(Seq((100, 190), (200, 290), (400, 600), (10000, 20000), (22100, 22100))) + .map(i => Row(i._1.toInt, i._2.toInt)) + val rdd2 = sc + .parallelize(Seq((150, 180), (190, 300), (310, 500), (510, 700), (22000, 22300), (15000, 15000))) + .map(i => Row(i._1.toInt, i._2.toInt)) + + val ds1 = sqlContext.createDataFrame(rdd1, twoFieldSchema) + ds1.createOrReplaceTempView("s1") + val ds2 = sqlContext.createDataFrame(rdd2, twoFieldSchema) + ds2.createOrReplaceTempView("s2") + } + + test("simple functionality test") { + val query = "select s1.start from s1 join s2 on (s1.end >= s2.start and s1.start <= s2.end)" + assertDataFrameEquals( + sqlContext + .createDataFrame( + sc.parallelize(Row(100) :: Row(100) :: Row(200) :: Row(400) :: Row(400) :: Row(10000) :: Row(22100) :: Nil), + oneFieldSchema) + .orderBy("start"), + sqlContext.sql(query).orderBy("s1.start") + ) + } + + // tests on bigger datasets + + test("test denser queries") { + testGeneratedData("generated/denser-queries/queries/", "generated/denser-queries/labels/", "generated/denser-queries/results") + } + + test("test denser labels") { + testGeneratedData("generated/denser-labels/queries/", "generated/denser-labels/labels/", "generated/denser-labels/results") + } + + + private def testGeneratedData(queryPath: String, labelPath: String, resultPath: String) = { + import spark.implicits._ + // workaround to avoid serialization error for CSVParser + val ds3 = spark.sparkContext.textFile(queryPath).map(_.split(",")).map(e => (e(0).toInt, e(1).toInt, e(2))).toDF("start1", "end1", "val1") + ds3.createOrReplaceTempView("dq1") + val ds4 = spark.sparkContext.textFile(labelPath).map(_.split(",")).map(e => (e(0).toInt, e(1).toInt, e(2))).toDF("start2", "end2", "val2") + ds4.createOrReplaceTempView("dq2") + val expected = spark.read.schema(resultSchema).csv(resultPath) + val sqlQuery1 = "select * from dq1 join dq2 on (end1 >= start2 and start1 <= end2)" + val result = sqlContext.sql(sqlQuery1) + val joined = result.join(expected, Seq("start1", "end1", "start2", "end2")) + assertTrue(joined.where("(dq2.val2 == null and rval2 != null) or (dq2.val2 != null and rval2 == null)").count() == 0) + assertTrue(joined.where("(dq2.val2 != null and rval2 != null) and dq2.val2 != rval2").count() == 0) + } + +} \ No newline at end of file