Skip to content

Commit

Permalink
integration tests cleanups, added a test for column overlap (#364)
Browse files Browse the repository at this point in the history
* changed flag `--intersected-anno` -> `--compute-overlap`

* added integration tests for `--compute-overlap`

* cleanups in other integration tests
  • Loading branch information
karasikov authored Dec 15, 2021
1 parent 9c99ea3 commit 43ed600
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 593 deletions.
6 changes: 3 additions & 3 deletions metagraph/integration_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

script_path = os.path.dirname(os.path.realpath(__file__))

METAGRAPH = './metagraph'
METAGRAPH = f'{os.getcwd()}/metagraph'

TEST_DATA_DIR = os.path.join(script_path, '..', 'tests', 'data')

Expand Down Expand Up @@ -102,7 +102,7 @@ def _clean(graph, output, extra_params=''):
@staticmethod
def _annotate_graph(input, graph_path, output, anno_repr,
separate=False, no_fork_opt=False, no_anchor_opt=False,
anno_type='header'):
anno_type='header', extra_params=''):
target_anno = anno_repr

noswap = anno_repr.endswith('_noswap')
Expand All @@ -120,7 +120,7 @@ def _annotate_graph(input, graph_path, output, anno_repr,
anno_repr = 'row'

command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-{anno_type}\
-i {graph_path} --anno-type {anno_repr} \
-i {graph_path} --anno-type {anno_repr} {extra_params} \
-o {output} {input}'

if target_anno.endswith('_coord'):
Expand Down
215 changes: 80 additions & 135 deletions metagraph/integration_tests/test_annotate.py

Large diffs are not rendered by default.

150 changes: 73 additions & 77 deletions metagraph/integration_tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from tempfile import TemporaryDirectory
import glob
import os
from base import TestingBase


"""Test graph construction"""
Expand Down Expand Up @@ -32,15 +33,10 @@
BUILDS = [name for name, _ in build_params.items()]


class TestBuild(unittest.TestCase):
class TestBuild(TestingBase):
def setUp(self):
self.tempdir = TemporaryDirectory()

def __get_stats(self, graph_filename):
stats_command = METAGRAPH + ' stats ' + graph_filename
res = subprocess.run(stats_command.split(), stdout=PIPE)
return res

@parameterized.expand([repr for repr in BUILDS if not (repr == 'bitmap' and PROTEIN_MODE)])
def test_simple_all_graphs(self, build):
representation, tmp_dir = build_params[build]
Expand All @@ -56,12 +52,12 @@ def test_simple_all_graphs(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', params_str[0])
self.assertEqual('nodes (k): 591997', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', out[0])
self.assertEqual('nodes (k): 591997', out[1])
self.assertEqual('mode: basic', out[2])

@parameterized.expand(succinct_states)
def test_build_succinct_inplace(self, state):
Expand All @@ -73,13 +69,13 @@ def test_build_succinct_inplace(self, state):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension['succinct'])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension['succinct'])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', params_str[0])
self.assertEqual('nodes (k): 597931', params_str[1])
self.assertEqual('mode: basic', params_str[2])
self.assertEqual('state: ' + state, params_str[8])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', out[0])
self.assertEqual('nodes (k): 597931', out[1])
self.assertEqual('mode: basic', out[2])
self.assertEqual('state: ' + state, out[8])

@parameterized.expand(['succinct'])
def test_simple_bloom_graph(self, build):
Expand All @@ -96,12 +92,12 @@ def test_simple_bloom_graph(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', params_str[0])
self.assertEqual('nodes (k): 591997', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', out[0])
self.assertEqual('nodes (k): 591997', out[1])
self.assertEqual('mode: basic', out[2])

convert_command = '{exe} transform -o {outfile} --initialize-bloom {bloom_param} {input}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -142,12 +138,12 @@ def test_simple_all_graphs_canonical(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', params_str[0])
self.assertEqual('nodes (k): 1159851', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 20', out[0])
self.assertEqual('nodes (k): 1159851', out[1])
self.assertEqual('mode: canonical', out[2])

@parameterized.expand(BUILDS)
def test_build_tiny_k(self, build):
Expand All @@ -163,12 +159,12 @@ def test_build_tiny_k(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', params_str[0])
self.assertEqual('nodes (k): 16', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', out[0])
self.assertEqual('nodes (k): 16', out[1])
self.assertEqual('mode: basic', out[2])

# TODO: add 'hashstr' once the canonical mode is implemented for it
@parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
Expand All @@ -186,12 +182,12 @@ def test_build_tiny_k_canonical(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', params_str[0])
self.assertEqual('nodes (k): 16', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', out[0])
self.assertEqual('nodes (k): 16', out[1])
self.assertEqual('mode: canonical', out[2])

@parameterized.expand(BUILDS)
def test_build_tiny_k_parallel(self, build):
Expand All @@ -205,12 +201,12 @@ def test_build_tiny_k_parallel(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', params_str[0])
self.assertEqual('nodes (k): 16', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', out[0])
self.assertEqual('nodes (k): 16', out[1])
self.assertEqual('mode: basic', out[2])

# TODO: add 'hashstr' once the canonical mode is implemented for it
@parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
Expand All @@ -227,12 +223,12 @@ def test_build_tiny_k_parallel_canonical(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', params_str[0])
self.assertEqual('nodes (k): 16', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 2', out[0])
self.assertEqual('nodes (k): 16', out[1])
self.assertEqual('mode: canonical', out[2])

@parameterized.expand(BUILDS)
def test_build_from_kmc(self, build):
Expand All @@ -249,12 +245,12 @@ def test_build_from_kmc(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 469983', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 469983', out[1])
self.assertEqual('mode: basic', out[2])

@parameterized.expand(BUILDS)
def test_build_from_kmc_both(self, build):
Expand All @@ -271,12 +267,12 @@ def test_build_from_kmc_both(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 802920', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 802920', out[1])
self.assertEqual('mode: basic', out[2])

@parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets")
Expand All @@ -295,12 +291,12 @@ def test_build_from_kmc_canonical(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 802920', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 802920', out[1])
self.assertEqual('mode: canonical', out[2])

@parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets")
Expand All @@ -319,12 +315,12 @@ def test_build_from_kmc_both_canonical(self, build):
res = subprocess.run([construct_command], shell=True)
self.assertEqual(res.returncode, 0)

res = self.__get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 802920', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 802920', out[1])
self.assertEqual('mode: canonical', out[2])

@parameterized.expand(['succinct', 'succinct_disk'])
@unittest.skipUnless(DNA_MODE, "Need to adapt suffixes for other alphabets")
Expand Down Expand Up @@ -358,13 +354,13 @@ def test_build_chunks_from_kmc(self, build):
self.assertEqual(res.returncode, 0)

# Check graph
res = self.__get_stats(self.tempdir.name + '/graph_from_chunks'
res = self._get_stats(self.tempdir.name + '/graph_from_chunks'
+ graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 469983', params_str[1])
self.assertEqual('mode: basic', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 469983', out[1])
self.assertEqual('mode: basic', out[2])

@parameterized.expand(['succinct', 'succinct_disk'])
@unittest.skipUnless(DNA_MODE, "Need to adapt suffixes for other alphabets")
Expand Down Expand Up @@ -398,13 +394,13 @@ def test_build_chunks_from_kmc_canonical(self, build):
self.assertEqual(res.returncode, 0)

# Check graph
res = self.__get_stats(self.tempdir.name + '/graph_from_chunks'
res = self._get_stats(self.tempdir.name + '/graph_from_chunks'
+ graph_file_extension[representation])
self.assertEqual(res.returncode, 0)
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 802920', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
out = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', out[0])
self.assertEqual('nodes (k): 802920', out[1])
self.assertEqual('mode: canonical', out[2])


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 43ed600

Please sign in to comment.