test dataset with only image csvs, uses default config

cytomining · Jan 22, 2018 · 8a36b56 · 8a36b56
1 parent 3b9b7f0
commit 8a36b56
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 81 deletions.
diff --git a/cytominer_database/utils.py b/cytominer_database/utils.py
@@ -1,5 +1,6 @@
 import csv
 import glob
+import logging
 import os
 import pkg_resources
 import tempfile
@@ -12,6 +13,8 @@
 # reset warnings to default after importing csvkit.
 warnings.resetwarnings()
 
+logger = logging.getLogger(__name__)
+
 
 def find_directories(directory):
     """
@@ -142,7 +145,10 @@ def read_config(filename):
         pkg_resources.resource_filename("cytominer_database", "config/config_default.ini"),  # default config file
         filename
     ]:
-        with open(config_filename, "r") as fd:
-            config.read_file(fd)
+        try:
+            with open(config_filename, "r") as fd:
+                config.read_file(fd)
+        except IOError as e:
+            logger.warn("Unable to read configuration file: {}.".format(config_filename))
 
     return config
diff --git a/tests/commands/test_command_ingest.py b/tests/commands/test_command_ingest.py
@@ -1,11 +1,9 @@
 import os
 
 import click.testing
-import configparser
 import backports.tempfile
 import odo
-import pandas
-import pkg_resources
+import pandas as pd
 import pytest
 
 import cytominer_database.command
@@ -23,12 +21,10 @@ def test_help(runner):
 
 
 def test_run(dataset, runner):
-    config_file = os.path.join(dataset["data_dir"], "config.ini")
+    opts = ["ingest"]
 
-    opts = [
-        "ingest",
-        "--config-file", config_file
-    ]
+    if dataset["config"]:
+        opts += ["--config-file", os.path.join(dataset["data_dir"], dataset["config"])]
 
     if dataset["munge"]:
         opts += ["--munge"]
@@ -44,32 +40,23 @@ def test_run(dataset, runner):
 
         result = runner.invoke(cytominer_database.command.command, opts)
 
-        assert result.exit_code == 0
-
-        config = configparser.ConfigParser()
-
-        with open(config_file, "r") as config_fd:
-            config.read_file(config_fd)
-
-        for (k, v) in dict({"cells": "Cells.csv", "cytoplasm": "Cytoplasm.csv", "nuclei": "Nuclei.csv"}).items():
-            config["filenames"][k] = v
+        assert result.exit_code == 0, result.output
 
-        for table_key in ["image", "cells", "cytoplasm", "nuclei"]:
-            csv_filename = os.path.join(temp_dir, config["filenames"][table_key])
+        for blob in dataset["ingest"]:
+            table_name = blob["table"]
 
-            table_name = config["filenames"][table_key].split(".")[0]
+            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
 
-            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_filename)
+            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
 
-            df = pandas.read_csv(csv_filename)
+            df = pd.read_csv(csv_pathname)
 
-            assert df.shape[0] == dataset["ingest"]["{}_nrows".format(table_name)]
+            assert df.shape[0] == blob["nrows"]
 
-            assert df.shape[1] == dataset["ingest"]["{}_ncols".format(table_name)] + 1
+            assert df.shape[1] == blob["ncols"] + 1
 
-            if table_key != "image":
-                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
-                       dataset["ingest"]["{}_nrows".format(table_name)]
+            if table_name.lower() != "image":
+                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]
 
 
 def test_run_defaults(cellpainting, runner):
@@ -91,32 +78,18 @@ def test_run_defaults(cellpainting, runner):
 
         assert result.exit_code == 0
 
-        config = configparser.ConfigParser()
-
-        config_file = pkg_resources.resource_filename(
-            "cytominer_database",
-            os.path.join("config", "config_cellpainting.ini")
-        )
-
-        with open(config_file, "r") as config_fd:
-            config.read_file(config_fd)
-
-        for (k, v) in dict({"cells": "Cells.csv", "cytoplasm": "Cytoplasm.csv", "nuclei": "Nuclei.csv"}).items():
-            config["filenames"][k] = v
-
-        for table_key in ["image", "cells", "cytoplasm", "nuclei"]:
-            csv_filename = os.path.join(temp_dir, config["filenames"][table_key])
+        for blob in cellpainting["ingest"]:
+            table_name = blob["table"]
 
-            table_name = config["filenames"][table_key].split(".")[0]
+            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
 
-            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_filename)
+            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
 
-            df = pandas.read_csv(csv_filename)
+            df = pd.read_csv(csv_pathname)
 
-            assert df.shape[0] == cellpainting["ingest"]["{}_nrows".format(table_name)]
+            assert df.shape[0] == blob["nrows"]
 
-            assert df.shape[1] == cellpainting["ingest"]["{}_ncols".format(table_name)] + 1
+            assert df.shape[1] == blob["ncols"] + 1
 
-            if table_key != "image":
-                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
-                       cellpainting["ingest"]["{}_nrows".format(table_name)]
+            if table_name.lower() != "image":
+                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -8,50 +8,90 @@ def pytest_addoption(parser):
 def pytest_generate_tests(metafunc):
     if "dataset" in metafunc.fixturenames:
         if metafunc.config.option.dataset is None:
-            metafunc.parametrize("dataset", ["htqc", "cellpainting"], indirect=True)
+            metafunc.parametrize("dataset", ["htqc", "cellpainting", "qc"], indirect=True)
         else:
-            assert metafunc.config.option.dataset in ["htqc", "cellpainting"]
+            assert metafunc.config.option.dataset in ["htqc", "cellpainting", "qc"]
 
             metafunc.parametrize("dataset", [metafunc.config.option.dataset], indirect=True)
 
 
 @pytest.fixture
 def cellpainting():
     return {
+        "config": "config.ini",
         "data_dir": "tests/data_b",
         "image_csv": "Image.csv",
-        "ingest":
+        "ingest": [
             {
-                "Image_nrows": 4,
-                "Image_ncols": 6,
-                "Cells_nrows": 40,
-                "Cells_ncols": 586,
-                "Cytoplasm_nrows": 40,
-                "Cytoplasm_ncols": 572,
-                "Nuclei_nrows": 40,
-                "Nuclei_ncols": 595
+                "ncols": 586,
+                "nrows": 40,
+                "table": "Cells"
             },
+            {
+                "ncols": 572,
+                "nrows": 40,
+                "table": "Cytoplasm"
+            },
+            {
+                "ncols": 6,
+                "nrows": 4,
+                "table": "image"
+            },
+            {
+                "ncols": 595,
+                "nrows": 40,
+                "table": "Nuclei"
+            }
+        ],
         "munge": False
     }
 
-
 @pytest.fixture
 def htqc():
     return {
+        "config": "config.ini",
         "data_dir": "tests/data_a",
         "munged_dir": "tests/data_a_munged",
         "image_csv": "image.csv",
-        "ingest":
+        "ingest": [
+            {
+                "ncols": 294,
+                "nrows": 40,
+                "table": "Cells"
+            },
+            {
+                "ncols": 279,
+                "nrows": 40,
+                "table": "Cytoplasm"
+            },
             {
-                "image_nrows": 8,
-                "image_ncols": 229,
-                "Cells_nrows": 40,
-                "Cells_ncols": 294,
-                "Cytoplasm_nrows": 40,
-                "Cytoplasm_ncols": 279,
-                "Nuclei_nrows": 40,
-                "Nuclei_ncols": 287
+                "ncols": 229,
+                "nrows": 8,
+                "table": "image"
             },
+            {
+                "ncols": 287,
+                "nrows": 40,
+                "table": "Nuclei"
+            }
+        ],
+        "munge": True
+    }
+
+
+@pytest.fixture
+def qc():
+    return {
+        "config": None,
+        "data_dir": "tests/data_c",
+        "image_csv": "Image.csv",
+        "ingest": [
+            {
+                "nrows": 8,
+                "ncols": 229,
+                "table": "Image"
+            }
+        ],
         "munge": True
     }
 
@@ -64,4 +104,7 @@ def dataset(request):
     if request.param == "cellpainting":
         return cellpainting()
 
+    if request.param == "qc":
+        return qc()
+
     raise ValueError("No such dataset: {}".format(request.param))
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
@@ -22,19 +22,18 @@ def test_seed(dataset):
             target="sqlite:///{}".format(str(sqlite_file))
         )
 
-        for csv_filename in [dataset["image_csv"], "Cells.csv", "Cytoplasm.csv", "Nuclei.csv"]:
-            csv_pathname = os.path.join(temp_dir, csv_filename)
+        for blob in dataset["ingest"]:
+            table_name = blob["table"]
 
-            table_name = os.path.splitext(csv_filename)[0]
+            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
 
             odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
 
             df = pd.read_csv(csv_pathname)
 
-            assert df.shape[0] == dataset["ingest"]["{}_nrows".format(table_name)]
+            assert df.shape[0] == blob["nrows"]
 
-            assert df.shape[1] == dataset["ingest"]["{}_ncols".format(table_name)] + 1
+            assert df.shape[1] == blob["ncols"] + 1
 
-            if csv_filename != dataset["image_csv"]:
-                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
-                       dataset["ingest"]["{}_nrows".format(table_name)]
+            if table_name.lower() != "image":
+                assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]