Skip to content

Commit

Permalink
test dataset with only image csvs, uses default config
Browse files Browse the repository at this point in the history
  • Loading branch information
mcquin committed Jan 22, 2018
1 parent 3b9b7f0 commit 8a36b56
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 81 deletions.
10 changes: 8 additions & 2 deletions cytominer_database/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import csv
import glob
import logging
import os
import pkg_resources
import tempfile
Expand All @@ -12,6 +13,8 @@
# reset warnings to default after importing csvkit.
warnings.resetwarnings()

logger = logging.getLogger(__name__)


def find_directories(directory):
"""
Expand Down Expand Up @@ -142,7 +145,10 @@ def read_config(filename):
pkg_resources.resource_filename("cytominer_database", "config/config_default.ini"), # default config file
filename
]:
with open(config_filename, "r") as fd:
config.read_file(fd)
try:
with open(config_filename, "r") as fd:
config.read_file(fd)
except IOError as e:
logger.warn("Unable to read configuration file: {}.".format(config_filename))

return config
73 changes: 23 additions & 50 deletions tests/commands/test_command_ingest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import os

import click.testing
import configparser
import backports.tempfile
import odo
import pandas
import pkg_resources
import pandas as pd
import pytest

import cytominer_database.command
Expand All @@ -23,12 +21,10 @@ def test_help(runner):


def test_run(dataset, runner):
config_file = os.path.join(dataset["data_dir"], "config.ini")
opts = ["ingest"]

opts = [
"ingest",
"--config-file", config_file
]
if dataset["config"]:
opts += ["--config-file", os.path.join(dataset["data_dir"], dataset["config"])]

if dataset["munge"]:
opts += ["--munge"]
Expand All @@ -44,32 +40,23 @@ def test_run(dataset, runner):

result = runner.invoke(cytominer_database.command.command, opts)

assert result.exit_code == 0

config = configparser.ConfigParser()

with open(config_file, "r") as config_fd:
config.read_file(config_fd)

for (k, v) in dict({"cells": "Cells.csv", "cytoplasm": "Cytoplasm.csv", "nuclei": "Nuclei.csv"}).items():
config["filenames"][k] = v
assert result.exit_code == 0, result.output

for table_key in ["image", "cells", "cytoplasm", "nuclei"]:
csv_filename = os.path.join(temp_dir, config["filenames"][table_key])
for blob in dataset["ingest"]:
table_name = blob["table"]

table_name = config["filenames"][table_key].split(".")[0]
csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_filename)
odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pandas.read_csv(csv_filename)
df = pd.read_csv(csv_pathname)

assert df.shape[0] == dataset["ingest"]["{}_nrows".format(table_name)]
assert df.shape[0] == blob["nrows"]

assert df.shape[1] == dataset["ingest"]["{}_ncols".format(table_name)] + 1
assert df.shape[1] == blob["ncols"] + 1

if table_key != "image":
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
dataset["ingest"]["{}_nrows".format(table_name)]
if table_name.lower() != "image":
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]


def test_run_defaults(cellpainting, runner):
Expand All @@ -91,32 +78,18 @@ def test_run_defaults(cellpainting, runner):

assert result.exit_code == 0

config = configparser.ConfigParser()

config_file = pkg_resources.resource_filename(
"cytominer_database",
os.path.join("config", "config_cellpainting.ini")
)

with open(config_file, "r") as config_fd:
config.read_file(config_fd)

for (k, v) in dict({"cells": "Cells.csv", "cytoplasm": "Cytoplasm.csv", "nuclei": "Nuclei.csv"}).items():
config["filenames"][k] = v

for table_key in ["image", "cells", "cytoplasm", "nuclei"]:
csv_filename = os.path.join(temp_dir, config["filenames"][table_key])
for blob in cellpainting["ingest"]:
table_name = blob["table"]

table_name = config["filenames"][table_key].split(".")[0]
csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_filename)
odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pandas.read_csv(csv_filename)
df = pd.read_csv(csv_pathname)

assert df.shape[0] == cellpainting["ingest"]["{}_nrows".format(table_name)]
assert df.shape[0] == blob["nrows"]

assert df.shape[1] == cellpainting["ingest"]["{}_ncols".format(table_name)] + 1
assert df.shape[1] == blob["ncols"] + 1

if table_key != "image":
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
cellpainting["ingest"]["{}_nrows".format(table_name)]
if table_name.lower() != "image":
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]
85 changes: 64 additions & 21 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,90 @@ def pytest_addoption(parser):
def pytest_generate_tests(metafunc):
if "dataset" in metafunc.fixturenames:
if metafunc.config.option.dataset is None:
metafunc.parametrize("dataset", ["htqc", "cellpainting"], indirect=True)
metafunc.parametrize("dataset", ["htqc", "cellpainting", "qc"], indirect=True)
else:
assert metafunc.config.option.dataset in ["htqc", "cellpainting"]
assert metafunc.config.option.dataset in ["htqc", "cellpainting", "qc"]

metafunc.parametrize("dataset", [metafunc.config.option.dataset], indirect=True)


@pytest.fixture
def cellpainting():
return {
"config": "config.ini",
"data_dir": "tests/data_b",
"image_csv": "Image.csv",
"ingest":
"ingest": [
{
"Image_nrows": 4,
"Image_ncols": 6,
"Cells_nrows": 40,
"Cells_ncols": 586,
"Cytoplasm_nrows": 40,
"Cytoplasm_ncols": 572,
"Nuclei_nrows": 40,
"Nuclei_ncols": 595
"ncols": 586,
"nrows": 40,
"table": "Cells"
},
{
"ncols": 572,
"nrows": 40,
"table": "Cytoplasm"
},
{
"ncols": 6,
"nrows": 4,
"table": "image"
},
{
"ncols": 595,
"nrows": 40,
"table": "Nuclei"
}
],
"munge": False
}


@pytest.fixture
def htqc():
return {
"config": "config.ini",
"data_dir": "tests/data_a",
"munged_dir": "tests/data_a_munged",
"image_csv": "image.csv",
"ingest":
"ingest": [
{
"ncols": 294,
"nrows": 40,
"table": "Cells"
},
{
"ncols": 279,
"nrows": 40,
"table": "Cytoplasm"
},
{
"image_nrows": 8,
"image_ncols": 229,
"Cells_nrows": 40,
"Cells_ncols": 294,
"Cytoplasm_nrows": 40,
"Cytoplasm_ncols": 279,
"Nuclei_nrows": 40,
"Nuclei_ncols": 287
"ncols": 229,
"nrows": 8,
"table": "image"
},
{
"ncols": 287,
"nrows": 40,
"table": "Nuclei"
}
],
"munge": True
}


@pytest.fixture
def qc():
return {
"config": None,
"data_dir": "tests/data_c",
"image_csv": "Image.csv",
"ingest": [
{
"nrows": 8,
"ncols": 229,
"table": "Image"
}
],
"munge": True
}

Expand All @@ -64,4 +104,7 @@ def dataset(request):
if request.param == "cellpainting":
return cellpainting()

if request.param == "qc":
return qc()

raise ValueError("No such dataset: {}".format(request.param))
15 changes: 7 additions & 8 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,18 @@ def test_seed(dataset):
target="sqlite:///{}".format(str(sqlite_file))
)

for csv_filename in [dataset["image_csv"], "Cells.csv", "Cytoplasm.csv", "Nuclei.csv"]:
csv_pathname = os.path.join(temp_dir, csv_filename)
for blob in dataset["ingest"]:
table_name = blob["table"]

table_name = os.path.splitext(csv_filename)[0]
csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pd.read_csv(csv_pathname)

assert df.shape[0] == dataset["ingest"]["{}_nrows".format(table_name)]
assert df.shape[0] == blob["nrows"]

assert df.shape[1] == dataset["ingest"]["{}_ncols".format(table_name)] + 1
assert df.shape[1] == blob["ncols"] + 1

if csv_filename != dataset["image_csv"]:
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == \
dataset["ingest"]["{}_nrows".format(table_name)]
if table_name.lower() != "image":
assert df.groupby(["TableNumber", "ImageNumber"]).size().sum() == blob["nrows"]

0 comments on commit 8a36b56

Please sign in to comment.