Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(mypy): add mypy check and adjust code for types #439

Merged
merged 11 commits into from
Sep 28, 2024
22 changes: 22 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ jobs:
uses: pre-commit/[email protected]
with:
extra_args: --all-files
python-type-checks:
# This job is used to check Python types
name: Python type checks
# Avoid fail-fast to retain output
strategy:
fail-fast: false
runs-on: ubuntu-22.04
if: github.event_name != 'schedule'
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Setup python, and check pre-commit cache
uses: ./.github/actions/setup-env
with:
python-version: ${{ env.TARGET_PYTHON_VERSION }}
cache-pre-commit: false
cache-venv: true
setup-poetry: true
install-deps: true
- name: Run mypy
run: |
poetry run mypy .
integration-test:
name: Pytest (Python ${{ matrix.python-version }} on ${{ matrix.os }})
# Runs pytest on all tested versions of python and OSes
Expand Down
151 changes: 143 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions pycytominer/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def aggregate(
# Only extract single object column in preparation for count
if compute_object_count:
count_object_df = (
population_df.loc[:, np.union1d(strata, [object_feature])]
population_df.loc[:, list(np.union1d(strata, [object_feature]))]
.groupby(strata)[object_feature]
.count()
.reset_index()
Expand All @@ -92,7 +92,9 @@ def aggregate(

if features == "infer":
features = infer_cp_features(population_df)
population_df = population_df[features]

# recast as dataframe to protect against scenarios where a series may be returned
population_df = pd.DataFrame(population_df[features])

# Fix dtype of input features (they should all be floats!)
population_df = population_df.astype(float)
Expand All @@ -101,7 +103,9 @@ def aggregate(
population_df = pd.concat([strata_df, population_df], axis="columns")

# Perform aggregating function
population_df = population_df.groupby(strata, dropna=False)
# Note: type ignore added below to address the change in variable types for
# label `population_df`.
population_df = population_df.groupby(strata, dropna=False) # type: ignore[assignment]

if operation == "median":
population_df = population_df.median().reset_index()
Expand All @@ -118,10 +122,10 @@ def aggregate(
for column in population_df.columns
if column in ["ImageNumber", "ObjectNumber"]
]:
population_df = population_df.drop([columns_to_drop], axis="columns")
population_df = population_df.drop(columns=columns_to_drop, axis="columns")

if output_file is not None:
output(
return output(
df=population_df,
output_filename=output_file,
output_type=output_type,
Expand Down
5 changes: 3 additions & 2 deletions pycytominer/cyto_utils/DeepProfiler_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import pandas as pd
import warnings

from pycytominer import aggregate, normalize
from pycytominer.cyto_utils import (
# use mypy ignores below to avoid duplicate import warnings
from pycytominer import aggregate, normalize # type: ignore[no-redef]
from pycytominer.cyto_utils import ( # type: ignore[no-redef]
load_npz_features,
load_npz_locations,
infer_cp_features,
Expand Down
4 changes: 3 additions & 1 deletion pycytominer/cyto_utils/cell_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def __init__(
"s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED)
)

def _expanduser(self, obj: Union[str, None]):
def _expanduser(
self, obj: Union[str, pd.DataFrame, sqlalchemy.engine.Engine, None]
):
"""Expand the user home directory in a path"""
if obj is not None and isinstance(obj, str) and not obj.startswith("s3://"):
return pathlib.Path(obj).expanduser().as_posix()
Expand Down
12 changes: 8 additions & 4 deletions pycytominer/cyto_utils/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ def merge_single_cells(
"""

# Load the single cell dataframe by merging on the specific linking columns
sc_df = ""
left_compartment_loaded = False
linking_check_cols = []
merge_suffix_rename = []
for left_compartment in self.compartment_linking_cols:
Expand All @@ -737,7 +737,7 @@ def merge_single_cells(
left_compartment
]

if isinstance(sc_df, str):
if not left_compartment_loaded:
sc_df = self.load_compartment(compartment=left_compartment)

if compute_subsample:
Expand All @@ -752,6 +752,8 @@ def merge_single_cells(
sc_df, how="left", on=subset_logic_df.columns.tolist()
).reindex(sc_df.columns, axis="columns")

left_compartment_loaded = True

sc_df = sc_df.merge(
self.load_compartment(compartment=right_compartment),
left_on=[*self.merge_cols, left_link_col],
Expand Down Expand Up @@ -804,11 +806,13 @@ def merge_single_cells(

normalize_args["features"] = features

sc_df = normalize(profiles=sc_df, **normalize_args)
# ignore mypy warnings below as these reference root package imports
sc_df = normalize(profiles=sc_df, **normalize_args) # type: ignore[operator]

# In case platemap metadata is provided, use pycytominer.annotate for metadata
if platemap is not None:
sc_df = annotate(
# ignore mypy warnings below as these reference root package imports
sc_df = annotate( # type: ignore[operator]
profiles=sc_df, platemap=platemap, output_file=None, **kwargs
)

Expand Down
2 changes: 1 addition & 1 deletion pycytominer/cyto_utils/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def collate(
with sqlite3.connect(cache_backend_file, isolation_level=None) as connection:
cursor = connection.cursor()
if column:
if print:
if printtoscreen:
print(f"Adding a Metadata_Plate column based on column {column}")
cursor.execute("ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;")
cursor.execute(f"UPDATE image SET Metadata_Plate ={column};")
Expand Down
Loading
Loading