From c9b9529e419e98cf3ceaf445c8303ccc2c94e743 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Thu, 5 Dec 2024 11:38:25 +0000 Subject: [PATCH] update maptext and dptxext detr tests --- .../test_text_spotting/test_dptext_runner.py | 226 ++++++++++++++++++ .../test_text_spotting/test_maptext_runner.py | 76 ++++++ 2 files changed, 302 insertions(+) diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py index 0c42fea0..71327ab5 100644 --- a/tests/test_text_spotting/test_dptext_runner.py +++ b/tests/test_text_spotting/test_dptext_runner.py @@ -5,11 +5,13 @@ import pickle import geopandas as gpd +import numpy as np import pandas as pd import pytest from detectron2.engine import DefaultPredictor from detectron2.structures.instances import Instances from dptext_detr.config import get_cfg +from shapely import Polygon from mapreader import DPTextDETRRunner from mapreader.load import MapImages @@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path): maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") maps.patchify_all(patch_size=800, path_save=tmp_path) maps.check_georeferencing() + assert maps.georeferenced parent_df, patch_df = maps.convert_images() return parent_df, patch_df @@ -141,6 +144,80 @@ def test_dptext_init_tsv(init_dataframes, tmp_path): assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) +def test_dptext_init_geojson(init_dataframes, tmp_path, mock_response): + parent_df, patch_df = init_dataframes + parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON") + patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON") + runner = DPTextDETRRunner( + f"{tmp_path}/patch_df.geojson", + parent_df=f"{tmp_path}/parent_df.geojson", + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + assert isinstance(runner, DPTextDETRRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon) + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + assert isinstance( + out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction + ) + + +def test_dptext_init_errors(init_dataframes): + parent_df, patch_df = init_dataframes + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DPTextDETRRunner( + patch_df="fake_file.txt", + parent_df=parent_df, + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DPTextDETRRunner( + patch_df=patch_df, + parent_df="fake_file.txt", + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DPTextDETRRunner( + patch_df=np.array([1, 2, 3]), + parent_df=parent_df, + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DPTextDETRRunner( + patch_df=patch_df, + parent_df=np.array([1, 2, 3]), + ) + + +def test_dptext_check_georeferencing(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df.drop(columns=["dlat", "dlon"]), + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df.drop(columns=["coordinates"]), + cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + runner.check_georeferencing() + assert not runner.georeferenced + + def test_dptext_run_all(init_runner, mock_response): runner = init_runner # dict @@ -238,3 +315,152 @@ def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response): assert set(gdf.columns) == set( ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "score"] ) + + +def test_dptext_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True) + assert os.path.exists(f"{tmp_path}/text_centroid.geojson") + gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson") + assert isinstance(gdf_centroid, gpd.GeoDataFrame) + assert set(gdf_centroid.columns) == set( + [ + "image_id", + "patch_id", + "pixel_geometry", + "geometry", + "crs", + "score", + "polygon", + ] + ) + + +def test_dptext_load_geo_predictions(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text.geojson") + runner.geo_predictions = {} + runner.load_geo_predictions(f"{tmp_path}/text.geojson") + assert len(runner.geo_predictions) + assert "mapreader_text.png" in runner.geo_predictions.keys() + assert isinstance(runner.geo_predictions["mapreader_text.png"], list) + assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction) + + +def test_dptext_load_geo_predictions_errors(runner_run_all, tmp_path): + runner = runner_run_all + with pytest.raises(ValueError, match="must be a path to a geojson file"): + runner.load_geo_predictions("fakefile.csv") + + +def test_dptext_save_to_csv_polygon(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + # patch + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + # parent + _ = runner.convert_to_parent_pixel_bounds() + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + # geo + _ = runner.convert_to_coords() + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + assert os.path.exists(f"{tmp_path}/geo_predictions.csv") + + +def test_dptext_save_to_csv_centroid(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + # patch + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + # parent + _ = runner.convert_to_parent_pixel_bounds() + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + # geo + _ = runner.convert_to_coords() + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + assert os.path.exists(f"{tmp_path}/geo_predictions.csv") + + +def test_dptext_save_to_csv_errors(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + runner.patch_predictions = {} + with pytest.raises(ValueError, match="No patch predictions found"): + runner.save_to_csv(tmp_path) + + +def test_dptext_load_patch_predictions(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) # this will be empty after reloading + runner.save_to_csv(tmp_path) + runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv") + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_dptext_load_patch_predictions_dataframe(runner_run_all): + runner = runner_run_all + patch_preds = runner._dict_to_dataframe(runner.patch_predictions) + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) # this will be empty after reloading + runner.load_patch_predictions(patch_preds) + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_dptext_load_patch_predictions_centroid(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) + runner.save_to_csv(tmp_path, centroid=True) + runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv") + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_dptext_load_patch_predictions_errors(runner_run_all, tmp_path): + runner = runner_run_all + with pytest.raises( + ValueError, match="must be a pandas DataFrame or path to a CSV file" + ): + runner.load_patch_predictions("fake_file.geojson") diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py index 6bd20b08..0a4ff5bc 100644 --- a/tests/test_text_spotting/test_maptext_runner.py +++ b/tests/test_text_spotting/test_maptext_runner.py @@ -5,11 +5,13 @@ import pickle import geopandas as gpd +import numpy as np import pandas as pd import pytest from detectron2.engine import DefaultPredictor from detectron2.structures.instances import Instances from maptextpipeline.config import get_cfg +from shapely import Polygon from mapreader import MapTextRunner from mapreader.load import MapImages @@ -141,6 +143,80 @@ def test_maptext_init_tsv(init_dataframes, tmp_path): assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) +def test_maptext_init_geojson(init_dataframes, tmp_path, mock_response): + parent_df, patch_df = init_dataframes + parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON") + patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON") + runner = MapTextRunner( + f"{tmp_path}/patch_df.geojson", + parent_df=f"{tmp_path}/parent_df.geojson", + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + assert isinstance(runner, MapTextRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon) + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + assert isinstance( + out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction + ) + + +def test_maptext_init_errors(init_dataframes): + parent_df, patch_df = init_dataframes + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + MapTextRunner( + patch_df="fake_file.txt", + parent_df=parent_df, + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + MapTextRunner( + patch_df=patch_df, + parent_df="fake_file.txt", + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + MapTextRunner( + patch_df=np.array([1, 2, 3]), + parent_df=parent_df, + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + MapTextRunner( + patch_df=patch_df, + parent_df=np.array([1, 2, 3]), + ) + + +def test_maptext_check_georeferencing(init_dataframes): + parent_df, patch_df = init_dataframes + runner = MapTextRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = MapTextRunner( + patch_df, + parent_df=parent_df.drop(columns=["dlat", "dlon"]), + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = MapTextRunner( + patch_df, + parent_df=parent_df.drop(columns=["coordinates"]), + cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml", + ) + runner.check_georeferencing() + assert not runner.georeferenced + + def test_maptext_run_all(init_runner, mock_response): runner = init_runner # dict