From c9b9529e419e98cf3ceaf445c8303ccc2c94e743 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 5 Dec 2024 11:38:25 +0000
Subject: [PATCH] update maptext and dptxext detr tests

---
 .../test_text_spotting/test_dptext_runner.py  | 226 ++++++++++++++++++
 .../test_text_spotting/test_maptext_runner.py |  76 ++++++
 2 files changed, 302 insertions(+)

diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
index 0c42fea0..71327ab5 100644
--- a/tests/test_text_spotting/test_dptext_runner.py
+++ b/tests/test_text_spotting/test_dptext_runner.py
@@ -5,11 +5,13 @@
 import pickle
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pytest
 from detectron2.engine import DefaultPredictor
 from detectron2.structures.instances import Instances
 from dptext_detr.config import get_cfg
+from shapely import Polygon
 
 from mapreader import DPTextDETRRunner
 from mapreader.load import MapImages
@@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path):
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
     maps.patchify_all(patch_size=800, path_save=tmp_path)
     maps.check_georeferencing()
+    assert maps.georeferenced
     parent_df, patch_df = maps.convert_images()
     return parent_df, patch_df
 
@@ -141,6 +144,80 @@ def test_dptext_init_tsv(init_dataframes, tmp_path):
     assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)
 
 
+def test_dptext_init_geojson(init_dataframes, tmp_path, mock_response):
+    parent_df, patch_df = init_dataframes
+    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    runner = DPTextDETRRunner(
+        f"{tmp_path}/patch_df.geojson",
+        parent_df=f"{tmp_path}/parent_df.geojson",
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    assert isinstance(runner, DPTextDETRRunner)
+    assert isinstance(runner.predictor, DefaultPredictor)
+    assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
+    out = runner.run_all()
+    assert isinstance(out, dict)
+    assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
+
+
+def test_dptext_init_errors(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df="fake_file.txt",
+            parent_df=parent_df,
+            cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=patch_df,
+            parent_df="fake_file.txt",
+            cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=np.array([1, 2, 3]),
+            parent_df=parent_df,
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=patch_df,
+            parent_df=np.array([1, 2, 3]),
+        )
+
+
+def test_dptext_check_georeferencing(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df,
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["dlat", "dlon"]),
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["coordinates"]),
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert not runner.georeferenced
+
+
 def test_dptext_run_all(init_runner, mock_response):
     runner = init_runner
     # dict
@@ -238,3 +315,152 @@ def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
     assert set(gdf.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "score"]
     )
+
+
+def test_dptext_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
+    assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
+    gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
+    assert isinstance(gdf_centroid, gpd.GeoDataFrame)
+    assert set(gdf_centroid.columns) == set(
+        [
+            "image_id",
+            "patch_id",
+            "pixel_geometry",
+            "geometry",
+            "crs",
+            "score",
+            "polygon",
+        ]
+    )
+
+
+def test_dptext_load_geo_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.geo_predictions = {}
+    runner.load_geo_predictions(f"{tmp_path}/text.geojson")
+    assert len(runner.geo_predictions)
+    assert "mapreader_text.png" in runner.geo_predictions.keys()
+    assert isinstance(runner.geo_predictions["mapreader_text.png"], list)
+    assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction)
+
+
+def test_dptext_load_geo_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(ValueError, match="must be a path to a geojson file"):
+        runner.load_geo_predictions("fakefile.csv")
+
+
+def test_dptext_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_dptext_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_dptext_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    runner.patch_predictions = {}
+    with pytest.raises(ValueError, match="No patch predictions found"):
+        runner.save_to_csv(tmp_path)
+
+
+def test_dptext_load_patch_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.save_to_csv(tmp_path)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_dataframe(runner_run_all):
+    runner = runner_run_all
+    patch_preds = runner._dict_to_dataframe(runner.patch_predictions)
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.load_patch_predictions(patch_preds)
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_centroid(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)
+    runner.save_to_csv(tmp_path, centroid=True)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(
+        ValueError, match="must be a pandas DataFrame or path to a CSV file"
+    ):
+        runner.load_patch_predictions("fake_file.geojson")
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 6bd20b08..0a4ff5bc 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -5,11 +5,13 @@
 import pickle
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pytest
 from detectron2.engine import DefaultPredictor
 from detectron2.structures.instances import Instances
 from maptextpipeline.config import get_cfg
+from shapely import Polygon
 
 from mapreader import MapTextRunner
 from mapreader.load import MapImages
@@ -141,6 +143,80 @@ def test_maptext_init_tsv(init_dataframes, tmp_path):
     assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)
 
 
+def test_maptext_init_geojson(init_dataframes, tmp_path, mock_response):
+    parent_df, patch_df = init_dataframes
+    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    runner = MapTextRunner(
+        f"{tmp_path}/patch_df.geojson",
+        parent_df=f"{tmp_path}/parent_df.geojson",
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    assert isinstance(runner, MapTextRunner)
+    assert isinstance(runner.predictor, DefaultPredictor)
+    assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
+    out = runner.run_all()
+    assert isinstance(out, dict)
+    assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
+
+
+def test_maptext_init_errors(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df="fake_file.txt",
+            parent_df=parent_df,
+            cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=patch_df,
+            parent_df="fake_file.txt",
+            cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=np.array([1, 2, 3]),
+            parent_df=parent_df,
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=patch_df,
+            parent_df=np.array([1, 2, 3]),
+        )
+
+
+def test_maptext_check_georeferencing(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df,
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["dlat", "dlon"]),
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["coordinates"]),
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert not runner.georeferenced
+
+
 def test_maptext_run_all(init_runner, mock_response):
     runner = init_runner
     # dict