diff --git a/CHANGELOG.md b/CHANGELOG.md index 41e4ae3c..f7e4df1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ - Added an adaptive box selection mode to the minimum absolute algorithm - Update a MSs `MODEL_DATA` column using `addmodel` and a source list (see `wsclean -save-source-list`) +- Added a `taql` based function intended to be used to subtract model data from + nominated data, `flint.ms.subtract_model_from_data_column` # 0.2.7 diff --git a/flint/ms.py b/flint/ms.py index 1c81c2c5..2fdf08ba 100644 --- a/flint/ms.py +++ b/flint/ms.py @@ -94,7 +94,7 @@ def critical_ms_interaction( assert ( not output_ms.exists() ), f"The output measurement set {output_ms} already exists. " - + logger.info(f"Critical section for {input_ms=}") if copy: rsync_copy_directory(target_path=input_ms, out_path=output_ms) else: @@ -573,6 +573,39 @@ def remove_columns_from_ms( return columns_to_remove +def subtract_model_from_data_column( + ms: MS, model_column: str = "MODEL_DATA", data_column: Optional[str] = None +) -> MS: + """Execute a ``taql`` query to subtract the MODEL_DATA from a nominated data column. + This requires the ``model_column`` to already be inserted into the MS. Internally + the ``critical_ms_interaction`` context manager is used to highlight that the MS + is being modified should things fail when subtracting. + + Args: + ms (MS): The measurement set instance being considered + model_column (str, optional): The column with representing the model. Defaults to "MODEL_DATA". + data_column (Optional[str], optional): The column where the column will be subtracted. If ``None`` it is taken from the ``column`` nominated by the input ``MS`` instance. Defaults to None. + + Returns: + MS: The updated MS + """ + ms = MS.cast(ms) + data_column = data_column if data_column else ms.column + assert data_column is not None, f"{data_column=}, which is not allowed" + with critical_ms_interaction(input_ms=ms.path) as critical_ms: + with table(str(critical_ms), readonly=False) as tab: + logger.info("Extracting columns") + colnames = tab.colnames() + assert all( + [d in colnames for d in (model_column, data_column)] + ), f"{model_column=} or {data_column=} missing from {colnames=}" + + logger.info(f"Subtracting {model_column=} from {data_column=}") + taql(f"UPDATE $tab SET {data_column}={data_column}-{model_column}") + + return ms + + def preprocess_askap_ms( ms: Union[MS, Path], data_column: str = "DATA", diff --git a/tests/test_ms.py b/tests/test_ms.py index 9fba67f4..7a980a93 100644 --- a/tests/test_ms.py +++ b/tests/test_ms.py @@ -19,6 +19,7 @@ get_phase_dir_from_ms, remove_columns_from_ms, rename_ms_and_columns_for_selfcal, + subtract_model_from_data_column, ) from flint.utils import get_packaged_resource_path @@ -269,3 +270,100 @@ def test_remove_columns_from_ms(ms_remove_example): ms=ms_remove_example, columns_to_remove="DATA" ) assert len(removed_columns) == 0 + + +@pytest.fixture +def casda_taql_example(tmpdir): + ms_zip = Path( + get_packaged_resource_path( + package="flint.data.tests", + filename="scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms.zip", + ) + ) + outpath = Path(tmpdir) / "taqlsubtract" + + shutil.unpack_archive(ms_zip, outpath) + + ms_path = ( + Path(outpath) + / "scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms" + ) + + return ms_path + + +def test_subtract_model_from_data_column(casda_taql_example): + """Ensure we can subtact the model from the data via taql""" + ms = Path(casda_taql_example) + assert ms.exists() + ms = MS(path=ms) + + from casacore.tables import maketabdesc, makearrcoldesc + + with table(str(ms.path), readonly=False) as tab: + data = tab.getcol("DATA") + ones = np.ones_like(data, dtype=data.dtype) + + tab.putcol(columnname="DATA", value=ones) + + if "MODEL_DATA" not in tab.colnames(): + coldesc = tab.getdminfo("DATA") + coldesc["NAME"] = "MODEL_DATA" + tab.addcols( + maketabdesc(makearrcoldesc("MODEL_DATA", 0.0 + 0j, ndim=2)), coldesc + ) + tab.flush() + tab.putcol(columnname="MODEL_DATA", value=ones) + tab.flush() + + ms = subtract_model_from_data_column( + ms=ms, model_column="MODEL_DATA", data_column="DATA" + ) + with table(str(ms.path)) as tab: + data = tab.getcol("DATA") + assert np.all(data == 0 + 0j) + + +def test_subtract_model_from_data_column_ms_column(tmpdir): + """Ensure we can subtact the model from the data via taql""" + ms_zip = Path( + get_packaged_resource_path( + package="flint.data.tests", + filename="scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms.zip", + ) + ) + outpath = Path(tmpdir) / "taqlsubtract2" + + shutil.unpack_archive(ms_zip, outpath) + + ms_path = ( + Path(outpath) + / "scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms" + ) + + ms = Path(ms_path) + assert ms.exists() + ms = MS(path=ms, column="DATA") + + from casacore.tables import maketabdesc, makearrcoldesc + + with table(str(ms.path), readonly=False) as tab: + data = tab.getcol("DATA") + ones = np.ones_like(data, dtype=data.dtype) + + tab.putcol(columnname="DATA", value=ones) + + if "MODEL_DATA" not in tab.colnames(): + coldesc = tab.getdminfo("DATA") + coldesc["NAME"] = "MODEL_DATA" + tab.addcols( + maketabdesc(makearrcoldesc("MODEL_DATA", 0.0 + 0j, ndim=2)), coldesc + ) + tab.flush() + tab.putcol(columnname="MODEL_DATA", value=ones) + tab.flush() + + ms = subtract_model_from_data_column(ms=ms, model_column="MODEL_DATA") + with table(str(ms.path)) as tab: + data = tab.getcol("DATA") + assert np.all(data == 0 + 0j)