Skip to content

Commit

Permalink
added tests / subtract_model_from_data_column
Browse files Browse the repository at this point in the history
  • Loading branch information
tgalvin committed Nov 22, 2024
1 parent cb48bae commit 7ff16d7
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
- Added an adaptive box selection mode to the minimum absolute algorithm
- Update a MSs `MODEL_DATA` column using `addmodel` and a source list (see
`wsclean -save-source-list`)
- Added a `taql` based function intended to be used to subtract model data from
nominated data, `flint.ms.subtract_model_from_data_column`

# 0.2.7

Expand Down
35 changes: 34 additions & 1 deletion flint/ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def critical_ms_interaction(
assert (
not output_ms.exists()
), f"The output measurement set {output_ms} already exists. "

logger.info(f"Critical section for {input_ms=}")
if copy:
rsync_copy_directory(target_path=input_ms, out_path=output_ms)
else:
Expand Down Expand Up @@ -573,6 +573,39 @@ def remove_columns_from_ms(
return columns_to_remove


def subtract_model_from_data_column(
ms: MS, model_column: str = "MODEL_DATA", data_column: Optional[str] = None
) -> MS:
"""Execute a ``taql`` query to subtract the MODEL_DATA from a nominated data column.
This requires the ``model_column`` to already be inserted into the MS. Internally
the ``critical_ms_interaction`` context manager is used to highlight that the MS
is being modified should things fail when subtracting.
Args:
ms (MS): The measurement set instance being considered
model_column (str, optional): The column with representing the model. Defaults to "MODEL_DATA".
data_column (Optional[str], optional): The column where the column will be subtracted. If ``None`` it is taken from the ``column`` nominated by the input ``MS`` instance. Defaults to None.
Returns:
MS: The updated MS
"""
ms = MS.cast(ms)
data_column = data_column if data_column else ms.column
assert data_column is not None, f"{data_column=}, which is not allowed"
with critical_ms_interaction(input_ms=ms.path) as critical_ms:
with table(str(critical_ms), readonly=False) as tab:
logger.info("Extracting columns")
colnames = tab.colnames()
assert all(
[d in colnames for d in (model_column, data_column)]
), f"{model_column=} or {data_column=} missing from {colnames=}"

logger.info(f"Subtracting {model_column=} from {data_column=}")
taql(f"UPDATE $tab SET {data_column}={data_column}-{model_column}")

return ms


def preprocess_askap_ms(
ms: Union[MS, Path],
data_column: str = "DATA",
Expand Down
98 changes: 98 additions & 0 deletions tests/test_ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_phase_dir_from_ms,
remove_columns_from_ms,
rename_ms_and_columns_for_selfcal,
subtract_model_from_data_column,
)
from flint.utils import get_packaged_resource_path

Expand Down Expand Up @@ -269,3 +270,100 @@ def test_remove_columns_from_ms(ms_remove_example):
ms=ms_remove_example, columns_to_remove="DATA"
)
assert len(removed_columns) == 0


@pytest.fixture
def casda_taql_example(tmpdir):
ms_zip = Path(
get_packaged_resource_path(
package="flint.data.tests",
filename="scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms.zip",
)
)
outpath = Path(tmpdir) / "taqlsubtract"

shutil.unpack_archive(ms_zip, outpath)

ms_path = (
Path(outpath)
/ "scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms"
)

return ms_path


def test_subtract_model_from_data_column(casda_taql_example):
"""Ensure we can subtact the model from the data via taql"""
ms = Path(casda_taql_example)
assert ms.exists()
ms = MS(path=ms)

from casacore.tables import maketabdesc, makearrcoldesc

with table(str(ms.path), readonly=False) as tab:
data = tab.getcol("DATA")
ones = np.ones_like(data, dtype=data.dtype)

tab.putcol(columnname="DATA", value=ones)

if "MODEL_DATA" not in tab.colnames():
coldesc = tab.getdminfo("DATA")
coldesc["NAME"] = "MODEL_DATA"
tab.addcols(
maketabdesc(makearrcoldesc("MODEL_DATA", 0.0 + 0j, ndim=2)), coldesc
)
tab.flush()
tab.putcol(columnname="MODEL_DATA", value=ones)
tab.flush()

ms = subtract_model_from_data_column(
ms=ms, model_column="MODEL_DATA", data_column="DATA"
)
with table(str(ms.path)) as tab:
data = tab.getcol("DATA")
assert np.all(data == 0 + 0j)


def test_subtract_model_from_data_column_ms_column(tmpdir):
"""Ensure we can subtact the model from the data via taql"""
ms_zip = Path(
get_packaged_resource_path(
package="flint.data.tests",
filename="scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms.zip",
)
)
outpath = Path(tmpdir) / "taqlsubtract2"

shutil.unpack_archive(ms_zip, outpath)

ms_path = (
Path(outpath)
/ "scienceData.EMU_0529-60.SB50538.EMU_0529-60.beam08_averaged_cal.leakage.ms"
)

ms = Path(ms_path)
assert ms.exists()
ms = MS(path=ms, column="DATA")

from casacore.tables import maketabdesc, makearrcoldesc

with table(str(ms.path), readonly=False) as tab:
data = tab.getcol("DATA")
ones = np.ones_like(data, dtype=data.dtype)

tab.putcol(columnname="DATA", value=ones)

if "MODEL_DATA" not in tab.colnames():
coldesc = tab.getdminfo("DATA")
coldesc["NAME"] = "MODEL_DATA"
tab.addcols(
maketabdesc(makearrcoldesc("MODEL_DATA", 0.0 + 0j, ndim=2)), coldesc
)
tab.flush()
tab.putcol(columnname="MODEL_DATA", value=ones)
tab.flush()

ms = subtract_model_from_data_column(ms=ms, model_column="MODEL_DATA")
with table(str(ms.path)) as tab:
data = tab.getcol("DATA")
assert np.all(data == 0 + 0j)

0 comments on commit 7ff16d7

Please sign in to comment.