From 493f6f013f68d86bffa9ecaec0dc187417adf245 Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 18:33:34 +0800
Subject: [PATCH 1/7] initial commit of catalogue

---
 flint/catalogue.py      | 308 ++++++++++++++++++++++++++++++++++++++++
 flint/sky_model.py      |  64 +--------
 flint/validation.py     |  30 +---
 pyproject.toml          |   2 +
 tests/test_catalogue.py |  93 ++++++++++++
 5 files changed, 406 insertions(+), 91 deletions(-)
 create mode 100644 flint/catalogue.py
 create mode 100644 tests/test_catalogue.py

diff --git a/flint/catalogue.py b/flint/catalogue.py
new file mode 100644
index 00000000..62d0efae
--- /dev/null
+++ b/flint/catalogue.py
@@ -0,0 +1,308 @@
+"""Utilities around catalogues"""
+
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Tuple, Optional, NamedTuple
+
+import astropy.units as u
+from astropy.table import Table
+from astroquery.vizier import Vizier
+
+from flint.logging import logger
+
+
+class Catalogue(NamedTuple):
+    """A basic structure used to describe a known catalogue."""
+
+    survey: str
+    """Shorthand name of the sourcey catalogue"""
+    file_name: str
+    """The file name of the known catalogue"""
+    freq: float  # Hertz
+    """Reference frequency of the catalogue, in Hertz"""
+    ra_col: str
+    """Column name containing the right-ascension"""
+    dec_col: str
+    """Column name containing the declination"""
+    name_col: str
+    """Column name containing the source/component name"""
+    flux_col: str
+    """Column name containing the flux density"""
+    maj_col: str
+    """Column name containing the major-axis of the source gaussian component"""
+    min_col: str
+    """Column name containing the min-axis of the source gaussian component"""
+    pa_col: str
+    """Column name containing the pa of the source gaussian component"""
+    alpha_col: Optional[str] = None  # Used to scale the SED
+    """Column name containing the spectral index, used to calculate the source SED. If None a default is used. """
+    q_col: Optional[str] = None  # Used to scale the SED
+    """Column name containing the curvature of the spectral index, used to calculate the source SED. If None a default is used. """
+    vizier_id: Optional[str] = (
+        None  # Required for known reference catalogues, not for other specified catalogues
+    )
+    """The ID of the catalogue on Vizier that is used to download the catalogue"""
+
+
+KNOWN_REFERENCE_CATALOGUES = dict(
+    NVSS=Catalogue(
+        survey="NVSS",
+        file_name="NVSS.fits",
+        name_col="NVSS",
+        freq=1.4e9,
+        ra_col="RAJ2000",
+        dec_col="DEJ2000",
+        flux_col="S1.4",
+        maj_col="MajAxis",
+        min_col="MinAxis",
+        pa_col="PA",
+        vizier_id="VIII/65/nvss",
+    ),
+    SUMSS=Catalogue(
+        survey="SUMSS",
+        file_name="SUMSS.fits",
+        freq=8.43e8,
+        ra_col="RAJ2000",
+        dec_col="DEJ2000",
+        name_col="Mosaic",
+        flux_col="St",
+        maj_col="dMajAxis",
+        min_col="dMinAxis",
+        pa_col="dPA",
+        vizier_id="VIII/81B/sumss212",
+    ),
+    ICRF=Catalogue(
+        survey="ICRF",
+        file_name="ICRF.fits",
+        freq=1e9,
+        ra_col="RAJ2000",
+        dec_col="DEJ2000",
+        name_col="ICRF",
+        flux_col="None",
+        maj_col="None",
+        min_col="None",
+        pa_col="None",
+        vizier_id="I/323/icrf2",
+    ),
+    RACSLOW=Catalogue(
+        file_name="racs-low.fits",
+        survey="RACS-LOW",
+        freq=887.56e6,
+        ra_col="RAJ2000",
+        dec_col="DEJ2000",
+        name_col="GID",
+        flux_col="Ftot",
+        maj_col="amaj",
+        min_col="bmin",
+        pa_col="PA",
+        vizier_id="J/other/PASA/38.58/gausscut",
+    ),
+)
+
+
+def get_reference_catalogue(
+    reference_directory: Path, survey: str, verify: bool = True
+) -> Tuple[Table, Catalogue]:
+    """Load in a known reference catalogue
+
+    Args:
+        reference_directory (Path): The path to the directory where reference catalogues were downlaoded to
+        survey (str): The name of the survey to load.
+        verify (bool, optional): If `True`, the table column names are inspected to ensure they are correct. Defaults to True.
+
+    Raises:
+        ValueError: Raised when the requested survey is not known
+
+    Returns:
+        Tuple[Table, Catalogue]: The loaded table and corresponding set of expected columns
+    """
+
+    catalogue = KNOWN_REFERENCE_CATALOGUES.get(survey, None)
+
+    if catalogue is None:
+        raise ValueError(
+            f"{survey=} is not known, recognized reference catalogues are {KNOWN_REFERENCE_CATALOGUES.keys()}"
+        )
+
+    table_path = reference_directory / catalogue.file_name
+    logger.info(f"Loading {table_path=}")
+
+    table = Table.read(table_path)
+
+    if verify:
+        valid_cols = [
+            col in table.columns
+            for col in (
+                catalogue.ra_col,
+                catalogue.dec_col,
+                catalogue.name_col,
+                catalogue.flux_col,
+                catalogue.maj_col,
+                catalogue.min_col,
+            )
+            if col.lower() != "none"
+        ]
+        assert all(valid_cols), f"Column is not valid, {valid_cols=}"
+        if catalogue.flux_col.lower() != "none":
+            assert isinstance(table[catalogue.flux_col].unit, u.Unit)
+
+    return table, catalogue
+
+
+def download_vizier_catalogue(
+    output_path: Path, vizier_id: str, dry_run: bool = False
+) -> Path:
+    """Download a catalogue from the vizier catalogue service. The table
+    will be obtained using astroquery and written out to the supplied
+    `output_path`, from which the format is inferred.
+
+    Args:
+        output_path (Path): Where the table will be written to
+        vizier_id (str): The catalogue ID that will be downloaded
+        dry_run (bool, optional): If `True`, no downloading will take place. Defaults to False.
+
+    Returns:
+        Path: Path the file was written to
+    """
+    logger.info(f"Downloading {vizier_id=}")
+
+    if dry_run:
+        logger.info(f"{dry_run=}, not downloading")
+        return output_path
+
+    tablelist = Vizier(columns=["all"], row_limit=-1).get_catalogs(
+        vizier_id, verbose=True
+    )
+    logger.info(f"catalogue downloaded, contains {len(tablelist[0])} rows")
+    logger.info(f"Writing {vizier_id=} to {output_path=}")
+
+    assert (
+        len(tablelist) == 1
+    ), f"Table list for {vizier_id=} has unexpected length of {len(tablelist)}"
+
+    # Note all pirates respect the FITS standard@
+    if description := tablelist[0].meta.get("description", None):
+        tablelist[0].meta["description"] = description[:30]
+
+    tablelist[0].write(output_path, overwrite=True)
+
+    return output_path
+
+
+def download_referencce_catalogues(
+    reference_directory: Path, dry_run: bool = False
+) -> Tuple[Path, ...]:
+    """Download all of the expected reference catalogue data that flint relies on
+
+    Args:
+        reference_directory (Path): The parent directory catalgoues will be written to
+        dry_run (bool, optional): If `True`, no downloading will take place. Defaults to False.
+
+    Returns:
+        Tuple[Path, ...]: Collection of paths of all the downloaded reference catalogues
+    """
+
+    logger.info(f"Downloading catalogues to {reference_directory=}")
+    reference_directory.mkdir(parents=True, exist_ok=True)
+
+    logger.info(f"Downloading {len(KNOWN_REFERENCE_CATALOGUES)} catalogues")
+    catalogue_paths = [
+        download_vizier_catalogue(
+            output_path=(reference_directory / f"{catalogue.file_name}").absolute(),
+            vizier_id=catalogue.vizier_id,
+            dry_run=dry_run,
+        )
+        for _, catalogue in KNOWN_REFERENCE_CATALOGUES.items()
+        if catalogue.vizier_id
+    ]
+
+    return tuple(catalogue_paths)
+
+
+def list_known_reference_catalogues() -> None:
+    """List the known reference catalogues that are expected/downloaded by flint"""
+
+    logger.info(f"{len(KNOWN_REFERENCE_CATALOGUES)} are known")
+    for survey, cata in KNOWN_REFERENCE_CATALOGUES.items():
+        logger.info(f"{survey=}")
+        logger.info(f"{Catalogue}")
+
+
+def verify_reference_catalogues(reference_directory: Path) -> bool:
+    """Attempt to load the set of reference catalogues to ensure they are correctly
+    formed
+
+    Args:
+        reference_directory (Path): The directory containing the reference catalogues
+
+    Returns:
+        bool: Indicates whether all catalogue files exist and are correctly formed
+    """
+
+    logger.info(f"Verifying catalogues in {reference_directory=}")
+    logger.info(f"Searching for {len(KNOWN_REFERENCE_CATALOGUES)}")
+    survey_valid = {}
+    for survey, cata in KNOWN_REFERENCE_CATALOGUES.items():
+        try:
+            _ = get_reference_catalogue(
+                reference_directory=reference_directory, survey=survey, verify=True
+            )
+            valid = True
+        except (ValueError, AssertionError):
+            valid = False
+        logger.info(f"{survey=} is {'valid' if valid else 'not valid'}")
+        survey_valid[survey] = valid
+
+    return all(survey_valid.items())
+
+
+def get_parser() -> ArgumentParser:
+    parser = ArgumentParser(description="Utilities around catalogues")
+
+    subparser = parser.add_subparsers(
+        dest="mode", help="Opertion mode of flint_catalogue"
+    )
+
+    download_parser = subparser.add_parser(
+        "download", help="Download reference catalogues"
+    )
+    download_parser.add_argument(
+        "reference_directory",
+        type=Path,
+        help="The directory to save the reference catalogues to",
+    )
+
+    _ = subparser.add_parser("list", help="List the known reference catalogues")
+
+    verify_parser = subparser.add_parser(
+        "verify",
+        help="Ensure the expected catalogues existing in the reference directory and are correctly formed",
+    )
+    verify_parser.add_argument(
+        "reference_directory",
+        type=Path,
+        help="Directory containing the known referene catalogues",
+    )
+
+    return parser
+
+
+def cli() -> None:
+
+    parser = get_parser()
+
+    args = parser.parse_args()
+
+    if args.mode == "download":
+        download_referencce_catalogues(reference_directory=args.reference_directory)
+    elif args.mode == "list":
+        list_known_reference_catalogues()
+    elif args.mode == "verify":
+        verify_reference_catalogues(reference_directory=args.reference_directory)
+    else:
+        logger.info(f"Mode {args.mode} is not recognised")
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/flint/sky_model.py b/flint/sky_model.py
index 08aa4fa0..128970f3 100644
--- a/flint/sky_model.py
+++ b/flint/sky_model.py
@@ -14,39 +14,13 @@
 from casacore.tables import table
 from scipy.optimize import curve_fit
 
+from flint.catalogue import Catalogue, KNOWN_REFERENCE_CATALOGUES
 from flint.logging import logger
 from flint.utils import get_packaged_resource_path
 
 KNOWN_PB_TYPES = ("gaussian", "sincsquared", "airy")
 
 
-class Catalogue(NamedTuple):
-    """A basic structure used to describe a known catalogue."""
-
-    file_name: str
-    """The file name of the known catalogue"""
-    freq: float  # Hertz
-    """Reference frequency of the catalogue, in Hertz"""
-    ra_col: str
-    """Column name containing the right-ascension"""
-    dec_col: str
-    """Column name containing the declination"""
-    name_col: str
-    """Column name containing the source/component name"""
-    flux_col: str
-    """Column name containing the flux density"""
-    maj_col: str
-    """Column name containing the major-axis of the source gaussian component"""
-    min_col: str
-    """Column name containing the min-axis of the source gaussian component"""
-    pa_col: str
-    """Column name containing the pa of the source gaussian component"""
-    alpha_col: Optional[str] = None  # Used to scale the SED
-    """Column name containing the spectral index, used to calculate the source SED. If None a default is used. """
-    q_col: Optional[str] = None  # Used to scale the SED
-    """Column name containing the curvature of the spectral index, used to calculate the source SED. If None a default is used. """
-
-
 class CurvedPL(NamedTuple):
     """Container for results of a Curved Power Law,
 
@@ -127,41 +101,7 @@ class SkyModel(NamedTuple):
 NORM_COLS = {"flux": "Jy", "maj": "arcsecond", "min": "arcsecond", "pa": "deg"}
 """Normalised column names and their corresponding astropy units. """
 
-KNOWN_CATAS: Dict[str, Catalogue] = {
-    "SUMSS": Catalogue(
-        file_name="sumsscat.Mar-11-2008_CLH.fits",
-        freq=843e6,
-        ra_col="RA",
-        dec_col="Dec",
-        name_col="Mosaic",
-        flux_col="Sp",
-        maj_col="dMajAxis",
-        min_col="dMinAxis",
-        pa_col="dPA",
-    ),
-    "RACS": Catalogue(
-        file_name="racs-low.fits",
-        freq=887.56e6,
-        ra_col="RA",
-        dec_col="Dec",
-        name_col="Gaussian_ID",
-        flux_col="Total_flux_Gaussian",
-        maj_col="DC_Maj",
-        min_col="DC_Min",
-        pa_col="DC_PA",
-    ),
-    "NVSS": Catalogue(
-        file_name="NVSS_vizier.fits",
-        freq=1400e6,
-        ra_col="RAJ2000",
-        dec_col="DEJ2000",
-        name_col="NVSS",
-        flux_col="S1_4",
-        maj_col="MajAxis",
-        min_col="MinAxis",
-        pa_col="PA",
-    ),
-}
+KNOWN_CATAS: Dict[str, Catalogue] = KNOWN_REFERENCE_CATALOGUES
 """Known sky-model catalogues that have had some pre-processing operations applied. Discuss with maintainers for access, """
 
 # TODO: Make this a yaml file packaged in data/models
diff --git a/flint/validation.py b/flint/validation.py
index af5592cb..ecdf5400 100644
--- a/flint/validation.py
+++ b/flint/validation.py
@@ -18,6 +18,7 @@
 from matplotlib.figure import Figure
 from scipy import stats
 
+from flint.catalogue import Catalogue
 from flint.logging import logger
 from flint.naming import processed_ms_format
 from flint.summary import BeamSummary, FieldSummary
@@ -29,35 +30,6 @@
 F_HUGE = 20
 
 
-class Catalogue(NamedTuple):
-    """A basic structure used to describe a known catalogue."""
-
-    survey: str
-    """Shorthand name of the sourcey catalogue"""
-    file_name: str
-    """The file name of the known catalogue"""
-    freq: float  # Hertz
-    """Reference frequency of the catalogue, in Hertz"""
-    ra_col: str
-    """Column name containing the right-ascension"""
-    dec_col: str
-    """Column name containing the declination"""
-    name_col: str
-    """Column name containing the source/component name"""
-    flux_col: str
-    """Column name containing the flux density"""
-    maj_col: str
-    """Column name containing the major-axis of the source gaussian component"""
-    min_col: str
-    """Column name containing the min-axis of the source gaussian component"""
-    pa_col: str
-    """Column name containing the pa of the source gaussian component"""
-    alpha_col: Optional[str] = None  # Used to scale the SED
-    """Column name containing the spectral index, used to calculate the source SED. If None a default is used. """
-    q_col: Optional[str] = None  # Used to scale the SED
-    """Column name containing the curvature of the spectral index, used to calculate the source SED. If None a default is used. """
-
-
 class Catalogues(NamedTuple):
     """Container for all the catalogues that are loaded in"""
 
diff --git a/pyproject.toml b/pyproject.toml
index e5ba16fe..c6dd7b1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ scikit-image = "*"
 pandas = "*"
 ConfigArgParse = "^1.7"
 fitscube = "^0.4.3"
+astroquery = "^0.4.7"
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.3.0"
@@ -72,6 +73,7 @@ flint_aocalibrate = "flint.calibrate.aocalibrate:cli"
 flint_archive = "flint.archive:cli"
 flint_flagger = "flint.flagging:cli"
 flint_bandpass = "flint.bandpass:cli"
+flint_catalogues = "flint.catalogue:cli"
 flint_ms = "flint.ms:cli"
 flint_wsclean = "flint.imager.wsclean:cli"
 flint_gaincal = "flint.selfcal.casa:cli"
diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py
new file mode 100644
index 00000000..d1e573ed
--- /dev/null
+++ b/tests/test_catalogue.py
@@ -0,0 +1,93 @@
+"""Tests that work around the catalogue functionality"""
+
+import pytest
+from pathlib import Path
+
+from astropy.table import Table
+
+from flint.catalogue import (
+    download_vizier_catalogue,
+    download_referencce_catalogues,
+    get_reference_catalogue,
+    KNOWN_REFERENCE_CATALOGUES,
+)
+
+
+def test_known_reference_catalogues():
+    """Make sure all of the known reference catalogues have a vizier id attached"""
+    assert all([cata.vizier_id for cata in KNOWN_REFERENCE_CATALOGUES.values()])
+
+
+def test_download_vizier_catalogue(tmpdir):
+    """Download a example vizier table"""
+
+    output_path = Path(tmpdir) / "catalogue1/ICRF.fits"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    icrf_id = KNOWN_REFERENCE_CATALOGUES["ICRF"]
+    assert icrf_id.vizier_id
+    cata_path = download_vizier_catalogue(
+        output_path=output_path, vizier_id=icrf_id.vizier_id
+    )
+
+    assert cata_path == output_path
+    assert cata_path.exists()
+
+    table = Table.read(cata_path)
+    assert len(table) == 3414
+
+
+def test_get_vizier_catalogue(tmpdir):
+    """Download a example vizier table"""
+    output_path = Path(tmpdir) / "catalogue1/ICRF.fits"
+    assert not output_path.exists()
+
+    output_path = Path(tmpdir) / "catalogue1/ICRF.fits"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    icrf = KNOWN_REFERENCE_CATALOGUES["ICRF"]
+    assert icrf.vizier_id
+    _ = download_vizier_catalogue(output_path=output_path, vizier_id=icrf.vizier_id)
+    assert output_path.exists()
+
+    table, catalogue = get_reference_catalogue(
+        reference_directory=output_path.parent, survey="ICRF"
+    )
+    assert catalogue.file_name == "ICRF.fits"
+    assert catalogue.survey == "ICRF"
+    assert len(table) == 3414
+
+    with pytest.raises(ValueError):
+        _, _ = get_reference_catalogue(
+            reference_directory=output_path.parent, survey="Jack"
+        )
+
+
+def test_download_vizier_catalogue_dryrun(tmpdir):
+    """See if the dry run option in download a example vizier table"""
+
+    output_path = Path(tmpdir) / "cataloguedry/ICRF.fits"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    icrf_id = KNOWN_REFERENCE_CATALOGUES["ICRF"]
+
+    cata_path = download_vizier_catalogue(
+        output_path=output_path, vizier_id=icrf_id, dry_run=True
+    )
+
+    assert cata_path == output_path
+    assert not cata_path.exists()
+
+
+def test_download_reference_catalogues(tmpdir):
+    """Ensure all catalogues can be downloaded. Not the dry_run=True,
+    meaning the catalogues are not all actually dowenloaded
+    """
+    output_dir = Path(tmpdir) / "catalogue2"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    outputs = download_referencce_catalogues(
+        reference_directory=output_dir, dry_run=True
+    )
+
+    assert len(outputs) == len(KNOWN_REFERENCE_CATALOGUES)

From 2332a8364bc0bce732eb842fcd9cafe2d08b2825 Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 18:44:43 +0800
Subject: [PATCH 2/7] added to the README.md

---
 CHANGELOG.md |  5 +++++
 README.md    | 50 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e74cf13..d38e8318 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,11 @@
     - naming format of output linmos files could contain the pol field
     - `stokesv` imaging will not linmos the cleaning residuals together, even if the `--linmos-residuals` CLI is provided
 - Capture `CleanDivergenceError` from `wsclean` and rerun with larger image size and lower gain
+- Added `flint.catalogue`, which aims to collect all the catalogue related operations
+  - a `flint_catalogue` CLI program to:
+    - download reference catalogues that are known and expected from vizier
+    - verify reference catalogues conform to expectations
+    - list the reference catalogues that are expected
 
 # 0.2.5
 - added in skip rounds for masking and selfcal
diff --git a/README.md b/README.md
index 7f805bcb..fcf69d33 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ Most of the `python` routines have a CLI that can be used to test them in a piec
 - `flint_potato`: Attempt to peel out known sources from a measurement set using [potatopeel](
 https://gitlab.com/Sunmish/potato/-/tree/main). Criteria used to assess which sources to peel is fairly minimumal, and at the time of writing only the reference set of sources paackaged within `flint` are considered.
 -`flint_archive`: Operations around archiving and copying final data products into place.
+-`flint_catalogue`: Download reference catalogues that are expected by `flint`
 
 The following commands use the `prefect` framework to link together individual tasks together (outlined above) into a single data-processing pipeline.
 - `flint_flow_bandpass_calibrate`: Executes a prefect flow run that will calibrate a set of ASKAP measurement sets taken during a normal bandpass observation sequence.
@@ -83,55 +84,80 @@ To help manage (and avoid) long CLI calls to conffigure `flint`, most command li
 
 The validation plots that are created are simple and aim to provide a quality assessment at a quick glance. An RMS image and corresponding source component catalogue are the base data products derived from the ASKAP data that are supplied to the routine.
 
-External catalogues are supplied to a source to compare against. In the current `flint` package these catalogues (and their expected columns) are:
+`flint` requires a set of reference catalogues to be present for some stages of operation, the obvious being the validation plots described above. In some computing environments (e.g. HPC) network access to external services are blocked. To avoid these issues `flint` has a built in utility to download the reference catalogues it expected from vizier and write them to a specified user directory. See:
+
+> `flint_catalogue download --help`
+
+The parent directory that contains these cataloguues should be provided to the appropriate tasks when appropriate. 
+
+In the current `flint` package these catalogues (and their expected columns) are:
 - ICRF
 
 ```
 Catalogue(
     survey="ICRF",
-    file_name="icrf.csv",
+    file_name="ICRF.fits",
     freq=1e9,
-    ra_col="RA",
-    dec_col="Dec",
-    name_col="IERS Des.",
+    ra_col="RAJ2000",
+    dec_col="DEJ2000",
+    name_col="ICRF",
     flux_col="None",
     maj_col="None",
     min_col="None",
     pa_col="None",
+    vizier_id="I/323/icrf2",
 )
 ```
 - NVSS
 ```
 Catalogue(
     survey="NVSS",
-    file_name="VIII_65_nvss.dat_CH_2.fits",
+    file_name="NVSS.fits",
     name_col="NVSS",
     freq=1.4e9,
-    ra_col="RA",
-    dec_col="Dec",
+    ra_col="RAJ2000",
+    dec_col="DEJ2000",
     flux_col="S1.4",
     maj_col="MajAxis",
     min_col="MinAxis",
     pa_col="PA",
+    vizier_id="VIII/65/nvss",
 )
 ```
 - SUMSS
 ```
 Catalogue(
     survey="SUMSS",
-    file_name="sumsscat.Mar-11-2008_CLH.fits",
+    file_name="SUMSS.fits",
     freq=8.43e8,
-    ra_col="RA",
-    dec_col="DEC",
+    ra_col="RAJ2000",
+    dec_col="DEJ2000",
     name_col="Mosaic",
     flux_col="St",
     maj_col="dMajAxis",
     min_col="dMinAxis",
     pa_col="dPA",
+    vizier_id="VIII/81B/sumss212",
+)
+```
+- RACS-LOW
+```
+Catalogue(
+    file_name="racs-low.fits",
+    survey="RACS-LOW",
+    freq=887.56e6,
+    ra_col="RAJ2000",
+    dec_col="DEJ2000",
+    name_col="GID",
+    flux_col="Ftot",
+    maj_col="amaj",
+    min_col="bmin",
+    pa_col="PA",
+    vizier_id="J/other/PASA/38.58/gausscut",
 )
 ```
 
-These catalogues are currently not distributed with the source code / python installable. Instead when required a parameter specifying their host directory on disk needs to be supplied. The known filename is used to find the appropriate catalogue and its full path.
+The known filename is used to find the appropriate catalogue and its full path, and are appropriately named when using the `flint_catalogue download` tool.
 
 
 ## Contributions

From 3566f28cf4e2bee409c204bf707ffad03f377255 Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 19:14:07 +0800
Subject: [PATCH 3/7] added another test and some checks / moved to
 get_reference_catalogue

---
 flint/catalogue.py      |  5 +++++
 flint/validation.py     | 16 ++++------------
 tests/test_catalogue.py |  6 ++++++
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/flint/catalogue.py b/flint/catalogue.py
index 62d0efae..44ec2fbb 100644
--- a/flint/catalogue.py
+++ b/flint/catalogue.py
@@ -127,6 +127,11 @@ def get_reference_catalogue(
     table_path = reference_directory / catalogue.file_name
     logger.info(f"Loading {table_path=}")
 
+    if not table_path.exists():
+        raise FileNotFoundError(
+            f"{table_path=} not found. Check {reference_directory=} for known catalogues"
+        )
+
     table = Table.read(table_path)
 
     if verify:
diff --git a/flint/validation.py b/flint/validation.py
index ecdf5400..bc6009b5 100644
--- a/flint/validation.py
+++ b/flint/validation.py
@@ -18,7 +18,7 @@
 from matplotlib.figure import Figure
 from scipy import stats
 
-from flint.catalogue import Catalogue
+from flint.catalogue import Catalogue, get_reference_catalogue
 from flint.logging import logger
 from flint.naming import processed_ms_format
 from flint.summary import BeamSummary, FieldSummary
@@ -284,17 +284,9 @@ def load_known_catalogue(
     Returns:
         Tuple[Table,Catalogue]: The loaded table and Catalogue structure describing the columns
     """
-    catalogue = get_known_catalogue_info(name=name)
-    catalogue_path = reference_catalogue_directory / catalogue.file_name
-    table = Table.read(catalogue_path)
-
-    if name == "SUMSS":
-        table[catalogue.flux_col] = table[catalogue.flux_col] * u.mJy
-    if name == "ICRF":
-        return table, catalogue
-
-    table[catalogue.flux_col] = table[catalogue.flux_col].to(u.Jy).value
-
+    table, catalogue = get_reference_catalogue(
+        reference_directory=reference_catalogue_directory, survey=name
+    )
     return table, catalogue
 
 
diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py
index d1e573ed..b3dc765a 100644
--- a/tests/test_catalogue.py
+++ b/tests/test_catalogue.py
@@ -18,6 +18,12 @@ def test_known_reference_catalogues():
     assert all([cata.vizier_id for cata in KNOWN_REFERENCE_CATALOGUES.values()])
 
 
+def test_no_reference_catalogue():
+    """Ensure file not found error raised if catalogue not found"""
+    with pytest.raises(FileNotFoundError):
+        _ = get_reference_catalogue(reference_directory=Path("./"), survey="NVSS")
+
+
 def test_download_vizier_catalogue(tmpdir):
     """Download a example vizier table"""
 

From a93c8e7a3b12f08f3f2dd1cce982058653eb7288 Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 20:58:37 +0800
Subject: [PATCH 4/7] resolved potential None

---
 tests/test_catalogue.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py
index b3dc765a..51e0bb27 100644
--- a/tests/test_catalogue.py
+++ b/tests/test_catalogue.py
@@ -75,10 +75,11 @@ def test_download_vizier_catalogue_dryrun(tmpdir):
     output_path = Path(tmpdir) / "cataloguedry/ICRF.fits"
     output_path.parent.mkdir(parents=True, exist_ok=True)
 
-    icrf_id = KNOWN_REFERENCE_CATALOGUES["ICRF"]
+    icrf_cata = KNOWN_REFERENCE_CATALOGUES["ICRF"]
+    assert icrf_cata.vizier_id is not None
 
     cata_path = download_vizier_catalogue(
-        output_path=output_path, vizier_id=icrf_id, dry_run=True
+        output_path=output_path, vizier_id=icrf_cata.vizier_id, dry_run=True
     )
 
     assert cata_path == output_path

From abc998d143fe40ffc3cbaf1c0eddff03d2d85d4e Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 21:28:48 +0800
Subject: [PATCH 5/7] updated readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fcf69d33..b5fa4642 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ The following commands use the `prefect` framework to link together individual t
 
 ## Sky-model catalogues
 
-The `flint_skymodel` command will attempt to create an in-field sky-model for a particular measurement set using existing source catalogues and an idealised primary beam response. At the moment these catalogue names are hard-coded. Reach out if you need these catalogues. In hopefully the near future this can be relaxed to allow a user-specific catalogue.
+The `flint_skymodel` command will attempt to create an in-field sky-model for a particular measurement set using existing source catalogues and an idealised primary beam response. 'Supported' catalogues are those available through `flint_catalogue download`. Note this mode has not be thoroughly tested and may not be out-of-date relative to how the `flint_flow_continuum_pipeline` operates. In the near future this may be expanded. 
 
 If calibrating a bandpass (i.e. `1934-638`) `flint` will use the packaged source model. At the moment this is only provided for `calibrate`.
 

From 2ee4310105e0e530137539314924e0055fb91802 Mon Sep 17 00:00:00 2001
From: tgalvin <tim.galvin@csiro.au>
Date: Wed, 31 Jul 2024 22:27:59 +0800
Subject: [PATCH 6/7] Catalogues class renamed / some notes

---
 flint/catalogue.py  |  9 ++++++++-
 flint/validation.py | 11 ++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/flint/catalogue.py b/flint/catalogue.py
index 44ec2fbb..f6030956 100644
--- a/flint/catalogue.py
+++ b/flint/catalogue.py
@@ -1,4 +1,11 @@
-"""Utilities around catalogues"""
+"""Utilities around catalogues. 
+
+Known reference catalogues are described by their ViZeR catalogue id,
+which are used to download and store the appropriately formed catalogues on disk.
+If the ViZeR service is down then attempts to download and form FITS catalgoues
+will fail. These only need to be downloaded once, provided they can be stored
+and retained on disk.  
+"""
 
 from argparse import ArgumentParser
 from pathlib import Path
diff --git a/flint/validation.py b/flint/validation.py
index bc6009b5..fd0ab353 100644
--- a/flint/validation.py
+++ b/flint/validation.py
@@ -30,8 +30,9 @@
 F_HUGE = 20
 
 
-class Catalogues(NamedTuple):
-    """Container for all the catalogues that are loaded in"""
+class ValidationCatalogues(NamedTuple):
+    """Container for all the catalogues that are loaded in and
+    used throughout validation processing"""
 
     nvss: Catalogue
     """NVSS catalogue"""
@@ -1065,7 +1066,7 @@ def load_catalogues(
     reference_catalogue_directory: Path,
     askap_survey_name: str,
     rms_info: RMSImageInfo,
-) -> Tuple[Catalogues, Tables]:
+) -> Tuple[ValidationCatalogues, Tables]:
     """Load in all the catalogues that are required for the validation.
 
     Args:
@@ -1075,7 +1076,7 @@ def load_catalogues(
         rms_info (RMSImageInfo): The extracted information from the RMS image
 
     Returns:
-        Tuple[Catalogues, Tables]: The loaded catalogues and tables
+        Tuple[ValidationCatalogues, Tables]: The loaded catalogues and tables
     """
     logger.info(f"Loading {source_catalogue_path=}")
     askap_table = Table.read(source_catalogue_path)
@@ -1102,7 +1103,7 @@ def load_catalogues(
     )
 
     return (
-        Catalogues(
+        ValidationCatalogues(
             askap=askap_cata,
             icrf=icrf_catalogue,
             sumss=sumss_catalogue,

From 90cea2764718e41595801831a8858beaec77cac3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 01:35:26 +0000
Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md               | 4 ++--
 flint/catalogue.py      | 7 +++----
 flint/sky_model.py      | 2 +-
 tests/test_catalogue.py | 6 +++---
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index b5fa4642..678276ca 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ The following commands use the `prefect` framework to link together individual t
 
 ## Sky-model catalogues
 
-The `flint_skymodel` command will attempt to create an in-field sky-model for a particular measurement set using existing source catalogues and an idealised primary beam response. 'Supported' catalogues are those available through `flint_catalogue download`. Note this mode has not be thoroughly tested and may not be out-of-date relative to how the `flint_flow_continuum_pipeline` operates. In the near future this may be expanded. 
+The `flint_skymodel` command will attempt to create an in-field sky-model for a particular measurement set using existing source catalogues and an idealised primary beam response. 'Supported' catalogues are those available through `flint_catalogue download`. Note this mode has not be thoroughly tested and may not be out-of-date relative to how the `flint_flow_continuum_pipeline` operates. In the near future this may be expanded.
 
 If calibrating a bandpass (i.e. `1934-638`) `flint` will use the packaged source model. At the moment this is only provided for `calibrate`.
 
@@ -88,7 +88,7 @@ The validation plots that are created are simple and aim to provide a quality as
 
 > `flint_catalogue download --help`
 
-The parent directory that contains these cataloguues should be provided to the appropriate tasks when appropriate. 
+The parent directory that contains these cataloguues should be provided to the appropriate tasks when appropriate.
 
 In the current `flint` package these catalogues (and their expected columns) are:
 - ICRF
diff --git a/flint/catalogue.py b/flint/catalogue.py
index f6030956..4cb20114 100644
--- a/flint/catalogue.py
+++ b/flint/catalogue.py
@@ -1,15 +1,15 @@
-"""Utilities around catalogues. 
+"""Utilities around catalogues.
 
 Known reference catalogues are described by their ViZeR catalogue id,
 which are used to download and store the appropriately formed catalogues on disk.
 If the ViZeR service is down then attempts to download and form FITS catalgoues
 will fail. These only need to be downloaded once, provided they can be stored
-and retained on disk.  
+and retained on disk.
 """
 
 from argparse import ArgumentParser
 from pathlib import Path
-from typing import Tuple, Optional, NamedTuple
+from typing import NamedTuple, Optional, Tuple
 
 import astropy.units as u
 from astropy.table import Table
@@ -300,7 +300,6 @@ def get_parser() -> ArgumentParser:
 
 
 def cli() -> None:
-
     parser = get_parser()
 
     args = parser.parse_args()
diff --git a/flint/sky_model.py b/flint/sky_model.py
index 128970f3..81996c0f 100644
--- a/flint/sky_model.py
+++ b/flint/sky_model.py
@@ -14,7 +14,7 @@
 from casacore.tables import table
 from scipy.optimize import curve_fit
 
-from flint.catalogue import Catalogue, KNOWN_REFERENCE_CATALOGUES
+from flint.catalogue import KNOWN_REFERENCE_CATALOGUES, Catalogue
 from flint.logging import logger
 from flint.utils import get_packaged_resource_path
 
diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py
index 51e0bb27..178f15f9 100644
--- a/tests/test_catalogue.py
+++ b/tests/test_catalogue.py
@@ -1,15 +1,15 @@
 """Tests that work around the catalogue functionality"""
 
-import pytest
 from pathlib import Path
 
+import pytest
 from astropy.table import Table
 
 from flint.catalogue import (
-    download_vizier_catalogue,
+    KNOWN_REFERENCE_CATALOGUES,
     download_referencce_catalogues,
+    download_vizier_catalogue,
     get_reference_catalogue,
-    KNOWN_REFERENCE_CATALOGUES,
 )