From a845a3a38042c6fb00b575554c05377a186fc154 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 16:34:07 +1000 Subject: [PATCH 01/37] Add sbid queries --- arrakis/cutout.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 8de423c1..ed75821f 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -390,6 +390,7 @@ def cutout_islands( directory: str, host: str, epoch: int, + sbid: Optional[int] = None, username: Optional[str] = None, password: Optional[str] = None, pad: float = 3, @@ -428,7 +429,11 @@ def cutout_islands( ) # Query the DB + query = {"$and": [{f"beams.{field}": {"$exists": True}}]} + if sbid is not None: + query["$and"].append({f"beams.{field}.SBIDs": sbid}) + unique_beams_nums: Set[int] = set( beams_col.distinct(f"beams.{field}.beam_list", query) ) @@ -442,6 +447,9 @@ def cutout_islands( {f"beams.{field}.beam_list": {"$in": list(unique_beams_nums)}}, ] } + if sbid is not None: + query["$and"].append({f"beams.{field}.SBIDs": sbid}) + all_beams = list(beams_col.find(query).sort("Source_ID")) for beams in tqdm(all_beams, desc="Getting beams", file=TQDM_OUT): for beam_num in beams[f"beams"][field]["beam_list"]: @@ -503,6 +511,7 @@ def main(args: argparse.Namespace) -> None: directory=args.datadir, host=args.host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, pad=args.pad, @@ -561,6 +570,13 @@ def cutout_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Epoch of observation.", ) + parser.add_argument( + "--sbid", + type=int, + default=None, + help="SBID of observation.", + ) + parser.add_argument( "--username", type=str, default=None, help="Username of mongodb." ) From b01c4db7166bc7db7e8fec072332793d9ac4d8b5 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 16:34:36 +1000 Subject: [PATCH 02/37] Add sbid queries and paths --- arrakis/rmsynth_oncuts.py | 200 +++++++++++++++++++++---------------- arrakis/utils/fitsutils.py | 28 +++--- 2 files changed, 127 insertions(+), 101 deletions(-) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 3a37a2be..08e5a333 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -8,7 +8,7 @@ from pathlib import Path from pprint import pformat from shutil import copyfile -from typing import List +from typing import Any, List from typing import NamedTuple as Struct from typing import Optional, Tuple, Union @@ -49,7 +49,7 @@ class Spectrum(Struct): """The RMS of the spectrum""" bkg: np.ndarray """The background of the spectrum""" - filename: str + filename: Path """The filename associated with the spectrum""" header: fits.Header """The header associated with the spectrum""" @@ -87,9 +87,10 @@ class StokesIFitResult(Struct): def rmsynthoncut3d( island_id: str, beam_tuple: Tuple[str, pd.Series], - outdir: str, + outdir: Path, freq: np.ndarray, field: str, + sbid: Optional[int] = None, phiMax_radm2: Optional[float] = None, dPhi_radm2: Optional[float] = None, nSamples: int = 5, @@ -127,6 +128,9 @@ def rmsynthoncut3d( ufile = os.path.join(outdir, beam["beams"][field]["u_file"]) # vfile = beam['beams'][field]['v_file'] + header: fits.Header + dataQ: np.ndarray + dataI: np.ndarray header, dataQ = do_RMsynth_3D.readFitsCube(qfile, rm_verbose) header, dataU = do_RMsynth_3D.readFitsCube(ufile, rm_verbose) header, dataI = do_RMsynth_3D.readFitsCube(ifile, rm_verbose) @@ -194,20 +198,27 @@ def rmsynthoncut3d( outer_dir = os.path.basename(os.path.dirname(ifile)) + save_name = field if sbid is None else f"{field}_{sbid}" newvalues = { "$set": { - "rm3dfiles": { - "FDF_real_dirty": os.path.join( - outer_dir, f"{prefix}FDF_real_dirty.fits" - ), - "FDF_im_dirty": os.path.join(outer_dir, f"{prefix}FDF_im_dirty.fits"), - "FDF_tot_dirty": os.path.join(outer_dir, f"{prefix}FDF_tot_dirty.fits"), - "RMSF_real": os.path.join(outer_dir, f"{prefix}RMSF_real.fits"), - "RMSF_tot": os.path.join(outer_dir, f"{prefix}RMSF_tot.fits"), - "RMSF_FWHM": os.path.join(outer_dir, f"{prefix}RMSF_FWHM.fits"), - }, - "rmsynth3d": True, - "header": dict(header), + save_name: { + "rm3dfiles": { + "FDF_real_dirty": os.path.join( + outer_dir, f"{prefix}FDF_real_dirty.fits" + ), + "FDF_im_dirty": os.path.join( + outer_dir, f"{prefix}FDF_im_dirty.fits" + ), + "FDF_tot_dirty": os.path.join( + outer_dir, f"{prefix}FDF_tot_dirty.fits" + ), + "RMSF_real": os.path.join(outer_dir, f"{prefix}RMSF_real.fits"), + "RMSF_tot": os.path.join(outer_dir, f"{prefix}RMSF_tot.fits"), + "RMSF_FWHM": os.path.join(outer_dir, f"{prefix}RMSF_FWHM.fits"), + }, + "rmsynth3d": True, + "header": dict(header), + } } } return pymongo.UpdateOne(myquery, newvalues) @@ -262,14 +273,14 @@ def extract_single_spectrum( stokes: str, ion: bool, field_dict: dict, - outdir: str, + outdir: Path, ) -> Spectrum: """Extract a single spectrum from a cubelet""" if ion and (stokes == "q" or stokes == "u"): key = f"{stokes}_file_ion" else: key = f"{stokes}_file" - filename = os.path.join(outdir, field_dict[key]) + filename = outdir / field_dict[key] with fits.open(filename, mode="denywrite", memmap=True) as hdulist: hdu = hdulist[0] data = np.squeeze(hdu.data) @@ -299,7 +310,7 @@ def extract_all_spectra( coord: SkyCoord, ion: bool, field_dict: dict, - outdir: str, + outdir: Path, ) -> StokesSpectra: """Extract spectra from cubelets""" return StokesSpectra( @@ -464,9 +475,10 @@ def update_rmtools_dict( def rmsynthoncut1d( comp_tuple: Tuple[str, pd.Series], beam_tuple: Tuple[str, pd.Series], - outdir: str, + outdir: Path, freq: np.ndarray, field: str, + sbid: Optional[int] = None, polyOrd: int = 3, phiMax_radm2: Optional[float] = None, dPhi_radm2: Optional[float] = None, @@ -492,6 +504,7 @@ def rmsynthoncut1d( freq (list): Frequencies in Hz host (str): MongoDB host field (str): RACS field + sbid (int, optional): SBID. Defaults to None. database (bool, optional): Update MongoDB. Defaults to False. polyOrd (int, optional): Order of fit to I. Defaults to 3. phiMax_radm2 (float, optional): Max FD. Defaults to None. @@ -598,16 +611,13 @@ def rmsynthoncut1d( except Exception as err: traceback.print_tb(err.__traceback__) raise err + if savePlots: plt.close("all") - plotdir = os.path.join(outdir, "plots") - plot_files = glob( - os.path.join(os.path.dirname(filtered_stokes_spectra.i.filename), "*.pdf") - ) - for src in plot_files: - base = os.path.basename(src) - dst = os.path.join(plotdir, base) - copyfile(src, dst) + plotdir = outdir / "plots" + plot_files = list(filtered_stokes_spectra.i.filename.parent.glob("*.pdf")) + for plot_file in plot_files: + copyfile(plot_file, plotdir / plot_file.name) # Update I, Q, U noise from data for stokes in "qu" if noStokesI else "iqu": @@ -666,55 +676,56 @@ def rmsynthoncut1d( outer_dir = os.path.basename(os.path.dirname(filtered_stokes_spectra.i.filename)) - # Fix for json encoding - + save_name = field if sbid is None else f"{field}_{sbid}" newvalues = { "$set": { - "rm1dfiles": { - "FDF_dirty": os.path.join(outer_dir, f"{cname}_FDFdirty.dat"), - "RMSF": os.path.join(outer_dir, f"{cname}_RMSF.dat"), - "weights": os.path.join(outer_dir, f"{cname}_weight.dat"), - "summary_dat": os.path.join(outer_dir, f"{cname}_RMsynth.dat"), - "summary_json": os.path.join(outer_dir, f"{cname}_RMsynth.json"), - }, - "rmsynth1d": True, - "header": head_dict, - "rmsynth_summary": mDict, - "spectra": { - "freq": np.array(freq).tolist(), - "I_model": ( - stokes_i_fit_result.modStokesI.tolist() - if stokes_i_fit_result.modStokesI is not None - else None - ), - "I_model_params": { - "alpha": ( - float(stokes_i_fit_result.alpha) - if stokes_i_fit_result.alpha is not None - else None - ), - "amplitude": ( - float(stokes_i_fit_result.amplitude) - if stokes_i_fit_result.amplitude is not None - else None - ), - "x_0": ( - float(stokes_i_fit_result.x_0) - if stokes_i_fit_result.x_0 is not None + save_name: { + "rm1dfiles": { + "FDF_dirty": os.path.join(outer_dir, f"{cname}_FDFdirty.dat"), + "RMSF": os.path.join(outer_dir, f"{cname}_RMSF.dat"), + "weights": os.path.join(outer_dir, f"{cname}_weight.dat"), + "summary_dat": os.path.join(outer_dir, f"{cname}_RMsynth.dat"), + "summary_json": os.path.join(outer_dir, f"{cname}_RMsynth.json"), + }, + "rmsynth1d": True, + "header": head_dict, + "rmsynth_summary": mDict, + "spectra": { + "freq": np.array(freq).tolist(), + "I_model": ( + stokes_i_fit_result.modStokesI.tolist() + if stokes_i_fit_result.modStokesI is not None else None ), - "model_repr": stokes_i_fit_result.model_repr, + "I_model_params": { + "alpha": ( + float(stokes_i_fit_result.alpha) + if stokes_i_fit_result.alpha is not None + else None + ), + "amplitude": ( + float(stokes_i_fit_result.amplitude) + if stokes_i_fit_result.amplitude is not None + else None + ), + "x_0": ( + float(stokes_i_fit_result.x_0) + if stokes_i_fit_result.x_0 is not None + else None + ), + "model_repr": stokes_i_fit_result.model_repr, + }, + "I": filtered_stokes_spectra.i.data.tolist(), + "Q": filtered_stokes_spectra.q.data.tolist(), + "U": filtered_stokes_spectra.u.data.tolist(), + "I_err": filtered_stokes_spectra.i.rms.tolist(), + "Q_err": filtered_stokes_spectra.q.rms.tolist(), + "U_err": filtered_stokes_spectra.u.rms.tolist(), + "I_bkg": filtered_stokes_spectra.i.bkg.tolist(), + "Q_bkg": filtered_stokes_spectra.q.bkg.tolist(), + "U_bkg": filtered_stokes_spectra.u.bkg.tolist(), }, - "I": filtered_stokes_spectra.i.data.tolist(), - "Q": filtered_stokes_spectra.q.data.tolist(), - "U": filtered_stokes_spectra.u.data.tolist(), - "I_err": filtered_stokes_spectra.i.rms.tolist(), - "Q_err": filtered_stokes_spectra.q.rms.tolist(), - "U_err": filtered_stokes_spectra.u.rms.tolist(), - "I_bkg": filtered_stokes_spectra.i.bkg.tolist(), - "Q_bkg": filtered_stokes_spectra.q.bkg.tolist(), - "U_bkg": filtered_stokes_spectra.u.bkg.tolist(), - }, + } } } return pymongo.UpdateOne(myquery, newvalues) @@ -722,7 +733,7 @@ def rmsynthoncut1d( def rmsynthoncut_i( comp_id: str, - outdir: str, + outdir: Path, freq: np.ndarray, host: str, field: str, @@ -766,8 +777,8 @@ def rmsynthoncut_i( if beams is None: raise ValueError(f"Beams for {iname} not found") - ifile = os.path.join(outdir, beams["beams"][field]["i_file"]) - outdir = os.path.dirname(ifile) + ifile = outdir / beams["beams"][field]["i_file"] + outdir = ifile.parent header, dataI = do_RMsynth_3D.readFitsCube(ifile, rm_verbose) @@ -862,8 +873,9 @@ def main( outdir: Path, host: str, epoch: int, - username: Union[str, None] = None, - password: Union[str, None] = None, + sbid: Optional[int] = None, + username: Optional[str] = None, + password: Optional[str] = None, dimension: str = "1d", verbose: bool = True, database: bool = False, @@ -882,8 +894,8 @@ def main( rm_verbose: bool = False, debug: bool = False, fit_function: str = "log", - tt0: Union[str, None] = None, - tt1: Union[str, None] = None, + tt0: Optional[str] = None, + tt1: Optional[str] = None, ion: bool = False, do_own_fit: bool = False, ) -> None: @@ -894,6 +906,7 @@ def main( outdir (Path): Output directory host (str): MongoDB host epoch (int): Epoch + sbid (Union[int, None], optional): SBID. Defaults to None. username (Union[str, None], optional): MongoDB username. Defaults to None. password (Union[str, None], optional): MongoDB password. Defaults to None. dimension (str, optional): RMsynth dimension. Defaults to "1d". @@ -920,12 +933,11 @@ def main( do_own_fit (bool, optional): Do own fit. Defaults to False. """ - outdir = os.path.abspath(outdir) - outdir = os.path.join(outdir, "cutouts") + outdir = outdir.absolute() / "cutouts" if savePlots: - plotdir = os.path.join(outdir, "plots") - try_mkdir(plotdir) + plotdir = outdir / "plots" + plotdir.mkdir(parents=True, exist_ok=True) beams_col, island_col, comp_col = get_db( host=host, epoch=epoch, username=username, password=password @@ -933,6 +945,9 @@ def main( beam_query = {"$and": [{f"beams.{field}": {"$exists": True}}]} + if sbid is not None: + beam_query["$and"].append({f"beams.{field}.SBIDs": sbid}) + beams = pd.DataFrame(list(beams_col.find(beam_query).sort("Source_ID"))) beams.set_index("Source_ID", drop=False, inplace=True) island_ids = sorted(beams_col.distinct("Source_ID", beam_query)) @@ -978,7 +993,7 @@ def main( # Make frequency file freq, freqfile = getfreq( - os.path.join(outdir, f"{beams.iloc[0]['beams'][f'{field}']['q_file']}"), + outdir / f"{beams.iloc[0]['beams'][f'{field}']['q_file']}", outdir=outdir, filename="frequencies.txt", ) @@ -1013,6 +1028,7 @@ def main( outdir=unmapped(outdir), freq=unmapped(freq), field=unmapped(field), + sbid=unmapped(sbid), polyOrd=unmapped(polyOrd), phiMax_radm2=unmapped(phiMax_radm2), dPhi_radm2=unmapped(dPhi_radm2), @@ -1039,6 +1055,7 @@ def main( outdir=unmapped(outdir), freq=unmapped(freq), field=unmapped(field), + sbid=unmapped(sbid), phiMax_radm2=unmapped(phiMax_radm2), dPhi_radm2=unmapped(dPhi_radm2), nSamples=unmapped(nSamples), @@ -1114,6 +1131,13 @@ def cli(): help="Epoch of observation.", ) + parser.add_argument( + "--sbid", + type=int, + default=None, + help="SBID of observation.", + ) + parser.add_argument( "--username", type=str, default=None, help="Username of mongodb." ) @@ -1262,18 +1286,22 @@ def cli(): verbose = args.verbose rmv = args.rm_verbose if rmv: - logger.setLevel(logger.DEBUG) + logger.setLevel(logging.DEBUG) elif verbose: - logger.setLevel(logger.INFO) + logger.setLevel(logging.INFO) test_db( - host=args.host, username=args.username, password=args.password, verbose=verbose + host=args.host, + username=args.username, + password=args.password, ) main( field=args.field, outdir=Path(args.outdir), host=args.host, + epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, dimension=args.dimension, diff --git a/arrakis/utils/fitsutils.py b/arrakis/utils/fitsutils.py index fbc551ac..82531778 100644 --- a/arrakis/utils/fitsutils.py +++ b/arrakis/utils/fitsutils.py @@ -3,7 +3,8 @@ import warnings from glob import glob -from typing import Any, Dict, Union +from pathlib import Path +from typing import Any, Dict, Optional, Tuple, Union import astropy.units as u import numpy as np @@ -66,8 +67,10 @@ def fix_header(cutout_header: fits.Header, original_header: fits.Header) -> fits def getfreq( - cube: str, outdir: Union[str, None] = None, filename: Union[str, None] = None -): + cube: Union[str, Path], + outdir: Optional[Path] = None, + filename: Union[str, Path, None] = None, +) -> Union[u.Quantity, Tuple[u.Quantity, Path]]: """Get list of frequencies from FITS data. Gets the frequency list from a given cube. Can optionally save @@ -104,21 +107,16 @@ def getfreq( del hdr[k] wcs = WCS(hdr) - freq = wcs.spectral.pixel_to_world(np.arange(data.shape[0])) # Type: u.Quantity + freq: u.Quantity = wcs.spectral.pixel_to_world(np.arange(data.shape[0])) # Write to file if outdir is specified if outdir is None: - return freq # Type: u.Quantity - else: - if outdir[-1] == "/": - outdir = outdir[:-1] - if filename is None: - outfile = f"{outdir}/frequencies.txt" - else: - outfile = f"{outdir}/{filename}" - logger.info(f"Saving to {outfile}") - np.savetxt(outfile, np.array(freq)) - return freq, outfile # Type: Tuple[u.Quantity, str] + return freq + + outfile = outdir / filename if filename is not None else outdir / "frequencies.txt" + logger.info(f"Saving to {outfile}") + np.savetxt(outfile, np.array(freq)) + return freq, outfile def getdata(cubedir="./", tabledir="./", mapdata=None, verbose=True): From 8c41f66588f34d9ac20c21fc246f943125d014d6 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 17:32:08 +1000 Subject: [PATCH 03/37] Add common args --- arrakis/cleanup.py | 38 +-- arrakis/cutout.py | 69 +---- arrakis/linmos.py | 75 ++--- arrakis/makecat.py | 79 ++--- arrakis/process_spice.py | 602 +++++++++++++++++++------------------- arrakis/rmclean_oncuts.py | 136 +++------ arrakis/rmsynth_oncuts.py | 167 ++++------- arrakis/utils/pipeline.py | 84 +++++- 8 files changed, 569 insertions(+), 681 deletions(-) diff --git a/arrakis/cleanup.py b/arrakis/cleanup.py index 801851b6..f4d87486 100644 --- a/arrakis/cleanup.py +++ b/arrakis/cleanup.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """DANGER ZONE: Purge directories of un-needed FITS files.""" +import argparse import logging import tarfile from pathlib import Path @@ -11,7 +12,7 @@ from tqdm.auto import tqdm from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str logger.setLevel(logging.INFO) @@ -103,10 +104,7 @@ def main( logger.info("Cleanup done!") -def cli(): - """Command-line interface""" - import argparse - +def cleanup_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" {logo_str} @@ -117,27 +115,33 @@ def cli(): """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter + cleanup_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) + parser = cleanup_parser.add_argument_group("cleanup arguments") parser.add_argument( - "outdir", - metavar="outdir", - type=Path, - help="Directory containing cutouts (in subdir outdir/cutouts).", - ) - parser.add_argument( - "-o", "--overwrite", dest="overwrite", action="store_true", help="Overwrite existing tarball", ) - parser.add_argument( - "-v", dest="verbose", action="store_true", help="Verbose output" - ) + return cleanup_parser + + +def cli(): + """Command-line interface""" + gen_parser = generic_parser(parent_parser=True) + clean_parser = cleanup_parser(parent_parser=True) + parser = argparse.ArgumentParser( + parents=[gen_parser, clean_parser], + formatter_class=UltimateHelpFormatter, + description=clean_parser.description, + ) args = parser.parse_args() + verbose = args.verbose if verbose: diff --git a/arrakis/cutout.py b/arrakis/cutout.py index ed75821f..948991e6 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -30,7 +30,7 @@ from arrakis.utils.database import get_db, test_db from arrakis.utils.fitsutils import fix_header from arrakis.utils.io import try_mkdir -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str iers.conf.auto_download = False warnings.filterwarnings( @@ -544,50 +544,6 @@ def cutout_parser(parent_parser: bool = False) -> argparse.ArgumentParser: ) parser = cut_parser.add_argument_group("cutout arguments") - parser.add_argument( - "field", metavar="field", type=str, help="Name of field (e.g. 2132-50A)." - ) - - parser.add_argument( - "datadir", - metavar="datadir", - type=str, - help="Directory containing data cubes in FITS format.", - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - - parser.add_argument( - "--sbid", - type=int, - default=None, - help="SBID of observation.", - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "-v", "--verbose", action="store_true", help="Verbose output [False]." - ) parser.add_argument( "-p", "--pad", @@ -599,28 +555,19 @@ def cutout_parser(parent_parser: bool = False) -> argparse.ArgumentParser: parser.add_argument( "-d", "--dryrun", action="store_true", help="Do a dry-run [False]." ) - parser.add_argument( - "-s", - "--stokes", - dest="stokeslist", - nargs="+", - type=str, - help="List of Stokes parameters to image [ALL]", - ) - parser.add_argument( - "--limit", - type=int, - default=None, - help="Limit number of islands to process [None]", - ) return cut_parser def cli() -> None: """Command-line interface""" - parser = cutout_parser() - + gen_parser = generic_parser(parent_parser=True) + cut_parser = cutout_parser(parent_parser=True) + parser = argparse.ArgumentParser( + formatter_class=UltimateHelpFormatter, + parents=[gen_parser, cut_parser], + description=cut_parser.description, + ) args = parser.parse_args() verbose = args.verbose diff --git a/arrakis/linmos.py b/arrakis/linmos.py index afff931c..e72640fc 100644 --- a/arrakis/linmos.py +++ b/arrakis/linmos.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Run LINMOS on cutouts in parallel""" +import argparse import logging import os import shlex @@ -20,6 +21,7 @@ from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import get_db, test_db +from arrakis.utils.pipeline import generic_parser, logo_str warnings.filterwarnings(action="ignore", category=SpectralCubeWarning, append=True) warnings.simplefilter("ignore", category=AstropyWarning) @@ -371,35 +373,26 @@ def main( logger.info("LINMOS Done!") -def cli(): - """Command-line interface""" - import argparse - +def linmos_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option - descStr = """ + descStr = f""" + {logo_str} Mosaic RACS beam cubes with linmos. """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter + linmos_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) - parser.add_argument( - "field", metavar="field", type=str, help="RACS field to mosaic - e.g. 2132-50A." - ) + parser = linmos_parser.add_argument_group("linmos arguments") - parser.add_argument( - "datadir", - metavar="datadir", - type=str, - help="Directory containing cutouts (in subdir outdir/cutouts)..", - ) parser.add_argument( "--holofile", type=str, default=None, help="Path to holography image" ) - parser.add_argument( "--yanda", type=str, @@ -412,49 +405,19 @@ def cli(): type=Path, help="Path to an existing yandasoft singularity container image. ", ) + return linmos_parser - parser.add_argument( - "-s", - "--stokes", - dest="stokeslist", - nargs="+", - type=str, - help="List of Stokes parameters to image [ALL]", - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - - parser.add_argument( - "-v", dest="verbose", action="store_true", help="Verbose output [False]." - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) +def cli(): + """Command-line interface""" - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - parser.add_argument( - "--limit", - type=Optional[int], - default=None, - help="Limit the number of islands to process.", + gen_parser = generic_parser(parent_parser=True) + lin_parser = linmos_parser(parent_parser=True) + parser = argparse.ArgumentParser( + parents=[gen_parser, lin_parser], + formatter_class=UltimateHelpFormatter, + description=lin_parser.description, ) - args = parser.parse_args() verbose = args.verbose diff --git a/arrakis/makecat.py b/arrakis/makecat.py index b8732ead..abb8e1d0 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Make an Arrakis catalogue""" +import argparse import logging import os import time @@ -27,7 +28,7 @@ from arrakis import columns_possum from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger from arrakis.utils.database import get_db, get_field_db, test_db -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str from arrakis.utils.plotting import latexify from arrakis.utils.typing import ArrayLike, TableLike @@ -1012,18 +1013,7 @@ def main( logger.info("Done!") -def cli(): - """Command-line interface""" - import argparse - - from astropy.utils.exceptions import AstropyWarning - - warnings.simplefilter("ignore", category=AstropyWarning) - from astropy.io.fits.verify import VerifyWarning - - warnings.simplefilter("ignore", category=VerifyWarning) - # Help string to be shown using the -h option - +def cat_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" {logo_str} @@ -1033,28 +1023,12 @@ def cli(): """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter + cat_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) - parser.add_argument( - "field", metavar="field", type=str, help="RACS field to mosaic - e.g. 2132-50A." - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - + parser = cat_parser.add_argument_group("catalogue arguments") parser.add_argument( "--leakage_degree", type=int, @@ -1077,26 +1051,35 @@ def cli(): ) parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "-v", "--verbose", action="store_true", help="verbose output [False]." - ) - - parser.add_argument( - "-w", "--write", dest="outfile", default=None, type=str, - help="File to save table to [None].", + help="File to save table to.", ) + return cat_parser + + +def cli(): + """Command-line interface""" + import argparse + + from astropy.utils.exceptions import AstropyWarning + + warnings.simplefilter("ignore", category=AstropyWarning) + from astropy.io.fits.verify import VerifyWarning + + warnings.simplefilter("ignore", category=VerifyWarning) + # Help string to be shown using the -h option + + gen_parser = generic_parser(parent_parser=True) + catalogue_parser = cat_parser(parent_parser=True) + parser = argparse.ArgumentParser( + parents=[gen_parser, catalogue_parser], + formatter_class=UltimateHelpFormatter, + description=catalogue_parser.description, + ) args = parser.parse_args() verbose = args.verbose diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index be811658..e0c37bf4 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -24,7 +24,7 @@ ) from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import test_db -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str @flow(name="Combining+Synthesis on Arrakis") @@ -338,311 +338,325 @@ def cli(): $ mongod --dbpath=/path/to/database --bind_ip $(hostname -i) """ - + gen_parser = generic_parser(parent_parser=True) imager_parser = imager.imager_parser(parent_parser=True) - + cutout_parser = cutout.cutout_parser(parent_parser=True) + linmos_parser = linmos.linmos_parser(parent_parser=True) + synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) + rmclean_parser = rmclean_oncuts.clean_parser(parent_parser=True) + catalogue_parser = makecat.cat_parser(parent_parser=True) + clean_parser = cleanup.cleanup_parser(parent_parser=True) # Parse the command line options parser = configargparse.ArgParser( default_config_files=[".default_config.cfg"], description=descStr, formatter_class=UltimateHelpFormatter, - parents=[imager_parser], - ) - parser.add("--config", required=False, is_config_file=True, help="Config file path") - - parser.add_argument( - "field", metavar="field", type=str, help="Name of field (e.g. 2132-50A)." - ) - parser.add_argument( - "--epoch", - type=int, - default=0, - help="Epoch to read field data from", - ) - - parser.add_argument( - "--host", - default=None, - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "--dask_config", - type=str, - default=None, - help="Config file for Dask SlurmCLUSTER.", - ) - parser.add_argument( - "--imager_dask_config", - type=str, - default=None, - help="Config file for Dask SlurmCLUSTER.", - ) - parser.add_argument( - "--holofile", type=str, default=None, help="Path to holography image" - ) - - parser.add_argument( - "--yanda", - type=str, - default="1.3.0", - help="Yandasoft version to pull from DockerHub [1.3.0].", - ) - - parser.add_argument( - "--yanda_image", - default=None, - type=Path, - help="Path to an existing yandasoft singularity container image. ", - ) - - flowargs = parser.add_argument_group("pipeline flow options") - flowargs.add_argument( - "--imager_only", - action="store_true", - help="Only run the imager component of the pipeline. ", - ) - flowargs.add_argument( - "--skip_imager", action="store_true", help="Skip imaging stage [False]." - ) - flowargs.add_argument( - "--skip_cutout", action="store_true", help="Skip cutout stage [False]." - ) - flowargs.add_argument( - "--skip_linmos", action="store_true", help="Skip LINMOS stage [False]." - ) - flowargs.add_argument( - "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." - ) - flowargs.add_argument( - "--skip_frion", action="store_true", help="Skip cleanup stage [False]." - ) - flowargs.add_argument( - "--skip_rmsynth", action="store_true", help="Skip RM Synthesis stage [False]." - ) - flowargs.add_argument( - "--skip_rmclean", action="store_true", help="Skip RM-CLEAN stage [False]." - ) - flowargs.add_argument( - "--skip_cat", action="store_true", help="Skip catalogue stage [False]." - ) - - options = parser.add_argument_group("output options") - options.add_argument( - "-v", "--verbose", action="store_true", help="Verbose output [False]." - ) - - cutargs = parser.add_argument_group("cutout arguments") - cutargs.add_argument( - "-p", - "--pad", - type=float, - default=5, - help="Number of beamwidths to pad around source [5].", - ) - - cutargs.add_argument("--dryrun", action="store_true", help="Do a dry-run [False].") - - synth = parser.add_argument_group("RM-synth/CLEAN arguments") - - synth.add_argument( - "--dimension", - default="1d", - help="How many dimensions for RMsynth [1d] or '3d'.", - ) - - synth.add_argument( - "-m", - "--database", - action="store_true", - help="Add RMsynth data to MongoDB [False].", - ) - - synth.add_argument( - "--tt0", - default=None, - type=str, - help="TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1.", - ) - - synth.add_argument( - "--tt1", - default=None, - type=str, - help="TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0.", - ) - - synth.add_argument( - "--validate", action="store_true", help="Run on RMsynth Stokes I [False]." - ) - - synth.add_argument( - "--limit", default=None, type=int, help="Limit number of sources [All]." - ) - synth.add_argument( - "--own_fit", - dest="do_own_fit", - action="store_true", - help="Use own Stokes I fit function [False].", - ) - tools = parser.add_argument_group("RM-tools arguments") - # RM-tools args - tools.add_argument( - "-sp", "--savePlots", action="store_true", help="save the plots [False]." - ) - tools.add_argument( - "-w", - "--weightType", - default="variance", - help="weighting [variance] (all 1s) or 'uniform'.", - ) - tools.add_argument( - "--fit_function", - type=str, - default="log", - help="Stokes I fitting function: 'linear' or ['log'] polynomials.", - ) - tools.add_argument( - "-t", - "--fitRMSF", - action="store_true", - help="Fit a Gaussian to the RMSF [False]", - ) - tools.add_argument( - "-l", - "--phiMax_radm2", - type=float, - default=None, - help="Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto].", - ) - tools.add_argument( - "-d", - "--dPhi_radm2", - type=float, - default=None, - help="Width of Faraday depth channel [Auto].", - ) - tools.add_argument( - "-s", - "--nSamples", - type=float, - default=5, - help="Number of samples across the FWHM RMSF.", - ) - tools.add_argument( - "-o", - "--polyOrd", - type=int, - default=3, - help="polynomial order to fit to I spectrum [3].", - ) - tools.add_argument( - "-i", - "--noStokesI", - action="store_true", - help="ignore the Stokes I spectrum [False].", - ) - tools.add_argument( - "--showPlots", action="store_true", help="show the plots [False]." - ) - tools.add_argument( - "-R", - "--not_RMSF", - action="store_true", - help="Skip calculation of RMSF? [False]", - ) - tools.add_argument( - "-rmv", - "--rm_verbose", - action="store_true", - help="Verbose RMsynth/CLEAN [False].", - ) - tools.add_argument( - "-D", - "--debug", - action="store_true", - help="turn on debugging messages & plots [False].", - ) - # RM-tools args - tools.add_argument( - "-c", - "--cutoff", - type=float, - default=-3, - help="CLEAN cutoff (+ve = absolute, -ve = sigma) [-3].", - ) - tools.add_argument( - "-n", - "--maxIter", - type=int, - default=10000, - help="maximum number of CLEAN iterations [10000].", - ) - tools.add_argument( - "-g", "--gain", type=float, default=0.1, help="CLEAN loop gain [0.1]." - ) - tools.add_argument( - "--window", - type=float, - default=None, - help="Further CLEAN in mask to this threshold [False].", - ) - tools.add_argument( - "--ionex_server", - type=str, - default="ftp://ftp.aiub.unibe.ch/CODE/", - help="IONEX server [ftp://ftp.aiub.unibe.ch/CODE/].", - ) - tools.add_argument( - "--ionex_prefix", - type=str, - default="codg", - help="IONEX prefix.", - ) - tools.add_argument( - "--ionex_proxy_server", - type=str, - default=None, - help="Proxy server [None].", - ) - tools.add_argument( - "--ionex_formatter", - type=str, - default=None, - help="IONEX formatter [None].", - ) - tools.add_argument( - "--ionex_predownload", - action="store_true", - help="Pre-download IONEX files [False].", - ) - cat = parser.add_argument_group("catalogue arguments") - # Cat args - cat.add_argument( - "--outfile", default=None, type=str, help="File to save table to [None]." - ) + parents=[ + gen_parser, + imager_parser, + cutout_parser, + linmos_parser, + synth_parser, + rmclean_parser, + catalogue_parser, + clean_parser, + ], + ) + # parser.add("--config", required=False, is_config_file=True, help="Config file path") + + # parser.add_argument( + # "field", metavar="field", type=str, help="Name of field (e.g. 2132-50A)." + # ) + # parser.add_argument( + # "--epoch", + # type=int, + # default=0, + # help="Epoch to read field data from", + # ) + + # parser.add_argument( + # "--host", + # default=None, + # type=str, + # help="Host of mongodb (probably $hostname -i).", + # ) + + # parser.add_argument( + # "--username", type=str, default=None, help="Username of mongodb." + # ) + + # parser.add_argument( + # "--password", type=str, default=None, help="Password of mongodb." + # ) + + # parser.add_argument( + # "--dask_config", + # type=str, + # default=None, + # help="Config file for Dask SlurmCLUSTER.", + # ) + # parser.add_argument( + # "--imager_dask_config", + # type=str, + # default=None, + # help="Config file for Dask SlurmCLUSTER.", + # ) + # parser.add_argument( + # "--holofile", type=str, default=None, help="Path to holography image" + # ) + + # parser.add_argument( + # "--yanda", + # type=str, + # default="1.3.0", + # help="Yandasoft version to pull from DockerHub [1.3.0].", + # ) + + # parser.add_argument( + # "--yanda_image", + # default=None, + # type=Path, + # help="Path to an existing yandasoft singularity container image. ", + # ) + + # flowargs = parser.add_argument_group("pipeline flow options") + # flowargs.add_argument( + # "--imager_only", + # action="store_true", + # help="Only run the imager component of the pipeline. ", + # ) + # flowargs.add_argument( + # "--skip_imager", action="store_true", help="Skip imaging stage [False]." + # ) + # flowargs.add_argument( + # "--skip_cutout", action="store_true", help="Skip cutout stage [False]." + # ) + # flowargs.add_argument( + # "--skip_linmos", action="store_true", help="Skip LINMOS stage [False]." + # ) + # flowargs.add_argument( + # "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." + # ) + # flowargs.add_argument( + # "--skip_frion", action="store_true", help="Skip cleanup stage [False]." + # ) + # flowargs.add_argument( + # "--skip_rmsynth", action="store_true", help="Skip RM Synthesis stage [False]." + # ) + # flowargs.add_argument( + # "--skip_rmclean", action="store_true", help="Skip RM-CLEAN stage [False]." + # ) + # flowargs.add_argument( + # "--skip_cat", action="store_true", help="Skip catalogue stage [False]." + # ) + + # options = parser.add_argument_group("output options") + # options.add_argument( + # "-v", "--verbose", action="store_true", help="Verbose output [False]." + # ) + + # cutargs = parser.add_argument_group("cutout arguments") + # cutargs.add_argument( + # "-p", + # "--pad", + # type=float, + # default=5, + # help="Number of beamwidths to pad around source [5].", + # ) + + # cutargs.add_argument("--dryrun", action="store_true", help="Do a dry-run [False].") + + # synth = parser.add_argument_group("RM-synth/CLEAN arguments") + + # synth.add_argument( + # "--dimension", + # default="1d", + # help="How many dimensions for RMsynth [1d] or '3d'.", + # ) + + # synth.add_argument( + # "-m", + # "--database", + # action="store_true", + # help="Add RMsynth data to MongoDB [False].", + # ) + + # synth.add_argument( + # "--tt0", + # default=None, + # type=str, + # help="TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1.", + # ) + + # synth.add_argument( + # "--tt1", + # default=None, + # type=str, + # help="TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0.", + # ) + + # synth.add_argument( + # "--validate", action="store_true", help="Run on RMsynth Stokes I [False]." + # ) + + # synth.add_argument( + # "--limit", default=None, type=int, help="Limit number of sources [All]." + # ) + # synth.add_argument( + # "--own_fit", + # dest="do_own_fit", + # action="store_true", + # help="Use own Stokes I fit function [False].", + # ) + # tools = parser.add_argument_group("RM-tools arguments") + # # RM-tools args + # tools.add_argument( + # "-sp", "--savePlots", action="store_true", help="save the plots [False]." + # ) + # tools.add_argument( + # "-w", + # "--weightType", + # default="variance", + # help="weighting [variance] (all 1s) or 'uniform'.", + # ) + # tools.add_argument( + # "--fit_function", + # type=str, + # default="log", + # help="Stokes I fitting function: 'linear' or ['log'] polynomials.", + # ) + # tools.add_argument( + # "-t", + # "--fitRMSF", + # action="store_true", + # help="Fit a Gaussian to the RMSF [False]", + # ) + # tools.add_argument( + # "-l", + # "--phiMax_radm2", + # type=float, + # default=None, + # help="Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto].", + # ) + # tools.add_argument( + # "-d", + # "--dPhi_radm2", + # type=float, + # default=None, + # help="Width of Faraday depth channel [Auto].", + # ) + # tools.add_argument( + # "-s", + # "--nSamples", + # type=float, + # default=5, + # help="Number of samples across the FWHM RMSF.", + # ) + # tools.add_argument( + # "-o", + # "--polyOrd", + # type=int, + # default=3, + # help="polynomial order to fit to I spectrum [3].", + # ) + # tools.add_argument( + # "-i", + # "--noStokesI", + # action="store_true", + # help="ignore the Stokes I spectrum [False].", + # ) + # tools.add_argument( + # "--showPlots", action="store_true", help="show the plots [False]." + # ) + # tools.add_argument( + # "-R", + # "--not_RMSF", + # action="store_true", + # help="Skip calculation of RMSF? [False]", + # ) + # tools.add_argument( + # "-rmv", + # "--rm_verbose", + # action="store_true", + # help="Verbose RMsynth/CLEAN [False].", + # ) + # tools.add_argument( + # "-D", + # "--debug", + # action="store_true", + # help="turn on debugging messages & plots [False].", + # ) + # # RM-tools args + # tools.add_argument( + # "-c", + # "--cutoff", + # type=float, + # default=-3, + # help="CLEAN cutoff (+ve = absolute, -ve = sigma) [-3].", + # ) + # tools.add_argument( + # "-n", + # "--maxIter", + # type=int, + # default=10000, + # help="maximum number of CLEAN iterations [10000].", + # ) + # tools.add_argument( + # "-g", "--gain", type=float, default=0.1, help="CLEAN loop gain [0.1]." + # ) + # tools.add_argument( + # "--window", + # type=float, + # default=None, + # help="Further CLEAN in mask to this threshold [False].", + # ) + # tools.add_argument( + # "--ionex_server", + # type=str, + # default="ftp://ftp.aiub.unibe.ch/CODE/", + # help="IONEX server [ftp://ftp.aiub.unibe.ch/CODE/].", + # ) + # tools.add_argument( + # "--ionex_prefix", + # type=str, + # default="codg", + # help="IONEX prefix.", + # ) + # tools.add_argument( + # "--ionex_proxy_server", + # type=str, + # default=None, + # help="Proxy server [None].", + # ) + # tools.add_argument( + # "--ionex_formatter", + # type=str, + # default=None, + # help="IONEX formatter [None].", + # ) + # tools.add_argument( + # "--ionex_predownload", + # action="store_true", + # help="Pre-download IONEX files [False].", + # ) + # cat = parser.add_argument_group("catalogue arguments") + # # Cat args + # cat.add_argument( + # "--outfile", default=None, type=str, help="File to save table to [None]." + # ) args = parser.parse_args() parser.print_values() - verbose = args.verbose - if verbose: - logger.setLevel(logging.INFO) + # verbose = args.verbose + # if verbose: + # logger.setLevel(logging.INFO) - logger.info(logo_str) - logger.info("\n\nArguments: ") - logger.info(args) + # logger.info(logo_str) + # logger.info("\n\nArguments: ") + # logger.info(args) - main(args) + # main(args) if __name__ == "__main__": diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 9e045f26..d2ecf820 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 """Run RM-synthesis on cutouts in parallel""" +import argparse +import logging import os import sys import warnings @@ -16,9 +18,10 @@ from RMtools_1D import do_RMclean_1D from RMtools_3D import do_RMclean_3D +from arrakis import rmsynth_oncuts from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import get_db, test_db -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str @task(name="1D RM-CLEAN") @@ -333,16 +336,7 @@ def main( logger.info("RM-CLEAN done!") -def cli(): - """Command-line interface""" - import argparse - - from astropy.utils.exceptions import AstropyWarning - - warnings.simplefilter("ignore", category=AstropyWarning) - from astropy.io.fits.verify import VerifyWarning - - warnings.simplefilter("ignore", category=VerifyWarning) +def clean_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" {logo_str} @@ -354,98 +348,55 @@ def cli(): """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter - ) - parser.add_argument( - "field", metavar="field", type=str, help="RACS field to mosaic - e.g. 2132-50A." - ) - parser.add_argument( - "outdir", - metavar="outdir", - type=Path, - help="Directory containing cutouts (in subdir outdir/cutouts).", - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "--dimension", - dest="dimension", - default="1d", - help="How many dimensions for RMsynth [1d] or '3d'.", - ) - - parser.add_argument( - "-v", dest="verbose", action="store_true", help="verbose output [False]." - ) - - parser.add_argument( - "-m", dest="database", action="store_true", help="Add data to MongoDB [False]." - ) - parser.add_argument( - "-sp", "--savePlots", action="store_true", help="save the plots [False]." - ) - - parser.add_argument( - "--limit", - dest="limit", - default=None, - type=int, - help="Limit number of sources [All].", + clean_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) + parser = clean_parser.add_argument_group("rm-clean arguments") # RM-tools args parser.add_argument( - "-c", - dest="cutoff", + "--cutoff", type=float, default=-3, - help="CLEAN cutoff (+ve = absolute, -ve = sigma) [-3].", + help="CLEAN cutoff (+ve = absolute, -ve = sigma).", ) parser.add_argument( - "-n", - dest="maxIter", + "--maxIter", type=int, default=10000, - help="maximum number of CLEAN iterations [10000].", - ) - parser.add_argument( - "-g", dest="gain", type=float, default=0.1, help="CLEAN loop gain [0.1]." + help="maximum number of CLEAN iterations.", ) + parser.add_argument("--gain", type=float, default=0.1, help="CLEAN loop gain.") parser.add_argument( - "-w", - dest="window", + "--window", type=float, default=None, - help="Further CLEAN in mask to this threshold [False].", - ) - parser.add_argument( - "-p", dest="showPlots", action="store_true", help="show the plots [False]." + help="Further CLEAN in mask to this threshold.", ) - parser.add_argument( - "-rmv", dest="rm_verbose", action="store_true", help="Verbose RM-CLEAN [False]." + + return clean_parser + + +def cli(): + """Command-line interface""" + + from astropy.utils.exceptions import AstropyWarning + + warnings.simplefilter("ignore", category=AstropyWarning) + from astropy.io.fits.verify import VerifyWarning + + warnings.simplefilter("ignore", category=VerifyWarning) + + gen_parser = generic_parser(parent_parser=True) + synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) + rmclean_parser = clean_parser(parent_parser=True) + + parser = argparse.ArgumentParser( + parents=[gen_parser, synth_parser, rmclean_parser], + formatter_class=UltimateHelpFormatter, + description=rmclean_parser.description, ) args = parser.parse_args() @@ -453,22 +404,21 @@ def cli(): verbose = args.verbose rmv = args.rm_verbose host = args.host - test_db( - host=args.host, username=args.username, password=args.password, verbose=verbose - ) + test_db(host=args.host, username=args.username, password=args.password) if rmv: logger.setLevel( - level=logger.DEBUG, + level=logging.DEBUG, ) elif verbose: logger.setLevel( - level=logger.INFO, + level=logging.INFO, ) main( field=args.field, outdir=Path(args.outdir), host=host, + epoch=args.epoch, username=args.username, password=args.password, dimension=args.dimension, diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 08e5a333..6494105c 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Run RM-CLEAN on cutouts in parallel""" +import argparse import logging import os import traceback @@ -35,7 +36,7 @@ from arrakis.utils.fitsutils import getfreq from arrakis.utils.fitting import fit_pl, fitted_mean, fitted_std from arrakis.utils.io import try_mkdir -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str logger.setLevel(logging.INFO) @@ -1081,17 +1082,7 @@ def main( logger.info("RMsynth done!") -def cli(): - """Command-line interface""" - import argparse - - from astropy.utils.exceptions import AstropyWarning - - warnings.simplefilter("ignore", category=AstropyWarning) - from astropy.io.fits.verify import VerifyWarning - - warnings.simplefilter("ignore", category=VerifyWarning) - warnings.simplefilter("ignore", category=RuntimeWarning) +def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" {logo_str} @@ -1103,66 +1094,26 @@ def cli(): """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter - ) - parser.add_argument( - "field", metavar="field", type=str, help="RACS field to mosaic - e.g. 2132-50A." - ) - parser.add_argument( - "outdir", - metavar="outdir", - type=Path, - help="Directory containing cutouts (in subdir outdir/cutouts).", - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - - parser.add_argument( - "--sbid", - type=int, - default=None, - help="SBID of observation.", - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." + rmsynth_parser = argparse.ArgumentParser( + description=descStr, + formatter_class=UltimateHelpFormatter, + add_help=not parent_parser, ) + parser = rmsynth_parser.add_argument_group("rm-synth arguments") parser.add_argument( "--dimension", dest="dimension", default="1d", - help="How many dimensions for RMsynth [1d] or '3d'.", - ) - - parser.add_argument( - "-v", dest="verbose", action="store_true", help="verbose output [False]." + help="How many dimensions for RMsynth '1d' or '3d'.", ) parser.add_argument( - "--ion", action="store_true", help="Use ionospheric-corrected data [False]." + "--ion", action="store_true", help="Use ionospheric-corrected data." ) parser.add_argument( - "-m", dest="database", action="store_true", help="Add data to MongoDB [False]." + "-m", dest="database", action="store_true", help="Add data to MongoDB." ) parser.add_argument( @@ -1183,99 +1134,95 @@ def cli(): "--validate", dest="validate", action="store_true", - help="Run on Stokes I [False].", - ) - - parser.add_argument( - "--limit", - dest="limit", - default=None, - type=int, - help="Limit number of sources [All].", + help="Run on Stokes I.", ) parser.add_argument( "--own_fit", dest="do_own_fit", action="store_true", - help="Use own Stokes I fit function [False].", + help="Use own Stokes I fit function.", ) - # RM-tools args + parser.add_argument("--savePlots", action="store_true", help="save the plots.") parser.add_argument( - "-sp", "--savePlots", action="store_true", help="save the plots [False]." - ) - parser.add_argument( - "-w", - dest="weightType", + "--weightType", default="variance", - help="weighting [variance] (all 1s) or 'uniform'.", + help="weighting (inverse) 'variance' or 'uniform' (all 1s).", ) parser.add_argument( - "-f", - dest="fit_function", + "--fit_function", type=str, default="log", - help="Stokes I fitting function: 'linear' or ['log'] polynomials.", + help="Stokes I fitting function: 'linear' or 'log' polynomials.", ) parser.add_argument( - "-t", - dest="fitRMSF", + "--fitRMSF", action="store_true", - help="Fit a Gaussian to the RMSF [False]", + help="Fit a Gaussian to the RMSF", ) parser.add_argument( - "-l", - dest="phiMax_radm2", + "--phiMax_radm2", type=float, default=None, - help="Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto].", + help="Absolute max Faraday depth sampled (overrides NSAMPLES).", ) parser.add_argument( - "-d", - dest="dPhi_radm2", + dest="--dPhi_radm2", type=float, default=None, - help="Width of Faraday depth channel [Auto].", + help="Width of Faraday depth channel.", ) parser.add_argument( - "-s", - dest="nSamples", + dest="--nSamples", type=float, default=5, help="Number of samples across the FWHM RMSF.", ) parser.add_argument( - "-o", - dest="polyOrd", + dest="--polyOrd", type=int, default=3, - help="polynomial order to fit to I spectrum [3].", + help="polynomial order to fit to I spectrum.", ) parser.add_argument( - "-i", - dest="noStokesI", + dest="--noStokesI", action="store_true", - help="ignore the Stokes I spectrum [False].", - ) - parser.add_argument( - "-p", dest="showPlots", action="store_true", help="show the plots [False]." + help="ignore the Stokes I spectrum.", ) + parser.add_argument("--showPlots", action="store_true", help="show the plots.") parser.add_argument( - "-R", - dest="not_RMSF", + "--not_RMSF", action="store_true", - help="Skip calculation of RMSF? [False]", - ) - parser.add_argument( - "-rmv", dest="rm_verbose", action="store_true", help="Verbose RMsynth [False]." + help="Skip calculation of RMSF?", ) + parser.add_argument("--rm_verbose", action="store_true", help="Verbose RMsynth.") parser.add_argument( - "-D", - dest="debug", + "--debug", action="store_true", - help="turn on debugging messages & plots [False].", + help="turn on debugging messages & plots.", ) + return rmsynth_parser + + +def cli(): + """Command-line interface""" + + from astropy.utils.exceptions import AstropyWarning + + warnings.simplefilter("ignore", category=AstropyWarning) + from astropy.io.fits.verify import VerifyWarning + + warnings.simplefilter("ignore", category=VerifyWarning) + warnings.simplefilter("ignore", category=RuntimeWarning) + + gen_parser = generic_parser(parent_parser=True) + synth_parser = rmsynth_parser(parent_parser=True) + parser = argparse.ArgumentParser( + parents=[gen_parser, synth_parser], + formatter_class=UltimateHelpFormatter, + description=synth_parser.description, + ) args = parser.parse_args() if args.tt0 and not args.tt1: @@ -1298,7 +1245,7 @@ def cli(): main( field=args.field, - outdir=Path(args.outdir), + outdir=Path(args.datadir), host=args.host, epoch=args.epoch, sbid=args.sbid, diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index fcab462f..a715fe04 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -1,12 +1,14 @@ #!/usr/bin/env python """Pipeline and flow utility functions""" +import argparse import logging import shlex import subprocess import time import warnings -from typing import List, Tuple, Union +from pathlib import Path +from typing import List, Optional, Tuple, Union import astropy.units as u import dask.array as da @@ -23,7 +25,7 @@ from tornado.ioloop import IOLoop from tqdm.auto import tqdm, trange -from arrakis.logger import TqdmToLogger, logger +from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger warnings.filterwarnings(action="ignore", category=SpectralCubeWarning, append=True) warnings.simplefilter("ignore", category=AstropyWarning) @@ -46,6 +48,84 @@ """ +def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: + descStr = f""" + {logo_str} + Generic pipeline options + + """ + + # Parse the command line options + gen_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, + ) + parser = gen_parser.add_argument_group("generic arguments") + + parser.add_argument( + "field", metavar="field", type=str, help="Name of field (e.g. RACS_2132-50)." + ) + + parser.add_argument( + "datadir", + metavar="datadir", + type=Path, + help="Directory containing full-size data cubes in FITS format, and cutout directory.", + ) + + parser.add_argument( + "--sbid", + type=int, + default=None, + help="SBID of observation.", + ) + + parser.add_argument( + "-s", + "--stokes", + dest="stokeslist", + nargs="+", + type=str, + default=["I", "Q", "U"], + help="List of Stokes parameters to image [ALL]", + ) + + parser.add_argument( + "-e", + "--epoch", + type=int, + default=0, + help="Epoch of observation.", + ) + + parser.add_argument( + "-v", dest="verbose", action="store_true", help="Verbose output [False]." + ) + parser.add_argument( + "--host", + metavar="host", + type=str, + default=None, + help="Host of mongodb (probably $hostname -i).", + ) + parser.add_argument( + "--username", type=str, default=None, help="Username of mongodb." + ) + + parser.add_argument( + "--password", type=str, default=None, help="Password of mongodb." + ) + parser.add_argument( + "--limit", + type=Optional[int], + default=None, + help="Limit the number of islands to process.", + ) + + return gen_parser + + class performance_report_prefect: """Gather performance report from prefect_dask From 40289e0d89c6d55035fd91cedf070a22edc579d3 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 17:51:56 +1000 Subject: [PATCH 04/37] Add args --- arrakis/frion.py | 93 ++++------ arrakis/imager.py | 9 +- arrakis/process_spice.py | 367 ++++++++------------------------------ arrakis/rmclean_oncuts.py | 4 +- arrakis/rmsynth_oncuts.py | 34 ++-- arrakis/utils/pipeline.py | 3 + 6 files changed, 134 insertions(+), 376 deletions(-) diff --git a/arrakis/frion.py b/arrakis/frion.py index 5fd38397..03b8f730 100644 --- a/arrakis/frion.py +++ b/arrakis/frion.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Correct for the ionosphere in parallel""" +import argparse import logging import os from glob import glob @@ -21,7 +22,7 @@ from arrakis.utils.database import get_db, get_field_db, test_db from arrakis.utils.fitsutils import getfreq from arrakis.utils.io import try_mkdir -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import generic_parser, logo_str logger.setLevel(logging.INFO) @@ -332,18 +333,7 @@ def main( logger.info(pformat(db_res.bulk_api_result)) -def cli(): - """Command-line interface""" - import argparse - import warnings - - from astropy.utils.exceptions import AstropyWarning - - warnings.simplefilter("ignore", category=AstropyWarning) - from astropy.io.fits.verify import VerifyWarning - - warnings.simplefilter("ignore", category=VerifyWarning) - warnings.simplefilter("ignore", category=RuntimeWarning) +def frion_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" {logo_str} @@ -353,63 +343,27 @@ def cli(): """ # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter - ) - parser.add_argument( - "field", metavar="field", type=str, help="RACS field to mosaic - e.g. 2132-50A." - ) - parser.add_argument( - "outdir", - metavar="outdir", - type=Path, - help="Directory containing cutouts (in subdir outdir/cutouts).", - ) - - parser.add_argument( - "host", - metavar="host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "-e", - "--epoch", - type=int, - default=0, - help="Epoch of observation.", - ) - - parser.add_argument( - "--username", type=str, default="admin", help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "-m", "--database", action="store_true", help="Add data to MongoDB [False]." + frion_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) + parser = frion_parser.add_argument_group("frion arguments") parser.add_argument( - "-s", "--ionex_server", type=str, default="ftp://ftp.aiub.unibe.ch/CODE/", - help="IONEX server [ftp://ftp.aiub.unibe.ch/CODE/].", + help="IONEX server", ) parser.add_argument( - "-x", "--ionex_prefix", type=str, default="codg", ) parser.add_argument( - "-f", "--ionex_formatter", type=str, default="ftp.aiub.unibe.ch", @@ -417,24 +371,38 @@ def cli(): ) parser.add_argument( - "-p", "--ionex_proxy_server", type=str, default=None, - help="Proxy server [None].", + help="Proxy server.", ) parser.add_argument( - "-d", "--ionex_predownload", action="store_true", - help="Pre-download IONEX files [False].", + help="Pre-download IONEX files.", ) - parser.add_argument( - "-v", dest="verbose", action="store_true", help="verbose output [False]." - ) +def cli(): + """Command-line interface""" + import warnings + + from astropy.utils.exceptions import AstropyWarning + + warnings.simplefilter("ignore", category=AstropyWarning) + from astropy.io.fits.verify import VerifyWarning + + warnings.simplefilter("ignore", category=VerifyWarning) + warnings.simplefilter("ignore", category=RuntimeWarning) + + gen_parser = generic_parser(parent_parser=True) + f_parser = frion_parser(parent_parser=True) + parser = argparse.ArgumentParser( + parents=[gen_parser, f_parser], + formatter_class=UltimateHelpFormatter, + description=f_parser.description, + ) args = parser.parse_args() verbose = args.verbose @@ -451,7 +419,6 @@ def cli(): username=args.username, password=args.password, database=args.database, - verbose=verbose, ionex_server=args.ionex_server, ionex_proxy_server=args.ionex_proxy_server, ionex_formatter=args.ionex_formatter, diff --git a/arrakis/imager.py b/arrakis/imager.py index f3830fb9..0044b594 100644 --- a/arrakis/imager.py +++ b/arrakis/imager.py @@ -994,14 +994,15 @@ def imager_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Do not apply the ASKAP MS corrections from the package fixms. ", ) - group = parser.add_mutually_exclusive_group() - group.add_argument( + group = parser.add_argument_group("wsclean container options") + mxg = group.add_mutually_exclusive_group() + mxg.add_argument( "--hosted-wsclean", type=str, default="docker://alecthomson/wsclean:latest", - help="Docker or Singularity image for wsclean [docker://alecthomson/wsclean:latest]", + help="Docker or Singularity image for wsclean", ) - group.add_argument( + mxg.add_argument( "--local_wsclean", type=Path, default=None, diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index e0c37bf4..520e8ca5 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Arrakis single-field pipeline""" +import argparse import logging import os from pathlib import Path @@ -325,10 +326,7 @@ def main(args: configargparse.Namespace) -> None: )(args, host, dask_runner_2) -def cli(): - """Command-line interface""" - # Help string to be shown using the -h option - +def pipeline_parser(parent_parser: bool = False) -> argparse.ArgumentParser: descStr = f""" {logo_str} @@ -338,10 +336,68 @@ def cli(): $ mongod --dbpath=/path/to/database --bind_ip $(hostname -i) """ + # Parse the command line options + pipeline_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, + ) + parser = pipeline_parser.add_argument_group("pipeline arguments") + parser.add_argument( + "--dask_config", + type=str, + default=None, + help="Config file for Dask SlurmCLUSTER.", + ) + parser.add_argument( + "--imager_dask_config", + type=str, + default=None, + help="Config file for Dask SlurmCLUSTER.", + ) + parser.add_argument( + "--imager_only", + action="store_true", + help="Only run the imager component of the pipeline. ", + ) + parser.add_argument( + "--skip_imager", action="store_true", help="Skip imaging stage [False]." + ) + parser.add_argument( + "--skip_cutout", action="store_true", help="Skip cutout stage [False]." + ) + parser.add_argument( + "--skip_linmos", action="store_true", help="Skip LINMOS stage [False]." + ) + parser.add_argument( + "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." + ) + parser.add_argument( + "--skip_frion", action="store_true", help="Skip cleanup stage [False]." + ) + parser.add_argument( + "--skip_rmsynth", action="store_true", help="Skip RM Synthesis stage [False]." + ) + parser.add_argument( + "--skip_rmclean", action="store_true", help="Skip RM-CLEAN stage [False]." + ) + parser.add_argument( + "--skip_cat", action="store_true", help="Skip catalogue stage [False]." + ) + + return pipeline_parser + + +def cli(): + """Command-line interface""" + # Help string to be shown using the -h option + + pipe_parser = pipeline_parser(parent_parser=True) gen_parser = generic_parser(parent_parser=True) imager_parser = imager.imager_parser(parent_parser=True) cutout_parser = cutout.cutout_parser(parent_parser=True) linmos_parser = linmos.linmos_parser(parent_parser=True) + common_parser = rmsynth_oncuts.rm_common_parser(parent_parser=True) synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) rmclean_parser = rmclean_oncuts.clean_parser(parent_parser=True) catalogue_parser = makecat.cat_parser(parent_parser=True) @@ -349,314 +405,37 @@ def cli(): # Parse the command line options parser = configargparse.ArgParser( default_config_files=[".default_config.cfg"], - description=descStr, + description=pipe_parser.description, formatter_class=UltimateHelpFormatter, parents=[ + pipe_parser, gen_parser, imager_parser, cutout_parser, linmos_parser, + common_parser, synth_parser, rmclean_parser, catalogue_parser, clean_parser, ], ) - # parser.add("--config", required=False, is_config_file=True, help="Config file path") - - # parser.add_argument( - # "field", metavar="field", type=str, help="Name of field (e.g. 2132-50A)." - # ) - # parser.add_argument( - # "--epoch", - # type=int, - # default=0, - # help="Epoch to read field data from", - # ) - - # parser.add_argument( - # "--host", - # default=None, - # type=str, - # help="Host of mongodb (probably $hostname -i).", - # ) - - # parser.add_argument( - # "--username", type=str, default=None, help="Username of mongodb." - # ) - - # parser.add_argument( - # "--password", type=str, default=None, help="Password of mongodb." - # ) - - # parser.add_argument( - # "--dask_config", - # type=str, - # default=None, - # help="Config file for Dask SlurmCLUSTER.", - # ) - # parser.add_argument( - # "--imager_dask_config", - # type=str, - # default=None, - # help="Config file for Dask SlurmCLUSTER.", - # ) - # parser.add_argument( - # "--holofile", type=str, default=None, help="Path to holography image" - # ) - - # parser.add_argument( - # "--yanda", - # type=str, - # default="1.3.0", - # help="Yandasoft version to pull from DockerHub [1.3.0].", - # ) - - # parser.add_argument( - # "--yanda_image", - # default=None, - # type=Path, - # help="Path to an existing yandasoft singularity container image. ", - # ) - - # flowargs = parser.add_argument_group("pipeline flow options") - # flowargs.add_argument( - # "--imager_only", - # action="store_true", - # help="Only run the imager component of the pipeline. ", - # ) - # flowargs.add_argument( - # "--skip_imager", action="store_true", help="Skip imaging stage [False]." - # ) - # flowargs.add_argument( - # "--skip_cutout", action="store_true", help="Skip cutout stage [False]." - # ) - # flowargs.add_argument( - # "--skip_linmos", action="store_true", help="Skip LINMOS stage [False]." - # ) - # flowargs.add_argument( - # "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." - # ) - # flowargs.add_argument( - # "--skip_frion", action="store_true", help="Skip cleanup stage [False]." - # ) - # flowargs.add_argument( - # "--skip_rmsynth", action="store_true", help="Skip RM Synthesis stage [False]." - # ) - # flowargs.add_argument( - # "--skip_rmclean", action="store_true", help="Skip RM-CLEAN stage [False]." - # ) - # flowargs.add_argument( - # "--skip_cat", action="store_true", help="Skip catalogue stage [False]." - # ) - - # options = parser.add_argument_group("output options") - # options.add_argument( - # "-v", "--verbose", action="store_true", help="Verbose output [False]." - # ) - - # cutargs = parser.add_argument_group("cutout arguments") - # cutargs.add_argument( - # "-p", - # "--pad", - # type=float, - # default=5, - # help="Number of beamwidths to pad around source [5].", - # ) - - # cutargs.add_argument("--dryrun", action="store_true", help="Do a dry-run [False].") - - # synth = parser.add_argument_group("RM-synth/CLEAN arguments") - - # synth.add_argument( - # "--dimension", - # default="1d", - # help="How many dimensions for RMsynth [1d] or '3d'.", - # ) - - # synth.add_argument( - # "-m", - # "--database", - # action="store_true", - # help="Add RMsynth data to MongoDB [False].", - # ) - - # synth.add_argument( - # "--tt0", - # default=None, - # type=str, - # help="TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1.", - # ) - - # synth.add_argument( - # "--tt1", - # default=None, - # type=str, - # help="TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0.", - # ) - - # synth.add_argument( - # "--validate", action="store_true", help="Run on RMsynth Stokes I [False]." - # ) - - # synth.add_argument( - # "--limit", default=None, type=int, help="Limit number of sources [All]." - # ) - # synth.add_argument( - # "--own_fit", - # dest="do_own_fit", - # action="store_true", - # help="Use own Stokes I fit function [False].", - # ) - # tools = parser.add_argument_group("RM-tools arguments") - # # RM-tools args - # tools.add_argument( - # "-sp", "--savePlots", action="store_true", help="save the plots [False]." - # ) - # tools.add_argument( - # "-w", - # "--weightType", - # default="variance", - # help="weighting [variance] (all 1s) or 'uniform'.", - # ) - # tools.add_argument( - # "--fit_function", - # type=str, - # default="log", - # help="Stokes I fitting function: 'linear' or ['log'] polynomials.", - # ) - # tools.add_argument( - # "-t", - # "--fitRMSF", - # action="store_true", - # help="Fit a Gaussian to the RMSF [False]", - # ) - # tools.add_argument( - # "-l", - # "--phiMax_radm2", - # type=float, - # default=None, - # help="Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto].", - # ) - # tools.add_argument( - # "-d", - # "--dPhi_radm2", - # type=float, - # default=None, - # help="Width of Faraday depth channel [Auto].", - # ) - # tools.add_argument( - # "-s", - # "--nSamples", - # type=float, - # default=5, - # help="Number of samples across the FWHM RMSF.", - # ) - # tools.add_argument( - # "-o", - # "--polyOrd", - # type=int, - # default=3, - # help="polynomial order to fit to I spectrum [3].", - # ) - # tools.add_argument( - # "-i", - # "--noStokesI", - # action="store_true", - # help="ignore the Stokes I spectrum [False].", - # ) - # tools.add_argument( - # "--showPlots", action="store_true", help="show the plots [False]." - # ) - # tools.add_argument( - # "-R", - # "--not_RMSF", - # action="store_true", - # help="Skip calculation of RMSF? [False]", - # ) - # tools.add_argument( - # "-rmv", - # "--rm_verbose", - # action="store_true", - # help="Verbose RMsynth/CLEAN [False].", - # ) - # tools.add_argument( - # "-D", - # "--debug", - # action="store_true", - # help="turn on debugging messages & plots [False].", - # ) - # # RM-tools args - # tools.add_argument( - # "-c", - # "--cutoff", - # type=float, - # default=-3, - # help="CLEAN cutoff (+ve = absolute, -ve = sigma) [-3].", - # ) - # tools.add_argument( - # "-n", - # "--maxIter", - # type=int, - # default=10000, - # help="maximum number of CLEAN iterations [10000].", - # ) - # tools.add_argument( - # "-g", "--gain", type=float, default=0.1, help="CLEAN loop gain [0.1]." - # ) - # tools.add_argument( - # "--window", - # type=float, - # default=None, - # help="Further CLEAN in mask to this threshold [False].", - # ) - # tools.add_argument( - # "--ionex_server", - # type=str, - # default="ftp://ftp.aiub.unibe.ch/CODE/", - # help="IONEX server [ftp://ftp.aiub.unibe.ch/CODE/].", - # ) - # tools.add_argument( - # "--ionex_prefix", - # type=str, - # default="codg", - # help="IONEX prefix.", - # ) - # tools.add_argument( - # "--ionex_proxy_server", - # type=str, - # default=None, - # help="Proxy server [None].", - # ) - # tools.add_argument( - # "--ionex_formatter", - # type=str, - # default=None, - # help="IONEX formatter [None].", - # ) - # tools.add_argument( - # "--ionex_predownload", - # action="store_true", - # help="Pre-download IONEX files [False].", - # ) - # cat = parser.add_argument_group("catalogue arguments") - # # Cat args - # cat.add_argument( - # "--outfile", default=None, type=str, help="File to save table to [None]." - # ) + + parser.add("--config", required=False, is_config_file=True, help="Config file path") + args = parser.parse_args() parser.print_values() - # verbose = args.verbose - # if verbose: - # logger.setLevel(logging.INFO) + verbose = args.verbose + if verbose: + logger.setLevel(logging.INFO) - # logger.info(logo_str) - # logger.info("\n\nArguments: ") - # logger.info(args) + logger.info(logo_str) + logger.info("\n\nArguments: ") + logger.info(args) - # main(args) + main(args) if __name__ == "__main__": diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index d2ecf820..073d774e 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -390,11 +390,11 @@ def cli(): warnings.simplefilter("ignore", category=VerifyWarning) gen_parser = generic_parser(parent_parser=True) - synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) + common_parser = rmsynth_oncuts.rm_common_parser(parent_parser=True) rmclean_parser = clean_parser(parent_parser=True) parser = argparse.ArgumentParser( - parents=[gen_parser, synth_parser, rmclean_parser], + parents=[gen_parser, common_parser, rmclean_parser], formatter_class=UltimateHelpFormatter, description=rmclean_parser.description, ) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 6494105c..91f15e81 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -1082,6 +1082,27 @@ def main( logger.info("RMsynth done!") +def rm_common_parser(parent_parser: bool = False) -> argparse.ArgumentParser: + common_parser = argparse.ArgumentParser( + formatter_class=UltimateHelpFormatter, + add_help=not parent_parser, + ) + parser = common_parser.add_argument_group("common rm arguments") + + parser.add_argument( + "--dimension", + dest="dimension", + default="1d", + help="How many dimensions for RMsynth '1d' or '3d'.", + ) + parser.add_argument("--savePlots", action="store_true", help="save the plots.") + parser.add_argument( + "--rm_verbose", action="store_true", help="Verbose RMsynth/RMClean." + ) + + return common_parser + + def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = f""" @@ -1101,21 +1122,10 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: ) parser = rmsynth_parser.add_argument_group("rm-synth arguments") - parser.add_argument( - "--dimension", - dest="dimension", - default="1d", - help="How many dimensions for RMsynth '1d' or '3d'.", - ) - parser.add_argument( "--ion", action="store_true", help="Use ionospheric-corrected data." ) - parser.add_argument( - "-m", dest="database", action="store_true", help="Add data to MongoDB." - ) - parser.add_argument( "--tt0", default=None, @@ -1143,7 +1153,6 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Use own Stokes I fit function.", ) # RM-tools args - parser.add_argument("--savePlots", action="store_true", help="save the plots.") parser.add_argument( "--weightType", default="variance", @@ -1195,7 +1204,6 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: action="store_true", help="Skip calculation of RMSF?", ) - parser.add_argument("--rm_verbose", action="store_true", help="Verbose RMsynth.") parser.add_argument( "--debug", action="store_true", diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index a715fe04..8cd3c84a 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -122,6 +122,9 @@ def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: default=None, help="Limit the number of islands to process.", ) + parser.add_argument( + "--database", dest="database", action="store_true", help="Add data to MongoDB." + ) return gen_parser From dd430ceed6ca6b8fdef04a1131c23693fd9b2555 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 18:43:18 +1000 Subject: [PATCH 05/37] Add merge args --- arrakis/merge_fields.py | 58 ++++---- arrakis/process_region.py | 288 +++++++------------------------------- arrakis/process_spice.py | 6 +- arrakis/utils/pipeline.py | 4 +- 4 files changed, 91 insertions(+), 265 deletions(-) diff --git a/arrakis/merge_fields.py b/arrakis/merge_fields.py index b57d724c..33ec2e92 100644 --- a/arrakis/merge_fields.py +++ b/arrakis/merge_fields.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Merge multiple RACS fields""" +import argparse import os from pprint import pformat from shutil import copyfile @@ -8,10 +9,11 @@ import pymongo from prefect import flow, task, unmapped -from arrakis.linmos import get_yanda, linmos +from arrakis.linmos import get_yanda, linmos, linmos_parser from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import get_db, test_db from arrakis.utils.io import try_mkdir +from arrakis.utils.pipeline import generic_parser def make_short_name(name: str) -> str: @@ -333,21 +335,19 @@ def main( return inter_dir -def cli(): - """Command-line interface""" - import argparse - +def merge_parser(parent_parser: bool = False) -> argparse.ArgumentParser: # Help string to be shown using the -h option descStr = """ Mosaic RACS beam fields with linmos. """ - # Parse the command line options - parser = argparse.ArgumentParser( - description=descStr, formatter_class=UltimateHelpFormatter + merge_parser = argparse.ArgumentParser( + add_help=not parent_parser, + description=descStr, + formatter_class=UltimateHelpFormatter, ) - + parser = merge_parser.add_argument_group("merge arguments") parser.add_argument( "--merge_name", type=str, @@ -355,7 +355,10 @@ def cli(): ) parser.add_argument( - "--fields", type=str, nargs="+", help="RACS fields to mosaic - e.g. 2132-50A." + "--fields", + type=str, + nargs="+", + help="RACS fields to mosaic - e.g. RACS_2132-50A.", ) parser.add_argument( @@ -370,20 +373,6 @@ def cli(): type=str, help="Path to save merged data (in output_dir/merge_name/cutouts)", ) - - parser.add_argument( - "--yanda", - type=str, - default="1.3.0", - help="Yandasoft version to pull from DockerHub [1.3.0].", - ) - - parser.add_argument( - "--host", - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - parser.add_argument( "-e", "--epoch", @@ -392,6 +381,13 @@ def cli(): help="Epoch of observation.", ) + parser.add_argument( + "--host", + metavar="host", + type=str, + default=None, + help="Host of mongodb (probably $hostname -i).", + ) parser.add_argument( "--username", type=str, default=None, help="Username of mongodb." ) @@ -399,8 +395,22 @@ def cli(): parser.add_argument( "--password", type=str, default=None, help="Password of mongodb." ) + return merge_parser + +def cli(): + """Command-line interface""" + + m_parser = merge_parser(parent_parser=True) + lin_parser = linmos_parser(parent_parser=True) + + parser = argparse.ArgumentParser( + parents=[m_parser, lin_parser], + formatter_class=UltimateHelpFormatter, + description=m_parser.description, + ) args = parser.parse_args() + verbose = args.verbose test_db( host=args.host, username=args.username, password=args.password, verbose=verbose diff --git a/arrakis/process_region.py b/arrakis/process_region.py index 21e66eed..638582f2 100644 --- a/arrakis/process_region.py +++ b/arrakis/process_region.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 """Arrakis multi-field pipeline""" +import argparse +import logging import os import configargparse @@ -8,7 +10,15 @@ from astropy.time import Time from prefect import flow -from arrakis import makecat, merge_fields, process_spice, rmclean_oncuts, rmsynth_oncuts +from arrakis import ( + cleanup, + linmos, + makecat, + merge_fields, + process_spice, + rmclean_oncuts, + rmsynth_oncuts, +) from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import test_db from arrakis.utils.pipeline import logo_str @@ -148,9 +158,7 @@ def main(args: configargparse.Namespace) -> None: )(args, args.host, inter_dir, dask_runner) -def cli(): - """Command-line interface""" - # Help string to be shown using the -h option +def pipeline_parser(parent_parser: bool = False) -> argparse.ArgumentParser: descStr = f""" {logo_str} Arrakis regional pipeline. @@ -159,72 +167,13 @@ def cli(): $ mongod --dbpath=/path/to/database --bind_ip $(hostname -i) """ - # Parse the command line options - parser = configargparse.ArgParser( - default_config_files=[".default_field_config.txt"], + pipeline_parser = argparse.ArgumentParser( + add_help=not parent_parser, description=descStr, formatter_class=UltimateHelpFormatter, ) - parser.add("--config", required=False, is_config_file=True, help="Config file path") - - parser.add_argument( - "--merge_name", - type=str, - help="Name of the merged region", - ) - - parser.add_argument( - "--fields", type=str, nargs="+", help="RACS fields to mosaic - e.g. 2132-50A." - ) - - parser.add_argument( - "--datadirs", - type=str, - nargs="+", - help="Directories containing cutouts (in subdir outdir/cutouts)..", - ) - - parser.add_argument( - "--output_dir", - type=str, - help="Path to save merged data (in output_dir/merge_name/cutouts)", - ) - - parser.add_argument( - "--epoch", - type=int, - default=0, - help="Epoch to read field data from", - ) - - parser.add_argument( - "--host", - default=None, - type=str, - help="Host of mongodb (probably $hostname -i).", - ) - - parser.add_argument( - "--username", type=str, default=None, help="Username of mongodb." - ) - - parser.add_argument( - "--password", type=str, default=None, help="Password of mongodb." - ) - - parser.add_argument( - "--use_mpi", - action="store_true", - help="Use Dask-mpi to parallelise -- must use srun/mpirun to assign resources.", - ) - parser.add_argument( - "--port_forward", - default=None, - help="Platform to fowards dask port [None].", - nargs="+", - ) - + parser = pipeline_parser.add_argument_group("pipeline arguments") parser.add_argument( "--dask_config", type=str, @@ -233,195 +182,62 @@ def cli(): ) parser.add_argument( - "--yanda", - type=str, - default="1.3.0", - help="Yandasoft version to pull from DockerHub [1.3.0].", + "--skip_frion", action="store_true", help="Skip cleanup stage [False]." ) - - flowargs = parser.add_argument_group("pipeline flow options") - flowargs.add_argument( - "--skip_merge", action="store_true", help="Skip merge stage [False]." - ) - flowargs.add_argument( + parser.add_argument( "--skip_rmsynth", action="store_true", help="Skip RM Synthesis stage [False]." ) - flowargs.add_argument( + parser.add_argument( "--skip_rmclean", action="store_true", help="Skip RM-CLEAN stage [False]." ) - flowargs.add_argument( + parser.add_argument( "--skip_cat", action="store_true", help="Skip catalogue stage [False]." ) - - options = parser.add_argument_group("output options") - options.add_argument( - "-v", "--verbose", action="store_true", help="Verbose output [False]." - ) - options.add_argument( - "--debugger", action="store_true", help="Debug output [False]." - ) - - synth = parser.add_argument_group("RM-synth/CLEAN arguments") - - synth.add_argument( - "--dimension", - default="1d", - help="How many dimensions for RMsynth [1d] or '3d'.", - ) - - synth.add_argument( - "-m", - "--database", - action="store_true", - help="Add RMsynth data to MongoDB [False].", - ) - - synth.add_argument( - "--tt0", - default=None, - type=str, - help="TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1.", - ) - - synth.add_argument( - "--tt1", - default=None, - type=str, - help="TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0.", - ) - - synth.add_argument( - "--validate", action="store_true", help="Run on RMsynth Stokes I [False]." + parser.add_argument( + "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." ) + return pipeline_parser - synth.add_argument( - "--limit", default=None, type=int, help="Limit number of sources [All]." - ) - synth.add_argument( - "--own_fit", - dest="do_own_fit", - action="store_true", - help="Use own Stokes I fit function [False].", - ) - tools = parser.add_argument_group("RM-tools arguments") - # RM-tools args - tools.add_argument( - "-sp", "--savePlots", action="store_true", help="save the plots [False]." - ) - tools.add_argument( - "-w", - "--weightType", - default="variance", - help="weighting [variance] (all 1s) or 'uniform'.", - ) - tools.add_argument( - "--fit_function", - type=str, - default="log", - help="Stokes I fitting function: 'linear' or ['log'] polynomials.", - ) - tools.add_argument( - "-t", - "--fitRMSF", - action="store_true", - help="Fit a Gaussian to the RMSF [False]", - ) - tools.add_argument( - "-l", - "--phiMax_radm2", - type=float, - default=None, - help="Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto].", - ) - tools.add_argument( - "-d", - "--dPhi_radm2", - type=float, - default=None, - help="Width of Faraday depth channel [Auto].", - ) - tools.add_argument( - "-s", - "--nSamples", - type=float, - default=5, - help="Number of samples across the FWHM RMSF.", - ) - tools.add_argument( - "-o", - "--polyOrd", - type=int, - default=3, - help="polynomial order to fit to I spectrum [3].", - ) - tools.add_argument( - "-i", - "--noStokesI", - action="store_true", - help="ignore the Stokes I spectrum [False].", - ) - tools.add_argument( - "--showPlots", action="store_true", help="show the plots [False]." - ) - tools.add_argument( - "-R", - "--not_RMSF", - action="store_true", - help="Skip calculation of RMSF? [False]", - ) - tools.add_argument( - "-rmv", - "--rm_verbose", - action="store_true", - help="Verbose RMsynth/CLEAN [False].", - ) - tools.add_argument( - "-D", - "--debug", - action="store_true", - help="turn on debugging messages & plots [False].", - ) - # RM-tools args - tools.add_argument( - "-c", - "--cutoff", - type=float, - default=-3, - help="CLEAN cutoff (+ve = absolute, -ve = sigma) [-3].", - ) - tools.add_argument( - "-n", - "--maxIter", - type=int, - default=10000, - help="maximum number of CLEAN iterations [10000].", - ) - tools.add_argument( - "-g", "--gain", type=float, default=0.1, help="CLEAN loop gain [0.1]." - ) - tools.add_argument( - "--window", - type=float, - default=None, - help="Further CLEAN in mask to this threshold [False].", - ) +def cli(): + """Command-line interface""" + # Help string to be shown using the -h option - cat = parser.add_argument_group("catalogue arguments") - # Cat args - cat.add_argument( - "--outfile", default=None, type=str, help="File to save table to [None]." + # Parse the command line options + pipe_parser = pipeline_parser(parent_parser=True) + merge_parser = merge_fields.merge_parser(parent_parser=True) + linmos_parser = linmos.linmos_parser(parent_parser=True) + common_parser = rmsynth_oncuts.rm_common_parser(parent_parser=True) + synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) + rmclean_parser = rmclean_oncuts.clean_parser(parent_parser=True) + catalogue_parser = makecat.cat_parser(parent_parser=True) + clean_parser = cleanup.cleanup_parser(parent_parser=True) + # Parse the command line options + parser = configargparse.ArgParser( + default_config_files=[".default_config.cfg"], + description=pipe_parser.description, + formatter_class=UltimateHelpFormatter, + parents=[ + pipe_parser, + merge_parser, + linmos_parser, + common_parser, + synth_parser, + rmclean_parser, + catalogue_parser, + clean_parser, + ], ) - + parser.add("--config", required=False, is_config_file=True, help="Config file path") args = parser.parse_args() if not args.use_mpi: parser.print_values() verbose = args.verbose if verbose: - logger.setLevel(logger.INFO) + logger.setLevel(logging.INFO) if args.debugger: - logger.setLevel(logger.DEBUG) + logger.setLevel(logging.DEBUG) main(args) diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 520e8ca5..0a0202f5 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -369,9 +369,6 @@ def pipeline_parser(parent_parser: bool = False) -> argparse.ArgumentParser: parser.add_argument( "--skip_linmos", action="store_true", help="Skip LINMOS stage [False]." ) - parser.add_argument( - "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." - ) parser.add_argument( "--skip_frion", action="store_true", help="Skip cleanup stage [False]." ) @@ -384,6 +381,9 @@ def pipeline_parser(parent_parser: bool = False) -> argparse.ArgumentParser: parser.add_argument( "--skip_cat", action="store_true", help="Skip catalogue stage [False]." ) + parser.add_argument( + "--skip_cleanup", action="store_true", help="Skip cleanup stage [False]." + ) return pipeline_parser diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index 8cd3c84a..ef2316c2 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -88,7 +88,7 @@ def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: nargs="+", type=str, default=["I", "Q", "U"], - help="List of Stokes parameters to image [ALL]", + help="List of Stokes parameters to image", ) parser.add_argument( @@ -100,7 +100,7 @@ def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: ) parser.add_argument( - "-v", dest="verbose", action="store_true", help="Verbose output [False]." + "-v", dest="verbose", action="store_true", help="Verbose output." ) parser.add_argument( "--host", From ae65c7f7f2c31b47d7a8d173297ad2e83a54114b Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 18:55:05 +1000 Subject: [PATCH 06/37] Good args --- arrakis/process_region.py | 30 +++++++++++++++------------- arrakis/process_spice.py | 29 ++++++++++++++------------- arrakis/rmclean_oncuts.py | 8 ++++---- arrakis/rmsynth_oncuts.py | 42 +++++++++++++++++++-------------------- 4 files changed, 56 insertions(+), 53 deletions(-) diff --git a/arrakis/process_region.py b/arrakis/process_region.py index 638582f2..fb334d2f 100644 --- a/arrakis/process_region.py +++ b/arrakis/process_region.py @@ -3,6 +3,7 @@ import argparse import logging import os +from pathlib import Path import configargparse import pkg_resources @@ -52,10 +53,11 @@ def process_merge(args, host: str, inter_dir: str, task_runner) -> None: previous_future = ( rmsynth_oncuts.main.with_options(task_runner=task_runner)( - field=args.merge_name, - outdir=inter_dir, - host=host, + field=args.field, + outdir=Path(args.datadir), + host=args.host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, dimension=args.dimension, @@ -63,16 +65,16 @@ def process_merge(args, host: str, inter_dir: str, task_runner) -> None: database=args.database, do_validate=args.validate, limit=args.limit, - savePlots=args.savePlots, - weightType=args.weightType, - fitRMSF=args.fitRMSF, - phiMax_radm2=args.phiMax_radm2, - dPhi_radm2=args.dPhi_radm2, - nSamples=args.nSamples, - polyOrd=args.polyOrd, - noStokesI=args.noStokesI, - showPlots=args.showPlots, - not_RMSF=args.not_RMSF, + savePlots=args.save_plots, + weightType=args.weight_type, + fitRMSF=args.fit_rmsf, + phiMax_radm2=args.phi_max, + dPhi_radm2=args.dphi, + nSamples=args.n_samples, + polyOrd=args.poly_ord, + noStokesI=args.no_stokes_i, + showPlots=args.show_plots, + not_RMSF=args.not_rmsf, rm_verbose=args.rm_verbose, debug=args.debug, fit_function=args.fit_function, @@ -100,7 +102,7 @@ def process_merge(args, host: str, inter_dir: str, task_runner) -> None: maxIter=args.maxIter, gain=args.gain, window=args.window, - showPlots=args.showPlots, + showPlots=args.show_plots, rm_verbose=args.rm_verbose, ) if not args.skip_rmclean diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 0a0202f5..c101bcf9 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -101,9 +101,10 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: previous_future = ( rmsynth_oncuts.main.with_options(task_runner=task_runner)( field=args.field, - outdir=args.outdir, - host=host, + outdir=Path(args.datadir), + host=args.host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, dimension=args.dimension, @@ -111,16 +112,16 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: database=args.database, do_validate=args.validate, limit=args.limit, - savePlots=args.savePlots, - weightType=args.weightType, - fitRMSF=args.fitRMSF, - phiMax_radm2=args.phiMax_radm2, - dPhi_radm2=args.dPhi_radm2, - nSamples=args.nSamples, - polyOrd=args.polyOrd, - noStokesI=args.noStokesI, - showPlots=args.showPlots, - not_RMSF=args.not_RMSF, + savePlots=args.save_plots, + weightType=args.weight_type, + fitRMSF=args.fit_rmsf, + phiMax_radm2=args.phi_max, + dPhi_radm2=args.dphi, + nSamples=args.n_samples, + polyOrd=args.poly_ord, + noStokesI=args.no_stokes_i, + showPlots=args.show_plots, + not_RMSF=args.not_rmsf, rm_verbose=args.rm_verbose, debug=args.debug, fit_function=args.fit_function, @@ -145,10 +146,10 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: database=args.database, limit=args.limit, cutoff=args.cutoff, - maxIter=args.maxIter, + maxIter=args.max_iter, gain=args.gain, window=args.window, - showPlots=args.showPlots, + showPlots=args.show_plots, rm_verbose=args.rm_verbose, ) if not args.skip_rmclean diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 073d774e..5f49fbea 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -363,7 +363,7 @@ def clean_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="CLEAN cutoff (+ve = absolute, -ve = sigma).", ) parser.add_argument( - "--maxIter", + "--max_iter", type=int, default=10000, help="maximum number of CLEAN iterations.", @@ -423,13 +423,13 @@ def cli(): password=args.password, dimension=args.dimension, database=args.database, - savePlots=args.savePlots, + savePlots=args.save_plots, limit=args.limit, cutoff=args.cutoff, - maxIter=args.maxIter, + maxIter=args.max_iter, gain=args.gain, window=args.window, - showPlots=args.showPlots, + showPlots=args.show_plots, rm_verbose=args.rm_verbose, ) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 91f15e81..09dd37d1 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -1095,7 +1095,7 @@ def rm_common_parser(parent_parser: bool = False) -> argparse.ArgumentParser: default="1d", help="How many dimensions for RMsynth '1d' or '3d'.", ) - parser.add_argument("--savePlots", action="store_true", help="save the plots.") + parser.add_argument("--save_plots", action="store_true", help="save the plots.") parser.add_argument( "--rm_verbose", action="store_true", help="Verbose RMsynth/RMClean." ) @@ -1154,7 +1154,7 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: ) # RM-tools args parser.add_argument( - "--weightType", + "--weight_type", default="variance", help="weighting (inverse) 'variance' or 'uniform' (all 1s).", ) @@ -1165,42 +1165,42 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Stokes I fitting function: 'linear' or 'log' polynomials.", ) parser.add_argument( - "--fitRMSF", + "--fit_rmsf", action="store_true", help="Fit a Gaussian to the RMSF", ) parser.add_argument( - "--phiMax_radm2", + "--phi_max", type=float, default=None, - help="Absolute max Faraday depth sampled (overrides NSAMPLES).", + help="Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES).", ) parser.add_argument( - dest="--dPhi_radm2", + dest="--dphi", type=float, default=None, help="Width of Faraday depth channel.", ) parser.add_argument( - dest="--nSamples", + dest="--n_samples", type=float, default=5, help="Number of samples across the FWHM RMSF.", ) parser.add_argument( - dest="--polyOrd", + dest="--poly_ord", type=int, default=3, help="polynomial order to fit to I spectrum.", ) parser.add_argument( - dest="--noStokesI", + dest="--no_stokes_i", action="store_true", help="ignore the Stokes I spectrum.", ) - parser.add_argument("--showPlots", action="store_true", help="show the plots.") + parser.add_argument("--show_plots", action="store_true", help="show the plots.") parser.add_argument( - "--not_RMSF", + "--not_rmsf", action="store_true", help="Skip calculation of RMSF?", ) @@ -1264,16 +1264,16 @@ def cli(): database=args.database, do_validate=args.validate, limit=args.limit, - savePlots=args.savePlots, - weightType=args.weightType, - fitRMSF=args.fitRMSF, - phiMax_radm2=args.phiMax_radm2, - dPhi_radm2=args.dPhi_radm2, - nSamples=args.nSamples, - polyOrd=args.polyOrd, - noStokesI=args.noStokesI, - showPlots=args.showPlots, - not_RMSF=args.not_RMSF, + savePlots=args.save_plots, + weightType=args.weight_type, + fitRMSF=args.fit_rmsf, + phiMax_radm2=args.phi_max, + dPhi_radm2=args.dphi, + nSamples=args.n_samples, + polyOrd=args.poly_ord, + noStokesI=args.no_stokes_i, + showPlots=args.show_plots, + not_RMSF=args.not_rmsf, rm_verbose=args.rm_verbose, debug=args.debug, fit_function=args.fit_function, From 14969c8ae095a37aec03b80f12b1f92f94e79faa Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 18:59:46 +1000 Subject: [PATCH 07/37] Update defailt --- arrakis/.default_config.cfg | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arrakis/.default_config.cfg b/arrakis/.default_config.cfg index 72750eb1..01ed7427 100644 --- a/arrakis/.default_config.cfg +++ b/arrakis/.default_config.cfg @@ -71,23 +71,23 @@ database: False validate: False # limit: None own_fit: False -savePlots: False -weightType: 'variance' +save_plots: False +weight_type: 'variance' fit_function: 'log' -fitRMSF: False +fit_rmsf: False # phiMax_radm2: None # dPhi_radm2: None -nSamples: 5 -polyOrd: 3 -noStokesI: False -showPlots: False -not_RMSF: False +n_samples: 5 +poly_ord: 3 +no_stokes_i: False +show_plots: False +not_rmsf: False rm_verbose: False debug: False [RMclean options] cutoff: -3 -maxIter: 10000 +max_iter: 10000 gain: 0.1 # window: None From b8c03eaab4ca25d1760186ac53d307f8a7f9aa1f Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 19:01:00 +1000 Subject: [PATCH 08/37] Ruff --- arrakis/cleanup.py | 4 ++-- arrakis/cutout.py | 4 ++-- arrakis/imager.py | 2 -- arrakis/logger.py | 2 -- arrakis/makecat.py | 6 +++--- arrakis/merge_fields.py | 1 - arrakis/rmsynth_oncuts.py | 6 +----- 7 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arrakis/cleanup.py b/arrakis/cleanup.py index f4d87486..d6df3999 100644 --- a/arrakis/cleanup.py +++ b/arrakis/cleanup.py @@ -4,11 +4,11 @@ import logging import tarfile from pathlib import Path -from typing import List, Union +from typing import List import astropy.units as u import numpy as np -from prefect import flow, get_run_logger, task, unmapped +from prefect import flow, get_run_logger, task from tqdm.auto import tqdm from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 948991e6..92077c7d 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -21,7 +21,7 @@ from astropy.utils import iers from astropy.utils.exceptions import AstropyWarning from astropy.wcs.utils import skycoord_to_pixel -from prefect import flow, task, unmapped +from prefect import flow, task from spectral_cube import SpectralCube from spectral_cube.utils import SpectralCubeWarning from tqdm.auto import tqdm @@ -452,7 +452,7 @@ def cutout_islands( all_beams = list(beams_col.find(query).sort("Source_ID")) for beams in tqdm(all_beams, desc="Getting beams", file=TQDM_OUT): - for beam_num in beams[f"beams"][field]["beam_list"]: + for beam_num in beams["beams"][field]["beam_list"]: beams_dict[beam_num].append(beams) comps_dict: Dict[str, List[Dict]] = {s: [] for s in source_ids} diff --git a/arrakis/imager.py b/arrakis/imager.py index 0044b594..8a7240c0 100644 --- a/arrakis/imager.py +++ b/arrakis/imager.py @@ -13,9 +13,7 @@ from typing import NamedTuple as Struct from typing import Optional, Tuple, Union -import astropy.units as u import numpy as np -from astropy import units as u from astropy.io import fits from astropy.stats import mad_std from astropy.table import Table diff --git a/arrakis/logger.py b/arrakis/logger.py index 0e141095..4c22b943 100644 --- a/arrakis/logger.py +++ b/arrakis/logger.py @@ -6,8 +6,6 @@ import io import logging -from tqdm import tqdm - # https://stackoverflow.com/questions/61324536/python-argparse-with-argumentdefaultshelpformatter-and-rawtexthelpformatter class UltimateHelpFormatter( diff --git a/arrakis/makecat.py b/arrakis/makecat.py index abb8e1d0..8150e4e5 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -628,7 +628,7 @@ def get_integration_time(cat: RMTable, field_col: Collection, epoch: int): reutrn_vals = {"_id": 0, "SCAN_TINT": 1, "FIELD_NAME": 1, "SBID": 1} # Get most recent SBID if more than one is 'SELECT'ed if field_col.count_documents(query) > 1: - logger.info(f"More than one SELECT=1 for field_names, getting most recent.") + logger.info("More than one SELECT=1 for field_names, getting most recent.") field_datas = list( field_col.find({"FIELD_NAME": {"$in": unique_field_names}}, reutrn_vals) ) @@ -637,7 +637,7 @@ def get_integration_time(cat: RMTable, field_col: Collection, epoch: int): logger.info(f"Using CAL_SBID {sbids[max_idx]}") field_data = field_datas[max_idx] elif field_col.count_documents(query) == 0: - logger.error(f"No data for field_names, trying without SELECT=1.") + logger.error("No data for field_names, trying without SELECT=1.") field_data = list( field_col.find({"FIELD_NAME": {"$in": unique_field_names}}, reutrn_vals) ) @@ -871,7 +871,7 @@ def main( # First try the component try: data += [comp[col]] - except KeyError as e: + except KeyError: logger.warning( f"Component {src_id} does not have {col}, trying island DB..." ) diff --git a/arrakis/merge_fields.py b/arrakis/merge_fields.py index 33ec2e92..2b8c9b2f 100644 --- a/arrakis/merge_fields.py +++ b/arrakis/merge_fields.py @@ -13,7 +13,6 @@ from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import get_db, test_db from arrakis.utils.io import try_mkdir -from arrakis.utils.pipeline import generic_parser def make_short_name(name: str) -> str: diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 09dd37d1..14176320 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -5,11 +5,10 @@ import os import traceback import warnings -from glob import glob from pathlib import Path from pprint import pformat from shutil import copyfile -from typing import Any, List +from typing import List from typing import NamedTuple as Struct from typing import Optional, Tuple, Union @@ -35,7 +34,6 @@ from arrakis.utils.database import get_db, test_db from arrakis.utils.fitsutils import getfreq from arrakis.utils.fitting import fit_pl, fitted_mean, fitted_std -from arrakis.utils.io import try_mkdir from arrakis.utils.pipeline import generic_parser, logo_str logger.setLevel(logging.INFO) @@ -1000,8 +998,6 @@ def main( ) freq = np.array(freq) - _batch_size = 100 - if do_validate: logger.info(f"Running RMsynth on {n_comp} components") # We don't run this in parallel! From 6da29e22d47174fcb7a9abda8a3870390447385f Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Mon, 8 Apr 2024 19:04:45 +1000 Subject: [PATCH 09/37] Unused vars --- arrakis/cutout.py | 1 - arrakis/init_database.py | 2 -- arrakis/makecat.py | 1 - arrakis/rmclean_oncuts.py | 2 -- arrakis/utils/fitting.py | 1 - arrakis/utils/pipeline.py | 2 +- scripts/casda_prepare.py | 6 ++---- scripts/compare_leakage.py | 8 ++++---- scripts/compute_leakage.py | 4 ++-- scripts/copy_data.py | 2 +- scripts/hello_mpi_world.py | 2 +- submit/test_image.py | 4 ++-- 12 files changed, 13 insertions(+), 22 deletions(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 92077c7d..1cddffe5 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -122,7 +122,6 @@ def cutout_image( outdir = os.path.abspath(cutout_args.outdir) - ret = [] basename = os.path.basename(image_name) outname = f"{source_id}.cutout.{basename}" outfile = os.path.join(outdir, outname) diff --git a/arrakis/init_database.py b/arrakis/init_database.py index 419ee92e..73d107f8 100644 --- a/arrakis/init_database.py +++ b/arrakis/init_database.py @@ -300,8 +300,6 @@ def get_beams(mastercat: Table, database: Table, epoch: int = 0) -> List[Dict]: tqdm(zip(vals, ixs), total=len(vals), desc="Getting beams", file=TQDM_OUT) ): beam_dict = {} - ra = mastercat[val]["RA"] - dec = mastercat[val]["Dec"] name = mastercat[val]["Source_Name"] isl_id = mastercat[val]["Source_ID"] beams = database[seps[0][idx.astype(int)]] diff --git a/arrakis/makecat.py b/arrakis/makecat.py index 8150e4e5..313e5927 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -345,7 +345,6 @@ def get_fit_func( idx = (hi_i_tab["beamdist"].to(u.deg).value < bins[i + 1]) & ( hi_i_tab["beamdist"].to(u.deg).value >= bins[i] ) - res = np.nanpercentile(frac_P[idx], [2.3, 16, 50, 84, 97.6]) s2_los[i], s1_los[i], meds[i], s1_ups[i], s2_ups[i] = np.nanpercentile( frac_P[idx], [2.3, 16, 50, 84, 97.6] ) diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 5f49fbea..a2f23115 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -253,7 +253,6 @@ def main( query = {"$and": [{f"beams.{field}": {"$exists": True}}]} - beams = list(beams_col.find(query).sort("Source_ID")) all_island_ids = sorted(beams_col.distinct("Source_ID", query)) if dimension == "3d": @@ -269,7 +268,6 @@ def main( }, ).sort("Source_ID") ) - island_ids = [doc["Source_ID"] for doc in islands] n_island = island_col.count_documents(query) island_col.update(query, {"$set": {"rmclean3d": False}}) diff --git a/arrakis/utils/fitting.py b/arrakis/utils/fitting.py index a46ab7fa..75dc1527 100644 --- a/arrakis/utils/fitting.py +++ b/arrakis/utils/fitting.py @@ -206,7 +206,6 @@ def fit_pl( model_arr = model_func(freq, *best) model_high = model_func(freq, *(best + np.sqrt(np.diag(covar)))) model_low = model_func(freq, *(best - np.sqrt(np.diag(covar)))) - model_err = model_high - model_low ssr = np.sum((flux[goodchan] - model_arr[goodchan]) ** 2) aic = akaike_info_criterion_lsq(ssr, len(p0), goodchan.sum()) diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index ef2316c2..8314fdc1 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -331,7 +331,7 @@ def port_forward(port: int, target: str) -> None: logger.info(f"Forwarding {port} from localhost to {target}") cmd = f"ssh -N -f -R {port}:localhost:{port} {target}" command = shlex.split(cmd) - output = subprocess.Popen(command) + _ = subprocess.Popen(command) def cpu_to_use(max_cpu: int, count: int) -> int: diff --git a/scripts/casda_prepare.py b/scripts/casda_prepare.py index 5083e558..9e21444d 100755 --- a/scripts/casda_prepare.py +++ b/scripts/casda_prepare.py @@ -37,7 +37,7 @@ def make_thumbnail(cube_f: str, cube_dir: str): cube = fits.getdata(cube_f) - icube = cube[:, 0, :, :] + _ = cube[:, 0, :, :] qcube = cube[:, 1, :, :] ucube = cube[:, 2, :, :] pcube = np.hypot(qcube, ucube) @@ -398,7 +398,7 @@ def write_polspec(table: Table, filename: str, overwrite: bool = False): np.array(tabcol[0]) ) # get the type of each element in 2D array col_format = "Q" + fits.column._convert_record2fits(subtype) + "()" - if tabcol.unit != None: + if tabcol.unit is not None: unit = tabcol.unit.to_string() else: unit = "" @@ -495,8 +495,6 @@ def main( polcat = polcat[df.index.values] polcat.add_index("cat_id") - test = prep_type == "test" - logger.info(f"Preparing data for {prep_type} CASDA upload") if prep_type == "full": diff --git a/scripts/compare_leakage.py b/scripts/compare_leakage.py index 47231dfb..1a24b5de 100644 --- a/scripts/compare_leakage.py +++ b/scripts/compare_leakage.py @@ -122,7 +122,7 @@ def interpolate(field, comp, beams, cutdir, septab, holofile, verbose=True): data[bm].update({"freq": freq}) try: - outname = make_plot(data, comp, imfile) + _ = make_plot(data, comp, imfile) # plotdir = os.path.join(os.path.join(cutdir, 'plots'), os.path.basename(outname)) # copyfile(outname, plotdir) except Exception as e: @@ -141,7 +141,7 @@ def main( verbose=True, snr_cut=None, ): - scriptdir = os.path.dirname(os.path.realpath(__file__)) + _ = os.path.dirname(os.path.realpath(__file__)) beamseps = gen_seps(field) if datadir is not None: @@ -177,7 +177,7 @@ def main( ) ) components.set_index("Source_ID", drop=False, inplace=True) - component_ids = list(components["Gaussian_ID"]) + _ = list(components["Gaussian_ID"]) assert len(set(beams.index)) == len(set(components.index)) outputs = [] @@ -197,7 +197,7 @@ def main( holofile=holofile, ) outputs.append(out) - futures = chunk_dask( + _ = chunk_dask( outputs=outputs, task_name="leakage plots", progress_text="Making leakage plots", diff --git a/scripts/compute_leakage.py b/scripts/compute_leakage.py index 10b6d27b..23581c9d 100644 --- a/scripts/compute_leakage.py +++ b/scripts/compute_leakage.py @@ -19,7 +19,7 @@ def makesurf(start, stop, field, datadir, save_plots=True, data=None): # myquery = {'rmsynth1d': True} query = {"$and": [{f"beams.{field}": {"$exists": True}}]} - beams = list(beams_col.find(query).sort("Source_ID")) + _ = list(beams_col.find(query).sort("Source_ID")) island_ids = sorted(beams_col.distinct("Source_ID", query)) query = {"Source_ID": {"$in": island_ids}} @@ -154,7 +154,7 @@ def trim_mean(x): q_estimates_arr = np.array(q_estimates) u_estimates_arr = np.array(u_estimates) - p_estimates_arr = np.array(p_estimates) + _ = np.array(p_estimates) logger.info( "\nThe mean number of points in each aperture of %.2f degs was %d\n" diff --git a/scripts/copy_data.py b/scripts/copy_data.py index f3c3669b..ca8426b8 100755 --- a/scripts/copy_data.py +++ b/scripts/copy_data.py @@ -27,7 +27,7 @@ def main( tab = Table.read(field_path) tab.add_index("FIELD_NAME") tab.add_index("CAL_SBID") - row = Table(tab.loc["FIELD_NAME", f"RACS_{name}"]).loc["CAL_SBID", sbid]["INDEX"] + _ = Table(tab.loc["FIELD_NAME", f"RACS_{name}"]).loc["CAL_SBID", sbid]["INDEX"] sb_dir = os.path.abspath(f"{spice_area}/{sbid}") field_dir = os.path.abspath(f"{sb_dir}/RACS_test4_1.05_{name}") bpcal = os.path.abspath(f"{sb_dir}/BPCAL") diff --git a/scripts/hello_mpi_world.py b/scripts/hello_mpi_world.py index 85505d99..f16539f3 100755 --- a/scripts/hello_mpi_world.py +++ b/scripts/hello_mpi_world.py @@ -22,7 +22,7 @@ def cli(): import argparse parser = argparse.ArgumentParser(description="Run a parallel hello world") - args = parser.parse_args() + _ = parser.parse_args() main() diff --git a/submit/test_image.py b/submit/test_image.py index 87a7502a..16ff1610 100755 --- a/submit/test_image.py +++ b/submit/test_image.py @@ -43,7 +43,7 @@ def main(): port_forward(port, "petrichor-i1") logger.info(client.scheduler_info()["services"]) - results = imager.main( + _ = imager.main( msdir=Path("/scratch2/tho822/spiceracs/pipe_test"), out_dir=Path("/scratch2/tho822/spiceracs/pipe_test"), mgain=0.8, @@ -66,7 +66,7 @@ def main(): # parallel_deconvolution=6144, absmem=float(config["memory"].replace("GB", "").replace("GiB", "")), ) - logs = client.get_worker_logs() + _ = client.get_worker_logs() if __name__ == "__main__": From c0cc2e0b65b4d7f234911bba79be4cd8561734ee Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 12:46:35 +1000 Subject: [PATCH 10/37] Fix queries and typing --- arrakis/frion.py | 30 +++++--- arrakis/makecat.py | 122 ++++++++++++++++++++++----------- arrakis/rmclean_oncuts.py | 141 +++++++++++++++++++++++++------------- arrakis/rmsynth_oncuts.py | 58 ++++++++++++++-- 4 files changed, 246 insertions(+), 105 deletions(-) diff --git a/arrakis/frion.py b/arrakis/frion.py index 03b8f730..ca67b784 100644 --- a/arrakis/frion.py +++ b/arrakis/frion.py @@ -193,6 +193,7 @@ def main( outdir: Path, host: str, epoch: int, + sbid: Optional[int] = None, username: Optional[str] = None, password: Optional[str] = None, database=False, @@ -210,6 +211,7 @@ def main( outdir (Path): Output directory host (str): MongoDB host IP address epoch (int): Epoch of observation + sbid (int, optional): SBID of observation. Defaults to None. username (str, optional): Mongo username. Defaults to None. password (str, optional): Mongo passwrod. Defaults to None. database (bool, optional): Update database. Defaults to False. @@ -220,11 +222,11 @@ def main( limit (int, optional): Limit to number of islands. Defaults to None. """ # Query database for data - outdir = os.path.abspath(outdir) - cutdir = os.path.join(outdir, "cutouts") + outdir = outdir.absolute() + cutdir = outdir / "cutouts" - plotdir = os.path.join(cutdir, "plots") - try_mkdir(plotdir) + plotdir = cutdir / "plots" + plotdir.mkdir(parents=True, exist_ok=True) beams_col, island_col, comp_col = get_db( host=host, epoch=epoch, username=username, password=password @@ -232,6 +234,9 @@ def main( query_1 = {"$and": [{f"beams.{field}": {"$exists": True}}]} + if sbid is not None: + query_1["$and"].append({f"beams.{field}.SBIDs": sbid}) + beams = list(beams_col.find(query_1).sort("Source_ID")) island_ids = sorted(beams_col.distinct("Source_ID", query_1)) @@ -244,18 +249,20 @@ def main( ) # SELECT '1' is best field according to the database query_3 = {"$and": [{"FIELD_NAME": f"{field}"}, {"SELECT": 1}]} + if sbid is not None: + query_3["$and"].append({"SBID": sbid}) logger.info(f"{query_3}") - # Get most recent SBID if more than one is 'SELECT'ed + # Raise error if too much or too little data if field_col.count_documents(query_3) > 1: - logger.info(f"More than one SELECT=1 for {field}, getting most recent.") - field_datas = list(field_col.find({"FIELD_NAME": f"{field}"})) - sbids = [f["CAL_SBID"] for f in field_datas] - max_idx = np.argmax(sbids) - logger.info(f"Using CAL_SBID {sbids[max_idx]}") - field_data = field_datas[max_idx] + logger.error(f"More than one SELECT=1 for {field} - try supplying SBID.") + raise ValueError(f"More than one SELECT=1 for {field} - try supplying SBID.") + elif field_col.count_documents(query_3) == 0: logger.error(f"No data for {field} with {query_3}, trying without SELECT=1.") + query_3 = query_3 = {"$and": [{"FIELD_NAME": f"{field}"}]} + if sbid is not None: + query_3["$and"].append({"SBID": sbid}) field_data = field_col.find_one({"FIELD_NAME": f"{field}"}) else: logger.info(f"Using {query_3}") @@ -413,6 +420,7 @@ def cli(): main( field=args.field, + sbid=args.sbid, outdir=Path(args.outdir), host=args.host, epoch=args.epoch, diff --git a/arrakis/makecat.py b/arrakis/makecat.py index 313e5927..baa1b9d7 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -6,7 +6,7 @@ import time import warnings from pprint import pformat -from typing import Tuple, Union +from typing import Callable, NamedTuple, Optional, Tuple, Union import astropy.units as u import dask.dataframe as dd @@ -37,6 +37,13 @@ TQDM_OUT = TqdmToLogger(logger, level=logging.INFO) +class SpectralIndices(NamedTuple): + alphas: np.ndarray + alphas_err: np.ndarray + betas: np.ndarray + betas_err: np.ndarray + + def combinate(data: ArrayLike) -> Tuple[ArrayLike, ArrayLike]: """Return all combinations of data with itself @@ -52,14 +59,14 @@ def combinate(data: ArrayLike) -> Tuple[ArrayLike, ArrayLike]: return dx, dy -def flag_blended_components(cat: RMTable) -> RMTable: +def flag_blended_components(cat: TableLike) -> TableLike: """Identify blended components in a catalogue and flag them. Args: - cat (RMTable): Input catalogue + cat (TableLike): Input catalogue Returns: - RMTable: Output catalogue with minor components flagged + TableLike: Output catalogue with minor components flagged """ def is_blended_component(sub_df: pd.DataFrame) -> pd.DataFrame: @@ -224,7 +231,7 @@ def lognorm_from_percentiles(x1, p1, x2, p2): @task(name="Fix sigma_add") -def sigma_add_fix(tab): +def sigma_add_fix(tab: TableLike) -> TableLike: sigma_Q_low = np.array(tab["sigma_add_Q"] - tab["sigma_add_Q_err_minus"]) sigma_Q_high = np.array(tab["sigma_add_Q"] + tab["sigma_add_Q_err_plus"]) @@ -277,7 +284,7 @@ def sigma_add_fix(tab): return tab -def is_leakage(frac, sep, fit): +def is_leakage(frac: float, sep: float, fit: Callable) -> bool: """Determine if a source is leakage Args: @@ -293,21 +300,21 @@ def is_leakage(frac, sep, fit): def get_fit_func( - tab: Table, + tab: TableLike, nbins: int = 21, offset: float = 0.002, degree: int = 2, do_plot: bool = False, high_snr_cut: float = 30.0, -) -> Tuple[np.polynomial.Polynomial.fit, plt.Figure]: +) -> Tuple[Callable, plt.Figure]: """Fit an envelope to define leakage sources Args: - tab (Table): Catalogue to fit + tab (TableLike): Catalogue to fit nbins (int, optional): Number of bins along seperation axis. Defaults to 21. Returns: - np.polynomial.Polynomial.fit: 3rd order polynomial fit. + Callable: 3rd order polynomial fit. """ logger.info(f"Using {high_snr_cut=}.") @@ -517,11 +524,11 @@ def masker(x): @task(name="Add cuts and flags") def cuts_and_flags( - cat: RMTable, + cat: TableLike, leakage_degree: int = 4, leakage_bins: int = 16, leakage_snr: float = 30.0, -) -> RMTable: +) -> TableLike: """Cut out bad sources, and add flag columns A flag of 'True' means the source is bad. @@ -585,7 +592,7 @@ def cuts_and_flags( @task(name="Get spectral indices") -def get_alpha(cat): +def get_alpha(cat: TableLike) -> SpectralIndices: coefs_str = cat["stokesI_model_coef"] coefs_err_str = cat["stokesI_model_coef_err"] alphas = [] @@ -605,7 +612,7 @@ def get_alpha(cat): beta_err = float(coefs_err[-3]) betas.append(beta) betas_err.append(beta_err) - return dict( + return SpectralIndices( alphas=np.array(alphas), alphas_err=np.array(alphas_err), betas=np.array(betas), @@ -614,8 +621,10 @@ def get_alpha(cat): @task(name="Get integration times") -def get_integration_time(cat: RMTable, field_col: Collection, epoch: int): - logger.warn("Will be stripping the trailing field character prefix. ") +def get_integration_time( + cat: RMTable, field_col: Collection, sbid: Optional[int] = None +): + logger.warning("Will be stripping the trailing field character prefix. ") field_names = [ name[:-1] if name[-1] in ("A", "B") else name for name in list(cat["tile_id"]) ] @@ -623,28 +632,34 @@ def get_integration_time(cat: RMTable, field_col: Collection, epoch: int): logger.debug(f"Searching integration times for {unique_field_names=}") - query = {"$and": [{"FIELD_NAME": {"$in": unique_field_names}, "SELECT": 1}]} + query = {"$and": [{"FIELD_NAME": {"$in": unique_field_names}}, {"SELECT": 1}]} + + # If an SBID is given, we're looking for a specific field + if sbid is not None: + query["$and"].append({"SBID": sbid}) + query["$and"].remove({"FIELD_NAME": {"$in": unique_field_names}}) + # Get the singlular field name + field_names = [ + field_col.find_one({"SBID": sbid}, {"FIELD_NAME": 1})["FIELD_NAME"] + ] * len(field_names) + unique_field_names = list(set(field_names)) + reutrn_vals = {"_id": 0, "SCAN_TINT": 1, "FIELD_NAME": 1, "SBID": 1} - # Get most recent SBID if more than one is 'SELECT'ed - if field_col.count_documents(query) > 1: - logger.info("More than one SELECT=1 for field_names, getting most recent.") - field_datas = list( - field_col.find({"FIELD_NAME": {"$in": unique_field_names}}, reutrn_vals) - ) - sbids = [f["CAL_SBID"] for f in field_datas] - max_idx = np.argmax(sbids) - logger.info(f"Using CAL_SBID {sbids[max_idx]}") - field_data = field_datas[max_idx] - elif field_col.count_documents(query) == 0: + + doc_count = field_col.count_documents(query) + + if doc_count == 0: logger.error("No data for field_names, trying without SELECT=1.") - field_data = list( - field_col.find({"FIELD_NAME": {"$in": unique_field_names}}, reutrn_vals) - ) - else: - field_data = list( - field_col.find({"FIELD_NAME": {"$in": unique_field_names}}, reutrn_vals) - ) + query["$and"].remove({"SELECT": 1}) + query["$and"].append({"SELECT": 0}) + doc_count = field_col.count_documents(query) + if doc_count == 0: + raise ValueError(f"No data for query {query}") + else: + logger.warning("Using SELECT=0 instead.") + + field_data = list(field_col.find(query, reutrn_vals)) tint_df = pd.DataFrame(field_data) tint_df.set_index("FIELD_NAME", inplace=True, drop=False) @@ -778,6 +793,7 @@ def main( field: str, host: str, epoch: str, + sbid: Optional[int] = None, leakage_degree: int = 4, leakage_bins: int = 16, leakage_snr: float = 30.0, @@ -813,8 +829,20 @@ def main( query = { "$and": [ {"Source_ID": {"$in": all_island_ids}}, - {"rmsynth1d": True}, - {"rmclean1d": True}, + { + ( + f"{field}.rmsynth1d" + if sbid is None + else f"{field}_{sbid}.rmsynth1d" + ): True + }, + { + ( + f"{field}.rmclean1d" + if sbid is None + else f"{field}_{sbid}.rmclean1d" + ): True + }, ] } @@ -838,6 +866,10 @@ def main( tock = time.time() logger.info(f"Finished island collection query - {tock-tick:.2f}s") + if len(comps) == 0: + logger.error("No components found for this field.") + raise ValueError("No components found for this field.") + comps_df = pd.DataFrame(comps) comps_df.set_index("Source_ID", inplace=True) islands_df = pd.DataFrame(islands) @@ -907,6 +939,12 @@ def main( new_col = Column(data=data, name=selcol) rmtab.add_column(new_col) + # If we have specified an SBID, we're doing a single field only + # Therefore we overwrite SBID and field_name with the specified value + if sbid is not None: + rmtab["sbid"] = sbid + rmtab["field_name"] = field + # Fix sigma_add rmtab = sigma_add_fix(rmtab) @@ -919,15 +957,17 @@ def main( ) # Add spectral index from fitted model - alpha_dict = get_alpha(rmtab) - rmtab.add_column(Column(data=alpha_dict["alphas"], name="spectral_index")) - rmtab.add_column(Column(data=alpha_dict["alphas_err"], name="spectral_index_err")) + spectral_indices = get_alpha(rmtab) + rmtab.add_column(Column(data=spectral_indices.alphas, name="spectral_index")) + rmtab.add_column( + Column(data=spectral_indices.alphas_err, name="spectral_index_err") + ) # Add integration time field_col = get_field_db( host=host, epoch=epoch, username=username, password=password ) - tints = get_integration_time(rmtab, field_col, epoch=epoch) + tints = get_integration_time(rmtab, field_col) rmtab.add_column(Column(data=tints, name="int_time")) # Add epoch rmtab.add_column(Column(data=rmtab["start_time"] + (tints / 2), name="epoch")) diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index a2f23115..d4f3209a 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -26,11 +26,13 @@ @task(name="1D RM-CLEAN") def rmclean1d( + field: str, comp: dict, - outdir: str, + outdir: Path, cutoff: float = -3, maxIter=10000, gain=0.1, + sbid: Optional[int] = None, showPlots=False, savePlots=False, rm_verbose=True, @@ -39,6 +41,7 @@ def rmclean1d( """1D RM-CLEAN Args: + field (str): RACS field name. comp (dict): Mongo entry for component. outdir (str): Output directory. cutoff (float, optional): CLEAN cutouff (in sigma). Defaults to -3. @@ -55,21 +58,23 @@ def rmclean1d( cname = comp["Gaussian_ID"] logger.debug(f"Working on {comp}") + save_name = field if sbid is None else f"{field}_{sbid}" try: rm1dfiles = comp["rm1dfiles"] - fdfFile = os.path.join(outdir, f"{rm1dfiles['FDF_dirty']}") - rmsfFile = os.path.join(outdir, f"{rm1dfiles['RMSF']}") - weightFile = os.path.join(outdir, f"{rm1dfiles['weights']}") - rmSynthFile = os.path.join(outdir, f"{rm1dfiles['summary_json']}") + fdfFile = outdir / f"{rm1dfiles['FDF_dirty']}" + rmsfFile = outdir / f"{rm1dfiles['RMSF']}" + weightFile = outdir / f"{rm1dfiles['weights']}" + rmSynthFile = outdir / f"{rm1dfiles['summary_json']}" prefix = os.path.join(os.path.abspath(os.path.dirname(fdfFile)), cname) # Sanity checks for f in [weightFile, fdfFile, rmsfFile, rmSynthFile]: - logger.debug(f"Checking {os.path.abspath(f)}") - if not os.path.exists(f): - logger.fatal("File does not exist: '{:}'.".format(f)) - sys.exit() + logger.debug(f"Checking {f.absolute()}") + if not f.exists(): + logger.fatal(f"File does not exist: '{f}'.") + raise FileNotFoundError(f"File does not exist: '{f}'") + nBits = 32 mDict, aDict = do_RMclean_1D.readFiles( fdfFile, rmsfFile, weightFile, rmSynthFile, nBits @@ -106,22 +111,21 @@ def rmclean1d( do_RMclean_1D.saveOutput(outdict, arrdict, prefixOut=prefix, verbose=rm_verbose) if savePlots: plt.close("all") - plotdir = os.path.join(outdir, "plots") - plot_files = glob( - os.path.join(os.path.abspath(os.path.dirname(fdfFile)), "*.pdf") - ) - for src in plot_files: - base = os.path.basename(src) - dst = os.path.join(plotdir, base) - copyfile(src, dst) + plotdir = outdir / "plots" + plot_files = list(fdfFile.parent.glob("*.pdf")) + for plot_file in plot_files: + copyfile(plot_file, plotdir / plot_file.name) + # Load into Mongo myquery = {"Gaussian_ID": cname} newvalues = { "$set": { - "rmclean1d": True, - "rmclean_summary": outdict, - }, + save_name: { + "rmclean1d": True, + "rmclean_summary": outdict, + }, + } } except KeyError: logger.critical("Failed to load data! RM-CLEAN not applied to component!") @@ -130,16 +134,20 @@ def rmclean1d( newvalues = { "$set": { - "rmclean1d": False, - }, + save_name: { + "rmclean1d": False, + }, + } } return pymongo.UpdateOne(myquery, newvalues) @task(name="3D RM-CLEAN") def rmclean3d( + field: str, island: dict, - outdir: str, + outdir: Path, + sbid: Optional[int] = None, cutoff: float = -3, maxIter=10000, gain=0.1, @@ -149,7 +157,7 @@ def rmclean3d( Args: island (dict): MongoDB island entry. - outdir (str): Output directory. + outdir (Path): Output directory. cutoff (float, optional): CLEAN cutoff (in sigma). Defaults to -3. maxIter (int, optional): Max CLEAN iterations. Defaults to 10000. gain (float, optional): CLEAN gain. Defaults to 0.1. @@ -158,24 +166,14 @@ def rmclean3d( Returns: pymongo.UpdateOne: MongoDB update query. """ - """3D RM-CLEAN - Args: - island_id (str): RACS Island ID - host (str): MongoDB host - field (str): RACS field - cutoff (int, optional): CLEAN cutoff. Defaults to -3. - maxIter (int, optional): CLEAN max iterations. Defaults to 10000. - gain (float, optional): CLEAN gain. Defaults to 0.1. - rm_verbose (bool, optional): Verbose RM-CLEAN. Defaults to False. - """ iname = island["Source_ID"] prefix = f"{iname}_" rm3dfiles = island["rm3dfiles"] cleanFDF, ccArr, iterCountArr, residFDF, headtemp = do_RMclean_3D.run_rmclean( - fitsFDF=os.path.join(outdir, rm3dfiles["FDF_real_dirty"]), - fitsRMSF=os.path.join(outdir, rm3dfiles["RMSF_tot"]), + fitsFDF=(outdir / rm3dfiles["FDF_real_dirty"]).as_posix(), + fitsRMSF=(outdir / rm3dfiles["RMSF_tot"]).as_posix(), cutoff=cutoff, maxIter=maxIter, gain=gain, @@ -192,15 +190,14 @@ def rmclean3d( residFDF, headtemp, prefixOut=prefix, - outDir=os.path.abspath( - os.path.dirname(os.path.join(outdir, rm3dfiles["FDF_real_dirty"])) - ), + outDir=(outdir / rm3dfiles["FDF_real_dirty"]).parent.absolute().as_posix(), write_separate_FDF=True, verbose=rm_verbose, ) # Load into Mongo + save_name = field if sbid is None else f"{field}_{sbid}" myquery = {"Source_ID": iname} - newvalues = {"$set": {"rmclean3d": True}} + newvalues = {"$set": {save_name: {"rmclean3d": True}}} return pymongo.UpdateOne(myquery, newvalues) @@ -210,6 +207,7 @@ def main( outdir: Path, host: str, epoch: int, + sbid: Optional[int] = None, username: Optional[str] = None, password: Optional[str] = None, dimension="1d", @@ -243,8 +241,8 @@ def main( showPlots (bool, optional): Show interactive plots. Defaults to False. rm_verbose (bool, optional): Verbose output from RM-CLEAN. Defaults to False. """ - outdir = os.path.abspath(outdir) - outdir = os.path.join(outdir, "cutouts") + outdir = outdir.absolute() + outdir = outdir / "cutouts" # default connection (ie, local) beams_col, island_col, comp_col = get_db( @@ -252,11 +250,24 @@ def main( ) query = {"$and": [{f"beams.{field}": {"$exists": True}}]} + if sbid is not None: + query["$and"].append({f"beams.{field}.SBIDs": sbid}) all_island_ids = sorted(beams_col.distinct("Source_ID", query)) if dimension == "3d": - query = {"$and": [{"Source_ID": {"$in": all_island_ids}}, {"rmsynth3d": True}]} + query = { + "$and": [ + {"Source_ID": {"$in": all_island_ids}}, + { + ( + f"{field}.rmsynth3d" + if sbid is None + else f"{field}_{sbid}.rmsynth3d" + ): True + }, + ] + } islands = list( island_col.find( @@ -269,10 +280,32 @@ def main( ).sort("Source_ID") ) n_island = island_col.count_documents(query) - island_col.update(query, {"$set": {"rmclean3d": False}}) + island_col.update( + query, + { + "$set": { + ( + f"{field}.rmclean3d" + if sbid is None + else f"{field}_{sbid}.rmclean3d" + ): False + } + }, + ) elif dimension == "1d": - query = {"$and": [{"Source_ID": {"$in": all_island_ids}}, {"rmsynth1d": True}]} + query = { + "$and": [ + {"Source_ID": {"$in": all_island_ids}}, + { + ( + f"{field}.rmsynth1d" + if sbid is None + else f"{field}_{sbid}.rmsynth1d" + ): True + }, + ] + } components = list( comp_col.find( @@ -286,18 +319,30 @@ def main( ).sort("Source_ID") ) n_comp = comp_col.count_documents(query) - comp_col.update_many(query, {"$set": {"rmclean1d": False}}) + comp_col.update_many( + query, + { + "$set": { + ( + f"{field}.rmclean1d" + if sbid is None + else f"{field}_{sbid}.rmclean1d" + ): True + } + }, + ) if limit is not None: count = limit n_comp = count n_island = count - # component_ids = component_ids[:count] if dimension == "1d": logger.info(f"Running RM-CLEAN on {n_comp} components") outputs = rmclean1d.map( comp=components, + field=unmapped(field), + sbid=unmapped(sbid), outdir=unmapped(outdir), cutoff=unmapped(cutoff), maxIter=unmapped(maxIter), @@ -311,7 +356,9 @@ def main( logger.info(f"Running RM-CLEAN on {n_island} islands") outputs = rmclean3d.map( + field=unmapped(field), island=islands, + sbid=unmapped(sbid), outdir=unmapped(outdir), cutoff=unmapped(cutoff), maxIter=unmapped(maxIter), diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 14176320..1e9aac9a 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -138,12 +138,15 @@ def rmsynthoncut3d( dataU = np.squeeze(dataU) dataI = np.squeeze(dataI) + save_name = field if sbid is None else f"{field}_{sbid}" if np.isnan(dataI).all() or np.isnan(dataQ).all() or np.isnan(dataU).all(): logger.critical(f"Cubelet {iname} is entirely NaN") myquery = {"Source_ID": iname} badvalues = { "$set": { - "rmsynth3d": False, + save_name: { + "rmsynth3d": False, + } } } return pymongo.UpdateOne(myquery, badvalues) @@ -197,7 +200,6 @@ def rmsynthoncut3d( outer_dir = os.path.basename(os.path.dirname(ifile)) - save_name = field if sbid is None else f"{field}_{sbid}" newvalues = { "$set": { save_name: { @@ -974,14 +976,58 @@ def main( # Unset rmsynth in db if dimension == "1d": - query_1d = {"$and": [{"Source_ID": {"$in": island_ids}}, {"rmsynth1d": True}]} + query_1d = { + "$and": [ + {"Source_ID": {"$in": island_ids}}, + { + ( + f"{field}.rmsynth1d" + if sbid is None + else f"{field}_{sbid}.rmsynth1d" + ): True + }, + ] + } - comp_col.update_many(query_1d, {"$set": {"rmsynth1d": False}}) + comp_col.update_many( + query_1d, + { + "$set": { + ( + f"{field}.rmsynth1d" + if sbid is None + else f"{field}_{sbid}.rmsynth1d" + ): False + } + }, + ) elif dimension == "3d": - query_3d = {"$and": [{"Source_ID": {"$in": island_ids}}, {"rmsynth3d": True}]} + query_3d = { + "$and": [ + {"Source_ID": {"$in": island_ids}}, + { + ( + f"{field}.rmsynth3d" + if sbid is None + else f"{field}_{sbid}.rmsynth3d" + ): True + }, + ] + } - island_col.update(query_3d, {"$set": {"rmsynth3d": False}}) + island_col.update( + query_3d, + { + "$set": { + ( + f"{field}.rmsynth3d" + if sbid is None + else f"{field}_{sbid}.rmsynth3d" + ): False + } + }, + ) if limit is not None: n_comp = limit From 3586de8820320b7bd853b6fee7c2af68c325dedb Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 12:47:31 +1000 Subject: [PATCH 11/37] Inline --- arrakis/rmclean_oncuts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index d4f3209a..e376c39f 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -241,8 +241,7 @@ def main( showPlots (bool, optional): Show interactive plots. Defaults to False. rm_verbose (bool, optional): Verbose output from RM-CLEAN. Defaults to False. """ - outdir = outdir.absolute() - outdir = outdir / "cutouts" + outdir = outdir.absolute() / "cutouts" # default connection (ie, local) beams_col, island_col, comp_col = get_db( From 38fbc7c465eb24624dbcbfd44ef4863ade1b0c75 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 13:40:22 +1000 Subject: [PATCH 12/37] Fix args --- arrakis/cleanup.py | 2 +- arrakis/cutout.py | 5 +++-- arrakis/frion.py | 4 +++- arrakis/imager.py | 19 +++++++++++-------- arrakis/linmos.py | 9 ++++++--- arrakis/merge_fields.py | 4 +++- arrakis/process_spice.py | 16 +++++++++------- arrakis/rmclean_oncuts.py | 2 +- arrakis/rmsynth_oncuts.py | 13 +++++++------ arrakis/utils/pipeline.py | 23 ++++++++++++++++------- 10 files changed, 60 insertions(+), 37 deletions(-) diff --git a/arrakis/cleanup.py b/arrakis/cleanup.py index d6df3999..d3d92bb8 100644 --- a/arrakis/cleanup.py +++ b/arrakis/cleanup.py @@ -147,7 +147,7 @@ def cli(): if verbose: logger.setLevel(logging.DEBUG) - main(datadir=Path(args.outdir), overwrite=args.overwrite) + main(datadir=Path(args.datadir), overwrite=args.overwrite) if __name__ == "__main__": diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 1cddffe5..c13d1c63 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -30,7 +30,7 @@ from arrakis.utils.database import get_db, test_db from arrakis.utils.fitsutils import fix_header from arrakis.utils.io import try_mkdir -from arrakis.utils.pipeline import generic_parser, logo_str +from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser iers.conf.auto_download = False warnings.filterwarnings( @@ -561,10 +561,11 @@ def cutout_parser(parent_parser: bool = False) -> argparse.ArgumentParser: def cli() -> None: """Command-line interface""" gen_parser = generic_parser(parent_parser=True) + work_parser = workdir_arg_parser(parent_parser=True) cut_parser = cutout_parser(parent_parser=True) parser = argparse.ArgumentParser( formatter_class=UltimateHelpFormatter, - parents=[gen_parser, cut_parser], + parents=[gen_parser, work_parser, cut_parser], description=cut_parser.description, ) args = parser.parse_args() diff --git a/arrakis/frion.py b/arrakis/frion.py index ca67b784..c3e7cb06 100644 --- a/arrakis/frion.py +++ b/arrakis/frion.py @@ -421,7 +421,9 @@ def cli(): main( field=args.field, sbid=args.sbid, - outdir=Path(args.outdir), + outdir=Path( + args.datadir, + ), host=args.host, epoch=args.epoch, username=args.username, diff --git a/arrakis/imager.py b/arrakis/imager.py index 8a7240c0..7d94b1ad 100644 --- a/arrakis/imager.py +++ b/arrakis/imager.py @@ -33,7 +33,7 @@ field_name_from_ms, wsclean, ) -from arrakis.utils.pipeline import logo_str +from arrakis.utils.pipeline import logo_str, workdir_arg_parser TQDM_OUT = TqdmToLogger(logger, level=logging.INFO) @@ -830,11 +830,6 @@ def imager_parser(parent_parser: bool = False) -> argparse.ArgumentParser: type=Path, help="Directory containing MS files", ) - parser.add_argument( - "outdir", - type=Path, - help="Directory to output images", - ) parser.add_argument( "--temp_dir", type=Path, @@ -1012,11 +1007,19 @@ def imager_parser(parent_parser: bool = False) -> argparse.ArgumentParser: def cli(): """Command-line interface""" - parser = imager_parser() + im_parser = imager_parser(parent_parser=True) + work_parser = workdir_arg_parser(parent_parser=True) + + parser = argparse.ArgumentParser( + parents=[im_parser, work_parser], + formatter_class=UltimateHelpFormatter, + description=im_parser.description, + ) + args = parser.parse_args() main( msdir=args.msdir, - out_dir=args.outdir, + out_dir=args.datadir, temp_dir=args.temp_dir, cutoff=args.psf_cutoff, robust=args.robust, diff --git a/arrakis/linmos.py b/arrakis/linmos.py index e72640fc..37f4a644 100644 --- a/arrakis/linmos.py +++ b/arrakis/linmos.py @@ -21,7 +21,7 @@ from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import get_db, test_db -from arrakis.utils.pipeline import generic_parser, logo_str +from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser warnings.filterwarnings(action="ignore", category=SpectralCubeWarning, append=True) warnings.simplefilter("ignore", category=AstropyWarning) @@ -412,9 +412,10 @@ def cli(): """Command-line interface""" gen_parser = generic_parser(parent_parser=True) + work_parser = workdir_arg_parser(parent_parser=True) lin_parser = linmos_parser(parent_parser=True) parser = argparse.ArgumentParser( - parents=[gen_parser, lin_parser], + parents=[gen_parser, work_parser, lin_parser], formatter_class=UltimateHelpFormatter, description=lin_parser.description, ) @@ -427,7 +428,9 @@ def cli(): main( field=args.field, - datadir=Path(args.datadir), + datadir=Path( + args.datadir, + ), host=args.host, epoch=args.epoch, holofile=Path(args.holofile), diff --git a/arrakis/merge_fields.py b/arrakis/merge_fields.py index 2b8c9b2f..46635a8c 100644 --- a/arrakis/merge_fields.py +++ b/arrakis/merge_fields.py @@ -412,7 +412,9 @@ def cli(): verbose = args.verbose test_db( - host=args.host, username=args.username, password=args.password, verbose=verbose + host=args.host, + username=args.username, + password=args.password, ) main( diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index c101bcf9..00ef4d76 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -25,7 +25,7 @@ ) from arrakis.logger import UltimateHelpFormatter, logger from arrakis.utils.database import test_db -from arrakis.utils.pipeline import generic_parser, logo_str +from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser @flow(name="Combining+Synthesis on Arrakis") @@ -44,7 +44,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: task_runner=task_runner, )( field=args.field, - directory=str(args.outdir), + directory=str(args.datadir), host=host, epoch=args.epoch, username=args.username, @@ -63,7 +63,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: task_runner=task_runner, )( field=args.field, - datadir=Path(args.outdir), + datadir=Path(args.datadir), host=host, epoch=args.epoch, holofile=Path(args.holofile), @@ -81,7 +81,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: previous_future = ( frion.main.with_options(task_runner=task_runner)( field=args.field, - outdir=args.outdir, + outdir=args.datadir, host=host, epoch=args.epoch, username=args.username, @@ -137,7 +137,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: previous_future = ( rmclean_oncuts.main.with_options(task_runner=task_runner)( field=args.field, - outdir=args.outdir, + outdir=args.datadir, host=host, epoch=args.epoch, username=args.username, @@ -172,7 +172,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: previous_future = ( cleanup.main.with_options(task_runner=task_runner)( - datadir=args.outdir, + datadir=args.datadir, ) if not args.skip_cleanup else previous_future @@ -273,7 +273,7 @@ def main(args: configargparse.Namespace) -> None: name=f"Arrakis Imaging -- {args.field}", task_runner=dask_runner )( msdir=args.msdir, - out_dir=args.outdir, + out_dir=args.datadir, temp_dir=args.temp_dir, cutoff=args.psf_cutoff, robust=args.robust, @@ -394,6 +394,7 @@ def cli(): # Help string to be shown using the -h option pipe_parser = pipeline_parser(parent_parser=True) + work_parser = workdir_arg_parser(parent_parser=True) gen_parser = generic_parser(parent_parser=True) imager_parser = imager.imager_parser(parent_parser=True) cutout_parser = cutout.cutout_parser(parent_parser=True) @@ -410,6 +411,7 @@ def cli(): formatter_class=UltimateHelpFormatter, parents=[ pipe_parser, + work_parser, gen_parser, imager_parser, cutout_parser, diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index e376c39f..49187f4b 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -460,7 +460,7 @@ def cli(): ) main( field=args.field, - outdir=Path(args.outdir), + outdir=Path(args.datadir), host=host, epoch=args.epoch, username=args.username, diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 1e9aac9a..e6c0bac9 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -34,7 +34,7 @@ from arrakis.utils.database import get_db, test_db from arrakis.utils.fitsutils import getfreq from arrakis.utils.fitting import fit_pl, fitted_mean, fitted_std -from arrakis.utils.pipeline import generic_parser, logo_str +from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser logger.setLevel(logging.INFO) @@ -1218,25 +1218,25 @@ def rmsynth_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES).", ) parser.add_argument( - dest="--dphi", + "--dphi", type=float, default=None, help="Width of Faraday depth channel.", ) parser.add_argument( - dest="--n_samples", + "--n_samples", type=float, default=5, help="Number of samples across the FWHM RMSF.", ) parser.add_argument( - dest="--poly_ord", + "--poly_ord", type=int, default=3, help="polynomial order to fit to I spectrum.", ) parser.add_argument( - dest="--no_stokes_i", + "--no_stokes_i", action="store_true", help="ignore the Stokes I spectrum.", ) @@ -1267,9 +1267,10 @@ def cli(): warnings.simplefilter("ignore", category=RuntimeWarning) gen_parser = generic_parser(parent_parser=True) + work_parser = workdir_arg_parser(parent_parser=True) synth_parser = rmsynth_parser(parent_parser=True) parser = argparse.ArgumentParser( - parents=[gen_parser, synth_parser], + parents=[gen_parser, work_parser, synth_parser], formatter_class=UltimateHelpFormatter, description=synth_parser.description, ) diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index 8314fdc1..056af408 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -48,6 +48,22 @@ """ +def workdir_arg_parser(parent_parser: bool = False) -> argparse.ArgumentParser: + # Parse the command line options + work_parser = argparse.ArgumentParser( + add_help=not parent_parser, + formatter_class=UltimateHelpFormatter, + ) + parser = work_parser.add_argument_group("workdir arguments") + parser.add_argument( + "datadir", + type=Path, + help="Directory to create/find full-size images and 'cutout' directory", + ) + + return work_parser + + def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: descStr = f""" {logo_str} @@ -67,13 +83,6 @@ def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: "field", metavar="field", type=str, help="Name of field (e.g. RACS_2132-50)." ) - parser.add_argument( - "datadir", - metavar="datadir", - type=Path, - help="Directory containing full-size data cubes in FITS format, and cutout directory.", - ) - parser.add_argument( "--sbid", type=int, From d4c853aa42691e97ad1814e5e59dbcd2b331737c Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 14:10:08 +1000 Subject: [PATCH 13/37] Use paths --- arrakis/cutout.py | 121 +++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index c13d1c63..6408599b 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -7,6 +7,7 @@ import warnings from concurrent.futures import ThreadPoolExecutor from glob import glob +from pathlib import Path from pprint import pformat from shutil import copyfile from typing import Dict, List @@ -59,49 +60,57 @@ class CutoutArgs(Struct): """Upper DEC bound in degrees""" dec_low: float """Lower DEC bound in degrees""" - outdir: str + outdir: Path """Output directory""" def cutout_weight( - image_name: str, + image_name: Path, source_id: str, - cutout_args: CutoutArgs, + cutout_args: Optional[CutoutArgs], field: str, stoke: str, beam_num: int, dryrun=False, ) -> pymongo.UpdateOne: - outdir = os.path.abspath(cutout_args.outdir) - basename = os.path.basename(image_name) + + # Update database + myquery = {"Source_ID": source_id} + + if cutout_args is None: + logger.error(f"Skipping {source_id} -- no components found") + newvalues = { + "$set": {f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": ""} + } + return pymongo.UpdateOne(myquery, newvalues, upsert=True) + + outdir = cutout_args.outdir + basename = image_name.name outname = f"{source_id}.cutout.{basename}" - outfile = os.path.join(outdir, outname) - image = image_name.replace("image.restored", "weights.restored").replace( - ".fits", ".txt" - ) - outfile = outfile.replace("image.restored", "weights.restored").replace( - ".fits", ".txt" - ) + outfile = outdir / outname + image = Path( + image_name.name.replace("image.restored", "weights.restored") + ).with_suffix(".txt") + outfile = Path( + outfile.name.replace("image.restored", "weights.restored") + ).with_suffix(".txt") if not dryrun: copyfile(image, outfile) logger.info(f"Written to {outfile}") - # Update database - myquery = {"Source_ID": source_id} - - filename = os.path.join( - os.path.basename(os.path.dirname(outfile)), os.path.basename(outfile) - ) + filename = outfile.parent / outfile.name newvalues = { - "$set": {f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": filename} + "$set": { + f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": filename.as_posix() + } } return pymongo.UpdateOne(myquery, newvalues, upsert=True) def cutout_image( - image_name: str, + image_name: Path, data_in_mem: np.ndarray, old_header: fits.Header, cube: SpectralCube, @@ -119,14 +128,22 @@ def cutout_image( pymongo.UpdateOne: Update query for MongoDB """ logger.setLevel(logging.INFO) + # Update database + myquery = {"Source_ID": source_id} + if cutout_args is None: + logger.error(f"Skipping {source_id} -- no components found") + newvalues = { + "$set": {f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": ""} + } + return pymongo.UpdateOne(myquery, newvalues, upsert=True) - outdir = os.path.abspath(cutout_args.outdir) + outdir = cutout_args.outdir.absolute() - basename = os.path.basename(image_name) + basename = image_name.name outname = f"{source_id}.cutout.{basename}" - outfile = os.path.join(outdir, outname) + outfile = outdir / outname - padder = cube.header["BMAJ"] * u.deg * pad + padder: float = cube.header["BMAJ"] * u.deg * pad xlo = Longitude(cutout_args.ra_low * u.deg) - Longitude(padder) xhi = Longitude(cutout_args.ra_high * u.deg) + Longitude(padder) @@ -164,14 +181,11 @@ def cutout_image( ) logger.info(f"Written to {outfile}") - # Update database - myquery = {"Source_ID": source_id} - - filename = os.path.join( - os.path.basename(os.path.dirname(outfile)), os.path.basename(outfile) - ) + filename = outfile.parent / outfile.name newvalues = { - "$set": {f"beams.{field}.{stoke.lower()}_beam{beam_num}_image_file": filename} + "$set": { + f"beams.{field}.{stoke.lower()}_beam{beam_num}_image_file": filename.as_posix() + } } return pymongo.UpdateOne(myquery, newvalues, upsert=True) @@ -181,19 +195,15 @@ def get_args( comps: List[Dict], beam: Dict, island_id: str, - outdir: str, -) -> Union[List[CutoutArgs], None]: + outdir: Path, +) -> Optional[CutoutArgs]: """Get arguments for cutout function Args: comps (List[Dict]): List of mongo entries for RACS components in island beam (Dict): Mongo entry for the RACS beam island_id (str): RACS island ID - outdir (str): Output directory - field (str): RACS field name - datadir (str): Input directory - stokeslist (List[str]): List of Stokes parameters to process - verbose (bool, optional): Verbose output. Defaults to True. + outdir (Path): Input directory Raises: e: Exception @@ -211,8 +221,8 @@ def get_args( logger.warning(f"Skipping island {island_id} -- no components found") return None - outdir = f"{outdir}/{island_id}" - try_mkdir(outdir) + outdir = outdir / island_id + outdir.mkdir(parents=True, exist_ok=True) # Find image size ras: List[float] = [] @@ -222,9 +232,10 @@ def get_args( ras = ras + [comp["RA"]] decs = decs + [comp["Dec"]] majs = majs + [comp["Maj"]] - ras = ras * u.deg - decs = decs * u.deg - majs = majs * u.arcsec + + ras: u.Quantity = ras * u.deg + decs: u.Quantity = decs * u.deg + majs: u.Quantity = majs * u.arcsec coords = SkyCoord(ras, decs) try: @@ -266,8 +277,8 @@ def worker( epoch: int, beam: Dict, comps: List[Dict], - outdir: str, - image_name: str, + outdir: Path, + image_name: Path, data_in_mem: np.ndarray, old_header: fits.Header, cube: SpectralCube, @@ -317,8 +328,8 @@ def big_cutout( beams: List[Dict], beam_num: int, stoke: str, - datadir: str, - outdir: str, + datadir: Path, + outdir: Path, host: str, epoch: int, field: str, @@ -329,10 +340,8 @@ def big_cutout( ) -> List[pymongo.UpdateOne]: with open("comps.pkl", "rb") as f: comps_dict = pickle.load(f) - wild = ( - f"{datadir}/image.restored.{stoke.lower()}*contcube*beam{beam_num:02}.conv.fits" - ) - images = glob(wild) + wild = f"image.restored.{stoke.lower()}*contcube*beam{beam_num:02}.conv.fits" + images = list(datadir.glob(wild)) if len(images) == 0: raise Exception(f"No images found matching '{wild}'") elif len(images) > 1: @@ -386,7 +395,7 @@ def big_cutout( @flow(name="Cutout islands") def cutout_islands( field: str, - directory: str, + directory: Path, host: str, epoch: int, sbid: Optional[int] = None, @@ -401,7 +410,7 @@ def cutout_islands( Args: field (str): RACS field name. - directory (str): Directory to store cutouts. + directory (Path): Directory to store cutouts. host (str): MongoDB host. username (str, optional): Mongo username. Defaults to None. password (str, optional): Mongo password. Defaults to None. @@ -413,8 +422,8 @@ def cutout_islands( if stokeslist is None: stokeslist = ["I", "Q", "U", "V"] - directory = os.path.abspath(directory) - outdir = os.path.join(directory, "cutouts") + directory = directory.absolute() + outdir = directory / "cutouts" logger.info("Testing database. ") test_db( @@ -466,7 +475,7 @@ def cutout_islands( pickle.dump(comps_dict, f) # Create output dir if it doesn't exist - try_mkdir(outdir) + outdir.mkdir(parents=True, exist_ok=True) cuts: List[pymongo.UpdateOne] = [] for stoke in stokeslist: for beam_num in unique_beams_nums: From f993f5e0fc4a113c79bf5f35b6518d81f9ec59d5 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 16:00:12 +1000 Subject: [PATCH 14/37] Fix cutout logic --- arrakis/cutout.py | 114 +++++++++++++++++++++----------------- arrakis/utils/database.py | 20 ++++++- 2 files changed, 83 insertions(+), 51 deletions(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 6408599b..73be57c7 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -16,6 +16,7 @@ import astropy.units as u import numpy as np +import pandas as pd import pymongo from astropy.coordinates import Latitude, Longitude, SkyCoord from astropy.io import fits @@ -28,7 +29,12 @@ from tqdm.auto import tqdm from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger -from arrakis.utils.database import get_db, test_db +from arrakis.utils.database import ( + get_db, + get_field_db, + test_db, + validate_sbid_field_pair, +) from arrakis.utils.fitsutils import fix_header from arrakis.utils.io import try_mkdir from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser @@ -115,12 +121,12 @@ def cutout_image( old_header: fits.Header, cube: SpectralCube, source_id: str, - cutout_args: CutoutArgs, + cutout_args: Optional[CutoutArgs], field: str, beam_num: int, stoke: str, - pad=3, - dryrun=False, + pad: float = 3, + dryrun: bool = False, ) -> pymongo.UpdateOne: """Perform a cutout. @@ -192,15 +198,14 @@ def cutout_image( def get_args( - comps: List[Dict], - beam: Dict, - island_id: str, + comps: pd.DataFrame, + source: pd.Series, outdir: Path, ) -> Optional[CutoutArgs]: """Get arguments for cutout function Args: - comps (List[Dict]): List of mongo entries for RACS components in island + comps (pd.DataFrame): List of mongo entries for RACS components in island beam (Dict): Mongo entry for the RACS beam island_id (str): RACS island ID outdir (Path): Input directory @@ -215,7 +220,7 @@ def get_args( logger.setLevel(logging.INFO) - assert beam["Source_ID"] == island_id + island_id = source.Source_ID if len(comps) == 0: logger.warning(f"Skipping island {island_id} -- no components found") @@ -225,17 +230,10 @@ def get_args( outdir.mkdir(parents=True, exist_ok=True) # Find image size - ras: List[float] = [] - decs: List[float] = [] - majs: List[float] = [] - for comp in comps: - ras = ras + [comp["RA"]] - decs = decs + [comp["Dec"]] - majs = majs + [comp["Maj"]] - - ras: u.Quantity = ras * u.deg - decs: u.Quantity = decs * u.deg - majs: u.Quantity = majs * u.arcsec + ras: u.Quantity = comps.RA.values * u.deg + decs: u.Quantity = comps.Dec.values * u.deg + majs: List[float] = comps.Maj.values * u.arcsec + coords = SkyCoord(ras, decs) try: @@ -275,8 +273,8 @@ def get_args( def worker( host: str, epoch: int, - beam: Dict, - comps: List[Dict], + source: pd.Series, + comps: pd.DataFrame, outdir: Path, image_name: Path, data_in_mem: np.ndarray, @@ -294,8 +292,7 @@ def worker( ) cut_args = get_args( comps=comps, - beam=beam, - island_id=beam["Source_ID"], + source=source, outdir=outdir, ) image_update = cutout_image( @@ -303,7 +300,7 @@ def worker( data_in_mem=data_in_mem, old_header=old_header, cube=cube, - source_id=beam["Source_ID"], + source_id=source.Source_ID, cutout_args=cut_args, field=field, beam_num=beam_num, @@ -313,7 +310,7 @@ def worker( ) weight_update = cutout_weight( image_name=image_name, - source_id=beam["Source_ID"], + source_id=source.Source_ID, cutout_args=cut_args, field=field, beam_num=beam_num, @@ -325,7 +322,8 @@ def worker( @task(name="Cutout from big cube") def big_cutout( - beams: List[Dict], + sources: pd.DataFrame, + comps: pd.DataFrame, beam_num: int, stoke: str, datadir: Path, @@ -338,8 +336,7 @@ def big_cutout( password: Optional[str] = None, limit: Optional[int] = None, ) -> List[pymongo.UpdateOne]: - with open("comps.pkl", "rb") as f: - comps_dict = pickle.load(f) + wild = f"image.restored.{stoke.lower()}*contcube*beam{beam_num:02}.conv.fits" images = list(datadir.glob(wild)) if len(images) == 0: @@ -360,19 +357,19 @@ def big_cutout( if limit is not None: logger.critical(f"Limiting to {limit} islands") - beams = beams[:limit] + sources = sources[:limit] updates: List[pymongo.UpdateOne] = [] with ThreadPoolExecutor() as executor: futures = [] - for beam in beams: + for _, source in sources.iterrows(): futures.append( executor.submit( worker, host=host, epoch=epoch, - beam=beam, - comps=comps_dict[beam["Source_ID"]], + source=source, + comps=comps.loc[source], outdir=outdir, image_name=image_name, data_in_mem=data_in_mem, @@ -436,7 +433,22 @@ def cutout_islands( host=host, epoch=epoch, username=username, password=password ) - # Query the DB + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) + + # Check for SBID match + if sbid is not None: + sbid_check = validate_sbid_field_pair( + field_name=field, + sbid=sbid, + field_col=field_col, + ) + if not sbid_check: + raise ValueError(f"SBID {sbid} does not match field {field}") query = {"$and": [{f"beams.{field}": {"$exists": True}}]} if sbid is not None: @@ -447,7 +459,7 @@ def cutout_islands( ) source_ids = sorted(beams_col.distinct("Source_ID", query)) - beams_dict: Dict[int, List[Dict]] = {b: [] for b in unique_beams_nums} + # beams_dict: Dict[int, List[Dict]] = {b: [] for b in unique_beams_nums} query = { "$and": [ @@ -458,21 +470,24 @@ def cutout_islands( if sbid is not None: query["$and"].append({f"beams.{field}.SBIDs": sbid}) - all_beams = list(beams_col.find(query).sort("Source_ID")) - for beams in tqdm(all_beams, desc="Getting beams", file=TQDM_OUT): - for beam_num in beams["beams"][field]["beam_list"]: - beams_dict[beam_num].append(beams) + beams_df = pd.DataFrame( + beams_col.find(query, {"Source_ID": 1, f"beams.{field}.beam_list": 1}).sort( + "Source_ID" + ) + ) + + beam_source_list = [] + for i, row in tqdm(beams_df.iterrows()): + beam_list = row.beams[field]["beam_list"] + for b in beam_list: + beam_source_list.append({"Source_ID": row.Source_ID, "beam": b}) + beam_source_df = pd.DataFrame(beam_source_list) + beam_source_df.set_index("beam", inplace=True) - comps_dict: Dict[str, List[Dict]] = {s: [] for s in source_ids} - all_comps = list( + comps_df = pd.DataFrame( comp_col.find({"Source_ID": {"$in": source_ids}}).sort("Source_ID") ) - for comp in tqdm(all_comps, desc="Getting components", file=TQDM_OUT): - comps_dict[comp["Source_ID"]].append(comp) - - # Dump comps to file - with open("comps.pkl", "wb") as f: - pickle.dump(comps_dict, f) + comps_df.set_index("Source_ID", inplace=True) # Create output dir if it doesn't exist outdir.mkdir(parents=True, exist_ok=True) @@ -480,7 +495,8 @@ def cutout_islands( for stoke in stokeslist: for beam_num in unique_beams_nums: results = big_cutout.submit( - beams=beams_dict[beam_num], + sources=beam_source_df.loc[beam_num], + comps=comps_df.loc[beam_source_df.loc[beam_num].Source_ID], beam_num=beam_num, stoke=stoke, datadir=directory, @@ -502,8 +518,6 @@ def cutout_islands( db_res = beams_col.bulk_write(updates, ordered=False) logger.info(pformat(db_res.bulk_api_result)) - os.remove("comps.pkl") - logger.info("Cutouts Done!") diff --git a/arrakis/utils/database.py b/arrakis/utils/database.py index 550b1d88..8d0336d1 100644 --- a/arrakis/utils/database.py +++ b/arrakis/utils/database.py @@ -2,7 +2,7 @@ """Database utilities""" import warnings -from typing import Tuple, Union +from typing import Optional, Tuple, Union import pymongo from astropy.utils.exceptions import AstropyWarning @@ -15,6 +15,24 @@ warnings.simplefilter("ignore", category=AstropyWarning) +def validate_sbid_field_pair(field_name: str, sbid: int, field_col: Collection) -> bool: + """Validate field and sbid pair + + Args: + field_name (str): Field name. + sbid (int): SBID. + field_col (Collection): Field collection. + + Raises: + bool: If field name and sbid pair is valid. + """ + field_data: Optional[dict] = field_col.find_one({"SBID": sbid}) + if field_data is None: + raise ValueError(f"SBID {sbid} not found in database") + + return field_data["FIELD_NAME"] == field_name + + def test_db( host: str, username: Union[str, None] = None, password: Union[str, None] = None ) -> bool: From c1f46ebf53e1e791580dc4adc73dd84ed73f7c72 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 16:10:39 +1000 Subject: [PATCH 15/37] Add sbid validation --- arrakis/cutout.py | 13 ++++++------- arrakis/frion.py | 24 +++++++++++++++++++++++- arrakis/makecat.py | 24 ++++++++++++++++++++++-- arrakis/process_spice.py | 2 ++ arrakis/rmclean_oncuts.py | 23 ++++++++++++++++++++++- arrakis/rmsynth_oncuts.py | 23 ++++++++++++++++++++++- arrakis/utils/database.py | 1 + 7 files changed, 98 insertions(+), 12 deletions(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 73be57c7..7136c469 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -433,15 +433,14 @@ def cutout_islands( host=host, epoch=epoch, username=username, password=password ) - field_col = get_field_db( - host=host, - epoch=epoch, - username=username, - password=password, - ) - # Check for SBID match if sbid is not None: + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) sbid_check = validate_sbid_field_pair( field_name=field, sbid=sbid, diff --git a/arrakis/frion.py b/arrakis/frion.py index c3e7cb06..cac0d7e4 100644 --- a/arrakis/frion.py +++ b/arrakis/frion.py @@ -19,7 +19,12 @@ from prefect import flow, task, unmapped from arrakis.logger import UltimateHelpFormatter, logger -from arrakis.utils.database import get_db, get_field_db, test_db +from arrakis.utils.database import ( + get_db, + get_field_db, + test_db, + validate_sbid_field_pair, +) from arrakis.utils.fitsutils import getfreq from arrakis.utils.io import try_mkdir from arrakis.utils.pipeline import generic_parser, logo_str @@ -231,6 +236,21 @@ def main( beams_col, island_col, comp_col = get_db( host=host, epoch=epoch, username=username, password=password ) + # Check for SBID match + if sbid is not None: + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) + sbid_check = validate_sbid_field_pair( + field_name=field, + sbid=sbid, + field_col=field_col, + ) + if not sbid_check: + raise ValueError(f"SBID {sbid} does not match field {field}") query_1 = {"$and": [{f"beams.{field}": {"$exists": True}}]} @@ -390,6 +410,8 @@ def frion_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Pre-download IONEX files.", ) + return frion_parser + def cli(): """Command-line interface""" diff --git a/arrakis/makecat.py b/arrakis/makecat.py index baa1b9d7..a8bf2548 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -27,7 +27,12 @@ from arrakis import columns_possum from arrakis.logger import TqdmToLogger, UltimateHelpFormatter, logger -from arrakis.utils.database import get_db, get_field_db, test_db +from arrakis.utils.database import ( + get_db, + get_field_db, + test_db, + validate_sbid_field_pair, +) from arrakis.utils.pipeline import generic_parser, logo_str from arrakis.utils.plotting import latexify from arrakis.utils.typing import ArrayLike, TableLike @@ -792,7 +797,7 @@ def write_votable(rmtab: TableLike, outfile: str) -> None: def main( field: str, host: str, - epoch: str, + epoch: int, sbid: Optional[int] = None, leakage_degree: int = 4, leakage_bins: int = 16, @@ -817,6 +822,21 @@ def main( beams_col, island_col, comp_col = get_db( host=host, epoch=epoch, username=username, password=password ) + # Check for SBID match + if sbid is not None: + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) + sbid_check = validate_sbid_field_pair( + field_name=field, + sbid=sbid, + field_col=field_col, + ) + if not sbid_check: + raise ValueError(f"SBID {sbid} does not match field {field}") logger.info("Starting beams collection query") tick = time.time() query = {"$and": [{f"beams.{field}": {"$exists": True}}]} diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 00ef4d76..c129f440 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -399,6 +399,7 @@ def cli(): imager_parser = imager.imager_parser(parent_parser=True) cutout_parser = cutout.cutout_parser(parent_parser=True) linmos_parser = linmos.linmos_parser(parent_parser=True) + fr_parser = frion.frion_parser(parent_parser=True) common_parser = rmsynth_oncuts.rm_common_parser(parent_parser=True) synth_parser = rmsynth_oncuts.rmsynth_parser(parent_parser=True) rmclean_parser = rmclean_oncuts.clean_parser(parent_parser=True) @@ -416,6 +417,7 @@ def cli(): imager_parser, cutout_parser, linmos_parser, + fr_parser, common_parser, synth_parser, rmclean_parser, diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 49187f4b..349e2151 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -20,7 +20,12 @@ from arrakis import rmsynth_oncuts from arrakis.logger import UltimateHelpFormatter, logger -from arrakis.utils.database import get_db, test_db +from arrakis.utils.database import ( + get_db, + get_field_db, + test_db, + validate_sbid_field_pair, +) from arrakis.utils.pipeline import generic_parser, logo_str @@ -248,6 +253,22 @@ def main( host=host, epoch=epoch, username=username, password=password ) + # Check for SBID match + if sbid is not None: + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) + sbid_check = validate_sbid_field_pair( + field_name=field, + sbid=sbid, + field_col=field_col, + ) + if not sbid_check: + raise ValueError(f"SBID {sbid} does not match field {field}") + query = {"$and": [{f"beams.{field}": {"$exists": True}}]} if sbid is not None: query["$and"].append({f"beams.{field}.SBIDs": sbid}) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index e6c0bac9..40a39feb 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -31,7 +31,12 @@ from scipy.stats import norm from arrakis.logger import UltimateHelpFormatter, logger -from arrakis.utils.database import get_db, test_db +from arrakis.utils.database import ( + get_db, + get_field_db, + test_db, + validate_sbid_field_pair, +) from arrakis.utils.fitsutils import getfreq from arrakis.utils.fitting import fit_pl, fitted_mean, fitted_std from arrakis.utils.pipeline import generic_parser, logo_str, workdir_arg_parser @@ -944,6 +949,22 @@ def main( host=host, epoch=epoch, username=username, password=password ) + # Check for SBID match + if sbid is not None: + field_col = get_field_db( + host=host, + epoch=epoch, + username=username, + password=password, + ) + sbid_check = validate_sbid_field_pair( + field_name=field, + sbid=sbid, + field_col=field_col, + ) + if not sbid_check: + raise ValueError(f"SBID {sbid} does not match field {field}") + beam_query = {"$and": [{f"beams.{field}": {"$exists": True}}]} if sbid is not None: diff --git a/arrakis/utils/database.py b/arrakis/utils/database.py index 8d0336d1..2c211dbf 100644 --- a/arrakis/utils/database.py +++ b/arrakis/utils/database.py @@ -26,6 +26,7 @@ def validate_sbid_field_pair(field_name: str, sbid: int, field_col: Collection) Raises: bool: If field name and sbid pair is valid. """ + logger.info(f"Validating field name and SBID pair: {field_name}, {sbid}") field_data: Optional[dict] = field_col.find_one({"SBID": sbid}) if field_data is None: raise ValueError(f"SBID {sbid} not found in database") From 7380b508df15b6d0b990eba917ffe65d2639f630 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 17:43:41 +1000 Subject: [PATCH 16/37] Add temp locations --- arrakis/imager.py | 60 ++++++++++++++++++++++++++++------------ arrakis/process_spice.py | 3 +- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/arrakis/imager.py b/arrakis/imager.py index 7d94b1ad..5d81402d 100644 --- a/arrakis/imager.py +++ b/arrakis/imager.py @@ -128,9 +128,10 @@ def image_beam( ms: Path, field_idx: int, out_dir: Path, - temp_dir: Path, prefix: Path, simage: Path, + temp_dir_wsclean: Path, + temp_dir_images: Path, pols: str = "IQU", nchan: int = 36, scale: float = 2.5, @@ -164,16 +165,18 @@ def image_beam( """Image a single beam""" logger = get_run_logger() # Evaluate the temp directory if a ENV variable is used - temp_dir = parse_env_path(temp_dir) - if out_dir != temp_dir: + temp_dir_images = parse_env_path(temp_dir_images) + if temp_dir_images != out_dir: # Copy the MS to the temp directory - ms_temp = temp_dir / ms.name + ms_temp = temp_dir_images / ms.name logger.info(f"Copying {ms} to {ms_temp}") ms_temp = ms_temp.resolve(strict=False) shutil.copytree(ms, ms_temp) ms = ms_temp # Update the prefix - prefix = temp_dir / prefix.name + prefix = temp_dir_images / prefix.name + + temp_dir_wsclean = parse_env_path(temp_dir_wsclean) commands = [] # Do any I cleaning separately @@ -181,7 +184,11 @@ def image_beam( if do_stokes_I: command = wsclean( mslist=[ms.resolve(strict=True).as_posix()], - temp_dir=temp_dir.resolve(strict=True).as_posix(), + temp_dir=( + temp_dir_wsclean.resolve(strict=True).as_posix() + if temp_dir_wsclean is not None + else None + ), use_mpi=False, name=prefix.resolve().as_posix(), pol="I", @@ -237,7 +244,11 @@ def image_beam( command = wsclean( mslist=[ms.resolve(strict=True).as_posix()], - temp_dir=temp_dir.resolve(strict=True).as_posix(), + temp_dir=( + temp_dir_wsclean.resolve(strict=True).as_posix() + if temp_dir_wsclean is not None + else None + ), use_mpi=False, name=prefix.resolve().as_posix(), pol=pols, @@ -302,10 +313,10 @@ def image_beam( logger.error(f"{e=}") raise e - if out_dir != temp_dir: + if temp_dir_images != out_dir: # Copy the images to the output directory logger.info(f"Copying images to {out_dir}") - all_fits_files = list(temp_dir.glob(f"{prefix.name}*.fits")) + all_fits_files = list(temp_dir_images.glob(f"{prefix.name}*.fits")) for fits_file in tqdm(all_fits_files, desc="Copying images", file=TQDM_OUT): shutil.copy(fits_file, out_dir) @@ -604,7 +615,8 @@ def fix_ms_askap_corrs(ms: Path, *args, **kwargs) -> Path: def main( msdir: Path, out_dir: Path, - temp_dir: Optional[Path] = None, + temp_dir_images: Optional[Path] = None, + temp_dir_wsclean: Optional[Path] = None, cutoff: Optional[float] = None, robust: float = -0.5, pols: str = "IQU", @@ -640,7 +652,8 @@ def main( Args: msdir (Path): Path to the directory containing the MS files. out_dir (Path): Path to the directory where the images will be written. - temp_dir (Optional[Path], optional): Path for temporary files to be written. Defaults to None. + temp_dir_images (Optional[Path], optional): Path for temporary files to be written. Defaults to None. + temp_dir_wsclean (Optional[Path], optional): Path for temporary files to be written by WSClean. Defaults to None. cutoff (Optional[float], optional): WSClean cutoff. Defaults to None. robust (float, optional): WSClean Briggs robust parameter. Defaults to -0.5. pols (str, optional): WSClean polarisations. Defaults to "IQU". @@ -683,9 +696,13 @@ def main( logger.info(f"Will image {len(mslist)} MS files in {msdir} to {out_dir}") cleans = [] - if temp_dir is None: - temp_dir = out_dir - logger.info(f"Using {temp_dir} as temp directory") + if temp_dir_wsclean is None: + temp_dir_wsclean = out_dir + logger.info(f"Using {temp_dir_wsclean} as temp directory for WSClean") + + if temp_dir_images is None: + temp_dir_images = out_dir + logger.info(f"Using {temp_dir_images} as temp directory for images") # Do this in serial since CASA gets upset prefixs = {} @@ -714,7 +731,8 @@ def main( ms=ms_fix, field_idx=field_idxs[ms], out_dir=out_dir, - temp_dir=temp_dir, + temp_dir_wsclean=temp_dir_wsclean, + temp_dir_images=temp_dir_images, prefix=prefixs[ms], simage=simage.resolve(strict=True), robust=robust, @@ -831,9 +849,14 @@ def imager_parser(parent_parser: bool = False) -> argparse.ArgumentParser: help="Directory containing MS files", ) parser.add_argument( - "--temp_dir", + "--temp_dir_wsclean", + type=Path, + help="Temporary directory for WSClean to store intermediate files", + ) + parser.add_argument( + "--temp_dir_images", type=Path, - help="Temporary directory to store intermediate files", + help="Temporary directory for to store intermediate image files", ) parser.add_argument( "--psf_cutoff", @@ -1020,7 +1043,8 @@ def cli(): main( msdir=args.msdir, out_dir=args.datadir, - temp_dir=args.temp_dir, + temp_dir_wsclean=args.temp_dir_wsclean, + temp_dir_images=args.temp_dir_images, cutoff=args.psf_cutoff, robust=args.robust, pols=args.pols, diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index c129f440..981c9af8 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -274,7 +274,8 @@ def main(args: configargparse.Namespace) -> None: )( msdir=args.msdir, out_dir=args.datadir, - temp_dir=args.temp_dir, + temp_dir_wsclean=args.temp_dir_wsclean, + temp_dir_images=args.temp_dir_images, cutoff=args.psf_cutoff, robust=args.robust, pols=args.pols, From 1feb78bc2c5aa67cacf1f1f7d800c5c6a199b9f7 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 18:38:52 +1000 Subject: [PATCH 17/37] New defs --- arrakis/.default_config.cfg | 102 ------------------------------ arrakis/.default_config.yaml | 116 +++++++++++++++++++++++++++++++++++ arrakis/process_spice.py | 10 +-- 3 files changed, 122 insertions(+), 106 deletions(-) delete mode 100644 arrakis/.default_config.cfg create mode 100644 arrakis/.default_config.yaml diff --git a/arrakis/.default_config.cfg b/arrakis/.default_config.cfg deleted file mode 100644 index 01ed7427..00000000 --- a/arrakis/.default_config.cfg +++ /dev/null @@ -1,102 +0,0 @@ -# Arrakis default config - -## Note 'None' will be interpreated as a string, not a NoneType -## If you want to use the default value, just leave the line blank - -[Imaging options] -# temp_dir: None -# psf_cutoff: None -robust: -0.5 -nchan: 36 -pols: 'IQU' -size: 4096 -scale: 2.5 -mgain: 0.8 -niter: 100000 -# nmiter: None -auto_mask: 3.0 -auto_threshold: 1.0 -local_rms: False -# local_rms_window: None -# force_mask_rounds: None -# gridder: None -# taper: None -minuv: 0.0 -# parallel: None -purge: False -mpi: False -multiscale: False -# multiscale_scale_bias: None -multiscale_scales: '0,2,4,8,16,32,64,128' -# absmem: None -make_residual_cubes: False -ms_glob_pattern: 'scienceData*_averaged_cal.leakage.ms' -data_column: 'CORRECTED_DATA' -no_mf_weighting: False -skip_fix_ms: False -hosted-wsclean: 'docker://alecthomson/wsclean:latest' -# local_wsclean: None - -[Pipeline options] -# config: None -epoch: 0 -# host: None -# username: None -# password: None -# dask_config: None -# imager_dask_config: None -# holofile: None -yanda: '1.3.0' -# yanda_image: None -imager_only: False -skip_imager: False -skip_cutout: False -skip_linmos: False -skip_cleanup: False -skip_frion: False -skip_rmsynth: False -skip_rmclean: False -skip_cat: False -verbose: False - -[Cutout options] -pad: 5 -dryrun: False - -[RMsynth options] -dimension: '1d' -database: False -# tt0: None -# tt1: None -validate: False -# limit: None -own_fit: False -save_plots: False -weight_type: 'variance' -fit_function: 'log' -fit_rmsf: False -# phiMax_radm2: None -# dPhi_radm2: None -n_samples: 5 -poly_ord: 3 -no_stokes_i: False -show_plots: False -not_rmsf: False -rm_verbose: False -debug: False - -[RMclean options] -cutoff: -3 -max_iter: 10000 -gain: 0.1 -# window: None - -[FRion options] -ionex_server: 'ftp://ftp.aiub.unibe.ch/CODE/' -ionex_prefix: 'codg' -# ionex_proxy_server: None -# ionex_formatter: None -ionex_predownload: False - -[Catalog options] -# outfile: None diff --git a/arrakis/.default_config.yaml b/arrakis/.default_config.yaml new file mode 100644 index 00000000..47537ae4 --- /dev/null +++ b/arrakis/.default_config.yaml @@ -0,0 +1,116 @@ +# options: +hosted-wsclean: docker://alecthomson/wsclean:latest # Docker or Singularity image for wsclean (default: docker://alecthomson/wsclean:latest) +local_wsclean: null # Path to local wsclean Singularity image (default: None) + +# pipeline arguments: +dask_config: null # Config file for Dask SlurmCLUSTER. (default: None) +imager_dask_config: null #Config file for Dask SlurmCLUSTER. (default: None) +imager_only: false #Only run the imager component of the pipeline. (default: False) +skip_imager: false #Skip imaging stage [False]. (default: False) +skip_cutout: false #Skip cutout stage [False]. (default: False) +skip_linmos: false #Skip LINMOS stage [False]. (default: False) +skip_frion: false #Skip cleanup stage [False]. (default: False) +skip_rmsynth: false #Skip RM Synthesis stage [False]. (default: False) +skip_rmclean: false #Skip RM-CLEAN stage [False]. (default: False) +skip_cat: false #Skip catalogue stage [False]. (default: False) +skip_cleanup: false #Skip cleanup stage [False]. (default: False) + +# generic null arguments: +sbid: null #SBID of observation. (default: None) +stokes: # List of Stokes parameters to image (default: ['I', 'Q', 'U']) + - I + - Q + - U +epoch: 0 # Epoch of observation. (default: 0) +host: null # Host of mongodb (probably $hostname -i). (default: None) +username: null # Username of mongodb. (default: None) +password: # Password of mongodb. (default: None) +limit: null # Limit the number of islands to process. (default: None) +database: false # Add data to MongoDB. (default: False) + +# imaging arguments: +temp_dir_wsclean: null # Temporary directory for WSClean to store intermediate files (default: None) +temp_dir_images: null # Temporary directory for to store intermediate image files (default: None) +psf_cutoff: null # Cutoff for smoothing in units of arcseconds. (default: None) +robust: -0.5 # ROBUST +nchan: 36 # NCHAN +pols: IQU # POLS +size: 6144 # SIZE +scale: 2.5 # SCALE +mgain: 0.7 # MGAIN +niter: 500_000 # NITER +nmiter: 15 # NMITER +auto_mask: 4 # AUTO_MASK +auto_threshold: 1 # AUTO_THRESHOLD +local_rms: true # +local_rms_window: 60 # LOCAL_RMS_WINDOW +force_mask_rounds: 8 # FORCE_MASK_ROUNDS +gridder: wgridder # {direct-ft,idg,wgridder,tuned-wgridder,wstacking} +taper: null # TAPER +minuv: 200 # MINUV +parallel: null # PARALLEL +mpi: false # Use MPI (default: False) +purge: false # Purge intermediate files (default: False) +multiscale: false # Use multiscale clean (default: False) +multiscale_scale_bias: null # The multiscale scale bias term provided to wsclean. (default: None) +multiscale_scales: 0,2,4,8,16,32,64,12 # The scales used in the multiscale clean. (default: 0,2,4,8,16,32,64,128) +absmem: null # ABSMEM Absolute memory limit in GB (default: None) +make_residual_cubes: false # Create residual cubes as well as cubes from restored images. (default: False) +ms_glob_pattern: scienceData*_averaged_cal.leakage.ms # The pattern used to search for measurement sets. (default: scienceData*_averaged_cal.leakage.ms) +data_column: CORRECTED_DATA # Which column in the measurement set to image. (default: CORRECTED_DATA) +no_mf_weighting: false # Do not use multi-frequency weighting. (default: False) +skip_fix_ms: false # Do not apply the ASKAP MS corrections from the package fixms. (default: False) + +# cutout arguments: +pad: 3 # Number of beamwidths to pad around source [3]. (default: 3) +dryrun: false # Do a dry-run [False]. (default: False) + +# linmos null arguments: +holofile: null #Path to holography image (default: None) +yanda: 1.3.0 # Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) +yanda_image: null #Path to an existing yandasoft singularity container image. (default: None) + +# frion arguments: +ionex_server: ftp://ftp.aiub.unibe.ch/CODE/ # IONEX server (default: ftp://ftp.aiub.unibe.ch/CODE/) +ionex_prefix: codg # IONEX_PREFIX +ionex_formatter: null # IONEX formatter. (default: ftp.aiub.unibe.ch) +ionex_proxy_server: null # Proxy server. (default: None) +ionex_predownload: false # Pre-download IONEX files. (default: False) + +# common rm arguments: +dimension: 1d # How many dimensions for RMsynth '1d' or '3d'. (default: 1d) +save_plots: false # save the plots. (default: False) +rm_verbose: false # Verbose RMsynth/RMClean. (default: False) + +# rm-synth arguments: +ion: false # Use ionospheric-corrected data. (default: False) +tt0: null # TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) +tt1: null # TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) +validate: false # Run on Stokes I. (default: False) +own_fit: false # Use own Stokes I fit function. (default: False) +weight_type: # weighting (inverse) 'variance' or 'uniform' (all 1s). (default: variance) +fit_function: # Stokes I fitting function: 'linear' or 'log' polynomials. (default: log) +fit_rmsf: false # Fit a Gaussian to the RMSF (default: False) +phi_max: null # Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) +dphi: null # Width of Faraday depth channel. (default: None) +n_samples: # Number of samples across the FWHM RMSF. (default: 5) +poly_ord: # polynomial order to fit to I spectrum. (default: 3) +no_stokes_i: false # ignore the Stokes I spectrum. (default: False) +show_plots: false # show the plots. (default: False) +not_rmsf: false # Skip calculation of RMSF? (default: False) +debug: false # turn on debugging messages & plots. (default: False) + +# rm-clean arguments: +cutoff: -8 # CLEAN cutoff (+ve = absolute, -ve = sigma). (default: -3) +max_iter: 10000 # maximum number of CLEAN iterations. (default: 10000) +gain: 0.1 # CLEAN loop gain. (default: 0.1) +window: null # Further CLEAN in mask to this threshold. (default: None) + +# catalogue arguments: +leakage_degree: 4 # Degree of leakage polynomial fit. (default: 4) +leakage_bins: 16 # Number of bins for leakage fit. (default: 16) +leakage_snr: 30 # SNR cut for leakage fit. (default: 30.0) +write: null # File to save table to. (default: None) + +# cleanup arguments: +overwrite: false # Overwrite existing tarball (default: False) diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 981c9af8..8367decb 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -3,10 +3,10 @@ import argparse import logging import os +from importlib import resources from pathlib import Path import configargparse -import pkg_resources import yaml from astropy.time import Time from prefect import flow @@ -214,8 +214,8 @@ def create_dask_runner( logger.setLevel(logging.INFO) logger.info("Creating a Dask Task Runner.") if dask_config is None: - config_dir = pkg_resources.resource_filename("arrakis", "configs") - dask_config = f"{config_dir}/default.yaml" + config_dir = resources.files("arrakis.configs") + dask_config = config_dir / "default.yaml" with open(dask_config) as f: logger.info(f"Loading {dask_config}") @@ -408,7 +408,9 @@ def cli(): clean_parser = cleanup.cleanup_parser(parent_parser=True) # Parse the command line options parser = configargparse.ArgParser( - default_config_files=[".default_config.cfg"], + default_config_files=[ + (resources.files("arrakis") / ".default_config.yaml").as_posix() + ], description=pipe_parser.description, formatter_class=UltimateHelpFormatter, parents=[ From 4e322bd6aa63564bad6844f2226f568f091fb357 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 18:46:35 +1000 Subject: [PATCH 18/37] Update docs --- arrakis/process_region.py | 2 - docs/source/pipeline.rst | 649 ++++++++++++++++++++------------------ 2 files changed, 343 insertions(+), 308 deletions(-) diff --git a/arrakis/process_region.py b/arrakis/process_region.py index fb334d2f..134f7b65 100644 --- a/arrakis/process_region.py +++ b/arrakis/process_region.py @@ -232,8 +232,6 @@ def cli(): ) parser.add("--config", required=False, is_config_file=True, help="Config file path") args = parser.parse_args() - if not args.use_mpi: - parser.print_values() verbose = args.verbose if verbose: diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst index 9d2e4d54..9edaee9e 100644 --- a/docs/source/pipeline.rst +++ b/docs/source/pipeline.rst @@ -13,8 +13,6 @@ Details of each module can be found in the API documentation. But broadly the st * LINMOS - Applies the primary beam and leakage correction to the cutout beam cubes, and then mosaics each into a single cube for each source per field. - * Clean up - Remove the cutout beam cubes from the cutouts directory. - * FRion - Applies time-independent ionospheric Faraday rotation to the mosaicked cubes using `FRion `_. * RM synthesis - Extracts 1D spectra for each component of each source and runs RM synthesis using `RM-tools `_. @@ -23,30 +21,31 @@ Details of each module can be found in the API documentation. But broadly the st * Catalogue - Queries the database for a given field and constructs a polarisation catalogue for each component. + * Clean up - Create a tarball of the the cutouts, and remove beam cubes. + .. rst-class:: clear-both ---- With an initalised database you can call the pipeline on a single field: :: - (spice) $ spice_process -h - usage: spice_process [-h] [--temp_dir TEMP_DIR] [--psf_cutoff PSF_CUTOFF] [--robust ROBUST] [--nchan NCHAN] [--pols POLS] [--size SIZE] - [--scale SCALE] [--mgain MGAIN] [--niter NITER] [--nmiter NMITER] [--auto_mask AUTO_MASK] - [--auto_threshold AUTO_THRESHOLD] [--local_rms] [--local_rms_window LOCAL_RMS_WINDOW] - [--force_mask_rounds FORCE_MASK_ROUNDS] [--gridder {direct-ft,idg,wgridder,tuned-wgridder,wstacking}] [--taper TAPER] - [--minuv MINUV] [--parallel PARALLEL] [--purge] [--mpi] [--multiscale] [--multiscale_scale_bias MULTISCALE_SCALE_BIAS] - [--multiscale_scales MULTISCALE_SCALES] [--absmem ABSMEM] [--make_residual_cubes] [--ms_glob_pattern MS_GLOB_PATTERN] - [--data_column DATA_COLUMN] [--no_mf_weighting] [--skip_fix_ms] - [--hosted-wsclean HOSTED_WSCLEAN | --local_wsclean LOCAL_WSCLEAN] [--config CONFIG] [--epoch EPOCH] [--host HOST] - [--username USERNAME] [--password PASSWORD] [--dask_config DASK_CONFIG] [--imager_dask_config IMAGER_DASK_CONFIG] - [--holofile HOLOFILE] [--yanda YANDA] [--yanda_image YANDA_IMAGE] [--imager_only] [--skip_imager] [--skip_cutout] - [--skip_linmos] [--skip_cleanup] [--skip_frion] [--skip_rmsynth] [--skip_rmclean] [--skip_cat] [-v] [-p PAD] - [--dryrun] [--dimension DIMENSION] [-m] [--tt0 TT0] [--tt1 TT1] [--validate] [--limit LIMIT] [--own_fit] [-sp] - [-w WEIGHTTYPE] [--fit_function FIT_FUNCTION] [-t] [-l PHIMAX_RADM2] [-d DPHI_RADM2] [-s NSAMPLES] [-o POLYORD] [-i] - [--showPlots] [-R] [-rmv] [-D] [-c CUTOFF] [-n MAXITER] [-g GAIN] [--window WINDOW] [--ionex_server IONEX_SERVER] - [--ionex_prefix IONEX_PREFIX] [--ionex_proxy_server IONEX_PROXY_SERVER] [--ionex_formatter IONEX_FORMATTER] - [--ionex_predownload] [--outfile OUTFILE] - msdir outdir field + (arrakis310) $ spice_process -h + usage: spice_process [-h] [--dask_config DASK_CONFIG] [--imager_dask_config IMAGER_DASK_CONFIG] [--imager_only] [--skip_imager] [--skip_cutout] [--skip_linmos] + [--skip_frion] [--skip_rmsynth] [--skip_rmclean] [--skip_cat] [--skip_cleanup] [--sbid SBID] [-s STOKESLIST [STOKESLIST ...]] [-e EPOCH] [-v] + [--host host] [--username USERNAME] [--password PASSWORD] [--limit LIMIT] [--database] [--temp_dir_wsclean TEMP_DIR_WSCLEAN] + [--temp_dir_images TEMP_DIR_IMAGES] [--psf_cutoff PSF_CUTOFF] [--robust ROBUST] [--nchan NCHAN] [--pols POLS] [--size SIZE] [--scale SCALE] + [--mgain MGAIN] [--niter NITER] [--nmiter NMITER] [--auto_mask AUTO_MASK] [--auto_threshold AUTO_THRESHOLD] [--local_rms] + [--local_rms_window LOCAL_RMS_WINDOW] [--force_mask_rounds FORCE_MASK_ROUNDS] [--gridder {direct-ft,idg,wgridder,tuned-wgridder,wstacking}] + [--taper TAPER] [--minuv MINUV] [--parallel PARALLEL] [--purge] [--mpi] [--multiscale] [--multiscale_scale_bias MULTISCALE_SCALE_BIAS] + [--multiscale_scales MULTISCALE_SCALES] [--absmem ABSMEM] [--make_residual_cubes] [--ms_glob_pattern MS_GLOB_PATTERN] [--data_column DATA_COLUMN] + [--no_mf_weighting] [--skip_fix_ms] [--hosted-wsclean HOSTED_WSCLEAN | --local_wsclean LOCAL_WSCLEAN] [-p PAD] [-d] [--holofile HOLOFILE] + [--yanda YANDA] [--yanda_image YANDA_IMAGE] [--ionex_server IONEX_SERVER] [--ionex_prefix IONEX_PREFIX] [--ionex_formatter IONEX_FORMATTER] + [--ionex_proxy_server IONEX_PROXY_SERVER] [--ionex_predownload] [--dimension DIMENSION] [--save_plots] [--rm_verbose] [--ion] [--tt0 TT0] + [--tt1 TT1] [--validate] [--own_fit] [--weight_type WEIGHT_TYPE] [--fit_function FIT_FUNCTION] [--fit_rmsf] [--phi_max PHI_MAX] [--dphi DPHI] + [--n_samples N_SAMPLES] [--poly_ord POLY_ORD] [--no_stokes_i] [--show_plots] [--not_rmsf] [--debug] [--cutoff CUTOFF] [--max_iter MAX_ITER] + [--gain GAIN] [--window WINDOW] [--leakage_degree LEAKAGE_DEGREE] [--leakage_bins LEAKAGE_BINS] [--leakage_snr LEAKAGE_SNR] [--write OUTFILE] + [--overwrite] [--config CONFIG] + datadir field msdir mmm mmm mmm mmm mmm @@ -67,247 +66,279 @@ With an initalised database you can call the pipeline on a single field: :: - positional arguments: - field Name of field (e.g. 2132-50A). - options: - -h, --help show this help message and exit - --hosted-wsclean HOSTED_WSCLEAN - Docker or Singularity image for wsclean [docker://alecthomson/wsclean:latest] (default: docker://alecthomson/wsclean:latest) - --local_wsclean LOCAL_WSCLEAN + -h, --help show this help message and exit + --hosted-wsclean HOSTED_WSCLEAN + Docker or Singularity image for wsclean (default: docker://alecthomson/wsclean:latest) + --local_wsclean LOCAL_WSCLEAN Path to local wsclean Singularity image (default: None) - --config CONFIG Config file path (default: None) - --epoch EPOCH Epoch to read field data from (default: 0) - --host HOST Host of mongodb (probably $hostname -i). (default: None) - --username USERNAME Username of mongodb. (default: None) - --password PASSWORD Password of mongodb. (default: None) - --dask_config DASK_CONFIG + --config CONFIG Config file path (default: None) + + pipeline arguments: + --dask_config DASK_CONFIG Config file for Dask SlurmCLUSTER. (default: None) - --imager_dask_config IMAGER_DASK_CONFIG + --imager_dask_config IMAGER_DASK_CONFIG Config file for Dask SlurmCLUSTER. (default: None) - --holofile HOLOFILE Path to holography image (default: None) - --yanda YANDA Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) - --yanda_image YANDA_IMAGE - Path to an existing yandasoft singularity container image. (default: None) + --imager_only Only run the imager component of the pipeline. (default: False) + --skip_imager Skip imaging stage [False]. (default: False) + --skip_cutout Skip cutout stage [False]. (default: False) + --skip_linmos Skip LINMOS stage [False]. (default: False) + --skip_frion Skip cleanup stage [False]. (default: False) + --skip_rmsynth Skip RM Synthesis stage [False]. (default: False) + --skip_rmclean Skip RM-CLEAN stage [False]. (default: False) + --skip_cat Skip catalogue stage [False]. (default: False) + --skip_cleanup Skip cleanup stage [False]. (default: False) + + workdir arguments: + datadir Directory to create/find full-size images and 'cutout' directory + + generic arguments: + field Name of field (e.g. RACS_2132-50). + --sbid SBID SBID of observation. (default: None) + -s STOKESLIST [STOKESLIST ...], --stokes STOKESLIST [STOKESLIST ...] + List of Stokes parameters to image (default: ['I', 'Q', 'U']) + -e EPOCH, --epoch EPOCH + Epoch of observation. (default: 0) + -v Verbose output. (default: False) + --host host Host of mongodb (probably $hostname -i). (default: None) + --username USERNAME Username of mongodb. (default: None) + --password PASSWORD Password of mongodb. (default: None) + --limit LIMIT Limit the number of islands to process. (default: None) + --database Add data to MongoDB. (default: False) imaging arguments: - msdir Directory containing MS files - outdir Directory to output images - --temp_dir TEMP_DIR Temporary directory to store intermediate files (default: None) - --psf_cutoff PSF_CUTOFF + msdir Directory containing MS files + --temp_dir_wsclean TEMP_DIR_WSCLEAN + Temporary directory for WSClean to store intermediate files (default: None) + --temp_dir_images TEMP_DIR_IMAGES + Temporary directory for to store intermediate image files (default: None) + --psf_cutoff PSF_CUTOFF Cutoff for smoothing in units of arcseconds. (default: None) - --robust ROBUST - --nchan NCHAN - --pols POLS - --size SIZE - --scale SCALE - --mgain MGAIN - --niter NITER - --nmiter NMITER - --auto_mask AUTO_MASK - --auto_threshold AUTO_THRESHOLD - --local_rms - --local_rms_window LOCAL_RMS_WINDOW - --force_mask_rounds FORCE_MASK_ROUNDS - --gridder {direct-ft,idg,wgridder,tuned-wgridder,wstacking} - --taper TAPER - --minuv MINUV - --parallel PARALLEL - --purge Purge intermediate files (default: False) - --mpi Use MPI (default: False) - --multiscale Use multiscale clean (default: False) - --multiscale_scale_bias MULTISCALE_SCALE_BIAS + --robust ROBUST + --nchan NCHAN + --pols POLS + --size SIZE + --scale SCALE + --mgain MGAIN + --niter NITER + --nmiter NMITER + --auto_mask AUTO_MASK + --auto_threshold AUTO_THRESHOLD + --local_rms + --local_rms_window LOCAL_RMS_WINDOW + --force_mask_rounds FORCE_MASK_ROUNDS + --gridder {direct-ft,idg,wgridder,tuned-wgridder,wstacking} + --taper TAPER + --minuv MINUV + --parallel PARALLEL + --purge Purge intermediate files (default: False) + --mpi Use MPI (default: False) + --multiscale Use multiscale clean (default: False) + --multiscale_scale_bias MULTISCALE_SCALE_BIAS The multiscale scale bias term provided to wsclean. (default: None) - --multiscale_scales MULTISCALE_SCALES + --multiscale_scales MULTISCALE_SCALES The scales used in the multiscale clean. (default: 0,2,4,8,16,32,64,128) - --absmem ABSMEM Absolute memory limit in GB (default: None) - --make_residual_cubes + --absmem ABSMEM Absolute memory limit in GB (default: None) + --make_residual_cubes Create residual cubes as well as cubes from restored images. (default: False) - --ms_glob_pattern MS_GLOB_PATTERN + --ms_glob_pattern MS_GLOB_PATTERN The pattern used to search for measurement sets. (default: scienceData*_averaged_cal.leakage.ms) - --data_column DATA_COLUMN + --data_column DATA_COLUMN Which column in the measurement set to image. (default: CORRECTED_DATA) - --no_mf_weighting Do not use multi-frequency weighting. (default: False) - --skip_fix_ms Do not apply the ASKAP MS corrections from the package fixms. (default: False) - - pipeline flow options: - --imager_only Only run the imager component of the pipeline. (default: False) - --skip_imager Skip imaging stage [False]. (default: False) - --skip_cutout Skip cutout stage [False]. (default: False) - --skip_linmos Skip LINMOS stage [False]. (default: False) - --skip_cleanup Skip cleanup stage [False]. (default: False) - --skip_frion Skip cleanup stage [False]. (default: False) - --skip_rmsynth Skip RM Synthesis stage [False]. (default: False) - --skip_rmclean Skip RM-CLEAN stage [False]. (default: False) - --skip_cat Skip catalogue stage [False]. (default: False) - - output options: - -v, --verbose Verbose output [False]. (default: False) + --no_mf_weighting Do not use multi-frequency weighting. (default: False) + --skip_fix_ms Do not apply the ASKAP MS corrections from the package fixms. (default: False) cutout arguments: - -p PAD, --pad PAD Number of beamwidths to pad around source [5]. (default: 5) - --dryrun Do a dry-run [False]. (default: False) - - RM-synth/CLEAN arguments: - --dimension DIMENSION - How many dimensions for RMsynth [1d] or '3d'. (default: 1d) - -m, --database Add RMsynth data to MongoDB [False]. (default: False) - --tt0 TT0 TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) - --tt1 TT1 TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) - --validate Run on RMsynth Stokes I [False]. (default: False) - --limit LIMIT Limit number of sources [All]. (default: None) - --own_fit Use own Stokes I fit function [False]. (default: False) - - RM-tools arguments: - -sp, --savePlots save the plots [False]. (default: False) - -w WEIGHTTYPE, --weightType WEIGHTTYPE - weighting [variance] (all 1s) or 'uniform'. (default: variance) - --fit_function FIT_FUNCTION - Stokes I fitting function: 'linear' or ['log'] polynomials. (default: log) - -t, --fitRMSF Fit a Gaussian to the RMSF [False] (default: False) - -l PHIMAX_RADM2, --phiMax_radm2 PHIMAX_RADM2 - Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto]. (default: None) - -d DPHI_RADM2, --dPhi_radm2 DPHI_RADM2 - Width of Faraday depth channel [Auto]. (default: None) - -s NSAMPLES, --nSamples NSAMPLES + -p PAD, --pad PAD Number of beamwidths to pad around source [3]. (default: 3) + -d, --dryrun Do a dry-run [False]. (default: False) + + linmos arguments: + --holofile HOLOFILE Path to holography image (default: None) + --yanda YANDA Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) + --yanda_image YANDA_IMAGE + Path to an existing yandasoft singularity container image. (default: None) + + frion arguments: + --ionex_server IONEX_SERVER + IONEX server (default: ftp://ftp.aiub.unibe.ch/CODE/) + --ionex_prefix IONEX_PREFIX + --ionex_formatter IONEX_FORMATTER + IONEX formatter. (default: ftp.aiub.unibe.ch) + --ionex_proxy_server IONEX_PROXY_SERVER + Proxy server. (default: None) + --ionex_predownload Pre-download IONEX files. (default: False) + + common rm arguments: + --dimension DIMENSION + How many dimensions for RMsynth '1d' or '3d'. (default: 1d) + --save_plots save the plots. (default: False) + --rm_verbose Verbose RMsynth/RMClean. (default: False) + + rm-synth arguments: + --ion Use ionospheric-corrected data. (default: False) + --tt0 TT0 TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) + --tt1 TT1 TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) + --validate Run on Stokes I. (default: False) + --own_fit Use own Stokes I fit function. (default: False) + --weight_type WEIGHT_TYPE + weighting (inverse) 'variance' or 'uniform' (all 1s). (default: variance) + --fit_function FIT_FUNCTION + Stokes I fitting function: 'linear' or 'log' polynomials. (default: log) + --fit_rmsf Fit a Gaussian to the RMSF (default: False) + --phi_max PHI_MAX Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) + --dphi DPHI Width of Faraday depth channel. (default: None) + --n_samples N_SAMPLES Number of samples across the FWHM RMSF. (default: 5) - -o POLYORD, --polyOrd POLYORD - polynomial order to fit to I spectrum [3]. (default: 3) - -i, --noStokesI ignore the Stokes I spectrum [False]. (default: False) - --showPlots show the plots [False]. (default: False) - -R, --not_RMSF Skip calculation of RMSF? [False] (default: False) - -rmv, --rm_verbose Verbose RMsynth/CLEAN [False]. (default: False) - -D, --debug turn on debugging messages & plots [False]. (default: False) - -c CUTOFF, --cutoff CUTOFF - CLEAN cutoff (+ve = absolute, -ve = sigma) [-3]. (default: -3) - -n MAXITER, --maxIter MAXITER - maximum number of CLEAN iterations [10000]. (default: 10000) - -g GAIN, --gain GAIN CLEAN loop gain [0.1]. (default: 0.1) - --window WINDOW Further CLEAN in mask to this threshold [False]. (default: None) - --ionex_server IONEX_SERVER - IONEX server [ftp://ftp.aiub.unibe.ch/CODE/]. (default: ftp://ftp.aiub.unibe.ch/CODE/) - --ionex_prefix IONEX_PREFIX - IONEX prefix. (default: codg) - --ionex_proxy_server IONEX_PROXY_SERVER - Proxy server [None]. (default: None) - --ionex_formatter IONEX_FORMATTER - IONEX formatter [None]. (default: None) - --ionex_predownload Pre-download IONEX files [False]. (default: False) + --poly_ord POLY_ORD polynomial order to fit to I spectrum. (default: 3) + --no_stokes_i ignore the Stokes I spectrum. (default: False) + --show_plots show the plots. (default: False) + --not_rmsf Skip calculation of RMSF? (default: False) + --debug turn on debugging messages & plots. (default: False) + + rm-clean arguments: + --cutoff CUTOFF CLEAN cutoff (+ve = absolute, -ve = sigma). (default: -3) + --max_iter MAX_ITER maximum number of CLEAN iterations. (default: 10000) + --gain GAIN CLEAN loop gain. (default: 0.1) + --window WINDOW Further CLEAN in mask to this threshold. (default: None) catalogue arguments: - --outfile OUTFILE File to save table to [None]. (default: None) - - Args that start with '--' can also be set in a config file (.default_config.cfg or specified via --config). Config file syntax allows: - key=value, flag=true, stuff=[a,b,c] (for details, see syntax at https://goo.gl/R74nmi). In general, command-line values override config - file values which override defaults. - - -You can optionally pass a configuration file (with the :code:`--config` argument) to set the options you prefer. An example file in contained in :file:`arrakis/.default_config.cfg`: - -.. code-block:: cfg - - # Arrakis default config - - ## Note 'None' will be interpreated as a string, not a NoneType - ## If you want to use the default value, just leave the line blank - - [Imaging options] - # temp_dir: None - # psf_cutoff: None - robust: -0.5 - nchan: 36 - pols: 'IQU' - size: 4096 - scale: 2.5 - mgain: 0.8 - niter: 100000 - # nmiter: None - auto_mask: 3.0 - auto_threshold: 1.0 - local_rms: False - # local_rms_window: None - # force_mask_rounds: None - # gridder: None - # taper: None - minuv: 0.0 - # parallel: None - purge: False - mpi: False - multiscale: False - # multiscale_scale_bias: None - multiscale_scales: '0,2,4,8,16,32,64,128' - # absmem: None - make_residual_cubes: False - ms_glob_pattern: 'scienceData*_averaged_cal.leakage.ms' - data_column: 'CORRECTED_DATA' - no_mf_weighting: False - skip_fix_ms: False - hosted-wsclean: 'docker://alecthomson/wsclean:latest' - # local_wsclean: None - - [Pipeline options] - # config: None - epoch: 0 - # host: None - # username: None - # password: None - # dask_config: None - # imager_dask_config: None - # holofile: None - yanda: '1.3.0' - # yanda_image: None - imager_only: False - skip_imager: False - skip_cutout: False - skip_linmos: False - skip_cleanup: False - skip_frion: False - skip_rmsynth: False - skip_rmclean: False - skip_cat: False - verbose: False - - [Cutout options] - pad: 5 - dryrun: False - - [RMsynth options] - dimension: '1d' - database: False - # tt0: None - # tt1: None - validate: False - # limit: None - own_fit: False - savePlots: False - weightType: 'variance' - fit_function: 'log' - fitRMSF: False - # phiMax_radm2: None - # dPhi_radm2: None - nSamples: 5 - polyOrd: 3 - noStokesI: False - showPlots: False - not_RMSF: False - rm_verbose: False - debug: False - - [RMclean options] - cutoff: -3 - maxIter: 10000 - gain: 0.1 - # window: None - - [FRion options] - ionex_server: 'ftp://ftp.aiub.unibe.ch/CODE/' - ionex_prefix: 'codg' - # ionex_proxy_server: None - # ionex_formatter: None - ionex_predownload: False - - [Catalog options] - # outfile: None + --leakage_degree LEAKAGE_DEGREE + Degree of leakage polynomial fit. (default: 4) + --leakage_bins LEAKAGE_BINS + Number of bins for leakage fit. (default: 16) + --leakage_snr LEAKAGE_SNR + SNR cut for leakage fit. (default: 30.0) + --write OUTFILE File to save table to. (default: None) + + cleanup arguments: + --overwrite Overwrite existing tarball (default: False) + + Args that start with '--' can also be set in a config file (/scratch3/projects/spiceracs/arrakis/arrakis/.default_config.yaml or specified via --config). Config file + syntax allows: key=value, flag=true, stuff=[a,b,c] (for details, see syntax at https://goo.gl/R74nmi). In general, command-line values override config file values which + override defaults. + + +You can optionally pass a configuration file (with the :code:`--config` argument) to set the options you prefer. An example file in contained in :file:`arrakis/.default_config.yaml`: + +.. code-block:: yaml + + # options: + hosted-wsclean: docker://alecthomson/wsclean:latest # Docker or Singularity image for wsclean (default: docker://alecthomson/wsclean:latest) + local_wsclean: null # Path to local wsclean Singularity image (default: None) + + # pipeline arguments: + dask_config: null # Config file for Dask SlurmCLUSTER. (default: None) + imager_dask_config: null #Config file for Dask SlurmCLUSTER. (default: None) + imager_only: false # Only run the imager component of the pipeline. (default: False) + skip_imager: false #Skip imaging stage [False]. (default: False) + skip_cutout: false #Skip cutout stage [False]. (default: False) + skip_linmos: false #Skip LINMOS stage [False]. (default: False) + skip_frion: false #Skip cleanup stage [False]. (default: False) + skip_rmsynth: false #Skip RM Synthesis stage [False]. (default: False) + skip_rmclean: false #Skip RM-CLEAN stage [False]. (default: False) + skip_cat: false #Skip catalogue stage [False]. (default: False) + skip_cleanup: false #Skip cleanup stage [False]. (default: False) + + # generic null arguments: + sbid: null #SBID of observation. (default: None) + stokes: # List of Stokes parameters to image (default: ['I', 'Q', 'U']) + - I + - Q + - U + epoch: 0 # Epoch of observation. (default: 0) + host: null # Host of mongodb (probably $hostname -i). (default: None) + username: null # Username of mongodb. (default: None) + password: # Password of mongodb. (default: None) + limit: null # Limit the number of islands to process. (default: None) + database: false # Add data to MongoDB. (default: False) + + # imaging arguments: + temp_dir_wsclean: null # Temporary directory for WSClean to store intermediate files (default: None) + temp_dir_images: null # Temporary directory for to store intermediate image files (default: None) + psf_cutoff: null # Cutoff for smoothing in units of arcseconds. (default: None) + robust: -0.5 # ROBUST + nchan: 36 # NCHAN + pols: IQU # POLS + size: 6144 # SIZE + scale: 2.5 # SCALE + mgain: 0.7 # MGAIN + niter: 500_000 # NITER + nmiter: 15 # NMITER + auto_mask: 4 # AUTO_MASK + auto_threshold: 1 # AUTO_THRESHOLD + local_rms: true # + local_rms_window: 60 # LOCAL_RMS_WINDOW + force_mask_rounds: 8 # FORCE_MASK_ROUNDS + gridder: wgridder # {direct-ft,idg,wgridder,tuned-wgridder,wstacking} + taper: null # TAPER + minuv: 200 # MINUV + parallel: null # PARALLEL + mpi: false # Use MPI (default: False) + purge: false # Purge intermediate files (default: False) + multiscale: false # Use multiscale clean (default: False) + multiscale_scale_bias: null # The multiscale scale bias term provided to wsclean. (default: None) + multiscale_scales: 0,2,4,8,16,32,64,12 # The scales used in the multiscale clean. (default: 0,2,4,8,16,32,64,128) + absmem: null # ABSMEM Absolute memory limit in GB (default: None) + make_residual_cubes: false # Create residual cubes as well as cubes from restored images. (default: False) + ms_glob_pattern: scienceData*_averaged_cal.leakage.ms # The pattern used to search for measurement sets. (default: scienceData*_averaged_cal.leakage.ms) + data_column: CORRECTED_DATA # Which column in the measurement set to image. (default: CORRECTED_DATA) + no_mf_weighting: false # Do not use multi-frequency weighting. (default: False) + skip_fix_ms: false # Do not apply the ASKAP MS corrections from the package fixms. (default: False) + + # cutout arguments: + pad: 3 # Number of beamwidths to pad around source [3]. (default: 3) + dryrun: false # Do a dry-run [False]. (default: False) + + # linmos null arguments: + holofile: null #Path to holography image (default: None) + yanda: 1.3.0 # Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) + yanda_image: null #Path to an existing yandasoft singularity container image. (default: None) + + # frion arguments: + ionex_server: ftp://ftp.aiub.unibe.ch/CODE/ # IONEX server (default: ftp://ftp.aiub.unibe.ch/CODE/) + ionex_prefix: codg # IONEX_PREFIX + ionex_formatter: null # IONEX formatter. (default: ftp.aiub.unibe.ch) + ionex_proxy_server: null # Proxy server. (default: None) + ionex_predownload: false # Pre-download IONEX files. (default: False) + + # common rm arguments: + dimension: 1d # How many dimensions for RMsynth '1d' or '3d'. (default: 1d) + save_plots: false # save the plots. (default: False) + rm_verbose: false # Verbose RMsynth/RMClean. (default: False) + + # rm-synth arguments: + ion: false # Use ionospheric-corrected data. (default: False) + tt0: null # TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) + tt1: null # TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) + validate: false # Run on Stokes I. (default: False) + own_fit: false # Use own Stokes I fit function. (default: False) + weight_type: # weighting (inverse) 'variance' or 'uniform' (all 1s). (default: variance) + fit_function: # Stokes I fitting function: 'linear' or 'log' polynomials. (default: log) + fit_rmsf: false # Fit a Gaussian to the RMSF (default: False) + phi_max: null # Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) + dphi: null # Width of Faraday depth channel. (default: None) + n_samples: # Number of samples across the FWHM RMSF. (default: 5) + poly_ord: # polynomial order to fit to I spectrum. (default: 3) + no_stokes_i: false # ignore the Stokes I spectrum. (default: False) + show_plots: false # show the plots. (default: False) + not_rmsf: false # Skip calculation of RMSF? (default: False) + debug: false # turn on debugging messages & plots. (default: False) + + # rm-clean arguments: + cutoff: -8 # CLEAN cutoff (+ve = absolute, -ve = sigma). (default: -3) + max_iter: 10000 # maximum number of CLEAN iterations. (default: 10000) + gain: 0.1 # CLEAN loop gain. (default: 0.1) + window: null # Further CLEAN in mask to this threshold. (default: None) + + # catalogue arguments: + leakage_degree: 4 # Degree of leakage polynomial fit. (default: 4) + leakage_bins: 16 # Number of bins for leakage fit. (default: 16) + leakage_snr: 30 # SNR cut for leakage fit. (default: 30.0) + write: null # File to save table to. (default: None) + + # cleanup arguments: + overwrite: false # Overwrite existing tarball (default: False) + For extra information you can refer to the API: @@ -316,14 +347,14 @@ For extra information you can refer to the API: Similarly, you can merge multiple fields togther using: :: - (spice) $ spice_region -h - usage: spice_region [-h] [--config CONFIG] [--merge_name MERGE_NAME] [--fields FIELDS [FIELDS ...]] [--datadirs DATADIRS [DATADIRS ...]] - [--output_dir OUTPUT_DIR] [--epoch EPOCH] [--host HOST] [--username USERNAME] [--password PASSWORD] [--use_mpi] - [--port_forward PORT_FORWARD [PORT_FORWARD ...]] [--dask_config DASK_CONFIG] [--yanda YANDA] [--skip_merge] - [--skip_rmsynth] [--skip_rmclean] [--skip_cat] [-v] [--debugger] [--dimension DIMENSION] [-m] [--tt0 TT0] [--tt1 TT1] - [--validate] [--limit LIMIT] [--own_fit] [-sp] [-w WEIGHTTYPE] [--fit_function FIT_FUNCTION] [-t] [-l PHIMAX_RADM2] - [-d DPHI_RADM2] [-s NSAMPLES] [-o POLYORD] [-i] [--showPlots] [-R] [-rmv] [-D] [-c CUTOFF] [-n MAXITER] [-g GAIN] - [--window WINDOW] [--outfile OUTFILE] + (arrakis310) $ spice_region -h + usage: spice_region [-h] [--dask_config DASK_CONFIG] [--skip_frion] [--skip_rmsynth] [--skip_rmclean] [--skip_cat] [--skip_cleanup] [--merge_name MERGE_NAME] + [--fields FIELDS [FIELDS ...]] [--datadirs DATADIRS [DATADIRS ...]] [--output_dir OUTPUT_DIR] [-e EPOCH] [--host host] [--username USERNAME] + [--password PASSWORD] [--holofile HOLOFILE] [--yanda YANDA] [--yanda_image YANDA_IMAGE] [--dimension DIMENSION] [--save_plots] [--rm_verbose] + [--ion] [--tt0 TT0] [--tt1 TT1] [--validate] [--own_fit] [--weight_type WEIGHT_TYPE] [--fit_function FIT_FUNCTION] [--fit_rmsf] [--phi_max PHI_MAX] + [--dphi DPHI] [--n_samples N_SAMPLES] [--poly_ord POLY_ORD] [--no_stokes_i] [--show_plots] [--not_rmsf] [--debug] [--cutoff CUTOFF] + [--max_iter MAX_ITER] [--gain GAIN] [--window WINDOW] [--leakage_degree LEAKAGE_DEGREE] [--leakage_bins LEAKAGE_BINS] [--leakage_snr LEAKAGE_SNR] + [--write OUTFILE] [--overwrite] [--config CONFIG] mmm mmm mmm mmm mmm @@ -347,78 +378,84 @@ Similarly, you can merge multiple fields togther using: :: options: -h, --help show this help message and exit --config CONFIG Config file path (default: None) + + pipeline arguments: + --dask_config DASK_CONFIG + Config file for Dask SlurmCLUSTER. (default: None) + --skip_frion Skip cleanup stage [False]. (default: False) + --skip_rmsynth Skip RM Synthesis stage [False]. (default: False) + --skip_rmclean Skip RM-CLEAN stage [False]. (default: False) + --skip_cat Skip catalogue stage [False]. (default: False) + --skip_cleanup Skip cleanup stage [False]. (default: False) + + merge arguments: --merge_name MERGE_NAME Name of the merged region (default: None) --fields FIELDS [FIELDS ...] - RACS fields to mosaic - e.g. 2132-50A. (default: None) + RACS fields to mosaic - e.g. RACS_2132-50A. (default: None) --datadirs DATADIRS [DATADIRS ...] Directories containing cutouts (in subdir outdir/cutouts).. (default: None) --output_dir OUTPUT_DIR Path to save merged data (in output_dir/merge_name/cutouts) (default: None) - --epoch EPOCH Epoch to read field data from (default: 0) - --host HOST Host of mongodb (probably $hostname -i). (default: None) + -e EPOCH, --epoch EPOCH + Epoch of observation. (default: 0) + --host host Host of mongodb (probably $hostname -i). (default: None) --username USERNAME Username of mongodb. (default: None) --password PASSWORD Password of mongodb. (default: None) - --use_mpi Use Dask-mpi to parallelise -- must use srun/mpirun to assign resources. (default: False) - --port_forward PORT_FORWARD [PORT_FORWARD ...] - Platform to fowards dask port [None]. (default: None) - --dask_config DASK_CONFIG - Config file for Dask SlurmCLUSTER. (default: None) - --yanda YANDA Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) - - pipeline flow options: - --skip_merge Skip merge stage [False]. (default: False) - --skip_rmsynth Skip RM Synthesis stage [False]. (default: False) - --skip_rmclean Skip RM-CLEAN stage [False]. (default: False) - --skip_cat Skip catalogue stage [False]. (default: False) - output options: - -v, --verbose Verbose output [False]. (default: False) - --debugger Debug output [False]. (default: False) + linmos arguments: + --holofile HOLOFILE Path to holography image (default: None) + --yanda YANDA Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) + --yanda_image YANDA_IMAGE + Path to an existing yandasoft singularity container image. (default: None) - RM-synth/CLEAN arguments: + common rm arguments: --dimension DIMENSION - How many dimensions for RMsynth [1d] or '3d'. (default: 1d) - -m, --database Add RMsynth data to MongoDB [False]. (default: False) + How many dimensions for RMsynth '1d' or '3d'. (default: 1d) + --save_plots save the plots. (default: False) + --rm_verbose Verbose RMsynth/RMClean. (default: False) + + rm-synth arguments: + --ion Use ionospheric-corrected data. (default: False) --tt0 TT0 TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) --tt1 TT1 TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) - --validate Run on RMsynth Stokes I [False]. (default: False) - --limit LIMIT Limit number of sources [All]. (default: None) - --own_fit Use own Stokes I fit function [False]. (default: False) - - RM-tools arguments: - -sp, --savePlots save the plots [False]. (default: False) - -w WEIGHTTYPE, --weightType WEIGHTTYPE - weighting [variance] (all 1s) or 'uniform'. (default: variance) + --validate Run on Stokes I. (default: False) + --own_fit Use own Stokes I fit function. (default: False) + --weight_type WEIGHT_TYPE + weighting (inverse) 'variance' or 'uniform' (all 1s). (default: variance) --fit_function FIT_FUNCTION - Stokes I fitting function: 'linear' or ['log'] polynomials. (default: log) - -t, --fitRMSF Fit a Gaussian to the RMSF [False] (default: False) - -l PHIMAX_RADM2, --phiMax_radm2 PHIMAX_RADM2 - Absolute max Faraday depth sampled (overrides NSAMPLES) [Auto]. (default: None) - -d DPHI_RADM2, --dPhi_radm2 DPHI_RADM2 - Width of Faraday depth channel [Auto]. (default: None) - -s NSAMPLES, --nSamples NSAMPLES + Stokes I fitting function: 'linear' or 'log' polynomials. (default: log) + --fit_rmsf Fit a Gaussian to the RMSF (default: False) + --phi_max PHI_MAX Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) + --dphi DPHI Width of Faraday depth channel. (default: None) + --n_samples N_SAMPLES Number of samples across the FWHM RMSF. (default: 5) - -o POLYORD, --polyOrd POLYORD - polynomial order to fit to I spectrum [3]. (default: 3) - -i, --noStokesI ignore the Stokes I spectrum [False]. (default: False) - --showPlots show the plots [False]. (default: False) - -R, --not_RMSF Skip calculation of RMSF? [False] (default: False) - -rmv, --rm_verbose Verbose RMsynth/CLEAN [False]. (default: False) - -D, --debug turn on debugging messages & plots [False]. (default: False) - -c CUTOFF, --cutoff CUTOFF - CLEAN cutoff (+ve = absolute, -ve = sigma) [-3]. (default: -3) - -n MAXITER, --maxIter MAXITER - maximum number of CLEAN iterations [10000]. (default: 10000) - -g GAIN, --gain GAIN CLEAN loop gain [0.1]. (default: 0.1) - --window WINDOW Further CLEAN in mask to this threshold [False]. (default: None) + --poly_ord POLY_ORD polynomial order to fit to I spectrum. (default: 3) + --no_stokes_i ignore the Stokes I spectrum. (default: False) + --show_plots show the plots. (default: False) + --not_rmsf Skip calculation of RMSF? (default: False) + --debug turn on debugging messages & plots. (default: False) + + rm-clean arguments: + --cutoff CUTOFF CLEAN cutoff (+ve = absolute, -ve = sigma). (default: -3) + --max_iter MAX_ITER maximum number of CLEAN iterations. (default: 10000) + --gain GAIN CLEAN loop gain. (default: 0.1) + --window WINDOW Further CLEAN in mask to this threshold. (default: None) catalogue arguments: - --outfile OUTFILE File to save table to [None]. (default: None) - - Args that start with '--' can also be set in a config file (.default_field_config.txt or specified via --config). Config file syntax - allows: key=value, flag=true, stuff=[a,b,c] (for details, see syntax at https://goo.gl/R74nmi). In general, command-line values override - config file values which override defaults. + --leakage_degree LEAKAGE_DEGREE + Degree of leakage polynomial fit. (default: 4) + --leakage_bins LEAKAGE_BINS + Number of bins for leakage fit. (default: 16) + --leakage_snr LEAKAGE_SNR + SNR cut for leakage fit. (default: 30.0) + --write OUTFILE File to save table to. (default: None) + + cleanup arguments: + --overwrite Overwrite existing tarball (default: False) + + Args that start with '--' can also be set in a config file (.default_config.cfg or specified via --config). Config file syntax allows: key=value, flag=true, + stuff=[a,b,c] (for details, see syntax at https://goo.gl/R74nmi). In general, command-line values override config file values which override defaults. * :py:mod:`arrakis.process_region` From 4466f44d99aab4b55d9aa8ded4e3b5c420500e92 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 18:50:16 +1000 Subject: [PATCH 19/37] Docs --- CHANGELOG.md | 11 +++++++++++ docs/source/install.rst | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab835f28..ab82fb9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### What's Changed + +* Allow SBID to passed as an argument + * This will enable a 'single field mode' + * Database queries / updates changes to support this +* Unified ArgParse mode + * Much easier argument parsing + * Now reused amongst modules +* Fixes to typing + * Much better use of `pathlib.Path` and `pandas` + ## [2.1.7] - 2024-04-03 ### What's Changed diff --git a/docs/source/install.rst b/docs/source/install.rst index 6b6ded2b..158057f5 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -18,9 +18,9 @@ After cloning this repo, please run: :: # or - if you have mamba: mamba env create -This will install the python dependencies and the command-line scrips into a conda environment called `spice`, which can be activated by: :: +This will install the python dependencies and the command-line scrips into a conda environment called `arrakis310`, which can be activated by: :: - conda activate spice + conda activate arrakis310 An installation of Singularity is also required. From 7cb26e6d713b32816cb965fb478de73587d445ae Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 18:57:56 +1000 Subject: [PATCH 20/37] Update deps --- pyproject.toml | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1725948f..b56920f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,8 @@ include = [ ] [tool.poetry.dependencies] -python = "^3.8" -astropy = "^5" +python = ">=3.8" +astropy = ">=5" bilby = "*" ConfigArgParse = "*" dask = "*" @@ -37,7 +37,7 @@ dask_mpi = "*" FRion = {git = "https://github.com/CIRADA-Tools/FRion.git" } h5py = "*" ipython = "*" -matplotlib = "^3.8" +matplotlib = ">=3.8" numba = "*" numba_progress = "*" #mpi4py = "*" @@ -48,32 +48,32 @@ pymongo = "*" pymultinest = "*" pytest = "*" python_casacore = "*" -RACS-tools = "^3" +RACS-tools = ">=3" radio_beam = "*" RMextract = {git = "https://github.com/AlecThomson/RMextract@race"} schwimmbad = "*" scipy = "*" -spectral_cube = "^0.6.3" +spectral_cube = ">=0.6.3" spython = "*" tqdm = "*" vorbin = "*" graphviz = "*" bokeh = "<3" -prefect = "^2" +prefect = ">=2" prefect-dask = "*" RMTable = { git = "https://github.com/CIRADA-Tools/RMTable" } RM-Tools = { git = "https://github.com/CIRADA-Tools/RM-Tools"} PolSpectra = { git = "https://github.com/AlecThomson/PolSpectra.git", branch="spiceracs"} setuptools = "*" -fixms = "^0.2" -fitscube = "^0.3" +fixms = ">=0.2" +fitscube = ">=0.3" [tool.poetry.dev-dependencies] -black = "^23" -flake8 = "^5" -isort = "^5" -mypy = "^1" -pre-commit = "^3.2" +black = ">=23" +flake8 = ">=5" +isort = ">=5" +mypy = ">=1" +pre-commit = ">=3.2" [tool.poetry.extras] docs = [ @@ -95,8 +95,9 @@ spice_process = "arrakis.process_spice:cli" spice_region = "arrakis.process_region:cli" spice_cat = "arrakis.makecat:cli" spice_image = "arrakis.imager:cli" -fix_ms_corrs = "arrakis.fix_ms_corrs:cli" -# Unified script script + +# Misc scripts +make_dr2_config = { reference="scripts/make_dr2_config.py", type="file"} casda_prepare = { reference="scripts/casda_prepare.py", type="file"} check_cutout = { reference="scripts/check_cutout.py", type="file"} compare_leakage = { reference="scripts/compare_leakage.py", type="file"} From 05e82ac6dba42b6545752380327761ee8e752819 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Tue, 9 Apr 2024 19:48:56 +1000 Subject: [PATCH 21/37] Write yaml --- scripts/make_dr2_config.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/make_dr2_config.py b/scripts/make_dr2_config.py index 9a2b53c4..c28b5232 100755 --- a/scripts/make_dr2_config.py +++ b/scripts/make_dr2_config.py @@ -7,6 +7,7 @@ from pathlib import Path import pandas as pd +import yaml from arrakis.logger import logger @@ -76,6 +77,10 @@ def main( processing_dir: Path, ): """Main script""" + + if not processing_dir.exists(): + processing_dir.mkdir(parents=True, exist_ok=True) + field_data = get_field_data(sbid) holo_file = get_holography_path(sbid) @@ -152,11 +157,10 @@ def main( ionex_prefix="codg", ) - config_file = processing_dir / f"{sbid}_rm.cfg" + config_file = processing_dir / f"{sbid}_rm.yaml" with open(config_file, "w") as f: - for key, value in config_base.items(): - f.write(f"{key} = {value}\n") + yaml.safe_dump(config_base, f) # Now make a run script script_file = processing_dir / f"{sbid}_rm_run.sh" @@ -191,10 +195,11 @@ def main( echo "About to run spice_process" spice_process \ - --config {config_file.absolute().as_posix()} \ - {sbid_dir.absolute().as_posix()} \ {processing_dir.absolute().as_posix()} \ {field_data.FIELD_NAME} \ + {sbid_dir.absolute().as_posix()} \ + --sbid {sbid} \ + --config {config_file.absolute().as_posix()} \ """ with open(script_file, "w") as f: f.write(script_string) From 6cdd8e4b0b04b2545648f310749bb9fc8fff38d4 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 12:21:06 +1000 Subject: [PATCH 22/37] Need absolute path --- arrakis/cutout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 7136c469..10c398fa 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -90,7 +90,7 @@ def cutout_weight( } return pymongo.UpdateOne(myquery, newvalues, upsert=True) - outdir = cutout_args.outdir + outdir = cutout_args.outdir.absolute() basename = image_name.name outname = f"{source_id}.cutout.{basename}" outfile = outdir / outname From e4806b401a39d4b23b7ae390d10f9ae6f482243e Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 12:21:49 +1000 Subject: [PATCH 23/37] Use dataframe --- arrakis/linmos.py | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/arrakis/linmos.py b/arrakis/linmos.py index 37f4a644..f9890ccc 100644 --- a/arrakis/linmos.py +++ b/arrakis/linmos.py @@ -10,8 +10,9 @@ from pprint import pformat from typing import Dict, List from typing import NamedTuple as Struct -from typing import Optional +from typing import Optional, Tuple +import pandas as pd import pymongo from astropy.utils.exceptions import AstropyWarning from prefect import flow, task, unmapped @@ -43,7 +44,7 @@ class ImagePaths(Struct): @task(name="Find images") def find_images( field: str, - beams: dict, + beams_row: Tuple[int, pd.Series], stoke: str, datadir: Path, ) -> ImagePaths: @@ -62,15 +63,17 @@ def find_images( ImagePaths: List of images and weights. """ logger.setLevel(logging.INFO) - - src_name = beams["Source_ID"] - field_beams = beams["beams"][field] + beams = beams_row[1] + src_name = beams.Source_ID + field_beams = beams.beams[field] # First check that the images exist image_list: List[Path] = [] for bm in list(set(field_beams["beam_list"])): # Ensure list of beams is unique! imfile = Path(field_beams[f"{stoke.lower()}_beam{bm}_image_file"]) - assert imfile.parent.name == src_name, "Looking in wrong directory!" + assert ( + imfile.parent.name == src_name + ), f"Looking in wrong directory! '{imfile.parent.name}'" new_imfile = datadir.resolve() / imfile image_list.append(new_imfile) image_list = sorted(image_list) @@ -81,7 +84,9 @@ def find_images( weight_list: List[Path] = [] for bm in list(set(field_beams["beam_list"])): # Ensure list of beams is unique! wgtsfile = Path(field_beams[f"{stoke.lower()}_beam{bm}_weight_file"]) - assert wgtsfile.parent.name == src_name, "Looking in wrong directory!" + assert ( + wgtsfile.parent.name == src_name + ), f"Looking in wrong directory! '{wgtsfile.parent.name}'" new_wgtsfile = datadir.resolve() / wgtsfile weight_list.append(new_wgtsfile) weight_list = sorted(weight_list) @@ -278,6 +283,7 @@ def main( datadir: Path, host: str, epoch: int, + sbid: Optional[int], holofile: Optional[Path] = None, username: Optional[str] = None, password: Optional[str] = None, @@ -316,36 +322,25 @@ def main( logger.debug(f"{beams_col = }") # Query the DB query = {"$and": [{f"beams.{field}": {"$exists": True}}]} + if sbid is not None: + query["$and"].append({f"beams.{field}.SBIDs": sbid}) logger.info(f"The query is {query=}") island_ids: List[str] = sorted(beams_col.distinct("Source_ID", query)) - big_beams: List[dict] = list( + big_beams = pd.DataFrame( beams_col.find({"Source_ID": {"$in": island_ids}}).sort("Source_ID") ) - big_comps: List[dict] = list( - comp_col.find({"Source_ID": {"$in": island_ids}}).sort("Source_ID") - ) - comps: List[List[dict]] = [] - for island_id in island_ids: - _comps = [] - for c in big_comps: - if c["Source_ID"] == island_id: - _comps.append(c) - comps.append(_comps) - - assert len(big_beams) == len(comps) if limit is not None: logger.critical(f"Limiting to {limit} islands") big_beams = big_beams[:limit] - comps = comps[:limit] all_parfiles = [] for stoke in stokeslist: image_paths = find_images.map( field=unmapped(field), - beams=big_beams, + beams_row=big_beams.iterrows(), stoke=unmapped(stoke.capitalize()), datadir=unmapped(cutdir), ) @@ -420,10 +415,10 @@ def cli(): description=lin_parser.description, ) args = parser.parse_args() - - verbose = args.verbose test_db( - host=args.host, username=args.username, password=args.password, verbose=verbose + host=args.host, + username=args.username, + password=args.password, ) main( @@ -433,6 +428,7 @@ def cli(): ), host=args.host, epoch=args.epoch, + sbid=args.sbid, holofile=Path(args.holofile), username=args.username, password=args.password, From ad97a2e7cb166af69cb2667db08a5ecc5446a936 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 12:37:13 +1000 Subject: [PATCH 24/37] Update defaults --- arrakis/.default_config.yaml | 59 +++++++++++++++++------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/arrakis/.default_config.yaml b/arrakis/.default_config.yaml index 47537ae4..b0281d19 100644 --- a/arrakis/.default_config.yaml +++ b/arrakis/.default_config.yaml @@ -1,10 +1,10 @@ # options: -hosted-wsclean: docker://alecthomson/wsclean:latest # Docker or Singularity image for wsclean (default: docker://alecthomson/wsclean:latest) -local_wsclean: null # Path to local wsclean Singularity image (default: None) +# hosted-wsclean: docker://alecthomson/wsclean:latest # Docker or Singularity image for wsclean (default: docker://alecthomson/wsclean:latest) +# local_wsclean: null # Path to local wsclean Singularity image (default: None) # pipeline arguments: -dask_config: null # Config file for Dask SlurmCLUSTER. (default: None) -imager_dask_config: null #Config file for Dask SlurmCLUSTER. (default: None) +# dask_config: null # Config file for Dask SlurmCLUSTER. (default: None) +# imager_dask_config: null #Config file for Dask SlurmCLUSTER. (default: None) imager_only: false #Only run the imager component of the pipeline. (default: False) skip_imager: false #Skip imaging stage [False]. (default: False) skip_cutout: false #Skip cutout stage [False]. (default: False) @@ -15,23 +15,20 @@ skip_rmclean: false #Skip RM-CLEAN stage [False]. (default: False) skip_cat: false #Skip catalogue stage [False]. (default: False) skip_cleanup: false #Skip cleanup stage [False]. (default: False) -# generic null arguments: -sbid: null #SBID of observation. (default: None) -stokes: # List of Stokes parameters to image (default: ['I', 'Q', 'U']) - - I - - Q - - U +# # generic null arguments: +# sbid: null #SBID of observation. (default: None) +stokes: [I,Q,U] # List of Stokes parameters to image (default: ['I', 'Q', 'U']) epoch: 0 # Epoch of observation. (default: 0) -host: null # Host of mongodb (probably $hostname -i). (default: None) -username: null # Username of mongodb. (default: None) +# host: null # Host of mongodb (probably $hostname -i). (default: None) +# username: null # Username of mongodb. (default: None) password: # Password of mongodb. (default: None) -limit: null # Limit the number of islands to process. (default: None) +# limit: null # Limit the number of islands to process. (default: None) database: false # Add data to MongoDB. (default: False) # imaging arguments: -temp_dir_wsclean: null # Temporary directory for WSClean to store intermediate files (default: None) -temp_dir_images: null # Temporary directory for to store intermediate image files (default: None) -psf_cutoff: null # Cutoff for smoothing in units of arcseconds. (default: None) +# temp_dir_wsclean: null # Temporary directory for WSClean to store intermediate files (default: None) +# temp_dir_images: null # Temporary directory for to store intermediate image files (default: None) +# psf_cutoff: null # Cutoff for smoothing in units of arcseconds. (default: None) robust: -0.5 # ROBUST nchan: 36 # NCHAN pols: IQU # POLS @@ -46,15 +43,15 @@ local_rms: true # local_rms_window: 60 # LOCAL_RMS_WINDOW force_mask_rounds: 8 # FORCE_MASK_ROUNDS gridder: wgridder # {direct-ft,idg,wgridder,tuned-wgridder,wstacking} -taper: null # TAPER +# taper: null # TAPER minuv: 200 # MINUV -parallel: null # PARALLEL +# parallel: null # PARALLEL mpi: false # Use MPI (default: False) purge: false # Purge intermediate files (default: False) multiscale: false # Use multiscale clean (default: False) -multiscale_scale_bias: null # The multiscale scale bias term provided to wsclean. (default: None) +# multiscale_scale_bias: null # The multiscale scale bias term provided to wsclean. (default: None) multiscale_scales: 0,2,4,8,16,32,64,12 # The scales used in the multiscale clean. (default: 0,2,4,8,16,32,64,128) -absmem: null # ABSMEM Absolute memory limit in GB (default: None) +# absmem: null # ABSMEM Absolute memory limit in GB (default: None) make_residual_cubes: false # Create residual cubes as well as cubes from restored images. (default: False) ms_glob_pattern: scienceData*_averaged_cal.leakage.ms # The pattern used to search for measurement sets. (default: scienceData*_averaged_cal.leakage.ms) data_column: CORRECTED_DATA # Which column in the measurement set to image. (default: CORRECTED_DATA) @@ -65,16 +62,16 @@ skip_fix_ms: false # Do not apply the ASKAP MS corrections from the package fixm pad: 3 # Number of beamwidths to pad around source [3]. (default: 3) dryrun: false # Do a dry-run [False]. (default: False) -# linmos null arguments: -holofile: null #Path to holography image (default: None) +# # linmos null arguments: +# holofile: null #Path to holography image (default: None) yanda: 1.3.0 # Yandasoft version to pull from DockerHub [1.3.0]. (default: 1.3.0) -yanda_image: null #Path to an existing yandasoft singularity container image. (default: None) +# yanda_image: null #Path to an existing yandasoft singularity container image. (default: None) # frion arguments: ionex_server: ftp://ftp.aiub.unibe.ch/CODE/ # IONEX server (default: ftp://ftp.aiub.unibe.ch/CODE/) ionex_prefix: codg # IONEX_PREFIX -ionex_formatter: null # IONEX formatter. (default: ftp.aiub.unibe.ch) -ionex_proxy_server: null # Proxy server. (default: None) +# ionex_formatter: null # IONEX formatter. (default: ftp.aiub.unibe.ch) +# ionex_proxy_server: null # Proxy server. (default: None) ionex_predownload: false # Pre-download IONEX files. (default: False) # common rm arguments: @@ -84,15 +81,15 @@ rm_verbose: false # Verbose RMsynth/RMClean. (default: False) # rm-synth arguments: ion: false # Use ionospheric-corrected data. (default: False) -tt0: null # TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) -tt1: null # TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) +# tt0: null # TT0 MFS image -- will be used for model of Stokes I -- also needs --tt1. (default: None) +# tt1: null # TT1 MFS image -- will be used for model of Stokes I -- also needs --tt0. (default: None) validate: false # Run on Stokes I. (default: False) own_fit: false # Use own Stokes I fit function. (default: False) weight_type: # weighting (inverse) 'variance' or 'uniform' (all 1s). (default: variance) fit_function: # Stokes I fitting function: 'linear' or 'log' polynomials. (default: log) fit_rmsf: false # Fit a Gaussian to the RMSF (default: False) -phi_max: null # Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) -dphi: null # Width of Faraday depth channel. (default: None) +# phi_max: null # Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) +# dphi: null # Width of Faraday depth channel. (default: None) n_samples: # Number of samples across the FWHM RMSF. (default: 5) poly_ord: # polynomial order to fit to I spectrum. (default: 3) no_stokes_i: false # ignore the Stokes I spectrum. (default: False) @@ -104,13 +101,13 @@ debug: false # turn on debugging messages & plots. (default: False) cutoff: -8 # CLEAN cutoff (+ve = absolute, -ve = sigma). (default: -3) max_iter: 10000 # maximum number of CLEAN iterations. (default: 10000) gain: 0.1 # CLEAN loop gain. (default: 0.1) -window: null # Further CLEAN in mask to this threshold. (default: None) +# window: null # Further CLEAN in mask to this threshold. (default: None) # catalogue arguments: leakage_degree: 4 # Degree of leakage polynomial fit. (default: 4) leakage_bins: 16 # Number of bins for leakage fit. (default: 16) leakage_snr: 30 # SNR cut for leakage fit. (default: 30.0) -write: null # File to save table to. (default: None) +# write: null # File to save table to. (default: None) # cleanup arguments: overwrite: false # Overwrite existing tarball (default: False) From 59fab58173c2052386f0e29cb736ac940efce642 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 13:24:24 +1000 Subject: [PATCH 25/37] Use SBID, you goose --- arrakis/linmos.py | 2 +- arrakis/process_spice.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arrakis/linmos.py b/arrakis/linmos.py index f9890ccc..a51f32ff 100644 --- a/arrakis/linmos.py +++ b/arrakis/linmos.py @@ -283,7 +283,7 @@ def main( datadir: Path, host: str, epoch: int, - sbid: Optional[int], + sbid: Optional[int] = None, holofile: Optional[Path] = None, username: Optional[str] = None, password: Optional[str] = None, diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 8367decb..785ba679 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -47,6 +47,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: directory=str(args.datadir), host=host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, pad=args.pad, @@ -66,6 +67,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: datadir=Path(args.datadir), host=host, epoch=args.epoch, + sbid=args.sbid, holofile=Path(args.holofile), username=args.username, password=args.password, @@ -84,6 +86,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: outdir=args.datadir, host=host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, database=args.database, @@ -140,6 +143,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: outdir=args.datadir, host=host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, dimension=args.dimension, @@ -161,6 +165,7 @@ def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: field=args.field, host=host, epoch=args.epoch, + sbid=args.sbid, username=args.username, password=args.password, verbose=args.verbose, From 93bf871629d3223f2769659df4015635cb0b68f7 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 13:24:39 +1000 Subject: [PATCH 26/37] Path fixing --- arrakis/cutout.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arrakis/cutout.py b/arrakis/cutout.py index 10c398fa..35f15dcf 100644 --- a/arrakis/cutout.py +++ b/arrakis/cutout.py @@ -94,11 +94,12 @@ def cutout_weight( basename = image_name.name outname = f"{source_id}.cutout.{basename}" outfile = outdir / outname - image = Path( - image_name.name.replace("image.restored", "weights.restored") + image = ( + image_name.parent + / image_name.name.replace("image.restored", "weights.restored") ).with_suffix(".txt") - outfile = Path( - outfile.name.replace("image.restored", "weights.restored") + outfile = ( + outfile.parent / outfile.name.replace("image.restored", "weights.restored") ).with_suffix(".txt") if not dryrun: @@ -108,7 +109,7 @@ def cutout_weight( filename = outfile.parent / outfile.name newvalues = { "$set": { - f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": filename.as_posix() + f"beams.{field}.{stoke.lower()}_beam{beam_num}_weight_file": filename.absolute().as_posix() } } @@ -190,7 +191,7 @@ def cutout_image( filename = outfile.parent / outfile.name newvalues = { "$set": { - f"beams.{field}.{stoke.lower()}_beam{beam_num}_image_file": filename.as_posix() + f"beams.{field}.{stoke.lower()}_beam{beam_num}_image_file": filename.absolute().as_posix() } } From b84ce20479e259ba52489d34977816d7eaea01a9 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 14:14:04 +1000 Subject: [PATCH 27/37] Fix database upsert --- arrakis/rmsynth_oncuts.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 40a39feb..2e4cee7c 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -938,7 +938,7 @@ def main( ion (bool, optional): Ion. Defaults to False. do_own_fit (bool, optional): Do own fit. Defaults to False. """ - + logger.info(f"Running RMsynth on {field} field") outdir = outdir.absolute() / "cutouts" if savePlots: @@ -970,6 +970,8 @@ def main( if sbid is not None: beam_query["$and"].append({f"beams.{field}.SBIDs": sbid}) + logger.info(f"Querying beams with {beam_query}") + beams = pd.DataFrame(list(beams_col.find(beam_query).sort("Source_ID"))) beams.set_index("Source_ID", drop=False, inplace=True) island_ids = sorted(beams_col.distinct("Source_ID", beam_query)) @@ -1010,7 +1012,7 @@ def main( ] } - comp_col.update_many( + result = comp_col.update_many( query_1d, { "$set": { @@ -1021,7 +1023,9 @@ def main( ): False } }, + upsert=True, ) + logger.info(f"{result}") elif dimension == "3d": query_3d = { @@ -1037,7 +1041,7 @@ def main( ] } - island_col.update( + result = island_col.update( query_3d, { "$set": { @@ -1048,8 +1052,11 @@ def main( ): False } }, + upsert=True, ) + logger.info(f"{result}") + if limit is not None: n_comp = limit n_island = limit From 9e4e96842dedc0a0c1460c96386ef3fc9cde125b Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 15:58:16 +1000 Subject: [PATCH 28/37] Dep updates --- pyproject.toml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b56920f8..beaf7056 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,15 +34,13 @@ dask = "*" distributed = "*" dask_jobqueue = "*" dask_mpi = "*" -FRion = {git = "https://github.com/CIRADA-Tools/FRion.git" } +FRion = ">=1.1.3" h5py = "*" ipython = "*" matplotlib = ">=3.8" numba = "*" numba_progress = "*" -#mpi4py = "*" pandas = "*" -Polspectra = "*" psutil = "*" pymongo = "*" pymultinest = "*" @@ -61,9 +59,9 @@ graphviz = "*" bokeh = "<3" prefect = ">=2" prefect-dask = "*" -RMTable = { git = "https://github.com/CIRADA-Tools/RMTable" } -RM-Tools = { git = "https://github.com/CIRADA-Tools/RM-Tools"} -PolSpectra = { git = "https://github.com/AlecThomson/PolSpectra.git", branch="spiceracs"} +RMTable = ">=1.2.1" +RM-Tools = ">=1.4.1" +PolSpectra = ">=1.1.0" setuptools = "*" fixms = ">=0.2" fitscube = ">=0.3" From 8cc19496dcb9ec73c6880c43fabd359e1955c2f2 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 15:58:36 +1000 Subject: [PATCH 29/37] Use upserts --- arrakis/rmclean_oncuts.py | 8 ++++++-- arrakis/rmsynth_oncuts.py | 30 ++++-------------------------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 349e2151..97987b15 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -300,7 +300,7 @@ def main( ).sort("Source_ID") ) n_island = island_col.count_documents(query) - island_col.update( + result = island_col.update( query, { "$set": { @@ -311,7 +311,9 @@ def main( ): False } }, + upsert=True, ) + logger.info(pformat(result.raw_result)) elif dimension == "1d": query = { @@ -339,7 +341,7 @@ def main( ).sort("Source_ID") ) n_comp = comp_col.count_documents(query) - comp_col.update_many( + result = comp_col.update_many( query, { "$set": { @@ -350,7 +352,9 @@ def main( ): True } }, + upsert=True, ) + logger.info(pformat(result.raw_result)) if limit is not None: count = limit diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 2e4cee7c..230e9f2a 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -999,18 +999,7 @@ def main( # Unset rmsynth in db if dimension == "1d": - query_1d = { - "$and": [ - {"Source_ID": {"$in": island_ids}}, - { - ( - f"{field}.rmsynth1d" - if sbid is None - else f"{field}_{sbid}.rmsynth1d" - ): True - }, - ] - } + query_1d = {"Source_ID": {"$in": island_ids}} result = comp_col.update_many( query_1d, @@ -1025,21 +1014,10 @@ def main( }, upsert=True, ) - logger.info(f"{result}") + logger.info(pformat(result.raw_result)) elif dimension == "3d": - query_3d = { - "$and": [ - {"Source_ID": {"$in": island_ids}}, - { - ( - f"{field}.rmsynth3d" - if sbid is None - else f"{field}_{sbid}.rmsynth3d" - ): True - }, - ] - } + query_3d = {"Source_ID": {"$in": island_ids}} result = island_col.update( query_3d, @@ -1055,7 +1033,7 @@ def main( upsert=True, ) - logger.info(f"{result}") + logger.info(pformat(result.raw_result)) if limit is not None: n_comp = limit From 73ceb8696b297f9fdd58ec23b6a0942c2021bd01 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 20:44:56 +1000 Subject: [PATCH 30/37] Use upserts --- arrakis/rmclean_oncuts.py | 154 +++++++++++++++++--------------------- arrakis/rmsynth_oncuts.py | 69 +++++++++-------- arrakis/utils/pipeline.py | 2 +- 3 files changed, 108 insertions(+), 117 deletions(-) diff --git a/arrakis/rmclean_oncuts.py b/arrakis/rmclean_oncuts.py index 97987b15..aaa5048d 100644 --- a/arrakis/rmclean_oncuts.py +++ b/arrakis/rmclean_oncuts.py @@ -61,89 +61,73 @@ def rmclean1d( """ iname = comp["Source_ID"] cname = comp["Gaussian_ID"] - logger.debug(f"Working on {comp}") save_name = field if sbid is None else f"{field}_{sbid}" - try: - rm1dfiles = comp["rm1dfiles"] - fdfFile = outdir / f"{rm1dfiles['FDF_dirty']}" - rmsfFile = outdir / f"{rm1dfiles['RMSF']}" - weightFile = outdir / f"{rm1dfiles['weights']}" - rmSynthFile = outdir / f"{rm1dfiles['summary_json']}" - - prefix = os.path.join(os.path.abspath(os.path.dirname(fdfFile)), cname) - - # Sanity checks - for f in [weightFile, fdfFile, rmsfFile, rmSynthFile]: - logger.debug(f"Checking {f.absolute()}") - if not f.exists(): - logger.fatal(f"File does not exist: '{f}'.") - raise FileNotFoundError(f"File does not exist: '{f}'") - - nBits = 32 - mDict, aDict = do_RMclean_1D.readFiles( - fdfFile, rmsfFile, weightFile, rmSynthFile, nBits - ) - # Run RM-CLEAN on the spectrum - outdict, arrdict = do_RMclean_1D.run_rmclean( - mDict=mDict, - aDict=aDict, - cutoff=cutoff, - maxIter=maxIter, - gain=gain, - nBits=nBits, - showPlots=showPlots, - verbose=rm_verbose, - prefixOut=prefix, - saveFigures=savePlots, - window=window, - ) - # Ensure JSON serializable - for k, v in outdict.items(): - if isinstance(v, np.float_): - outdict[k] = float(v) - elif isinstance(v, np.float32): - outdict[k] = float(v) - elif isinstance(v, np.int_): - outdict[k] = int(v) - elif isinstance(v, np.int32): - outdict[k] = int(v) - elif isinstance(v, np.ndarray): - outdict[k] = v.tolist() - - # Save output - do_RMclean_1D.saveOutput(outdict, arrdict, prefixOut=prefix, verbose=rm_verbose) - if savePlots: - plt.close("all") - plotdir = outdir / "plots" - plot_files = list(fdfFile.parent.glob("*.pdf")) - for plot_file in plot_files: - copyfile(plot_file, plotdir / plot_file.name) - - # Load into Mongo - myquery = {"Gaussian_ID": cname} - - newvalues = { - "$set": { - save_name: { - "rmclean1d": True, - "rmclean_summary": outdict, - }, - } - } - except KeyError: - logger.critical("Failed to load data! RM-CLEAN not applied to component!") - logger.critical(f"Island is {iname}, component is {cname}") - myquery = {"Gaussian_ID": cname} - - newvalues = { - "$set": { - save_name: { - "rmclean1d": False, - }, - } - } + rm1dfiles = comp[save_name]["rm1dfiles"] + fdfFile = outdir / f"{rm1dfiles['FDF_dirty']}" + rmsfFile = outdir / f"{rm1dfiles['RMSF']}" + weightFile = outdir / f"{rm1dfiles['weights']}" + rmSynthFile = outdir / f"{rm1dfiles['summary_json']}" + + prefix = os.path.join(os.path.abspath(os.path.dirname(fdfFile)), cname) + + # Sanity checks + for f in [weightFile, fdfFile, rmsfFile, rmSynthFile]: + logger.debug(f"Checking {f.absolute()}") + if not f.exists(): + logger.fatal(f"File does not exist: '{f}'.") + raise FileNotFoundError(f"File does not exist: '{f}'") + + nBits = 32 + mDict, aDict = do_RMclean_1D.readFiles( + fdfFile, rmsfFile, weightFile, rmSynthFile, nBits + ) + + # Run RM-CLEAN on the spectrum + outdict, arrdict = do_RMclean_1D.run_rmclean( + mDict=mDict, + aDict=aDict, + cutoff=cutoff, + maxIter=maxIter, + gain=gain, + nBits=nBits, + showPlots=showPlots, + verbose=rm_verbose, + prefixOut=prefix, + saveFigures=savePlots, + window=window, + ) + # Ensure JSON serializable + for k, v in outdict.items(): + if isinstance(v, np.float_): + outdict[k] = float(v) + elif isinstance(v, np.float32): + outdict[k] = float(v) + elif isinstance(v, np.int_): + outdict[k] = int(v) + elif isinstance(v, np.int32): + outdict[k] = int(v) + elif isinstance(v, np.ndarray): + outdict[k] = v.tolist() + + # Save output + do_RMclean_1D.saveOutput(outdict, arrdict, prefixOut=prefix, verbose=rm_verbose) + if savePlots: + plt.close("all") + plotdir = outdir / "plots" + plot_files = list(fdfFile.parent.glob("*.pdf")) + for plot_file in plot_files: + copyfile(plot_file, plotdir / plot_file.name) + + # Load into Mongo + myquery = {"Gaussian_ID": cname} + + to_update = comp[save_name] + to_update["rmclean1d"] = True + to_update["rmclean_summary"] = outdict + + newvalues = {"$set": {save_name: to_update}} return pymongo.UpdateOne(myquery, newvalues) @@ -201,8 +185,10 @@ def rmclean3d( ) # Load into Mongo save_name = field if sbid is None else f"{field}_{sbid}" + to_update = island[save_name] + to_update["rmclean3d"] = True myquery = {"Source_ID": iname} - newvalues = {"$set": {save_name: {"rmclean3d": True}}} + newvalues = {"$set": {save_name: to_update}} return pymongo.UpdateOne(myquery, newvalues) @@ -295,7 +281,7 @@ def main( # Only get required values { "Source_ID": 1, - "rm3dfiles": 1, + f"{field}" if sbid is None else f"{field}_{sbid}": 1, }, ).sort("Source_ID") ) @@ -311,7 +297,6 @@ def main( ): False } }, - upsert=True, ) logger.info(pformat(result.raw_result)) @@ -336,7 +321,7 @@ def main( { "Source_ID": 1, "Gaussian_ID": 1, - "rm1dfiles": 1, + f"{field}" if sbid is None else f"{field}_{sbid}": 1, }, ).sort("Source_ID") ) @@ -352,7 +337,6 @@ def main( ): True } }, - upsert=True, ) logger.info(pformat(result.raw_result)) diff --git a/arrakis/rmsynth_oncuts.py b/arrakis/rmsynth_oncuts.py index 230e9f2a..357964a3 100644 --- a/arrakis/rmsynth_oncuts.py +++ b/arrakis/rmsynth_oncuts.py @@ -154,7 +154,7 @@ def rmsynthoncut3d( } } } - return pymongo.UpdateOne(myquery, badvalues) + return pymongo.UpdateOne(myquery, badvalues, upsert=True) bkgq, rmsq = cubelet_bane(dataQ, header) rmsq[rmsq == 0] = np.nan @@ -227,7 +227,7 @@ def rmsynthoncut3d( } } } - return pymongo.UpdateOne(myquery, newvalues) + return pymongo.UpdateOne(myquery, newvalues, upsert=True) def cubelet_bane(cubelet: np.ndarray, header: fits.Header) -> Tuple[np.ndarray]: @@ -476,6 +476,8 @@ def update_rmtools_dict( for key, val in fit_dict["fit_flag"].items(): mDict[f"fit_flag_{key}"] = val + return mDict + @task(name="1D RM-synthesis") def rmsynthoncut1d( @@ -525,6 +527,7 @@ def rmsynthoncut1d( rm_verbose (bool, optional): Verbose RMsynth. Defaults to False. """ logger.setLevel(logging.INFO) + save_name = field if sbid is None else f"{field}_{sbid}" comp = comp_tuple[1] beam = dict(beam_tuple[1]) @@ -547,8 +550,8 @@ def rmsynthoncut1d( if np.isnan(spectrum.data).all(): logger.critical(f"Entire data is NaN for {iname} in {spectrum.filename}") myquery = {"Gaussian_ID": cname} - badvalues = {"$set": {"rmsynth1d": False}} - return pymongo.UpdateOne(myquery, badvalues) + badvalues = {"$set": {save_name: {"rmsynth1d": False}}} + return pymongo.UpdateOne(myquery, badvalues, upsert=True) prefix = f"{os.path.dirname(stokes_spectra.i.filename)}/{cname}" @@ -573,14 +576,14 @@ def rmsynthoncut1d( ): logger.critical(f"{cname} QU data is all NaNs.") myquery = {"Gaussian_ID": cname} - badvalues = {"$set": {"rmsynth1d": False}} - return pymongo.UpdateOne(myquery, badvalues) + badvalues = {"$set": {save_name: {"rmsynth1d": False}}} + return pymongo.UpdateOne(myquery, badvalues, upsert=True) # And I if np.isnan(filtered_stokes_spectra.i.data).all(): logger.critical(f"{cname} I data is all NaNs.") myquery = {"Gaussian_ID": cname} - badvalues = {"$set": {"rmsynth1d": False}} - return pymongo.UpdateOne(myquery, badvalues) + badvalues = {"$set": {save_name: {"rmsynth1d": False}}} + return pymongo.UpdateOne(myquery, badvalues, upsert=True) data = [np.array(freq)] bkg_data = [np.array(freq)] @@ -681,8 +684,6 @@ def rmsynthoncut1d( logger.debug(f"Heading for {cname} is {pformat(head_dict)}") outer_dir = os.path.basename(os.path.dirname(filtered_stokes_spectra.i.filename)) - - save_name = field if sbid is None else f"{field}_{sbid}" newvalues = { "$set": { save_name: { @@ -734,7 +735,7 @@ def rmsynthoncut1d( } } } - return pymongo.UpdateOne(myquery, newvalues) + return pymongo.UpdateOne(myquery, newvalues, upsert=True) def rmsynthoncut_i( @@ -999,37 +1000,42 @@ def main( # Unset rmsynth in db if dimension == "1d": + logger.info(f"Unsetting rmsynth1d for {n_comp} components") query_1d = {"Source_ID": {"$in": island_ids}} + update_1d = { + "$set": { + ( + f"{field}.rmsynth1d" + if sbid is None + else f"{field}_{sbid}.rmsynth1d" + ): False + } + } + logger.info(pformat(update_1d)) result = comp_col.update_many( query_1d, - { - "$set": { - ( - f"{field}.rmsynth1d" - if sbid is None - else f"{field}_{sbid}.rmsynth1d" - ): False - } - }, + update_1d, upsert=True, ) logger.info(pformat(result.raw_result)) elif dimension == "3d": + logger.info(f"Unsetting rmsynth3d for {n_island} islands") query_3d = {"Source_ID": {"$in": island_ids}} - + update_3d = { + "$set": { + ( + f"{field}.rmsynth3d" + if sbid is None + else f"{field}_{sbid}.rmsynth3d" + ): False + } + } + logger.info(pformat(update_3d)) result = island_col.update( query_3d, - { - "$set": { - ( - f"{field}.rmsynth3d" - if sbid is None - else f"{field}_{sbid}.rmsynth3d" - ): False - } - }, + update_3d, upsert=True, ) @@ -1275,8 +1281,9 @@ def cli(): gen_parser = generic_parser(parent_parser=True) work_parser = workdir_arg_parser(parent_parser=True) synth_parser = rmsynth_parser(parent_parser=True) + common_parser = rm_common_parser(parent_parser=True) parser = argparse.ArgumentParser( - parents=[gen_parser, work_parser, synth_parser], + parents=[gen_parser, work_parser, common_parser, synth_parser], formatter_class=UltimateHelpFormatter, description=synth_parser.description, ) diff --git a/arrakis/utils/pipeline.py b/arrakis/utils/pipeline.py index 056af408..b4ffce88 100644 --- a/arrakis/utils/pipeline.py +++ b/arrakis/utils/pipeline.py @@ -127,7 +127,7 @@ def generic_parser(parent_parser: bool = False) -> argparse.ArgumentParser: ) parser.add_argument( "--limit", - type=Optional[int], + type=int, default=None, help="Limit the number of islands to process.", ) From 9f022dda4827fc83054e2acbdb2c7ef924196ed0 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 20:57:50 +1000 Subject: [PATCH 31/37] Correct databases and indices --- arrakis/makecat.py | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/arrakis/makecat.py b/arrakis/makecat.py index a8bf2548..8d6085da 100644 --- a/arrakis/makecat.py +++ b/arrakis/makecat.py @@ -871,9 +871,25 @@ def main( fields.update({n: 1}) for n in columns_possum.sourcefinder_columns: fields.update({n: 1}) - fields.update({"rmsynth_summary": 1}) - fields.update({"rmclean_summary": 1}) - fields.update({"header": 1}) + fields.update( + { + ( + f"{field}.rmsynth_summary" + if sbid is None + else f"{field}_{sbid}.rmsynth_summary" + ): 1 + } + ) + fields.update( + { + ( + f"{field}.rmclean_summary" + if sbid is None + else f"{field}_{sbid}.rmclean_summary" + ): 1 + } + ) + fields.update({f"{field}.header" if sbid is None else f"{field}_{sbid}.header": 1}) comps = list(comp_col.find(query, fields)) tock = time.time() @@ -938,15 +954,25 @@ def main( if src == "synth": for src_id, comp in comps_df.iterrows(): try: - data += [comp["rmclean_summary"][col]] + data += [ + comp[field if sbid is None else f"{field}_{sbid}"][ + "rmclean_summary" + ][col] + ] except KeyError: - data += [comp["rmsynth_summary"][col]] + data += [ + comp[field if sbid is None else f"{field}_{sbid}"][ + "rmsynth_summary" + ][col] + ] new_col = Column(data=data, name=name, dtype=typ, unit=unit) rmtab.add_column(new_col) if src == "header": for src_id, comp in comps_df.iterrows(): - data += [comp["header"][col]] + data += [ + comp[field if sbid is None else f"{field}_{sbid}"]["header"][col] + ] new_col = Column(data=data, name=name, dtype=typ, unit=unit) rmtab.add_column(new_col) From bec3806e7b6b73d77ee63a94e1f33d29606563dc Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 21:08:07 +1000 Subject: [PATCH 32/37] Can't check deleted files --- arrakis/cleanup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/arrakis/cleanup.py b/arrakis/cleanup.py index d3d92bb8..74aab342 100644 --- a/arrakis/cleanup.py +++ b/arrakis/cleanup.py @@ -98,8 +98,6 @@ def main( for to_purge in tqdm(to_purge_all, file=TQDM_OUT, desc="Purging big beams"): purged.append(purge_cubelet_beams(to_purge)) logger.info(f"Files purged: {len(purged)}") - total_purge_size = np.sum([p.stat().st_size for p in purged]) * u.byte - logger.info(f"Total space freed: {total_purge_size.to(u.GB)}") logger.info("Cleanup done!") From 9e5821db8261c74c40c57c13ac16ef4ec4c6b2b2 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Wed, 10 Apr 2024 21:46:35 +1000 Subject: [PATCH 33/37] Remove cutdir --- arrakis/cleanup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arrakis/cleanup.py b/arrakis/cleanup.py index 74aab342..e1b559e1 100644 --- a/arrakis/cleanup.py +++ b/arrakis/cleanup.py @@ -54,6 +54,10 @@ def make_cutout_tarball(cutdir: Path, overwrite: bool = False) -> Path: with tarfile.open(tarball, "w") as tar: for cutout in tqdm(all_things, file=TQDM_OUT, desc="Tarballing cutouts"): tar.add(cutout, arcname=cutout.name) + + logger.info(f"Tarball created: {tarball}") + logger.critical(f"Removing {cutdir}") + cutdir.rmdir() return tarball From 470ba9764a16686c16270c91f7011c5559a35d55 Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Thu, 11 Apr 2024 10:12:10 +1000 Subject: [PATCH 34/37] Add RACS-low3 epochs --- scripts/make_dr2_config.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/scripts/make_dr2_config.py b/scripts/make_dr2_config.py index c28b5232..47eac7b1 100755 --- a/scripts/make_dr2_config.py +++ b/scripts/make_dr2_config.py @@ -17,7 +17,6 @@ RACSPASS = "racs-db-letmein" RACSHOST = "146.118.68.63" RACSPORT = "5433" -RACSDB = "epoch_7" def get_field_data(sbid: int) -> pd.Series: @@ -30,9 +29,15 @@ def get_field_data(sbid: int) -> pd.Series: pd.Series: Field data row """ table = "field_data" + + if 55538 <= sbid <= 59072: + racsdb = "epoch_9" + elif 38307 <= sbid <= 41829: + racsdb = "epoch_7" + df = pd.read_sql( f"SELECT * from {table}", - f"postgresql://{RACSUSER}:{RACSPASS}@{RACSHOST}:{RACSPORT}/{RACSDB}", + f"postgresql://{RACSUSER}:{RACSPASS}@{RACSHOST}:{RACSPORT}/{racsdb}", ) df.set_index("SBID", inplace=True) return df.loc[sbid] @@ -50,12 +55,24 @@ def get_holography_path(sbid: int) -> Path: Returns: Path: Path to the holography file """ + + # RACS-low3 min SBID 55538 + # SBID 55538-59072 : associated holography is SBID 55219 + + # RACS-low2 38307 - 41829 # From Uncle Timmy: # SBID 38307-38528 : associated holography is SBID 38585 # SBID 38545-39385 : associated holography is SBID 38709 # SBID 39400-40878 : associated holography is SBID 39549 # SBID 40989-41829 : associated holography is SBID 41055 - holo_dir = Path("/scratch3/projects/spiceracs/RACS_Low2_Holography") + + if 55538 <= sbid <= 59072: + holo_dir = Path("/scratch3/projects/spiceracs/RACS_Low3_Holography") + elif 38307 <= sbid <= 41829: + holo_dir = Path("/scratch3/projects/spiceracs/RACS_Low2_Holography") + else: + raise ValueError(f"SBID {sbid} not in range") + if 38307 <= sbid <= 38528: holo_sbid = 38585 elif 38545 <= sbid <= 39385: @@ -64,9 +81,19 @@ def get_holography_path(sbid: int) -> Path: holo_sbid = 39549 elif 40989 <= sbid <= 41829: holo_sbid = 41055 + elif 55538 <= sbid <= 59072: + holo_sbid = 55219 else: raise ValueError(f"SBID {sbid} not in range") - holo_file = holo_dir / f"akpb.iquv.square_6x6.63.887MHz.SB{holo_sbid}.cube.fits" + + if 38307 <= sbid <= 41829: + holo_file = holo_dir / f"akpb.iquv.square_6x6.63.887MHz.SB{holo_sbid}.cube.fits" + + elif 55538 <= sbid <= 59072: + holo_file = ( + holo_dir / f"akpb.iquv.closepack36.54.943MHz.SB{holo_sbid}.cube.fits" + ) + assert holo_file.exists(), f"{holo_file} does not exist" return holo_file From 959ae8db7f50e01c2542f57b96e3a1abf196a47b Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Thu, 11 Apr 2024 16:17:43 +1000 Subject: [PATCH 35/37] Updates --- arrakis/.default_config.yaml | 4 ++-- arrakis/process_spice.py | 2 +- pyproject.toml | 2 +- scripts/make_dr2_config.py | 37 ++++++++++++++++++------------------ 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arrakis/.default_config.yaml b/arrakis/.default_config.yaml index b0281d19..5722b5ea 100644 --- a/arrakis/.default_config.yaml +++ b/arrakis/.default_config.yaml @@ -90,8 +90,8 @@ fit_function: # Stokes I fitting function: 'linear' or 'log' polynomials. (defau fit_rmsf: false # Fit a Gaussian to the RMSF (default: False) # phi_max: null # Absolute max Faraday depth sampled (in rad/m^2) (overrides NSAMPLES). (default: None) # dphi: null # Width of Faraday depth channel. (default: None) -n_samples: # Number of samples across the FWHM RMSF. (default: 5) -poly_ord: # polynomial order to fit to I spectrum. (default: 3) +n_samples: 5 # Number of samples across the FWHM RMSF. (default: 5) +poly_ord: 3 # polynomial order to fit to I spectrum. (default: 3) no_stokes_i: false # ignore the Stokes I spectrum. (default: False) show_plots: false # show the plots. (default: False) not_rmsf: false # Skip calculation of RMSF? (default: False) diff --git a/arrakis/process_spice.py b/arrakis/process_spice.py index 785ba679..1f9a940d 100644 --- a/arrakis/process_spice.py +++ b/arrakis/process_spice.py @@ -30,7 +30,7 @@ @flow(name="Combining+Synthesis on Arrakis") def process_spice(args, host: str, task_runner: BaseTaskRunner) -> None: - """Workflow to process the SPIRCE-RACS data + """Workflow to process the SPICE-RACS data Args: args (configargparse.Namespace): Configuration parameters for this run diff --git a/pyproject.toml b/pyproject.toml index beaf7056..bf8dce1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ bokeh = "<3" prefect = ">=2" prefect-dask = "*" RMTable = ">=1.2.1" -RM-Tools = ">=1.4.1" +RM-Tools = ">=1.4.2" PolSpectra = ">=1.1.0" setuptools = "*" fixms = ">=0.2" diff --git a/scripts/make_dr2_config.py b/scripts/make_dr2_config.py index 47eac7b1..ac08e3fd 100755 --- a/scripts/make_dr2_config.py +++ b/scripts/make_dr2_config.py @@ -14,7 +14,6 @@ logger.setLevel(logging.INFO) # Public RACS database credentials RACSUSER = "anonymous" -RACSPASS = "racs-db-letmein" RACSHOST = "146.118.68.63" RACSPORT = "5433" @@ -37,7 +36,7 @@ def get_field_data(sbid: int) -> pd.Series: df = pd.read_sql( f"SELECT * from {table}", - f"postgresql://{RACSUSER}:{RACSPASS}@{RACSHOST}:{RACSPORT}/{racsdb}", + f"postgresql://{RACSUSER}:{os.environ['PGPASSWORD']}@{RACSHOST}:{RACSPORT}/{racsdb}", ) df.set_index("SBID", inplace=True) return df.loc[sbid] @@ -122,7 +121,8 @@ def main( epoch=7, ms_glob_pattern=f"'SB{sbid}.{field_data.FIELD_NAME}.beam*.round4.ms'", imager_dask_config="/scratch3/projects/spiceracs/arrakis/arrakis/configs/petrichor.yaml", - temp_dir="'$LOCALDIR'", + temp_dir_images="$LOCALDIR", + temp_dir_wsclean="$MEMDIR", mgain=0.7, force_mask_rounds=8, nmiter=15, @@ -161,27 +161,26 @@ def main( debug=False, dimension="1d", dryrun=False, - fitRMSF=True, + fit_rmsf=True, fit_function="log", gain=0.1, holofile=holo_file.as_posix(), - maxIter=10000, - nSamples=100.0, - noStokesI=False, - not_RMSF=False, - outfile=f"{field_data.FIELD_NAME}_SB{sbid}_polcat.fits", + max_iter=10000, + n_samples=100.0, + no_stokes_i=False, + not_rmsf=False, + write=f"{field_data.FIELD_NAME}_SB{sbid}_polcat.fits", pad=7.0, - polyOrd=-2, + poly_ord=-2, rm_verbose=False, - savePlots=True, - showPlots=False, + save_plots=True, + show_plots=False, validate=False, - verbose=True, - weightType="variance", + weight_type="variance", yanda_image="/datasets/work/sa-mhongoose/work/containers/askapsoft_1.15.0-openmpi4.sif", - ionex_server="file:///datasets/work/sa-mhongoose/work/data/IONEX/ftp.aiub.unibe.ch", - ionex_formatter="ftp.aiub.unibe.ch", - ionex_prefix="codg", + ionex_server="file:///datasets/work/sa-mhongoose/work/data/IONEX/gdc.cddis.eosdis.nasa.gov", + ionex_formatter="cddis.gsfc.nasa.gov", + ionex_prefix="casg", ) config_file = processing_dir / f"{sbid}_rm.yaml" @@ -199,8 +198,8 @@ def main( #SBATCH --mem=36GB #SBATCH --time=1-00:00:00 #SBATCH -A OD-217087 -#SBATCH -o {sbid}_rm_%j.log -#SBATCH -e {sbid}_rm_%j.log +#SBATCH -o {(processing_dir/str(sbid)).absolute().as_posix()}_rm_%j.log +#SBATCH -e {(processing_dir/str(sbid)).absolute().as_posix()}_rm_%j.log #SBATCH --qos=express # I trust nothing From 65125da0d246816f17786ed5ebbcd0a9ac4ea7fe Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Thu, 11 Apr 2024 17:59:49 +1000 Subject: [PATCH 36/37] No script --- pyproject.toml | 1 - scripts/make_dr2_config.py | 257 ------------------------------------- 2 files changed, 258 deletions(-) delete mode 100755 scripts/make_dr2_config.py diff --git a/pyproject.toml b/pyproject.toml index bf8dce1b..d53c383a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,7 +95,6 @@ spice_cat = "arrakis.makecat:cli" spice_image = "arrakis.imager:cli" # Misc scripts -make_dr2_config = { reference="scripts/make_dr2_config.py", type="file"} casda_prepare = { reference="scripts/casda_prepare.py", type="file"} check_cutout = { reference="scripts/check_cutout.py", type="file"} compare_leakage = { reference="scripts/compare_leakage.py", type="file"} diff --git a/scripts/make_dr2_config.py b/scripts/make_dr2_config.py deleted file mode 100755 index ac08e3fd..00000000 --- a/scripts/make_dr2_config.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Make a DR2 config file""" - -import logging -import os -from pathlib import Path - -import pandas as pd -import yaml - -from arrakis.logger import logger - -logger.setLevel(logging.INFO) -# Public RACS database credentials -RACSUSER = "anonymous" -RACSHOST = "146.118.68.63" -RACSPORT = "5433" - - -def get_field_data(sbid: int) -> pd.Series: - """Get field data from the RACS database - - Args: - sbid (int): SBID - - Returns: - pd.Series: Field data row - """ - table = "field_data" - - if 55538 <= sbid <= 59072: - racsdb = "epoch_9" - elif 38307 <= sbid <= 41829: - racsdb = "epoch_7" - - df = pd.read_sql( - f"SELECT * from {table}", - f"postgresql://{RACSUSER}:{os.environ['PGPASSWORD']}@{RACSHOST}:{RACSPORT}/{racsdb}", - ) - df.set_index("SBID", inplace=True) - return df.loc[sbid] - - -def get_holography_path(sbid: int) -> Path: - """Get the path to the holography file for a given SBID - - Args: - sbid (int): SBID - - Raises: - ValueError: If SBID is not in the expected range - - Returns: - Path: Path to the holography file - """ - - # RACS-low3 min SBID 55538 - # SBID 55538-59072 : associated holography is SBID 55219 - - # RACS-low2 38307 - 41829 - # From Uncle Timmy: - # SBID 38307-38528 : associated holography is SBID 38585 - # SBID 38545-39385 : associated holography is SBID 38709 - # SBID 39400-40878 : associated holography is SBID 39549 - # SBID 40989-41829 : associated holography is SBID 41055 - - if 55538 <= sbid <= 59072: - holo_dir = Path("/scratch3/projects/spiceracs/RACS_Low3_Holography") - elif 38307 <= sbid <= 41829: - holo_dir = Path("/scratch3/projects/spiceracs/RACS_Low2_Holography") - else: - raise ValueError(f"SBID {sbid} not in range") - - if 38307 <= sbid <= 38528: - holo_sbid = 38585 - elif 38545 <= sbid <= 39385: - holo_sbid = 38709 - elif 39400 <= sbid <= 40878: - holo_sbid = 39549 - elif 40989 <= sbid <= 41829: - holo_sbid = 41055 - elif 55538 <= sbid <= 59072: - holo_sbid = 55219 - else: - raise ValueError(f"SBID {sbid} not in range") - - if 38307 <= sbid <= 41829: - holo_file = holo_dir / f"akpb.iquv.square_6x6.63.887MHz.SB{holo_sbid}.cube.fits" - - elif 55538 <= sbid <= 59072: - holo_file = ( - holo_dir / f"akpb.iquv.closepack36.54.943MHz.SB{holo_sbid}.cube.fits" - ) - - assert holo_file.exists(), f"{holo_file} does not exist" - return holo_file - - -def main( - sbid: int, - sbid_dir: Path, - processing_dir: Path, -): - """Main script""" - - if not processing_dir.exists(): - processing_dir.mkdir(parents=True, exist_ok=True) - - field_data = get_field_data(sbid) - holo_file = get_holography_path(sbid) - - # Set nchan depending on the Galactic latitude - nchan = 36 if abs(field_data.GAL_LAT) > 10 else 72 - - config_base = dict( - host="stokes.it.csiro.au", - username="admin", - password=os.environ["SPICE_PASSWORD"], - imager_only=False, - epoch=7, - ms_glob_pattern=f"'SB{sbid}.{field_data.FIELD_NAME}.beam*.round4.ms'", - imager_dask_config="/scratch3/projects/spiceracs/arrakis/arrakis/configs/petrichor.yaml", - temp_dir_images="$LOCALDIR", - temp_dir_wsclean="$MEMDIR", - mgain=0.7, - force_mask_rounds=8, - nmiter=15, - niter=500_000, - local_rms=True, - auto_mask=4, - local_rms_window=60, - auto_threshold=1, - size=6144, - scale=2.5, - robust=-0.5, - pols="IQU", - gridder="wgridder", - minuv=200, - local_wsclean="/datasets/work/sa-mhongoose/work/containers/wsclean_force_mask.sif", - multiscale=True, - multiscale_scale_bias=0.6, - multiscale_scales="0,2,4,8,16,32,64,128", - purge=True, - absmem=100, - nchan=nchan, - skip_fix_ms=True, - data_column="CORRECTED_DATA", - skip_imager=False, - skip_cutout=False, - skip_linmos=False, - skip_cleanup=False, - skip_frion=False, - skip_rmsynth=False, - skip_rmclean=False, - skip_cat=False, - cutoff=-8.0, - window=-5.0, - dask_config="/scratch3/projects/spiceracs/arrakis/arrakis/configs/rm_petrichor.yaml", - database=True, - debug=False, - dimension="1d", - dryrun=False, - fit_rmsf=True, - fit_function="log", - gain=0.1, - holofile=holo_file.as_posix(), - max_iter=10000, - n_samples=100.0, - no_stokes_i=False, - not_rmsf=False, - write=f"{field_data.FIELD_NAME}_SB{sbid}_polcat.fits", - pad=7.0, - poly_ord=-2, - rm_verbose=False, - save_plots=True, - show_plots=False, - validate=False, - weight_type="variance", - yanda_image="/datasets/work/sa-mhongoose/work/containers/askapsoft_1.15.0-openmpi4.sif", - ionex_server="file:///datasets/work/sa-mhongoose/work/data/IONEX/gdc.cddis.eosdis.nasa.gov", - ionex_formatter="cddis.gsfc.nasa.gov", - ionex_prefix="casg", - ) - - config_file = processing_dir / f"{sbid}_rm.yaml" - - with open(config_file, "w") as f: - yaml.safe_dump(config_base, f) - - # Now make a run script - script_file = processing_dir / f"{sbid}_rm_run.sh" - script_string = rf"""#!/bin/bash -#SBATCH --job-name=spice_master -#SBATCH --export=NONE -#SBATCH --ntasks-per-node=1 -#SBATCH --ntasks=1 -#SBATCH --mem=36GB -#SBATCH --time=1-00:00:00 -#SBATCH -A OD-217087 -#SBATCH -o {(processing_dir/str(sbid)).absolute().as_posix()}_rm_%j.log -#SBATCH -e {(processing_dir/str(sbid)).absolute().as_posix()}_rm_%j.log -#SBATCH --qos=express - -# I trust nothing -export OMP_NUM_THREADS=1 - -export APIURL=http://jones.it.csiro.au:4200/api -export PREFECT_API_URL="${{APIURL}}" -export WORKDIR=$(pwd) -export PREFECT_HOME="${{WORKDIR}}/prefect" -export PREFECT_LOGGING_EXTRA_LOGGERS="arrakis" - -echo "Sourcing home" -source /home/$(whoami)/.bashrc - -module load singularity - -echo "Activating conda arrakis environment" -conda activate arrakis310 - -echo "About to run spice_process" -spice_process \ - {processing_dir.absolute().as_posix()} \ - {field_data.FIELD_NAME} \ - {sbid_dir.absolute().as_posix()} \ - --sbid {sbid} \ - --config {config_file.absolute().as_posix()} \ -""" - with open(script_file, "w") as f: - f.write(script_string) - - logger.info(f"Wrote {config_file} and {script_file}") - - return config_file, script_file - - -def cli(): - import argparse - - parser = argparse.ArgumentParser(description=__doc__) - - parser.add_argument("sbid", type=int, help="SBID") - parser.add_argument( - "-s", "--sbiddir", type=Path, help="Processing directory", default=Path(".") - ) - parser.add_argument( - "-p", "--procdir", type=Path, help="Processing directory", default=Path(".") - ) - - args = parser.parse_args() - - _ = main(sbid=args.sbid, sbid_dir=args.sbiddir, processing_dir=args.procdir) - - -if __name__ == "__main__": - cli() From 4d13ba553faeca9847401e55d0a90b673d799abe Mon Sep 17 00:00:00 2001 From: "Thomson, Alec (CASS, Kensington)" Date: Thu, 11 Apr 2024 18:40:23 +1000 Subject: [PATCH 37/37] Update config --- arrakis/configs/petrichor.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrakis/configs/petrichor.yaml b/arrakis/configs/petrichor.yaml index 67a7384a..3dca0ca8 100644 --- a/arrakis/configs/petrichor.yaml +++ b/arrakis/configs/petrichor.yaml @@ -7,7 +7,7 @@ cluster_kwargs: memory: "128GiB" account: 'OD-217087' #queue: 'workq' - walltime: '1-00:00:00' + walltime: '0-06:00:00' job_extra_directives: ['--qos express'] # interface for the workers interface: "ib0"