diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fae595d..322f32e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,19 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 24.4.2 hooks: - id: black # It is recommended to specify the latest version of Python # supported by your project here, or alternatively use # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version - language_version: python3.10 + language_version: python3.11 - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: diff --git a/doc/lsst.source.injection/_assets/t9813bi_completeness.png b/doc/lsst.source.injection/_assets/t9813bi_completeness.png new file mode 100644 index 0000000..c2dc396 Binary files /dev/null and b/doc/lsst.source.injection/_assets/t9813bi_completeness.png differ diff --git a/doc/lsst.source.injection/reference/21_inject_synthetic_sources.rst b/doc/lsst.source.injection/reference/21_inject_synthetic_sources.rst index c685078..44def25 100644 --- a/doc/lsst.source.injection/reference/21_inject_synthetic_sources.rst +++ b/doc/lsst.source.injection/reference/21_inject_synthetic_sources.rst @@ -72,6 +72,12 @@ The step 1 subset will have had the ``inject_exposure`` task (:lsst-task:`~lsst. If injection outputs have already been generated within your butler repository, you should omit this flag from your run command to prevent any accidental registration of unwanted dataset types. +.. note:: + + Similar to ``stepN`` subsets are ``injected_stepN`` subsets. + These only run tasks including and after the injection task. + The ``injected_stepN`` subsets can save memory and runtime if the tasks prior to injection have already been run. + Assuming processing completes successfully, the ``injected_postISRCCD`` and associated ``injected_postISRCCD_catalog`` will be written to the butler repository. Various downstream ``step1`` data products should also exist, including the ``injected_calexp`` dataset type (see example images below). diff --git a/doc/lsst.source.injection/reference/41_match_injected_outputs.rst b/doc/lsst.source.injection/reference/41_match_injected_outputs.rst new file mode 100644 index 0000000..1658420 --- /dev/null +++ b/doc/lsst.source.injection/reference/41_match_injected_outputs.rst @@ -0,0 +1,198 @@ +.. _lsst.source.injection-ref-match: + +======================== + Match Injected Outputs +======================== + +------------------------------------------------- + Consolidate and match source injection catalogs +------------------------------------------------- + +This page covers how to match injected input catalogs to output data catalogs. +This process can generally be split into two parts: consoliding per-patch injected catalogs into tract-level input catalogs, and matching the input and output catalogs. + +Consolidate injected catalogs +============================= + +The butler may split up catalogs which cover multiple photometry bands or which cover large areas of sky for memory efficiency, even when a single catalog is injected. +For example, if a coadd-level injected catalog covers a whole tract across multiple photometry bands, the injected catalogs will be split and stored with the dimensions ``{patch, band}``. +Before matching the injected input catalogs to the processed output catalog, the per-patch and per-band inputs must be consolidated into a single tract level catalog. +This can be done by using ``pipetask run`` to run ``ConsolidateInjectedCatalogsTask`` from ``pipelines/consolidate_injected_deepCoadd_catalogs.yaml``. + +.. code:: + + pipetask --long-log --log-file $LOGFILE run \ + -b $REPO \ + -i $PROCESSED_DATA_COLL \ + -o $CONSOLIDATED_CATALOG_COLL \ + -p $SOURCE_INJECTION_DIR/pipelines/consolidate_injected_deepCoadd_catalogs.yaml \ + -d "instrument='HSC' AND skymap='hsc_rings_v1' AND tract=9813 AND patch=42 AND band='i'" + +*where* + + `$LOGFILE` + The full path to a user-defined output log file. + + `$REPO` + The path to the butler repository. + + `$PROCESSED_DATA_COLL` + The name of the input injected catalog collection. + + `$CONSOLIDATED_CATALOG_COLL` + The name of the consolidated injected output collection. + + +Matching +======== + +Now that we have our consolidated tract-level injected catalog and a reference tract-level standard catalog, we can move on to matching these two sets of catalogs together. + +The matching tasks are ``MatchTractCatalogTask`` and ``DiffMatchedTractCatalogTask``. +The first task performs a spatial probablistic match with minimal flag cuts, and the second computes any relevant statistics. +These tasks are located in the ``pipelines/match_injected_tract_catalog.yaml`` pipeline definition file, with the labels ``match_object_to_truth`` and ``compare_object_to_truth``. +The pipeline graph for the consolidation and matching process is shown below: + +.. code:: + + ○ injected_deepCoadd_catalog + │ + ○ │ skyMap + ├─┤ + │ ■ consolidate_injected_catalogs + │ │ + │ ○ injected_deepCoadd_catalog_tract + │ │ + ○ │ │ injected_objectTable_tract + ╭─┼─┼─┤ + ■ │ │ │ match_object_to_truth + │ │ │ │ + ◍ │ │ │ match_target_injected_deepCoadd_catalog_tract_injected_objectTable_tract, match_ref_injected_deepCoadd_catalog_tract_injected_objectTable_tract + ╰─┴─┴─┤ + ■ compare_object_to_truth + │ + ○ matched_injected_deepCoadd_catalog_tract_injected_objectTable_tract + +Matching two tract-level catalogs can be done trivially with a ``pipetask run`` command as below: + +.. code:: + + pipetask --long-log --log-file $LOGFILE run \ + -b $REPO \ + -i $CONSOLIDATED_CATALOG_COLL \ + -o $MATCHED_CATALOG_COLL \ + -p $SOURCE_INJECTION_DIR/pipelines/match_injected_tract_catalog.yaml \ + -d "instrument='HSC' AND skymap='hsc_rings_v1' AND tract=9813 AND patch=42 AND band='i'" + +*where* + + `$LOGFILE` + The full path to a user-defined output log file. + + `$REPO` + The path to the butler repository. + + `$CONSOLIDATED_CATALOG_COLL` + The name of the consolidated injected input collection. + + `$MATCHED_CATALOG_COLL` + The name of the matched injected output collection. + +.. note:: + + Within ``pipelines/match_injected_tract_catalog.yaml`` there are various config options for pre-matching flag selections, columns to copy from the reference and target catalogs, etc. + +Visualize the matched catalog and compute metrics +================================================= + +One metric to determine the quality of an injection run is completeness, or the ratio of matched sources to injected sources. +The following is an example of a completeness plot using ``matplotlib.pyplot``. + +.. code-block:: python + + from lsst.daf.butler import Butler + import astropy.units as u + import matplotlib.pyplot as plt + import numpy as np + + # Load the matched catalog with the butler. + butler = Butler("/sdf/group/rubin/repo/main") + collections = "u/mccann/DM-41210/RC2" + dtype = "matched_injected_deepCoadd_catalog_tract_injected_objectTable_tract" + tract = 9813 + dataId = {"skymap":"hsc_rings_v1", "tract":tract} + data = butler.get(dtype, collections=collections, dataId=dataId) + + # Define a matched source flag. + matched = np.isfinite(data["match_distance"]) + + # Make a completeness plot. + band="i" + flux = f"ref_{band}_flux" + mags = ((data[flux] * u.nJy).to(u.ABmag)).value + fig, axLeft = plt.subplots() + axRight = axLeft.twinx() + axLeft.tick_params(axis="y", labelcolor="C0") + axLeft.set_ylabel("Fraction Recovered", color="C0") + axLeft.set_xlabel("PSF Magnitude (mag)") + axRight.set_ylabel("Number of Sources") + nInput, bins, _ = axRight.hist( + mags, + range=(np.nanmin(mags), np.nanmax(mags)), + bins=121, + log=True, + histtype="step", + label="Synthetic Inputs", + color="black", + ) + nOutput, _, _ = axRight.hist( + mags[matched], + range=(np.nanmin(mags[matched]), np.nanmax(mags[matched])), + bins=bins, + log=True, + histtype="step", + label="Synthetic Recovered", + color="grey", + ) + xlims = plt.gca().get_xlim() + # Find bin where the fraction recovered first falls below 0.5 + lessThanHalf = np.where((nOutput / nInput < 0.5))[0] + if len(lessThanHalf) == 0: + mag50 = np.nan + else: + mag50 = np.min(bins[lessThanHalf]) + axLeft.plot([xlims[0], mag50], [0.5, 0.5], ls=":", color="grey") + axLeft.plot([mag50, mag50], [0, 0.5], ls=":", color="grey") + plt.xlim(xlims) + fig.legend(loc="outside upper left", ncol=2) + axLeft.axhline(1, color="grey", ls="--") + axLeft.bar( + bins[:-1], + nOutput / nInput, + width=np.diff(bins), + align="edge", + color="C0", + alpha=0.5, + zorder=10, + ) + bboxDict = dict(boxstyle="round", facecolor="white", alpha=0.75) + info50 = "Magnitude at 50% recovered: {:0.2f}".format(mag50) + axLeft.text(0.3, 0.15, info50, transform=fig.transFigure, bbox=bboxDict, zorder=11) + plt.title(f"{tract} {band} completeness") + fig = plt.gcf() + +.. figure:: ../_assets/t9813bi_completeness.png + :name: t9813bi_completeness + :alt: Completeness for coadd-level matched injected catalog (``matched_injected_deepCoadd_catalog_tract_injected_objectTable_tract``) for HSC tract 9813, ``i`` band, visualized using `matplotlib.pyplot`. + :align: center + :width: 100% + + .. + +Wrap Up +======= + +This page has presented methods for consolidating injected catalogs, matching injected inputs with processed outputs, and visualizing a matched catalog. +Currently ``source_injection`` only supports consolidation and matching for coadd-level injection, but in the future these methods may be generalized for use at the visit and exposure level. + +Move on to :ref:`another quick reference guide `, consult the :ref:`FAQs `, or head back to the `main page <..>`_. diff --git a/pipelines/match_injected_tract_catalog.yaml b/pipelines/match_injected_tract_catalog.yaml new file mode 100644 index 0000000..2545bf2 --- /dev/null +++ b/pipelines/match_injected_tract_catalog.yaml @@ -0,0 +1,50 @@ +description: Match tract-level injected_deepCoadd_catalog datasets to injected_objectTable_tract datasets. +tasks: + consolidate_injected_catalogs: + class: lsst.source.injection.utils.consolidate_injected_deepCoadd_catalogs.ConsolidateInjectedCatalogsTask + match_object_to_truth: + class: lsst.pipe.tasks.match_tract_catalog.MatchTractCatalogTask + config: + connections.name_input_cat_ref: injected_deepCoadd_catalog_tract + connections.name_input_cat_target: injected_objectTable_tract + match_tract_catalog.column_ref_order: i_mag + match_tract_catalog.columns_ref_meas: ["ra", "dec"] + match_tract_catalog.columns_target_meas: ["coord_ra", "coord_dec"] + match_tract_catalog.columns_target_err: ["coord_raErr", "coord_decErr"] + match_tract_catalog.columns_ref_copy: ["injected_id", "source_type"] + match_tract_catalog.columns_target_copy: ["objectId"] + match_tract_catalog.columns_ref_select_true: ["injected_isPatchInner"] + match_tract_catalog.columns_ref_select_false: ["injection_flag"] + match_tract_catalog.columns_target_select_true: ["detect_isDeblendedSource", "detect_isPatchInner"] + match_tract_catalog.columns_target_select_false: ["merge_peak_sky"] + match_tract_catalog.match_n_finite_min: 2 + match_tract_catalog.order_ascending: true + python: | + from lsst.pipe.tasks.match_tract_catalog_probabilistic import MatchTractCatalogProbabilisticTask + + config.match_tract_catalog.retarget(MatchTractCatalogProbabilisticTask) + compare_object_to_truth: + class: lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask + config: + connections.name_input_cat_ref: injected_deepCoadd_catalog_tract + connections.name_input_cat_target: injected_objectTable_tract + column_matched_prefix_ref: "ref_" + column_ref_extended: source_type + # TODO: Remove as part of DM-44139 + columns_ref_mag_to_nJy: { + g_mag: "g_flux", r_mag: "r_flux", i_mag: "i_flux", z_mag: "z_flux", y_mag: "y_flux", + } + columns_ref_copy: [ + "injected_id", + "g_mag", "r_mag", "i_mag", "z_mag", "y_mag", + "injection_flag", "injected_isPatchInner", + ] + columns_target_coord_err: ["coord_raErr", "coord_decErr"] + columns_target_copy: [ + "g_psfFlux", "r_psfFlux", "i_psfFlux", "z_psfFlux", "y_psfFlux", + "g_psfFluxErr", "r_psfFluxErr", "i_psfFluxErr", "z_psfFluxErr", "y_psfFluxErr", + "g_psfFlux_flag", "r_psfFlux_flag", "i_psfFlux_flag", "z_psfFlux_flag", "y_psfFlux_flag", + "patch", "detect_isDeblendedSource", "detect_isPatchInner", "detect_isPrimary", "merge_peak_sky", + "refSizeExtendedness", + ] + include_unmatched: true