Skip to content

Commit

Permalink
fix!: Expect user to supply valid, case-sensitive HGNC symbol (#375)
Browse files Browse the repository at this point in the history
closes #374
  • Loading branch information
jarbesfeld authored Oct 29, 2024
1 parent e9a917e commit f1448f4
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 22 deletions.
24 changes: 11 additions & 13 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
"""Model for representing a boundary for a transcript segment."""

seg: TxSegment | None = Field(None, description="Transcript segment.")
gene: StrictStr | None = Field(None, description="HGNC gene symbol.")
gene: StrictStr | None = Field(
None, description="Valid, case-sensitive HGNC gene symbol."
)
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
errors: list[StrictStr] = Field([], description="Error messages.")
Expand Down Expand Up @@ -139,7 +141,9 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
class GenomicTxSegService(BaseModelForbidExtra):
"""Service model for genomic and transcript data."""

gene: StrictStr | None = Field(None, description="HGNC gene symbol.")
gene: StrictStr | None = Field(
None, description="Valid, case-sensitive HGNC gene symbol."
)
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
Expand Down Expand Up @@ -292,7 +296,7 @@ async def tx_segment_to_genomic(
('NC_000001.11', 154192135, 154170399)
:param transcript: RefSeq transcript accession
:param gene: HGNC gene symbol
:param gene: Valid, case-sensitive HGNC gene symbol
:param exon_start: Starting transcript exon number (1-based). If not provided,
must provide ``exon_end``
:param exon_start_offset: Starting exon offset
Expand Down Expand Up @@ -335,9 +339,6 @@ async def tx_segment_to_genomic(
if errors:
return _return_service_errors(errors)

if gene:
gene = gene.upper()

# Get aligned genomic data (hgnc gene, alt_ac, alt_start_i, alt_end_i, strand)
# for exon(s)
(
Expand Down Expand Up @@ -455,7 +456,7 @@ async def genomic_to_tx_segment(
following the breakpoint for the 3' end. For the negative strand, adjacent
is defined as the exon following the breakpoint for the 5' end and the exon
preceding the breakpoint for the 3' end.
:param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
:param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
value is provided.
:param coordinate_type: Coordinate type for ``seg_start_genomic`` and
``seg_end_genomic``
Expand All @@ -473,9 +474,6 @@ async def genomic_to_tx_segment(
if errors:
return _return_service_errors(errors)

if gene is not None:
gene = gene.upper()

params = {}

if seg_start_genomic:
Expand Down Expand Up @@ -630,7 +628,7 @@ async def _get_genomic_aln_coords(
must provide ``tx_exon_end``
:param tx_exon_end: Transcript's exon end coordinates. If not provided, must
provide ``tx_exon_start``
:param gene: HGNC gene symbol
:param gene: A valid, case-sensitive HGNC gene symbol
:return: Tuple containing aligned genomic data for start and end exon and
warnings if found
"""
Expand Down Expand Up @@ -755,7 +753,7 @@ async def _genomic_to_tx_segment(
:param transcript: The transcript to use. If this is not given, we will try the
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
Compatible Transcript
:param gene: HGNC gene symbol
:param gene: Valid, case-sensitive HGNC gene symbol
:param get_nearest_transcript_junction: If ``True``, this will return the
adjacent exon if the position specified by``seg_start_genomic`` or
``seg_end_genomic`` does not occur on an exon. For the positive strand, adjacent
Expand Down Expand Up @@ -1062,7 +1060,7 @@ async def _get_tx_seg_genomic_metadata(
:param genomic_ac: Genomic RefSeq accession
:param genomic_pos: Genomic position where the transcript segment occurs
:param is_seg_start: Whether or not ``genomic_pos`` represents the start position.
:param gene: HGNC gene symbol
:param gene: Valid, case-sensitive HGNC gene symbol
:param tx_ac: Transcript RefSeq accession. If not provided, will use MANE
transcript
:return: Transcript segment data and associated genomic metadata
Expand Down
12 changes: 3 additions & 9 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
"seg_start_genomic": 9597639,
"seg_end_genomic": 9609996,
"transcript": "NM_003390.3",
"gene": "wee1",
"gene": "WEE1",
}
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
Expand All @@ -1177,7 +1177,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
"genomic_ac": "NC_000011.9",
"seg_start_genomic": 9597639, # GRCh38 coords: 9576092
"seg_end_genomic": 9609996, # GRCh38 coords: 9588449
"gene": "wee1",
"gene": "WEE1",
}
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
Expand Down Expand Up @@ -1216,12 +1216,6 @@ async def test_transcript_to_genomic(
expected.seg_end.genomic_location.start = 154170399
genomic_tx_seg_service_checks(resp, expected)

resp = await test_egc_mapper.tx_segment_to_genomic(
exon_start=None, exon_end=8, gene="tpm3", transcript="NM_152263.3"
)
expected.seg_end.genomic_location.start = 154170399
genomic_tx_seg_service_checks(resp, expected)

expected = tpm3_exon1_exon8.model_copy(deep=True)
resp = await test_egc_mapper.tx_segment_to_genomic(
exon_start=1, exon_end=8, exon_end_offset=-5, transcript="NM_152263.3"
Expand Down Expand Up @@ -1371,7 +1365,7 @@ async def test_invalid(test_egc_mapper):
gene="dummy gene",
)
genomic_tx_seg_service_checks(resp, is_valid=False)
assert resp.errors == ["Expected gene, DUMMY GENE, but found TPM3"]
assert resp.errors == ["Expected gene, dummy gene, but found TPM3"]

# Invalid accession
resp = await test_egc_mapper.genomic_to_tx_segment(
Expand Down

0 comments on commit f1448f4

Please sign in to comment.