Skip to content

Commit

Permalink
transvar works with genes that have lowercase c in them + update gtf …
Browse files Browse the repository at this point in the history
…file with single gene rows
  • Loading branch information
manulera committed Aug 7, 2023
1 parent d2c9f18 commit 14ce007
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 18 deletions.
10 changes: 5 additions & 5 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,24 +353,24 @@ async def get_residue_at_position(systematic_id: str = Query(example='SPAPB1A10.
return PlainTextResponse(gene['peptide'][position - 1])


@ app.get("/ganno")
async def ganno(variant_description: str = Query(example="I:g.2832796A>T", description='Variant described at the genome level (gDNA)')) -> list[TransvarAnnotation]:
@ app.get("/ganno", summary='Variant described at the genome level (gDNA)', response_model=list[TransvarAnnotation])
async def ganno(variant_description: str = Query(example="II:g.178497T>A", description='Variant described at the genome level (gDNA)')) -> list[TransvarAnnotation]:
try:
return parse_transvar_string(get_transvar_str_annotation('ganno', variant_description))
except ValueError as e:
raise HTTPException(400, str(e))


@ app.get("/canno")
@ app.get("/canno", summary='Variant described at the coding DNA level (cDNA)', response_model=list[TransvarAnnotation])
async def canno(variant_description: str = Query(example="SPAC3F10.09:c.5A>T", description='Variant described at the coding DNA level (cDNA)')) -> list[TransvarAnnotation]:
try:
return parse_transvar_string(get_transvar_str_annotation('canno', variant_description))
except ValueError as e:
raise HTTPException(400, str(e))


@ app.get("/panno")
async def panno(variant_description: str = Query(example="SPAC3F10.09:p.E2L", description='Variant described at the protein level')) -> list[TransvarAnnotation]:
@ app.get("/panno", summary='Variant described at the protein level', response_model=list[TransvarAnnotation])
async def panno(variant_description: str = Query(example="SPBC1198.04c:p.N3A", description='Variant described at the protein level')) -> list[TransvarAnnotation]:
try:
return parse_transvar_string(get_transvar_str_annotation('panno', variant_description))
except ValueError as e:
Expand Down
16 changes: 3 additions & 13 deletions data/pombe_genome.gtf
Original file line number Diff line number Diff line change
Expand Up @@ -4936,7 +4936,6 @@ I PomBase CDS 1170504 1171270 . - 1 transcript_id "SPAC22A12.08c.1"; gene_id "SP
I PomBase CDS 1171362 1171856 . - 1 transcript_id "SPAC22A12.08c.1"; gene_id "SPAC22A12.08c";
I PomBase CDS 1171900 1171956 . - 1 transcript_id "SPAC22A12.08c.1"; gene_id "SPAC22A12.08c";
I PomBase CDS 1171999 1172126 . - 0 transcript_id "SPAC22A12.08c.1"; gene_id "SPAC22A12.08c";
I PomBase gene 1170705 1172209 . - . gene_id "SPAC22A12.08c"; gene_biotype "protein_coding";
I PomBase transcript 1170705 1172209 . - . transcript_id "SPAC22A12.08c.2"; gene_id "SPAC22A12.08c"; transcript_biotype "protein_coding";
I PomBase exon 1170705 1170724 . - . transcript_id "SPAC22A12.08c.2"; gene_id "SPAC22A12.08c";
I PomBase exon 1170801 1171270 . - . transcript_id "SPAC22A12.08c.2"; gene_id "SPAC22A12.08c";
Expand Down Expand Up @@ -24150,7 +24149,6 @@ II PomBase CDS 179322 179387 . - 0 transcript_id "SPBC1198.04c.1"; gene_id "SPBC
II PomBase CDS 179440 180143 . - 2 transcript_id "SPBC1198.04c.1"; gene_id "SPBC1198.04c";
II PomBase CDS 180300 180416 . - 2 transcript_id "SPBC1198.04c.1"; gene_id "SPBC1198.04c";
II PomBase CDS 180461 180608 . - 0 transcript_id "SPBC1198.04c.1"; gene_id "SPBC1198.04c";
II PomBase gene 177469 180628 . - . gene_id "SPBC1198.04c"; gene_biotype "protein_coding";
II PomBase transcript 177469 180628 . - . transcript_id "SPBC1198.04c.2"; gene_id "SPBC1198.04c"; transcript_biotype "protein_coding";
II PomBase exon 177469 177608 . - . transcript_id "SPBC1198.04c.2"; gene_id "SPBC1198.04c";
II PomBase exon 177656 178291 . - . transcript_id "SPBC1198.04c.2"; gene_id "SPBC1198.04c";
Expand Down Expand Up @@ -24843,7 +24841,6 @@ II PomBase exon 339378 340150 . + 0 transcript_id "SPNCRNA.4674.1"; gene_id "SPN
II PomBase gene 340209 340750 . + . gene_id "SPNCRNA.1342"; gene_biotype "ncRNA";
II PomBase transcript 340209 340750 . + . transcript_id "SPNCRNA.1342.1"; gene_id "SPNCRNA.1342"; transcript_biotype "ncRNA";
II PomBase exon 340209 340750 . + 0 transcript_id "SPNCRNA.1342.1"; gene_id "SPNCRNA.1342";
II PomBase gene 340399 340907 . - . gene_id "SPNCRNA.103"; gene_biotype "ncRNA";
II PomBase transcript 340399 340907 . - . transcript_id "SPNCRNA.103.2"; gene_id "SPNCRNA.103"; transcript_biotype "ncRNA";
II PomBase exon 340399 340907 . - 0 transcript_id "SPNCRNA.103.2"; gene_id "SPNCRNA.103";
II PomBase gene 340696 341725 . - . gene_id "SPBC1271.13"; gene_biotype "protein_coding";
Expand Down Expand Up @@ -26443,7 +26440,6 @@ II PomBase CDS 719806 720252 . + 0 transcript_id "SPBC119.04.1"; gene_id "SPBC11
II PomBase gene 719699 720288 . - . gene_id "SPNCRNA.4843"; gene_biotype "ncRNA";
II PomBase transcript 719699 720288 . - . transcript_id "SPNCRNA.4843.1"; gene_id "SPNCRNA.4843"; transcript_biotype "ncRNA";
II PomBase exon 719699 720288 . - 0 transcript_id "SPNCRNA.4843.1"; gene_id "SPNCRNA.4843";
II PomBase gene 719719 720990 . + . gene_id "SPBC119.04"; gene_biotype "protein_coding";
II PomBase transcript 719719 720990 . + . transcript_id "SPBC119.04.2"; gene_id "SPBC119.04"; transcript_biotype "protein_coding";
II PomBase exon 719719 720990 . + . transcript_id "SPBC119.04.2"; gene_id "SPBC119.04";
II PomBase CDS 719806 720252 . + 0 transcript_id "SPBC119.04.2"; gene_id "SPBC119.04";
Expand Down Expand Up @@ -29009,10 +29005,9 @@ II PomBase exon 1306760 1307341 . + 0 transcript_id "SPNCRNA.5123.1"; gene_id "S
II PomBase gene 1307004 1307502 . - . gene_id "SPNCRNA.5124"; gene_biotype "ncRNA";
II PomBase transcript 1307004 1307502 . - . transcript_id "SPNCRNA.5124.1"; gene_id "SPNCRNA.5124"; transcript_biotype "ncRNA";
II PomBase exon 1307004 1307502 . - 0 transcript_id "SPNCRNA.5124.1"; gene_id "SPNCRNA.5124";
II PomBase gene 1307681 1308192 . + . gene_id "SPNCRNA.1715"; gene_biotype "ncRNA";
II PomBase gene 1307681 1308359 . + . gene_id "SPNCRNA.1715"; gene_biotype "ncRNA";
II PomBase transcript 1307681 1308192 . + . transcript_id "SPNCRNA.1715.2"; gene_id "SPNCRNA.1715"; transcript_biotype "ncRNA";
II PomBase exon 1307681 1308192 . + 0 transcript_id "SPNCRNA.1715.2"; gene_id "SPNCRNA.1715";
II PomBase gene 1307681 1308359 . + . gene_id "SPNCRNA.1715"; gene_biotype "ncRNA";
II PomBase transcript 1307681 1308359 . + . transcript_id "SPNCRNA.1715.1"; gene_id "SPNCRNA.1715"; transcript_biotype "ncRNA";
II PomBase exon 1307681 1308359 . + 0 transcript_id "SPNCRNA.1715.1"; gene_id "SPNCRNA.1715";
II PomBase gene 1308634 1308745 . + . gene_id "SPSNORNA.21"; gene_biotype "snoRNA";
Expand Down Expand Up @@ -29503,7 +29498,6 @@ II PomBase exon 1414254 1414547 . + . transcript_id "SPBC17A3.07.1"; gene_id "SP
II PomBase exon 1414603 1416156 . + . transcript_id "SPBC17A3.07.1"; gene_id "SPBC17A3.07";
II PomBase CDS 1414496 1414547 . + 0 transcript_id "SPBC17A3.07.1"; gene_id "SPBC17A3.07";
II PomBase CDS 1414603 1415945 . + 2 transcript_id "SPBC17A3.07.1"; gene_id "SPBC17A3.07";
II PomBase gene 1414440 1416156 . + . gene_id "SPBC17A3.07"; gene_biotype "protein_coding";
II PomBase transcript 1414440 1416156 . + . transcript_id "SPBC17A3.07.2"; gene_id "SPBC17A3.07"; transcript_biotype "protein_coding";
II PomBase exon 1414440 1414547 . + . transcript_id "SPBC17A3.07.2"; gene_id "SPBC17A3.07";
II PomBase exon 1414603 1416156 . + . transcript_id "SPBC17A3.07.2"; gene_id "SPBC17A3.07";
Expand Down Expand Up @@ -44018,7 +44012,7 @@ III PomBase exon 220148 220286 . - 0 transcript_id "SPCC548.02c.1"; gene_id "SPC
III PomBase gene 219734 220102 . + . gene_id "SPNCRNA.6748"; gene_biotype "ncRNA";
III PomBase transcript 219734 220102 . + . transcript_id "SPNCRNA.6748.1"; gene_id "SPNCRNA.6748"; transcript_biotype "ncRNA";
III PomBase exon 219734 220102 . + 0 transcript_id "SPNCRNA.6748.1"; gene_id "SPNCRNA.6748";
III PomBase gene 221149 222403 . - . gene_id "SPCC548.03c"; gene_biotype "protein_coding";
III PomBase gene 221149 222935 . - . gene_id "SPCC548.03c"; gene_biotype "protein_coding";
III PomBase transcript 221149 222403 . - . transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c"; transcript_biotype "protein_coding";
III PomBase exon 221149 221344 . - . transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c";
III PomBase exon 221395 221457 . - . transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c";
Expand All @@ -44030,7 +44024,6 @@ III PomBase CDS 221395 221457 . - 2 transcript_id "SPCC548.03c.2"; gene_id "SPCC
III PomBase CDS 221498 221689 . - 2 transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c";
III PomBase CDS 221726 222016 . - 2 transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c";
III PomBase CDS 222065 222347 . - 0 transcript_id "SPCC548.03c.2"; gene_id "SPCC548.03c";
III PomBase gene 221149 222935 . - . gene_id "SPCC548.03c"; gene_biotype "protein_coding";
III PomBase transcript 221149 222935 . - . transcript_id "SPCC548.03c.1"; gene_id "SPCC548.03c"; transcript_biotype "protein_coding";
III PomBase exon 221149 221344 . - . transcript_id "SPCC548.03c.1"; gene_id "SPCC548.03c";
III PomBase exon 221395 221457 . - . transcript_id "SPCC548.03c.1"; gene_id "SPCC548.03c";
Expand Down Expand Up @@ -49593,7 +49586,7 @@ III PomBase gene 1578091 1579074 . - . gene_id "SPCC162.05"; gene_biotype "prote
III PomBase transcript 1578091 1579074 . - . transcript_id "SPCC162.05.1"; gene_id "SPCC162.05"; transcript_biotype "protein_coding";
III PomBase exon 1578091 1579074 . - . transcript_id "SPCC162.05.1"; gene_id "SPCC162.05";
III PomBase CDS 1578220 1579044 . - 0 transcript_id "SPCC162.05.1"; gene_id "SPCC162.05";
III PomBase gene 1579591 1581396 . - . gene_id "SPCC162.04c"; gene_biotype "protein_coding";
III PomBase gene 1579591 1581928 . - . gene_id "SPCC162.04c"; gene_biotype "protein_coding";
III PomBase transcript 1579591 1581396 . - . transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c"; transcript_biotype "protein_coding";
III PomBase exon 1579591 1580268 . - . transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c";
III PomBase exon 1580319 1580381 . - . transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c";
Expand All @@ -49605,7 +49598,6 @@ III PomBase CDS 1580319 1580381 . - 2 transcript_id "SPCC162.04c.2"; gene_id "SP
III PomBase CDS 1580422 1580613 . - 2 transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c";
III PomBase CDS 1580650 1581006 . - 2 transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c";
III PomBase CDS 1581055 1581337 . - 0 transcript_id "SPCC162.04c.2"; gene_id "SPCC162.04c";
III PomBase gene 1579591 1581928 . - . gene_id "SPCC162.04c"; gene_biotype "protein_coding";
III PomBase transcript 1579591 1581928 . - . transcript_id "SPCC162.04c.1"; gene_id "SPCC162.04c"; transcript_biotype "protein_coding";
III PomBase exon 1579591 1580268 . - . transcript_id "SPCC162.04c.1"; gene_id "SPCC162.04c";
III PomBase exon 1580319 1580381 . - . transcript_id "SPCC162.04c.1"; gene_id "SPCC162.04c";
Expand Down Expand Up @@ -51317,7 +51309,6 @@ III PomBase CDS 2019206 2019288 . + 2 transcript_id "SPCC1906.03.1"; gene_id "SP
III PomBase gene 2017583 2017791 . - . gene_id "SPNCRNA.7588"; gene_biotype "ncRNA";
III PomBase transcript 2017583 2017791 . - . transcript_id "SPNCRNA.7588.1"; gene_id "SPNCRNA.7588"; transcript_biotype "ncRNA";
III PomBase exon 2017583 2017791 . - 0 transcript_id "SPNCRNA.7588.1"; gene_id "SPNCRNA.7588";
III PomBase gene 2018018 2019686 . + . gene_id "SPCC1906.03"; gene_biotype "protein_coding";
III PomBase transcript 2018018 2019686 . + . transcript_id "SPCC1906.03.2"; gene_id "SPCC1906.03"; transcript_biotype "protein_coding";
III PomBase exon 2018018 2018397 . + . transcript_id "SPCC1906.03.2"; gene_id "SPCC1906.03";
III PomBase exon 2018447 2018824 . + . transcript_id "SPCC1906.03.2"; gene_id "SPCC1906.03";
Expand Down Expand Up @@ -51918,7 +51909,6 @@ III PomBase CDS 2146492 2146770 . + 2 transcript_id "SPCC1620.02.1"; gene_id "SP
III PomBase CDS 2146807 2146998 . + 2 transcript_id "SPCC1620.02.1"; gene_id "SPCC1620.02";
III PomBase CDS 2147039 2147101 . + 2 transcript_id "SPCC1620.02.1"; gene_id "SPCC1620.02";
III PomBase CDS 2147152 2147258 . + 2 transcript_id "SPCC1620.02.1"; gene_id "SPCC1620.02";
III PomBase gene 2146063 2147814 . + . gene_id "SPCC1620.02"; gene_biotype "protein_coding";
III PomBase transcript 2146063 2147814 . + . transcript_id "SPCC1620.02.2"; gene_id "SPCC1620.02"; transcript_biotype "protein_coding";
III PomBase exon 2146063 2146442 . + . transcript_id "SPCC1620.02.2"; gene_id "SPCC1620.02";
III PomBase exon 2146492 2146770 . + . transcript_id "SPCC1620.02.2"; gene_id "SPCC1620.02";
Expand Down
3 changes: 3 additions & 0 deletions set_up_transvar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ transvar ganno -i 'I:g.2832795T>A' --ensembl data/pombe_genome.gtf.transvardb --

# Hacky way to use the functions inside another script
cp $(which transvar) ./transvar_main_script.py

transvar panno -i 'SPBC1198.04c.1:p.T566S' --ensembl data/pombe_genome.gtf.transvardb --reference data/pombe_genome.fa
transvar panno -i 'SPAPB1A10.09:p.S372_N374delinsAAA' --ensembl data/pombe_genome.gtf.transvardb --reference data/pombe_genome.fa --gseq
14 changes: 14 additions & 0 deletions transvar_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ def parse_transvar_string(transvar_str: str) -> list[TransvarAnnotation]:
return [TransvarAnnotation.from_list(t.split('\t')) for t in transvar_list]


class TransvarCustomString(str):
"""Hacky class to circunvent https://github.com/zwdzwd/transvar/issues/59
"""
def upper(self):
return self

def strip(self, __chars=None):
return TransvarCustomString(str.strip(self, __chars))

def split(self, __sep=None, __maxsplit=-1):
return [TransvarCustomString(x) for x in str.split(self, __sep, __maxsplit)]


def get_transvar_str_annotation(variant_type: str, variant_description: str) -> str:

if variant_type not in ['ganno', 'canno', 'panno']:
Expand All @@ -56,6 +69,7 @@ def get_transvar_str_annotation(variant_type: str, variant_description: str) ->
p.set_defaults(func=partial(main_anno, at='p'))

args = parser.parse_args([variant_type, '-i', variant_description, '--ensembl', 'data/pombe_genome.gtf.transvardb', '--reference', 'data/pombe_genome.fa'])
args.i = TransvarCustomString(args.i)

output_stream = io.StringIO()
with redirect_stdout(output_stream):
Expand Down

0 comments on commit 14ce007

Please sign in to comment.