-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#56 - Fix occasional UTA duplicated exons
- Loading branch information
Showing
3 changed files
with
18 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,15 @@ | ||
\copy (SELECT transcript.ac, string_agg(distinct transcript.hgnc, ',') as hgnc, string_agg(distinct origin.url, ',') as origin_url, | ||
string_agg(distinct es.alt_ac::varchar, ',') as contig, | ||
string_agg(distinct es.alt_strand::varchar, ',') as strand, | ||
\copy (SELECT transcript.ac, string_agg(distinct transcript.hgnc, ',') as hgnc, 'http://www.ncbi.nlm.nih.gov/refseq/' as origin_url, | ||
string_agg(distinct aln_v.alt_ac::varchar, ',') as contig, | ||
string_agg(distinct aln_v.alt_strand::varchar, ',') as strand, | ||
transcript.cds_start_i, | ||
transcript.cds_end_i, | ||
string_agg(exon.start_i::varchar, ',' order by exon.ord) as exon_starts, | ||
string_agg(exon.end_i::varchar, ',' order by exon.ord) as exon_ends, | ||
string_agg(exon_aln.cigar, ',' order by exon.ord) as cigars, | ||
string_agg(aln_v.alt_start_i::varchar, ',' order by aln_v.alt_exon_id) as exon_starts, | ||
string_agg(aln_v.alt_end_i::varchar, ',' order by aln_v.alt_exon_id) as exon_ends, | ||
string_agg(aln_v.cigar, ',' order by aln_v.alt_exon_id) as cigars, | ||
string_agg(distinct aa.pro_ac, ',' order by aa.pro_ac) as protein | ||
from uta_20210129.transcript transcript | ||
inner join uta_20210129.exon_set es on (transcript.ac = es.tx_ac AND alt_aln_method = 'splign') | ||
inner join uta_20210129.origin origin on (transcript.origin_id = origin.origin_id) | ||
inner join uta_20210129.exon as exon on (es.exon_set_id = exon.exon_set_id) | ||
inner join uta_20210129.exon_aln exon_aln on (exon_aln.alt_exon_id = exon.exon_id) | ||
inner join uta_20210129.tx_exon_aln_v aln_v on (transcript.ac = aln_v.tx_ac AND alt_aln_method = 'splign') | ||
left outer join uta_20210129.associated_accessions aa on (transcript.ac = aa.tx_ac) | ||
WHERE es.alt_ac in | ||
('NC_000001.10', 'NC_000002.11', 'NC_000003.11', 'NC_000004.11', 'NC_000005.9', 'NC_000006.11', 'NC_000007.13', 'NC_000008.10', 'NC_000009.11', 'NC_000010.10', 'NC_000011.9', 'NC_000012.11', 'NC_000013.10', 'NC_000014.8', 'NC_000015.9', 'NC_000016.9', 'NC_000017.10', 'NC_000018.9', 'NC_000019.9', 'NC_000020.10', 'NC_000021.8', 'NC_000022.10', 'NC_000023.10', 'NC_000024.9') and origin.origin_id not in (10, 11) | ||
WHERE aln_v.alt_ac in | ||
('NC_000001.10', 'NC_000002.11', 'NC_000003.11', 'NC_000004.11', 'NC_000005.9', 'NC_000006.11', 'NC_000007.13', 'NC_000008.10', 'NC_000009.11', 'NC_000010.10', 'NC_000011.9', 'NC_000012.11', 'NC_000013.10', 'NC_000014.8', 'NC_000015.9', 'NC_000016.9', 'NC_000017.10', 'NC_000018.9', 'NC_000019.9', 'NC_000020.10', 'NC_000021.8', 'NC_000022.10', 'NC_000023.10', 'NC_000024.9') | ||
group by transcript.ac) TO 'uta_20210129_grch37.csv' CSV HEADER; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,15 @@ | ||
\copy (SELECT transcript.ac, string_agg(distinct transcript.hgnc, ',') as hgnc, string_agg(distinct origin.url, ',') as origin_url, | ||
string_agg(distinct es.alt_ac::varchar, ',') as contig, | ||
string_agg(distinct es.alt_strand::varchar, ',') as strand, | ||
\copy (SELECT transcript.ac, string_agg(distinct transcript.hgnc, ',') as hgnc, 'http://www.ncbi.nlm.nih.gov/refseq/' as origin_url, | ||
string_agg(distinct aln_v.alt_ac::varchar, ',') as contig, | ||
string_agg(distinct aln_v.alt_strand::varchar, ',') as strand, | ||
transcript.cds_start_i, | ||
transcript.cds_end_i, | ||
string_agg(exon.start_i::varchar, ',' order by exon.ord) as exon_starts, | ||
string_agg(exon.end_i::varchar, ',' order by exon.ord) as exon_ends, | ||
string_agg(exon_aln.cigar, ',' order by exon.ord) as cigars, | ||
string_agg(aln_v.alt_start_i::varchar, ',' order by aln_v.alt_exon_id) as exon_starts, | ||
string_agg(aln_v.alt_end_i::varchar, ',' order by aln_v.alt_exon_id) as exon_ends, | ||
string_agg(aln_v.cigar, ',' order by aln_v.alt_exon_id) as cigars, | ||
string_agg(distinct aa.pro_ac, ',' order by aa.pro_ac) as protein | ||
from uta_20210129.transcript transcript | ||
inner join uta_20210129.exon_set es on (transcript.ac = es.tx_ac AND alt_aln_method = 'splign') | ||
inner join uta_20210129.origin origin on (transcript.origin_id = origin.origin_id) | ||
inner join uta_20210129.exon as exon on (es.exon_set_id = exon.exon_set_id) | ||
inner join uta_20210129.exon_aln exon_aln on (exon_aln.alt_exon_id = exon.exon_id) | ||
inner join uta_20210129.tx_exon_aln_v aln_v on (transcript.ac = aln_v.tx_ac AND alt_aln_method = 'splign') | ||
left outer join uta_20210129.associated_accessions aa on (transcript.ac = aa.tx_ac) | ||
WHERE es.alt_ac in | ||
WHERE aln_v.alt_ac in | ||
('NC_000001.11', 'NC_000002.12', 'NC_000003.12', 'NC_000004.12', 'NC_000005.10', 'NC_000006.12', 'NC_000007.14', 'NC_000008.11', 'NC_000009.12', 'NC_000010.11', 'NC_000011.10', 'NC_000012.12', 'NC_000013.11', 'NC_000014.9', 'NC_000015.10', 'NC_000016.10', 'NC_000017.11', 'NC_000018.10', 'NC_000019.10', 'NC_000020.11', 'NC_000021.9', 'NC_000022.11', 'NC_000023.11', 'NC_000024.10') and origin.origin_id not in (10, 11) | ||
group by transcript.ac) TO 'uta_20210129_grch38.csv' CSV HEADER; |