-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_sequence_indexing.py
55 lines (48 loc) · 1.9 KB
/
test_sequence_indexing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
These tests only work with PomBase data.
"""
import unittest
import pickle
from genome_functions import get_nt_at_gene_coord
from Bio.SeqIO import parse
with open('data/genome.pickle', 'rb') as ins:
contig_genome = pickle.load(ins)
class SequenceIndexingTest(unittest.TestCase):
def test_nts(self):
# Coding and non-coding examples, from the +1 and -1 strands
test_genes = [
{
'id': 'SPAPB1A10.09',
'downstream': 'ATGCAAACAGTAATGATGG',
'upstream': 'TCATTTACATCAACCGGTTCA',
},
{
'id': 'SPAPB1A10.10c',
'downstream': 'ATGTCGGCTCAGAAAAGGG',
'upstream': 'AGGTACGACAGAATATACTTCA',
},
{
'id': 'SPNCRNA.2846',
'downstream': 'ACTTCTTTTTGCTTGCAAAGTT',
'upstream': 'CTTTTCTTTTTCAGCGGAAAAA',
},
{
'id': 'SPNCRNA.2847',
'downstream': 'ACATTATAAACAATTACACAACAATCGGCCCCTC',
'upstream': 'ACTGAGTCAAAAGACTTCGAGTTATTC',
},
]
for g in test_genes:
# Gene from the forward strand (ase1)
gene = contig_genome[g['id']]
# Check that downstream matches
for i, value in enumerate(g['downstream']):
self.assertEqual(get_nt_at_gene_coord(i + 1, gene, gene['contig']), value)
# Check that 5'UTR matches
# We start at 1 because the first negative index is -1
for i, value in enumerate(g['upstream'][::-1], start=1):
self.assertEqual(get_nt_at_gene_coord(-i, gene, gene['contig']), value)
def test_aas(self):
for seq in parse('test_data/test_peptides.fasta', 'fasta'):
self.assertEqual(contig_genome[seq.id]['peptide'], seq.seq)
self.assertEqual(contig_genome[seq.id]['peptide'], seq.seq)