Skip to content
This repository has been archived by the owner on Aug 1, 2023. It is now read-only.

Commit

Permalink
added functions for storing agr primer based typing
Browse files Browse the repository at this point in the history
  • Loading branch information
rpetit3 committed May 24, 2018
1 parent 349ac86 commit 523ba1b
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 4 deletions.
3 changes: 3 additions & 0 deletions sample/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ def get_file_list(is_paired):
'virulence_summary': '{0}/virulence/summary.csv',
'virulence_assembled_seqs': '{0}/virulence/assembled_seqs.fa.gz',
'virulence_debug_report': '{0}/virulence/debug.report.tsv',
'virulence_agr_primers': '{0}/agr/primers.json',

'timeline': '{0}/staphopia-timeline.html',
'version': '{0}/staphopia-version.txt'
Expand Down Expand Up @@ -429,6 +430,8 @@ def get_file_list(is_paired):

'variants': '{0}/variants/{1}.variants.vcf.gz',

'virulence_agr_primers': '{0}/agr/primers.json',

'timeline': '{0}/staphopia-timeline.html',
'version': '{0}/staphopia-version.txt'
}
6 changes: 3 additions & 3 deletions variant/management/commands/insert_variant_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ def create_snp(self, record, reference, annotation, feature):

reference_codon=(
'.' if record.INFO['RefCodon'][0] is None
else record.INFO['RefCodon'][0]
else record.INFO['RefCodon']
),
alternate_codon=(
'.' if record.INFO['AltCodon'][0] is None
else record.INFO['AltCodon'][0]
else record.INFO['AltCodon']
),
reference_amino_acid=(
record.INFO['RefAminoAcid'] if record.INFO['RefAminoAcid']
Expand All @@ -222,7 +222,7 @@ def create_snp(self, record, reference, annotation, feature):
),
amino_acid_change=(
'.' if record.INFO['AminoAcidChange'][0] is None
else record.INFO['AminoAcidChange'][0]
else record.INFO['AminoAcidChange']
),
is_synonymous=record.INFO['IsSynonymous'],
is_transition=record.INFO['IsTransition'],
Expand Down
73 changes: 73 additions & 0 deletions variant/management/commands/update_snps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Update SNPs in the database."""
import sys
from cyvcf2 import VCF
import time

from django.db import connection, transaction
from django.db.utils import IntegrityError
from django.core.management.base import BaseCommand, CommandError

from staphopia.utils import timeit
from variant.models import Reference, SNP


def get_reference_instance(reference_name):
"""Get reference instance."""
try:
return Reference.objects.get(name=reference_name)
except IntegrityError:
raise CommandError('Error getting/saving reference information')


class Command(BaseCommand):
"""Update SNPs in the database."""

help = 'Update SNPs in the database.'

def add_arguments(self, parser):
"""Command line arguements."""
parser.add_argument('input', metavar='INPUT_VCF',
help=('Annotated VCF formated file to '
'be inserted'))
parser.add_argument('--compressed', action='store_true',
help='Input VCF is gzipped.')

def handle(self, *args, **opts):
"""Insert results to database."""
# Open VCF for reading
try:
vcf_reader = VCF(opts['input'])
except IOError:
raise CommandError('{0} does not exist'.format(input))

# Get reference info
reference_obj = get_reference_instance(vcf_reader.seqnames[0])

# Read through VCF
start_time = time.time()
count = 0
with connection.cursor() as cursor:
for record in vcf_reader:
if record.is_snp:
sql = """UPDATE variant_snp
SET reference_codon='{0}', alternate_codon='{1}',
amino_acid_change='{2}'
WHERE reference_id={3} AND reference_position={4} AND
alternate_base='{5}';""".format(
'.' if record.INFO['RefCodon'][0] is None else record.INFO['RefCodon'],
'.' if record.INFO['AltCodon'][0] is None else record.INFO['AltCodon'],
'.' if record.INFO['AminoAcidChange'][0] is None else record.INFO['AminoAcidChange'],
reference_obj.pk,
record.POS,
record.ALT[0]
)
cursor.execute(sql)
count += 1
if count % 100000 == 0:
total_time = f'{time.time() - start_time:.2f}'
rate = f'{100000 / float(total_time):.2f}'
print(''.join([
f'Processed 100k, Total {count} SNPs ',
f'(took {total_time}s, {rate} snp/s)'
]))
start_time = time.time()
29 changes: 29 additions & 0 deletions virulence/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.contrib.postgres.fields import JSONField

from sample.models import Sample
from staphopia.models import BlastQuery
from version.models import Version


Expand Down Expand Up @@ -40,3 +41,31 @@ class AribaSequence(models.Model):

class Meta:
unique_together = ('sample', 'version')


class AgrPrimers(models.Model):
"""BLAST hits against SCCmec primers."""
sample = models.ForeignKey(Sample, on_delete=models.CASCADE)
version = models.ForeignKey(Version, on_delete=models.CASCADE,
related_name='agr_primers_version')
query = models.ForeignKey(BlastQuery, on_delete=models.CASCADE)
contig = models.PositiveIntegerField()

bitscore = models.PositiveSmallIntegerField()
evalue = models.DecimalField(max_digits=7, decimal_places=2)
identity = models.PositiveSmallIntegerField()
mismatch = models.PositiveSmallIntegerField()
gaps = models.PositiveSmallIntegerField()
hamming_distance = models.PositiveSmallIntegerField()
query_from = models.PositiveSmallIntegerField()
query_to = models.PositiveSmallIntegerField()
hit_from = models.PositiveIntegerField()
hit_to = models.PositiveIntegerField()
align_len = models.PositiveSmallIntegerField()

qseq = models.TextField()
hseq = models.TextField()
midline = models.TextField()

class Meta:
unique_together = ('sample', 'version', 'query')
11 changes: 10 additions & 1 deletion virulence/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
from django.db.utils import IntegrityError
from django.core.management.base import CommandError

from assembly.tools import get_contigs
from sccmec.tools import insert_blast
from staphopia.utils import read_fasta, timeit
from virulence.models import Ariba, AribaSequence, Cluster
from virulence.models import Ariba, AribaSequence, Cluster, AgrPrimers


@timeit
Expand All @@ -23,6 +25,13 @@ def insert_virulence(sample, version, files, force=False):

results, sequences = read_report(files)

contigs = {}
for contig in get_contigs(sample, version):
contigs[contig.spades] = int(contig.spades.split('_')[1])

insert_blast(sample, version, files['virulence_agr_primers'], AgrPrimers,
contigs)

try:
Ariba.objects.create(sample=sample, version=version, results=results)
except IntegrityError as e:
Expand Down

0 comments on commit 523ba1b

Please sign in to comment.