Skip to content

Commit

Permalink
issue #76 - SeqAuto API - better handling of re-sending data + uniq c…
Browse files Browse the repository at this point in the history
…onstraints, update data_state properly, cleanup unused files
  • Loading branch information
davmlaw committed Oct 17, 2024
1 parent a005524 commit a27a859
Show file tree
Hide file tree
Showing 12 changed files with 52 additions and 382 deletions.
4 changes: 4 additions & 0 deletions seqauto/models/models_seqauto.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ class SeqAutoRecord(TimeStampedModel):
file_last_modified = models.FloatField(default=0.0)
hash = models.TextField(blank=True) # Not used for everything
is_valid = models.BooleanField(default=False) # Set in save
# data_state was used to create 'expected' objects ie vcfs for bam files
# that was then set based on whether the file turned up. If it disappeared it would be set to DELETED
# But with API - we assume anything sent to us is COMPLETED
# We will probably remove this field in the future as we go API only
data_state = models.CharField(max_length=1, choices=DataState.choices)

def save(self, force_insert=False, force_update=False, using=None,
Expand Down
40 changes: 24 additions & 16 deletions seqauto/serializers/seqauto_qc_serializers.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
from Bio.Align import write
from rest_framework import serializers

from genes.models import CustomTextGeneList, ActiveSampleGeneList
from genes.serializers import SampleGeneListSerializer, GeneCoverageCollectionSerializer
from library.utils import Value
from seqauto.models import IlluminaFlowcellQC, QCGeneList, QC, QCGeneCoverage, QCExecSummary, FastQC, SequencingSample, \
SampleSheet, SequencingRun, BamFile, VCFFile
from seqauto.serializers.sequencing_serializers import SampleSheetLookupSerializer, FastqSerializer, SeqAutoRecordMixin, \
from seqauto.serializers.sequencing_serializers import SampleSheetLookupSerializer, FastqSerializer, \
BamFilePathSerializer, VCFFilePathSerializer, SequencingSampleLookupSerializer
from snpdb.models import DataState


class FastQCSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class FastQCSerializer(serializers.ModelSerializer):
fastq = FastqSerializer()

class Meta:
model = FastQC
fields = "__all__"


class IlluminaFlowcellQCSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class IlluminaFlowcellQCSerializer(serializers.ModelSerializer):
sample_sheet = SampleSheetLookupSerializer()

class Meta:
Expand All @@ -28,7 +25,7 @@ class Meta:
exclude = ("sequencing_run", ) # Already part of sample_sheet


class QCSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class QCSerializer(serializers.ModelSerializer):
# Instead of dealing with all the bam/vcf etc - we'll just deal with sequencing_sample and
# assume we're using the latest ones associated with that
sequencing_sample = SequencingSampleLookupSerializer()
Expand All @@ -49,13 +46,24 @@ def get_object(data):
sample_name = sequencing_sample_data["sample_name"]
sequencing_sample = SequencingSample.objects.get(sample_sheet=sample_sheet, sample_name=sample_name)
bam_file_data = data.pop("bam_file")
bam_file = BamFile.objects.get(path=bam_file_data["path"],
sequencing_run=sequencing_run,
unaligned_reads__sequencing_sample=sequencing_sample)
# Occasionally we could have multiples in there, we don't really care so take 1st
bam_file_kwargs = {
"path": bam_file_data["path"],
"sequencing_run": sequencing_run,
"unaligned_reads__sequencing_sample": sequencing_sample
}
bam_file = BamFile.objects.filter(**bam_file_kwargs).first()
if not bam_file:
raise BamFile.DoesNotExist(f"No bam file for {bam_file_kwargs=}")

vcf_file_data = data.pop("vcf_file")
vcf_file = VCFFile.objects.get(path=vcf_file_data["path"],
bam_file=bam_file)
vcf_file_kwargs = {
"path": vcf_file_data["path"],
"bam_file": bam_file,
}
vcf_file = VCFFile.objects.filter(**vcf_file_kwargs).first()
if not vcf_file:
raise VCFFile.DoesNotExist(f"No vcf file for {vcf_file_kwargs=}")

defaults = {}
if qc_path := data.get("path"):
Expand All @@ -72,7 +80,7 @@ def get_object(data):
return qc


class QCGeneListSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class QCGeneListSerializer(serializers.ModelSerializer):
""" When we retrieve this, we want to see linked sample gene list """
qc = QCSerializer()
sample_gene_list = SampleGeneListSerializer()
Expand All @@ -82,7 +90,7 @@ class Meta:
fields = ("path", "qc", "sample_gene_list")


class QCGeneListCreateSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class QCGeneListCreateSerializer(serializers.ModelSerializer):
""" When we create, we just want to send up gene list
This also handles complexity of setting active gene list
Expand Down Expand Up @@ -126,7 +134,7 @@ def create(self, validated_data):
"records": created_records,
}

class QCGeneCoverageSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class QCGeneCoverageSerializer(serializers.ModelSerializer):
qc = QCSerializer()
gene_coverage_collection = GeneCoverageCollectionSerializer()

Expand All @@ -135,7 +143,7 @@ class Meta:
fields = ("path", "qc", "gene_coverage_collection")


class QCExecSummarySerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class QCExecSummarySerializer(serializers.ModelSerializer):
qc = QCSerializer()

class Meta:
Expand Down
51 changes: 16 additions & 35 deletions seqauto/serializers/sequencing_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,31 +123,7 @@ def create(self, validated_data):
return instance


class SeqAutoRecordMixin:
"""
This sets SeqAutoRecord.data_state to COMPLETED for anything created via API
SeqAutoRecord.data_state represents eg whether the file exists on disk or has been deleted
or we expect it, and it's not available yet.
Now we're moving to an API, I think we should just have the SeqAuto records match the disk
and be updated via clients, or just be added and then if they are deleted we don't care
TODO: We should consider removing the data_state field
"""
def set_data_state_complete(self, validated_data):
validated_data['data_state'] = DataState.COMPLETE

def create(self, validated_data):
self.set_data_state_complete(validated_data)
return super().create(validated_data)

def update(self, instance, validated_data):
self.set_data_state_complete(validated_data)
return super().update(instance, validated_data)


class SequencingRunSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class SequencingRunSerializer(serializers.ModelSerializer):
name = serializers.CharField(validators=[]) # disable UniqueValidator
sequencer = serializers.PrimaryKeyRelatedField(queryset=Sequencer.objects.all())
experiment = serializers.PrimaryKeyRelatedField(queryset=Experiment.objects.all())
Expand Down Expand Up @@ -214,7 +190,7 @@ class Meta:
fields = ['sample_id', 'sample_name', 'sample_project', 'sample_number', 'lane', 'barcode', 'enrichment_kit', 'is_control', 'failed', 'sequencingsampledata_set']


class SampleSheetSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class SampleSheetSerializer(serializers.ModelSerializer):
sequencing_run = serializers.PrimaryKeyRelatedField(queryset=SequencingRun.objects.all())
sequencingsample_set = SequencingSampleSerializer(many=True)

Expand All @@ -241,6 +217,7 @@ def _create_sequencing_samples(sample_sheet, sequencing_samples_data):

def create(self, validated_data):
sequencing_samples_data = validated_data.pop('sequencingsample_set')
validated_data["data_state"] = DataState.COMPLETE
sequencing_run = validated_data["sequencing_run"]
sample_sheet, created = SampleSheet.objects.update_or_create(
sequencing_run=sequencing_run,
Expand All @@ -262,7 +239,7 @@ def update(self, instance, validated_data):
return instance


class FastqSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class FastqSerializer(serializers.ModelSerializer):
name = serializers.CharField(read_only=True)
read = serializers.SerializerMethodField(read_only=True)

Expand Down Expand Up @@ -306,12 +283,13 @@ def create(self, validated_data):
else:
unaligned_reads_kwargs["fastq_r2"] = None # Be able to blank it out

# Unaligned reads isn't a file so doesn't have 'data_state'
instance, _created = UnalignedReads.objects.update_or_create(sequencing_sample=sequencing_sample,
defaults=unaligned_reads_kwargs)
return instance


class FlagstatsSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class FlagstatsSerializer(serializers.ModelSerializer):
class Meta:
model = Flagstats
fields = ("total", "read1", "read2", "mapped", "properly_paired")
Expand All @@ -323,7 +301,7 @@ class Meta:
fields = ("path", )


class BamFileSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class BamFileSerializer(serializers.ModelSerializer):
unaligned_reads = UnalignedReadsSerializer(required=False)
aligner = AlignerSerializer(required=False)
flagstats = FlagstatsSerializer(read_only=True, required=False) # 1-to-1 field
Expand All @@ -350,6 +328,7 @@ def create(self, validated_data):
defaults={"data_state": DataState.COMPLETE})

if flagstats_data:
flagstats_data["data_state"] = DataState.COMPLETE
Flagstats.objects.create(bam_file=bam_file, **flagstats_data)

return bam_file
Expand All @@ -371,7 +350,7 @@ class Meta:
fields = ("path", )


class VCFFileSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class VCFFileSerializer(serializers.ModelSerializer):
bam_file = BamFileSerializer(required=False)
variant_caller = VariantCallerSerializer()

Expand Down Expand Up @@ -450,7 +429,7 @@ def create(self, validated_data):
}


class SampleSheetCombinedVCFFileSerializer(SeqAutoRecordMixin, serializers.ModelSerializer):
class SampleSheetCombinedVCFFileSerializer(serializers.ModelSerializer):
sample_sheet = SampleSheetLookupSerializer()
variant_caller = VariantCallerSerializer()

Expand All @@ -462,10 +441,12 @@ class Meta:
def create(self, validated_data):
sample_sheet = SampleSheetLookupSerializer.get_object(validated_data.pop('sample_sheet'))
variant_caller = VariantCallerSerializer().create(validated_data.pop('variant_caller'))
sscvcf, _ = SampleSheetCombinedVCFFile.objects.get_or_create(sequencing_run=sample_sheet.sequencing_run,
sample_sheet=sample_sheet,
variant_caller=variant_caller,
path=validated_data["path"])
defaults = {"data_state": DataState.COMPLETE}
sscvcf, _ = SampleSheetCombinedVCFFile.objects.update_or_create(sequencing_run=sample_sheet.sequencing_run,
sample_sheet=sample_sheet,
variant_caller=variant_caller,
path=validated_data["path"],
defaults=defaults)
return sscvcf


3 changes: 0 additions & 3 deletions seqauto/test_data/api_client/experiment/exome_20_022.json

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit a27a859

Please sign in to comment.