diff --git a/pyGeno/Chromosome.py b/pyGeno/Chromosome.py index fb79600..b0baf34 100644 --- a/pyGeno/Chromosome.py +++ b/pyGeno/Chromosome.py @@ -72,12 +72,23 @@ def getSequenceData(self, slic) : if sequenceModifier.__class__ is SF.SequenceDel : seqPos = seqPos + sequenceModifier.offset #To avoid to change the length of the sequence who can create some bug or side effect - data[seqPos:(seqPos + sequenceModifier.length)] = [''] * sequenceModifier.length + if (sequenceModifier.indel == 0): + data[seqPos:(seqPos + sequenceModifier.length)] = [''] * sequenceModifier.length + else: # indel + data[seqPos] = sequenceModifier.bases + seqPos += 1 + data[seqPos:(seqPos + sequenceModifier.indel)] = [''] * sequenceModifier.indel + elif sequenceModifier.__class__ is SF.SequenceSNP : data[seqPos] = sequenceModifier.alleles elif sequenceModifier.__class__ is SF.SequenceInsert : seqPos = seqPos + sequenceModifier.offset - data[seqPos] = "%s%s" % (data[seqPos], sequenceModifier.bases) + if (sequenceModifier.indel == 0): + data[seqPos] = "%s%s" % (data[seqPos], sequenceModifier.bases) + else: # indel + data[seqPos] = sequenceModifier.bases + seqPos += 1 + data[(seqPos):(seqPos + sequenceModifier.indel)] = [''] * sequenceModifier.indel else : raise TypeError("sequenceModifier on chromosome: %s starting at: %s is of unknown type: %s" % (self.chromosome.number, snp.start, sequenceModifier.__class__)) diff --git a/pyGeno/SNPFiltering.py b/pyGeno/SNPFiltering.py index b1694b0..bd778b8 100644 --- a/pyGeno/SNPFiltering.py +++ b/pyGeno/SNPFiltering.py @@ -29,16 +29,18 @@ def __init__(self, bases, sources = {}, ref = '-') : Sequence_modifiers.__init__(self, sources) self.bases = bases self.offset = 0 + self.indel = 0 # Allow to use format like C/CCTGGAA(dbSNP) or CCT/CCTGGAA(samtools) if ref != '-': if ref == bases[:len(ref)]: self.offset = len(ref) self.bases = self.bases[self.offset:] - #-1 because if the insertion are after the last nuc we go out of table + #-1 because if the insertion are after the last nuc we go out of table self.offset -= 1 - else: - raise NotImplemented("This format of Insetion is not accepted. Please change your format, or implement your format in pyGeno.") + # Allow indel like GG/GAG + else: + self.indel = len(ref) - 1 class SequenceDel(Sequence_modifiers) : @@ -47,6 +49,7 @@ def __init__(self, length, sources = {}, ref = None, alt = '-') : Sequence_modifiers.__init__(self, sources) self.length = length self.offset = 0 + self.indel = 0 # Allow to use format like CCTGGAA/C(dbSNP) or CCTGGAA/CCT(samtools) if alt != '-': @@ -54,8 +57,10 @@ def __init__(self, length, sources = {}, ref = None, alt = '-') : if alt == ref[:len(alt)]: self.offset = len(alt) self.length = self.length - len(alt) + # Allow indel like CATA/CAA else: - raise NotImplemented("This format of Deletion is not accepted. Please change your format, or implement your format in pyGeno.") + self.bases = alt + self.indel = len(ref) - 1 else: raise Exception("You need to add a ref sequence in your call of SequenceDel. Or implement your format in pyGeno.")