Theo.py~

from typing import *
from eliot_main import get_genetic_code, write_csv, read_csv, read_fasta


def read_flat_file(filename):
    """Load a file in memory by returning a string

            This function is written by Theo Gauvrit.

            Args:
                filename: file to open

            Returns:
                string of the whole file (with \n)
        """
       
    fichier=open(filename,"r")
    txt = fichier.read()
    fichier.close()
    return txt #  'LOCUS       NM_000518 ' ...


def reversed_complement(dna_seq):
    """Return the reversed complement of the given DNA sequence

            This function is written by Theo GAUVRIT.

            Args:
                dna_seq: DNA sequence to be reversed.

            Returns:
                reversed complement DNA sequence
        """
    complement_seq=[]
    dna_seq=dna_seq.upper()
    complement_dict={"A":"T","C":"G","G":"C","T":"A"}
    
    for nucleotide in dna_seq[::-1]:
        complement_seq.append(complement_dict[nucleotide])
        
    return "".join(complement_seq)


def find_orf(seq: str, code_table_id: int):
    """Give a list of all ORF in the sequence if they are grater than the threshold

            This function is written by .

            Args:
                seq: Sequence to analyse
                threshold: Minimum size of the ORF in the list
                code_table_id: NCBI identifier of the translation table used on this sequence

            Returns:
                list of ORF
                    start: start position (in bp)
                    stop: stop position (in bp)
                    length: ORF length (in bp)
                    protein: translated protein sequence if available.
                    frame:1,2,3 ou -1,-2,-3
        """
    transl_table, start_table = get_genetic_code(code_table_id)

    ORF_lidi=[]
    #ORF_lidi[e]={"start":0,"stop":0,"length":0,"protein":0,"frame":0}
    start_lidi=[]
    listestop=[]
    stop_lidi=[]
    rever=reversed_complement(seq)

    
    for z in transl_table.keys():
        if transl_table[z]=='*':
            listestop.append(z)
            
    for a in range(3):
        for pos in range(a,len(seq)):
            if seq[pos:pos+3] in start_table:
                start_lidi.append({"pos":pos,"frame":a})
            if rever[pos:pos+3] in start_table:
                start_lidi.append({"pos":pos,"frame":-a})
                
            if seq[pos:pos+3] in listestop:
                stop_lidi.append({"pos":pos,"frame":a})
            if rever[pos:pos+3] in listestop:
                stop_lidi.append({"pos":pos,"frame":-a})


    for i in stop_lidi:
        for j in stop_lidi:
           if j["frame"]==i["frame"] and j["pos"]>i["pos"]:
               
               
    return listestop,start_lidi,stop_lidi


if __name__=="__main__":
   seqentry=read_fasta("influenza.fasta")
 
   a,b,c=find_orf(seqentry,11)
   print (a)
   print (b)                                                
   print (c)