-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_filtered_otus.py
executable file
·48 lines (36 loc) · 1.48 KB
/
get_filtered_otus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Script to find which sequences were filtered by trimAl
Procudes a fasta file with the filtered seqs.
Created on Fri Aug 21 10:43:58 2015
@author: VanessaRM
"""
from argparse import ArgumentParser
from Bio import SeqIO
import sys
#from Bio.SeqRecord import SeqRecord
#import os
#os.chdir('/Users/VanessaRM/Documents/PhD/4_Merged_123Runs/05_alignments')
parser = ArgumentParser()
parser.add_argument('-oa', '--original_aln', help='The path to the input aligned pfiltered OTUs', required=True)
parser.add_argument('-fa', '--filtered_aln', help='The path to the filtered OTUs - output from trimAL.txt', required=True)
parser.add_argument('-o', '--output', help='The path and name of the output file', required=True)
args = parser.parse_args()
original_aln = args.original_aln
filtered_aln = args.filtered_aln
output_fp = args.output
all_seqs_in_filtered_aln = []
store_bad_otus = []
#original_aln = "UPA.nt_aligned.fas.reduced_pfiltered.fasta"
#filtered_aln = "test_trimal_UPA_80_80.fasta"
# get a list of OTUs in filtered alignment:
for seq_record in SeqIO.parse(filtered_aln, "fasta"):
all_seqs_in_filtered_aln.append(seq_record.id)
# Check which OTUs are not in the filtered aln
for seq_record in SeqIO.parse(original_aln, "fasta"):
if seq_record.id not in all_seqs_in_filtered_aln:
store_bad_otus.append(seq_record)
SeqIO.write(store_bad_otus, str(output_fp), "fasta")
print ""
print "%i sequences saved in %s" %(len(output_fp), output_fp)