-
Notifications
You must be signed in to change notification settings - Fork 2
/
Reanalyze_16S
executable file
·200 lines (186 loc) · 8.92 KB
/
Reanalyze_16S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/bin/sh
# Reanalyze_16S
#
# Script for redoing alpha and beta diversity analyses to a different sampling depth than the one used by the automatic analysis script.
# This script takes as input a users specified OTU table, sample mapping file, phylogenetic tree, and new sampling depth and redoes the standard alpha and beta diversity analyses.
# A time-stamped log file of all steps that are conducted is created, which also displays the input files and their MD5 checksums.
# The script utilizes a checkpointing feature to prevent it from proceeding to a subsequent step if a proceeding step has failed.
# This minimizes the number of error messages sent to the terminal window and hopefully makes it easier for users to troubleshoot when something fails.
# It must be noted that the script is currently written to utilize QIIME version 1.8.
# Previous and/or future versions of QIIME are not guaranteed to work with this script.
#
# Created by Michael C. Nelson on 2014-01-16.
# Last revised: 2015-04-16
# Revision #: 5
# Copyright 2015 Michael C. Nelson and the University of Connecticut. All rights reserved.
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#Set Script Name variable
SCRIPT=`basename ${BASH_SOURCE[0]}`
# Set up working environment. These values should be changed as needed depending on the system being used.
# If this script is being used on a system where the QIIME python scripts are included in the shell's $PATH, then comment or delete the next line.
source /macqiime/configs/bash_profile.txt
#Help function
function HELP {
echo "$SCRIPT"
echo "Required arguments."
echo "-o The path to your input OTU table file (e.g. otu_table.biom)."
echo "-m The path to your mapping file (e.g. Map.txt)."
echo "-t The path to your phylogenetic tree (e.g. Rep_Set_tree.tree)."
echo "-d Sets the value for rarefaction depth."
echo \\n"Optional arguments."
echo "-h Displays this help message. No further functions are performed."\\n
echo "Example usage: $SCRIPT -o otu_table.buom -m Map.txt -t Rep_Set_tree.tree -d 10000"\\n
exit 1
}
#Check the number of arguments. If none are passed, print help and exit.
if [ $# -eq 0 ]; then
echo \\n"ERROR: No arguments given."
HELP
fi
### Start getopts code ###
while getopts :o:m:t:d:h FLAG; do
case $FLAG in
o) # Set input sequence file path
TABLE=$OPTARG
;;
m) # Set mapping file path
MAPLOC=$OPTARG
;;
t) # Set the path to the tree tile
TREE=$OPTARG
;;
d) # Set optional rarefaction depth
DEPTH=$OPTARG
;;
h) # Show help
HELP
;;
'?') # Unrecognized option
echo \\n"ERROR: Option -$OPTARG not recognized."\\n
echo "Use $SCRIPT -h to see the help documentation."\\n
exit 2
;;
:) # No argument given for an option (s,m,d)
echo "Option -$OPTARG requires an argument. See help (-h)."\\n
;;
esac
done
shift $((OPTIND-1))
### End getopts code ###
# Check that the required options are given and that the files exist.
if [ ! $TABLE ]; then
echo "ERROR: No input OTU table file was given."\\n
HELP
elif [ ! -f $TABLE ]; then
echo "ERROR: Input OTU table could not be found, please check that your filepath is correct."\\n
exit 1
fi
if [ ! $MAPLOC ]; then
echo "ERROR: No input mapping file was given."\\n
HELP
elif [ ! -f $MAPLOC ]; then
echo "ERROR: Input mapping file could not be found, please check that your filepath is correct."\\n
exit 1
fi
if [ ! $TREE ]; then
echo "ERROR: No input mapping file was given."\\n
HELP
elif [ ! -f $TREE ]; then
echo "ERROR: Input tree file could not be found, please check that your filepath is correct."\\n
exit 1
fi
if [ ! $DEPTH ]; then
echo "ERROR: New sampling depth was not provided."\\n
HELP
fi
clear
# Initialize timestamp variables
DATE=`date +%Y-%m-%d`
TIME=`date +%H:%M:%S`
TM=`date +%Y%m%d-%H%M`
LOG=Reanalysis_log_$TM.txt
#Create log file
echo "Executing Reanalysis on $DATE at $TIME "\\n | tee $LOG
MD5TABLE=`md5 $TABLE`
MD5MAP=`md5 $MAPLOC`
MD5TREE=`md5 $TREE`
echo "Using $TABLE as the input otu table." | tee -a $LOG
echo $MD5TABLE | tee -a $LOG
echo "Using $MAPLOC as the input sample mapping file." | tee -a $LOG
echo $MD5MAP | tee -a $LOG
echo "Using $TREE as the input tree file." | tee -a $LOG
echo $MD5TREE | tee -a $LOG
echo "New rarefaction depth is $DEPTH. All new files/folders will state this sample depth." | tee -a $LOG
# Set up working environment
source /macqiime/configs/bash_profile.txt
DIR=Reanalysis_$TM_$DEPTH
mkdir $DIR
START=`date +%s`
### Step 1: Perform new rarefaction of OTU table to new sample depth
if [ ! -f Redo1.complete ]; then
echo \\n"Step 1: $DATE $TIME" | tee -a $LOG
echo "Re-rarefying your OTU table ($TABLE) to the new sampling depth: $DEPTH" | tee -a $LOG
single_rarefaction.py -i $TABLE -o $DIR/even_table_$DEPTH.biom -d $DEPTH
biom summarize-table -i $DIR/even_table_$DEPTH.biom -o $DIR/even_table_stats_$DEPTH.txt && touch Redo1.complete
fi
if [ ! -f Redo1.complete ]; then
echo \\n\\n"ERROR: $DATE $TIME" | tee -a $LOG
echo 'An error occured while rarefying the OTU table.' | tee -a $LOG
echo 'Please consult the error message(s) displayed in the terminal window.'\\n\\n | tee -a $LOG
exit 1
fi
## Step 2: Perform new alpha diversity calculations and also new taxa summaries
if [ -f $DIR/even_table_$DEPTH.biom ] && [ ! -f Redo2.complete ]; then
echo \\n'Step 2: Calculating alpha diversity metrics from the newly rarefied table and outputting absolute abundance taxa summaries.' | tee -a $LOG
alpha_diversity.py -i $DIR/even_table_$DEPTH.biom -o $DIR/alpha_even_$DEPTH.txt -m osd,simpson,shannon,PD_whole_tree -t $TREE
summarize_taxa.py -i $DIR/even_table_$DEPTH.biom -o $DIR/tax_summary_$DEPTH/
summarize_taxa.py -i $DIR/even_table_$DEPTH.biom -o $DIR/tax_summary_$DEPTH/abs -a && touch Redo2.complete
fi
if [ ! -f Redo2.complete ]; then
echo \\n\\n"ERROR: $DATE $TIME" | tee -a $LOG
echo 'An error occured while calculating alpha diversity metrics or summarizing taxonomy.' | tee -a $LOG
echo 'Please consult the error message(s) displayed in the terminal window.'\\n\\n | tee -a $LOG
exit 1
fi
### Step 2: Perform standard beta diversity calculations and create 3D PCoA plots
if [ -f $DIR/even_table_$DEPTH.biom ] && [ ! -f Redo3.complete ]; then
echo \\n"Step 3: $DATE $TIME" | tee -a $LOG
echo 'Computing new beta diversity statistics for the following metrics: Bray Curtis, Ochiai, weighted and unweighted UniFrac' | tee -a $LOG
beta_diversity.py -i $DIR/even_table_$DEPTH.biom -o $DIR/beta_div/ -m binary_ochiai,bray_curtis,weighted_unifrac,unweighted_unifrac -t $TREE
principal_coordinates.py -i $DIR/beta_div/weighted_unifrac_even_table_$DEPTH.txt -o $DIR/beta_div/weighted_unifrac_PCoA.txt
principal_coordinates.py -i $DIR/beta_div/unweighted_unifrac_even_table_$DEPTH.txt -o $DIR/beta_div/unweighted_unifrac_PCoA.txt
principal_coordinates.py -i $DIR/beta_div/bray_curtis_even_table_$DEPTH.txt -o $DIR/beta_div/bray_curtis_PCoA.txt
principal_coordinates.py -i $DIR/beta_div/binary_ochiai_even_table_$DEPTH.txt -o $DIR/beta_div/ochiai_PCoA.txt
echo \\n"$DATE $TIME" | tee -a $LOG
echo 'Making 3D PCoA plots, output is in the $DIR/beta_div/3D_plots directory' | tee -a $LOG
make_3d_plots.py -i $DIR/beta_div/weighted_unifrac_PCoA.txt -o $DIR/beta_div/weighted_unifrac/ -m $MAPLOC
make_3d_plots.py -i $DIR/beta_div/unweighted_unifrac_PCoA.txt -o $DIR/beta_div/unweighted_unifrac/ -m $MAPLOC
make_3d_plots.py -i $DIR/beta_div/bray_curtis_PCoA.txt -o $DIR/beta_div/Bray-Curtis/ -m $MAPLOC
make_3d_plots.py -i $DIR/beta_div/ochiai_PCoA.txt -o $DIR/beta_div/Ochiai/ -m $MAPLOC
mkdir $DIR/beta_div/3D_plots
find $DIR/ -name '*.kin' -exec mv {} $DIR/beta_div/3D_plots/ ';'
rm -r $DIR/beta_div/Bray-Curtis/ $DIR/beta_div/Ochiai/ $DIR/beta_div/unweighted_unifrac/ $DIR/beta_div/weighted_unifrac/ && touch Redo2.complete
fi
if [ ! -f Redo2.complete ]; then
echo \\n\\n"ERROR: $DATE $TIME" | tee -a $LOG
echo 'An error occured while performing beta diversity analyses.' | tee -a $LOG
echo 'Please consult the error message(s) displayed in the terminal window.'\\n\\n | tee -a $LOG
exit 1
else
rm Redo1.complete Redo2.complete Redo3.complete
END=`date +%s`
RUNTIME=$(( END - START ))
echo \\n'Reanalysis has finished!' | tee -a $LOG
echo "This job took $RUNTIME seconds to complete" | tee -a $LOG
fi