-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathssunique_settings.R
62 lines (53 loc) · 2.34 KB
/
ssunique_settings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Setting universal constants, loading required libraries, and sourcing SSUnique scripts
#####################
# REQUIRED PACKAGES #
#####################
# CRAN packages, R >= 3.1 (ggplot2)
if (!require("pacman")) install.packages("pacman")
pacman::p_load(optparse, ape, biom, ggplot2, reshape)
# Bioconductor packages, R >= 3.2 (phyloseq)
# Must have Bioconductor installed, see: https://www.bioconductor.org/install/
pacman::p_load(Biostrings, phyloseq)
#########################
# LOAD SSUnique SOURCES #
#########################
source("align_and_merge.R")
source("load_and_clean.R")
source("novelty_functions.R")
source("novelty_plotting_and_output.R")
source("rare_functions.R")
source("rare_plotting_and_output.R")
####################
# SET UP CONSTANTS #
####################
# general
VERSION <- "0.1"
RESULTS_DIR <- "ssunique_results/"
FILTER_DIR <- paste(RESULTS_DIR, "filter_results/", sep = "")
LOG_FILE <- paste(RESULTS_DIR, "ssunique.log", sep = "")
CLEANUP <- FALSE
TAX_RANK <- "Order"
ABUNDANCE_THRESHOLD <- 1 # exclude OTUs that sum below this threshold - useful for very large datasets
METADATA_CAT <- "SOURCE" #e.g., "HMPbodysubsite", "all", "ENV_FEATURE"
BIOM <- "TRUE"
# program paths
FASTTREE <- 'FastTree'
CMALIGN <- 'cmalign'
ESL_ALIMERGE <- 'esl-alimerge'
SSU_ALIGN <- 'ssu-align'
# reference standards
REF_ALIGNMENT <- "ref_data/LTPs119_SSU.cmalign.stk"
REF_NOVEL_TREE <- paste(RESULTS_DIR, "reference_plus_novel.tre", sep = "")
CMALIGN_MODEL <- 'ref_data/SSU_rRNA_bacteria.cm'
#SSUnique outputs
PREFILTER_FASTA <- paste(RESULTS_DIR, "prefiltered.fasta", sep = "")
FILTERED_FASTA <- paste(FILTER_DIR, "filter_results.bacteria.fa", sep = "")
TMP_FASTA <- paste(RESULTS_DIR, "tmp.fasta", sep = "")
ALIGN_OUT <- paste(RESULTS_DIR, "tmp.stk", sep = "")
MERGED_ALIGNMENT <- paste(RESULTS_DIR, "merged.afa", sep = "")
NOVEL_FASTA <- paste(RESULTS_DIR, "novel.fasta", sep = "")
NOVEL_ALIGNMENT <- paste(RESULTS_DIR, "novel.stk", sep = "")
TMP_TREE <- paste(RESULTS_DIR, "tmp.tre", sep = "")
REF_NOVEL_SUFFIX <- "ref_plus_novel"
CLADES_OUTPUT <- paste(RESULTS_DIR, "clades.txt", sep = "")
HIGHLIGHT_TREE <- paste(RESULTS_DIR, "highlight_tree.xml", sep = "")