-
Notifications
You must be signed in to change notification settings - Fork 2
/
config.yaml
90 lines (70 loc) · 3.67 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Instructions for gathering a reference genome can be found at the link below
# https://helikarlab.github.io/FastqToGeneCounts/fastq_setup_genome.html
# Modify these settings to reflect your paths
# Only change values after the colon (':')
# The CSV file to analyze
# The file should have columns in the order: srr, friendly_name, layout, and method
# For "layout", options are "PE" (paired-end sequencing) and "SE" (single-end sequencing)
# For "method", options are "total" (total RNA sequencing) and "mrna" (mRNA/PolyA RNA sequencing)
MASTER_CONTROL: "controls/master_control.csv"
# The location where results should be saved to
ROOTDIR: "results"
# The scratch location of your computer or cluster
SCRATCH_DIR: "/scratch"
# If you are processing local FastQ items, set this to the location of your items
LOCAL_FASTQ_FILES: "results/dump_fastq_input"
# Quality Control Options
# Should trimming of RNA sequencing adapters be performed before aligning? (default: True)
PERFORM_TRIM: True
# Should RNA sequences be screened for contamination? (default: True)
PERFORM_SCREEN: True
# Should RNAseq metrics be collected to obtain more information about the reads? (default: True)
PERFORM_GET_RNASEQ_METRICS: True
# Prefetch SRA files before writing to FastQ (default: True)
# If you are processing local FastQ items, set to False
PERFORM_PREFETCH: True
# Get RNA information from SRA files (default: True)
# If you are processing local FastQ items, set to False
PERFORM_DUMP_FASTQ: True
# Determine insert sizes for fragment length calculation (default: True)
PERFORM_GET_INSERT_SIZE: True
# Determine fragment size; required for zFPKM calculation (default: True)
PERFORM_GET_FRAGMENT_SIZE: True
# If you are using this tool to generate files for COMO, single replicates are not allowed
# For example, a "S1R1" sample will result in an error in COMO
# If you are creating a constraint-based model with COMO, it is recommended to set this to False
# https://github.com/HelikarLab/COMO
BYPASS_REPLICATE_VALIDATION: False
# This will check that genome-related files are able to be found before starting the pipeline (default: False)
# It is recommended to not bypass this check, as errors will occur in the middle of the pipeline if these files are not found
BYPASS_GENOME_VALIDATION: False
# Benchmarks test the amount of time it takes to execute each rule on each file
# By increasing this number, you will begin to execute each rule more than once per file
# For example, if you have 1 input file, but set this to 3, that input file will go through the entire pipeline three times
BENCHMARK_TIMES: 1
# The following items dictate the location for the reference genome
GENOME:
# The following files will be downloaded or created
# - Genome FASTA file (i.e., primary assembly, downloaded from Ensembl)
# - GTF file (i.e., gene annotation, downloaded from Ensembl)
# - UCSC-style refFlat file (downloaded from UCSC)
# - BED file (created from refFlat file)
# - rRNA Interval List (created from GTF file)
# - STAR index (created from FASTA file)
# The full path where the genome generation data should be saved
SAVE_DIR: "genome"
# The species of the genome to download
# Get your species from https://www.ncbi.nlm.nih.gov/Taxonomy
# Examples are:
# Homo Sapiens: 9606
# Mus Musculus: 10090
# Macaca Mulatta: 9544
TAXONOMY_ID: 9606
# The version of the genome to download
# "latest" will get the most recent version
# If you want to use a specific version, find it from: https://ftp.ensembl.org/pub/
# Examples are: "latest", "release-112", "release-111", etc.
VERSION: "latest"
# Should the download progress be shown?
# If False, no progress is shown.
SHOW_PROGRESS: True