-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathqtl.config
executable file
·323 lines (314 loc) · 14.3 KB
/
qtl.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
### CONFIGURATION FILE FOR QTLTOOLKIT ###
# Precede your comments with a #-sign.
#
# Set the directory variables, the order doesn't matter.
# Don't end the directory variables with '/' (forward-slash)!
#
### --- SYSTEM SETTINGS --- ###
# REQUIRED: Path_to where the software resides on the server.
SOFTWARE="/hpc/local/CentOS7/dhl_ec/software"
# SOFTWARE="/hpc/local/Rocky8/dhl_ec/software"
#
# REQUIRED: Path_to where QTLToolKit resides on the server.
# QTLTOOLKIT=/hpc/dhl_ec/llandsmeer/_atheroexpress/_eqtl/QTLToolKit
QTLTOOLKIT=${SOFTWARE}/QTLToolKit
#
#
# REQUIRED: Path_to support programs on the server
### Set location of SNPTEST v2.5.2 & the method to be used.
### Note: QCTOOL v2.0 lacks the flags for filtering (-maf, -info, -hwe), so we extract
### in two steps - quick and dirty solution
SNPTEST=${SOFTWARE}/snptest_v2.5.2_CentOS6.5_x86_64_static/snptest_v2.5.2
QCTOOL=${SOFTWARE}/qctool_v1.5-linux-x86_64-static/qctool
QCTOOL2=${SOFTWARE}/qctool_v204
# QTLTOOLS=${SOFTWARE}/QTLTools/QTLtools_1.0_CentOS6.8_x86_64
QTLTOOLS=${SOFTWARE}/QTLtools_1.1_source/bin/QTLtools
LOCUSZOOM=${SOFTWARE}/locuszoom_1.3/bin/locuszoom
BGZIP=${SOFTWARE}/bgzip_v1.6
TABIX=${SOFTWARE}/tabix_v1.6
PLINK=${SOFTWARE}/plink_v1.9
PYTHON="/hpc/local/CentOS7/common/lang/python/2.7.10/bin/python"
#
### --- QSUB SETTINGS --- ###
QUEUE_QCTOOL_CONFIG="01:00:00"
VMEM_QCTOOL_CONFIG="8G"
QUEUE_NOM_CONFIG="08:00:00"
VMEM_NOM_CONFIG="8G"
QUEUE_PERM_CONFIG="32:00:00"
VMEM_PERM_CONFIG="8G"
QUEUE_CLUMP_CONFIG="00:30:00"
VMEM_CLUMP_CONFIG="12G"
QUEUE_SUMPARSER_CONFIG="00:15:00"
VMEM_SUMPARSER_CONFIG="8G"
QUEUE_FUNCENRICH_CONFIG="12:00:00"
VMEM_FUNCENRICH_CONFIG="64G"
QUEUE_RTC_CONFIG="00:30:00"
VMEM_RTC_CONFIG="12G"
QUEUE_PLOT_CONFIG="24:00:00"
VMEM_PLOT_CONFIG="16G"
#
### --- ANALYSIS SETTINGS --- ###
# REQUIRED: mailing settings
# you're e-mail address; you'll get an email when the job has ended or when it was aborted
# 'b' Mail is sent at the beginning of the job;
# 'e' Mail is sent at the end of the job;
# 'a' Mail is sent when the job is aborted or rescheduled.
# 's' Mail is sent when the job is suspended;
# 'n' No mail is sent.
YOUREMAIL="[email protected]"
MAILSETTINGS="beas"
#
# REQUIRED: Path_to where the main alysis directory resides.
ROOTDIR=/hpc/dhl_ec/SOMPROJECTDIR
#
# REQUIRED: Name of the project.
# PROJECTNAME="cardiogramplusc4d_57loci_4real_complete_with_4pcs_clumped"
# PROJECTNAME="cardiogramplusc4d_88loci_4real_complete_with_4pcs_clumped"
PROJECTNAME="2020-03-03_test_slurm-v3"
# PROJECTNAME="cardiogramplusc4d_small_4real_complete_with_4pcs"
# PROJECTNAME="cardiogramplusc4d_TEST_4real_complete_with_4pcs"
#
# REQUIRED: Analysis settings.
# You can choose one of these options [CIS/TRANS].
QTL_TYPE="CIS"
#
# REQUIRED: Set study and sample type
# Set the analysis type, choose one of these options [AEMS450K1/AEMS450K2/CTMM]
STUDY_TYPE="AERNA"
#
# Set the sample type, choose one of these options [AE: PLAQUES/BLOOD; CTMM: MONOCYTES]
SAMPLE_TYPE="BLOOD"
#
# Set the analysis type, choose one of these options [AE: MQTL; CTMM: EQTL]
ANALYSIS_TYPE="EQTL"
#
# REQUIRED: Set the regions to analyse
REGIONS_FILE="/hpc/dhl_ec/llandsmeer/ae/fifth_try/variants_for_eqtl.10loci.txt"
#
# REQUIRED: Set the exclusion type and the list of covariates *not* to use in the model
EXCLUSION_COV="${ROOTDIR}/excl_cov.txt"
EXCLUSION_COV="/hpc/dhl_ec/SOMPROJECTDIR/excl_cov"
#
# REQRUIRED: Set STUDY-specific directories and files
#
# Choose a name for file names, options: [aegs/ctmm]
STUDYNAME="aegs"
# Choose the study job-name, options: [AEMS450K1/AEMS450K2/CTMM]
STUDYJOBNAME="AERNA"
#
# Choose the matching genotyping and expression/methylation data
#
### -- my config ###
ORIGINALS="/hpc/dhl_ec/data/_ae_originals"
GENETICDATA=/hpc/dhl_ec/llandsmeer/_atheroexpress/_eqtl/_data/AEGS_COMBINED_QC2018
SNPTESTDATA="AEGS1_2_3.clean.v5.chr"
SNPTESTOUTPUTDATA="aegs_combined"
PHENOCOVEXCL="${ORIGINALS}/pheno_cov_exclusions"
OMICSDATA=/hpc/dhl_ec/SOMPROJECTDIR/_data
QTLDATA=${OMICSDATA}/counts.qc.bed.gz
QTLINDEX=${OMICSDATA}/counts.gc.bed.gz.tbi
### --- ATHERO-EXPRESS --- ###
# #
# ORIGINALS="/hpc/dhl_ec/data/_ae_originals"
# GENETICDATA="${ORIGINALS}/AEGS_COMBINED_IMPUTE2_1000Gp3_GoNL5"
# OMICSDATA="${ORIGINALS}/${STUDYJOBNAME}"
# SNPTESTDATA="aegs_combo_1kGp3GoNL5_RAW_chr"
# SNPTESTOUTPUTDATA="aegs_1kGp3GoNL5"
# # for PLAQUES analysis
# # AEMS450K1
# QTLDATA="${OMICSDATA}/20170924.AEMS450K1.BvaluesQCplaque.bed.gz"
# QTLINDEX="${OMICSDATA}/20170924.AEMS450K1.BvaluesQCplaque.bed.gz.tbi"
# # AEMS450K2
# QTLDATA="${OMICSDATA}/20170924.AEMS450K2.BvaluesQCplaque.bed.gz"
# QTLINDEX="${OMICSDATA}/20170924.AEMS450K2.BvaluesQCplaque.bed.gz.tbi"
# # for BLOOD analysis
# # AEMS450K1
# QTLDATA="${OMICSDATA}/20170924.AEMS450K1.BvaluesQCblood.bed.gz"
# QTLINDEX="${OMICSDATA}/20170924.AEMS450K1.BvaluesQCblood.bed.gz.tbi"
# # AEMS450K2
# # There is no data for AEMS450K2
# #
# # Indicate the COVARIATE-data to use
# COVARIATES="${GENETICDATA}/covariates_aegs_combo.all.cov"
# #
# # Indicate the ANNOTATION-file to use
# ANNOTATIONFILE="${ORIGINALS}/IlluminaMethylation450K.annotation.txt.gz"
#
### --- CTMM --- ###
# ORIGINALS="/hpc/dhl_ec/data/_ctmm_originals"
# GENETICDATA="${ORIGINALS}/CTMMAxiomTX_IMPUTE2_1000Gp3_GoNL5"
# PHENOCOVEXCL="${ORIGINALS}/pheno_cov_exclusions"
# OMICSDATA="${ORIGINALS}/CTMMHumanHT12v4r2_15002873B"
# SNPTESTDATA="ctmm_1kGp3GoNL5_RAW_chr"
# SNPTESTOUTPUTDATA="ctmm_1kGp3GoNL5"
# QTLDATA="${OMICSDATA}/ctmm.humanhtv4r2.qtl.bed.gz"
# QTLINDEX="${OMICSDATA}/ctmm.humanhtv4r2.qtl.bed.gz.tbi"
#
# Indicate the COVARIATE-data to use
COVARIATES="${PHENOCOVEXCL}/covariates_aegs_combo.all.cov"
COVARIATES="/hpc/dhl_ec/SOMPROJECTDIR/_data/COVARIATES"
#
# Indicate the ANNOTATION-file to use
ANNOTATIONFILE="/hpc/dhl_ec/llandsmeer/_atheroexpress/_eqtl/_data/ANNOTATION"
#
### --- EXCLUSION TYPE & LIST--- ###
### Note: All analyses with ATHERO-EXPRESS data are presumed to be constrained to CEA-patients only.
### You can set the exclusion criteria 'NONAEGS/FEMALES/MALES' if you want to analyse
### all ATHERO-EXPRESS data!
# Set the exclusion type.
# For the ATHERO-EXPRESS, choose one of these options [DEFAULT/SMOKER/NONSMOKER/MALES/FEMALES/T2D/NONT2D/CKD/NONCKD/PRE2007/POST2007/NONAEGS/NONAEGSFEMALES/NONAEGSMALES]
# For the CTMM, choose one of these options [DEFAULT/SMOKER/NONSMOKER/MALES/FEMALES/T2D/NONT2D]
#
# ### ATHERO-EXPRESS specific exclusion lists
# EXCLUSION_TYPE="DEFAULT"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA.list"
# ### MALES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_FEMALES"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_Females.list"
# ### FEMALES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_MALES"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_Males.list"
# ### NON-CKD ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_CKD"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_CKD.list"
# ### CKD ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONCKD"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_nonCKD.list"
# ### NON-TYPE 2 DIABETES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_T2D"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_T2D.list"
# ### TYPE 2 DIABETES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONT2D"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_nonT2D.list"
# ### SMOKER ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_SMOKER"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_nonSMOKER.list"
# ### NONSMOKER ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONSMOKER"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_SMOKER.list"
# ### POST-2007 ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_PRE2007"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_pre2007.list"
# ### PRE-2007 ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_POST2007"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_post2007.list"
# ### NON-DIURETICS ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_DIURETICS"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_DIURETICS.list"
# ### DIURETICS ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONDIURETICS"
# EXCLUSION_LIST="${GENETICDATA}/exclusion_nonCEA_nonDIURETICS.list"
#
### CTMMGS specific exclusion lists
EXCLUSION_TYPE="EXCL_DEFAULT"
EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM.list"
EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonAEGS.list"
EXCLUSION_LIST="/hpc/dhl_ec/llandsmeer/ae/fifth_try/phenocov_excl/exclusion_nonAEGS.list"
# ### MALES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_FEMALES"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_FEMALES.list"
# ### FEMALES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_MALES"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_MALES.list"
# ### NON-CKD ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_CKD" #-- does not exist yet
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_CKD.list"
# ### CKD ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONCKD" #-- does not exist yet
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_nonCKD.list"
# ### NON-TYPE 2 DIABETES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_T2D"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_T2D.list"
# ### TYPE 2 DIABETES ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONT2D"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_nonT2D.list"
# ### SMOKER ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_SMOKER"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_SMOKER.list"
# ### NONSMOKER ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONSMOKER"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_nonSMOKER.list"
# ### NON-DIURETICS ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_DIURETICS"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_DIURETICS.list"
# ### DIURETICS ONLY ANALYSIS
# EXCLUSION_TYPE="EXCL_NONDIURETICS"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_nonDIURETICS.list"
# ### ANALYSIS IN DATA WITH MONOCYTE COUNTS
# EXCLUSION_TYPE="EXCL_NONMONOCYTE"
# EXCLUSION_LIST="${PHENOCOVEXCL}/exclusion_nonCTMM_nonMONOCYTE.list"
### --- QTL SETTINGS --- ###
SEEDNO_CONFIG=91149216
PERMSTART_CONFIG="1000"
PERMEND_CONFIG="10000"
NOMINAL_P="0.05"
### --- QCTOOL SETTINGS --- ###
MAF_CONFIG="0.05"
INFO_CONFIG="0.8"
HWE_CONFIG="6"
### --- CLUMPING SETTINGS --- ###
CLUMP="Y" # plot clumped set? [Y/N]
CLUMP_THRESH="0.8" # Threshold for rsquared, optional
# CLUMP_GWAS="${ROOTDIR}/cardiogramplusc4d.cad.57loci.forclumping.txt"
CLUMP_GWAS="${ROOTDIR}/cardiogramplusc4d.cad.88loci.forclumping.txt"
CLUMP_GWAS="/hpc/dhl_ec/SOMPROJECTDIR/_data/cardiogramplusc4d.cad.164loci.forclumping.txt"
CLUMP_P1="5e-8"
CLUMP_P2="0.05"
CLUMP_KB="1000"
CLUMP_GWAS_SNPFIELD="SNP"
CLUMP_GWAS_PVAL="P"
### --- LOCUSZOOM SETTINGS --- ###
# REGASSOCPLOT -- Do you want to plot regional associations using LocusZoom v1.3? default: N
# LDPOP -- Indicates the reference population for the LD map used by LocusZoom v1.3. default: EUR.
# LDBUILD -- Indicates the genome build used by LocusZoom v1.3. default: hg19.
# LDSOURCE -- Indicates the reference source used by LocusZoom v1.3. default: 1000G_March2012.
# LDSETTINGS -- Fill in specifica settings to be used by LocusZoom v1.3.
# LZMARKERNAME -- Column name containing the variants used in the input data for LocusZoom v1.3. default: MarkerName.
# LZPVALUE -- Column name containing the p-values used in the input data for LocusZoom v1.3. default: P-value.
# LZDELIMITER -- Delimiter for the input data used by LocusZoom v1.3. default: tab.
# LZTHEME -- Theme used by LocusZoom v1.3 for plotting. default: publication
REGASSOCPLOT="Y"
LDPOP="EUR"
LDBUILD="hg19"
LDSOURCE="1000G_March2012"
LOCUSZOOM_SETTINGS="ldColors=\"#595A5C,#4C81BF,#1396D8,#C5D220,#F59D10,red,#9A3480\" showRecomb=TRUE drawMarkerNames=FALSE showRug=FALSE showAnnot=TRUE showRefsnpAnnot=TRUE showGenes=TRUE clean=TRUE bigDiamond=TRUE rfrows=10 refsnpLineWidth=2 refsnpTextSize=1.0 axisSize=1.25 axisTextSize=1.25 geneFontSize=1.25"
LZMARKERNAME="MarkerName"
LZPVALUE="P-value"
LZDELIMITER="tab"
LZTHEME="publication"
### --- RTC SETTINGS --- ###
# RTCCALCULATION -- Calculate RTC score? [Y/N]
# RTCTYPE -- When doing a genome-wide QTL analysis choose 'NORMAL', if using a selection of independent variants 'CONDITIONAL'
# RTCPVALLEVEL -- QTL p-value threshold to select variants for input to RTC analysis
# RTCGWAS -- Variants annotated to GWAS loci, two main versions:
# * 'GWAS.b37.1kGp3GoNL5.txt' includes rsID-based variant names, specifically for 1000G phase 3 + GoNL5 imputed data.
# * 'GWAS.b37.txt' includes chr_bp variant names
# * 'GWAS.b37.1kGp3GoNL5.handcurated_20180725.txt' includes rsID-based variant names (1kGp3 + GoNL5) and hand-curated from GWAS Catalog on 2018-07-25
# RTCHOTSPOTS -- File containing genome-wide (b37) hotspots
### --- FUNCTIONAL ENRICHMENT SETTINGS --- ###
# FUNCENRICH -- Perform functional enrichment?
# FUNCINFORMATION -- Some file with functional information, e.g. genome-wide transcription binding factors. There are two files available:
# * 'TFs.encode.sort.withzero.bed': with two digit chromosomes
# * 'TFs.encode.sort.bed': one or two digit chromosomes.
# * 'H3K27ac.encode.monocytescd14pos.bed': with two digit chromosomes; epigentic marker of actively enhanced in monocytes CD14+
# * 'H3K4me1.encode.monocytescd14pos.bed': with two digit chromosomes; epigentic marker of transcription in monocytes CD14+
# * 'H3K4me3.encode.monocytescd14pos.bed': with two digit chromosomes; epigentic marker in monocytes CD14+
# * 'DNAse.encode.monocytescd14pos.bed': with two digit chromosomes; open chromatin markers in monocytes CD14+
# * 'H3K27ac.DNAse.active.encode.monocytescd14pos.bed': with two digit chromosomes; open chromatin markers intersected with actively enhanced H3K27ac markers in monocytes CD14+
# FUNCINFOFILENAME -- This will be printed as part of the functional enrichment analysis output-filename.
RTCCALCULATION="Y"
RTCTYPE="CONDITIONAL"
RTCPVALLEVEL="0.05"
RTCGWAS="GWAS.b37.1kGp3GoNL5.handcurated_20180725.txt"
RTCHOTSPOTS="hotspots_b37_hg19.bed"
FUNCENRICH="Y"
FUNCINFORMATION="H3K27ac.encode.monocytescd14pos.bed"
FUNCINFOFILENAME="H3K27ac.encode.monocytescd14pos"
# FUNCINFORMATION="H3K4me1.encode.monocytescd14pos.bed"
# FUNCINFOFILENAME="H3K4me1.encode.monocytescd14pos"
# FUNCINFORMATION="H3K4me3.encode.monocytescd14pos.bed"
# FUNCINFOFILENAME="H3K4me3.encode.monocytescd14pos"
# FUNCINFORMATION="DNAse.encode.monocytescd14pos.bed"
# FUNCINFOFILENAME="DNAse.encode.monocytescd14pos"
# FUNCINFORMATION="H3K27ac.DNAse.active.encode.monocytescd14pos.bed"
# FUNCINFOFILENAME="H3K27ac.DNAse.active.encode.monocytescd14pos"