Skip to content

Commit

Permalink
Updated the test suite for the GSEA Desktop 4.2.0 bugfix release
Browse files Browse the repository at this point in the history
  • Loading branch information
davideby committed Nov 3, 2021
1 parent feedfa4 commit ade2255
Show file tree
Hide file tree
Showing 17 changed files with 239 additions and 39 deletions.
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/Dressman_81.2_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "Dressman_81.zip"
output.file.name: "Dressman_81.2.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"Dressman_81.zip":
"Dressman_81.2.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.2.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "Lin.et.al.2008.zip"
output.file.name: "Lin.et.al.2008.2.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"Lin.et.al.2008.zip":
"Lin.et.al.2008.2.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.2.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/MD.outcome.2_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "MD.MD.outcome.2.cls_0_versus_NA.zip"
output.file.name: "MD.outcome.MD.outcome.2.cls_0_versus_NA.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"MD.MD.outcome.2.cls_0_versus_NA.zip":
"MD.outcome.MD.outcome.2.cls_0_versus_NA.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.MD.outcome.2.cls_0_versus_NA.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.outcome.MD.outcome.2.cls_0_versus_NA.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/MD.outcome.BCAT_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "MD.outcome.zip"
output.file.name: "MD.outcome.BCAT.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"MD.outcome.zip":
"MD.outcome.BCAT.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.BCAT.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/Ross_et_al.3-class_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "Ross_et_al.zip"
output.file.name: "Ross_et_al.3-class.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"Ross_et_al.zip":
"Ross_et_al.3-class.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.3-class.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/dfci.subset_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "dfci.zip"
output.file.name: "dfci.subset.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"dfci.zip":
"dfci.subset.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.subset.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/meta.1.2_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "meta.meta.1.2.cls_breast_versus_colon.zip"
output.file.name: "meta.1.meta.1.2.cls_breast_versus_colon.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"meta.meta.1.2.cls_breast_versus_colon.zip":
"meta.1.meta.1.2.cls_breast_versus_colon.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.meta.1.2.cls_breast_versus_colon.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.1.meta.1.2.cls_breast_versus_colon.zip"
6 changes: 3 additions & 3 deletions gpunit_breadthTest/other/primet1.2_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params:
collapse.dataset: "No_Collapse"
#chip.platform.file:
# Renaming result file for ease of testing.
output.file.name: "primet1.zip"
output.file.name: "primet1.2.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
Expand All @@ -40,6 +40,6 @@ params:
assertions:
jobStatus: success
files:
"primet1.zip":
"primet1.2.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.2.zip"
50 changes: 50 additions & 0 deletions gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
module: GSEA
name: GSEA collapse_NaN_Missing_max_test
description: Test the GSEA 'collapse dataset' function handling NaN and Missing values, collapse to max of probes. Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1.
params:
expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
number.of.permutations: "10"
# Uses P53_6samples.cls because it happens to have a reasonable class template for this use
phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples.cls"
target.profile: ""
permutation.type: "phenotype"
collapse.dataset: "Collapse"
chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip"
# Modifying the ZIP name here so that the diffCmd can find the RNK inside. We could modify the diffCmd
# to be able to find it, but that makes the code somewhat complicated.
output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Signal2Noise"
gene.list.sorting.mode: "real"
gene.list.ordering.mode: "descending"
max.gene.set.size: "500"
min.gene.set.size: "15"
collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
normalization.mode: "meandiv"
randomization.mode: "no_balance"
omit.features.with.no.symbol.match: "true"
median.for.class.metrics: "false"
number.of.markers: "100"
# Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
random.seed: "149"
create.svgs: "false"
create.gcts: "true"
save.random.ranked.lists: "false"
plot.graphs.for.the.top.sets.of.each.phenotype: "20"
make.detailed.gene.set.report: "false"
selected.gene.sets: ""
dev.mode: "true"
alt.delim: ""
create.zip: "true"
assertions:
jobStatus: success
files:
"Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
"stdout.txt":
diffCmd: ../grepMessages.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/stdoutMatches.txt"
10 changes: 10 additions & 0 deletions gpunit_functionality/grepMessages.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#!/bin/sh

# The file passed in $1 should be a list of fixed strings, one per line, to be checked if they are found in the $2 file.
# We verify by checking the count of matches against the count of fixed strings/
# This is an imperfect check but should be good enough provided the fixed strings are sufficiently detailed to appear
# only *once* in the $2 file.
grepOut=`grep -c -F -f $1 $2`
numChecks=`cat $1 | wc -l`
exit $(( numChecks - grepOut ))
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
module: GSEA
name: GSEA continuous_metric_NaN_Missing_test
description: Test the GSEA continuous metric handling NaN and Missing values. Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1.
params:
# Using a dataset needing collapse because it happens to have missing & NaN values
expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
number.of.permutations: "10"
phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_6samples_fake_as_cont_pheno.cls"
target.profile: ""
permutation.type: "phenotype"
collapse.dataset: "Collapse"
chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip"
# Modifying the ZIP name here so that the diffCmd can find the RNK inside. We could modify the diffCmd
# to be able to find it, but that makes the code somewhat complicated.
output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Spearman"
gene.list.sorting.mode: "real"
gene.list.ordering.mode: "descending"
max.gene.set.size: "500"
min.gene.set.size: "15"
collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
normalization.mode: "meandiv"
randomization.mode: "no_balance"
omit.features.with.no.symbol.match: "true"
median.for.class.metrics: "false"
number.of.markers: "100"
# Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
random.seed: "149"
create.svgs: "false"
create.gcts: "true"
save.random.ranked.lists: "false"
plot.graphs.for.the.top.sets.of.each.phenotype: "20"
make.detailed.gene.set.report: "false"
selected.gene.sets: ""
dev.mode: "true"
alt.delim: ""
create.zip: "true"
assertions:
jobStatus: success
files:
"Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
"stdout.txt":
diffCmd: ../grepMessages.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/stdoutMatches.txt"

44 changes: 44 additions & 0 deletions gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
module: GSEA
name: GSEA metric_Diff_of_Classes_test
description: Test GSEA using the Diff_of_Classes metric. Variation of user_gene_set_test with a reduced dataset.
params:
expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
number.of.permutations: "10"
phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls"
target.profile: ""
permutation.type: "phenotype"
collapse.dataset: "No_Collapse"
#chip.platform.file:
output.file.name: "<expression.dataset_basename>.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Diff_of_Classes"
gene.list.sorting.mode: "real"
gene.list.ordering.mode: "descending"
max.gene.set.size: "500"
min.gene.set.size: "15"
collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
normalization.mode: "meandiv"
randomization.mode: "no_balance"
omit.features.with.no.symbol.match: "true"
median.for.class.metrics: "false"
number.of.markers: "100"
# Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
random.seed: "149"
create.svgs: "false"
create.gcts: "false"
save.random.ranked.lists: "false"
plot.graphs.for.the.top.sets.of.each.phenotype: "20"
make.detailed.gene.set.report: "false"
selected.gene.sets: ""
dev.mode: "true"
alt.delim: ""
create.zip: "true"
assertions:
jobStatus: success
files:
"P53_14samples_collapsed_symbols.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Classes_test/P53_14samples_collapsed_symbols.zip"
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:000142:15
module: GSEA
name: GSEA metric_Ratio_of_Means_test
description: Test GSEA using the Ratio_of_Means metric. Variation of user_gene_set_test with a reduced dataset.
name: GSEA metric_Ratio_of_Classes_test
description: Test GSEA using the Ratio_of_Classes metric. Variation of user_gene_set_test with a reduced dataset.
params:
expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
Expand All @@ -14,7 +14,7 @@ params:
#chip.platform.file:
output.file.name: "<expression.dataset_basename>.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Ratio_of_Means"
metric.for.ranking.genes: "Ratio_of_Classes"
gene.list.sorting.mode: "real"
gene.list.ordering.mode: "descending"
max.gene.set.size: "500"
Expand All @@ -41,4 +41,4 @@ assertions:
files:
"P53_14samples_collapsed_symbols.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Means_test/P53_14samples_collapsed_symbols.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Classes_test/P53_14samples_collapsed_symbols.zip"
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved.
#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
module: GSEA
name: GSEA metric_Diff_of_Means_test
description: Test GSEA using the Diff_of_Means metric. Variation of user_gene_set_test with a reduced dataset.
name: GSEA metric_Spearman_test
description: Test GSEA using the Spearman metric. Variation of user_gene_set_test with a reduced dataset and a synthetic continuous phenotype CLS file.
params:
expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
number.of.permutations: "10"
phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls"
phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_fake_as_cont_pheno.cls"
target.profile: ""
permutation.type: "phenotype"
collapse.dataset: "No_Collapse"
#chip.platform.file:
output.file.name: "<expression.dataset_basename>.zip"
scoring.scheme: "weighted"
metric.for.ranking.genes: "Diff_of_Means"
metric.for.ranking.genes: "Spearman"
gene.list.sorting.mode: "real"
gene.list.ordering.mode: "descending"
max.gene.set.size: "500"
Expand All @@ -41,4 +41,4 @@ assertions:
files:
"P53_14samples_collapsed_symbols.zip":
diffCmd: ../diffGseaResults.sh
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Means_test/P53_14samples_collapsed_symbols.zip"
diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Spearman_test/P53_14samples_collapsed_symbols.zip"
Loading

0 comments on commit ade2255

Please sign in to comment.