From ade2255856466459fe91a2c59d0df7df3670a7b2 Mon Sep 17 00:00:00 2001 From: eby Date: Tue, 2 Nov 2021 17:01:12 -0700 Subject: [PATCH 1/5] Updated the test suite for the GSEA Desktop 4.2.0 bugfix release --- .../other/Dressman_81.2_test.yml | 6 +-- .../other/Lin.et.al.2008.2_test.yml | 6 +-- .../other/MD.outcome.2_test.yml | 6 +-- .../other/MD.outcome.BCAT_test.yml | 6 +-- .../other/Ross_et_al.3-class_test.yml | 6 +-- gpunit_breadthTest/other/dfci.subset_test.yml | 6 +-- gpunit_breadthTest/other/meta.1.2_test.yml | 6 +-- gpunit_breadthTest/other/primet1.2_test.yml | 6 +-- .../collapse_NaN_Missing_max_test.yml | 50 ++++++++++++++++++ gpunit_functionality/grepMessages.sh | 10 ++++ .../continuous_metric_NaN_Missing_test.yml | 51 +++++++++++++++++++ .../metrics/metric_Diff_of_Classes_test.yml | 44 ++++++++++++++++ ...t.yml => metric_Ratio_of_Classes_test.yml} | 10 ++-- ...eans_test.yml => metric_Spearman_test.yml} | 10 ++-- ... => metric_log2_ratio_of_classes_test.yml} | 8 +-- ...lid_too_few_samples_gene_set_perm_test.yml | 45 ++++++++++++++++ .../invalid_too_few_samples_test.yml | 2 +- 17 files changed, 239 insertions(+), 39 deletions(-) create mode 100644 gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml create mode 100755 gpunit_functionality/grepMessages.sh create mode 100644 gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml create mode 100644 gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml rename gpunit_functionality/metrics/{metric_Ratio_of_Means_test.yml => metric_Ratio_of_Classes_test.yml} (84%) rename gpunit_functionality/metrics/{metric_Diff_of_Means_test.yml => metric_Spearman_test.yml} (84%) rename gpunit_functionality/metrics/{metric_log2_ratio_of_means_test.yml => metric_log2_ratio_of_classes_test.yml} (88%) create mode 100644 gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml diff --git a/gpunit_breadthTest/other/Dressman_81.2_test.yml b/gpunit_breadthTest/other/Dressman_81.2_test.yml index 7040454..ef34a27 100644 --- a/gpunit_breadthTest/other/Dressman_81.2_test.yml +++ b/gpunit_breadthTest/other/Dressman_81.2_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "Dressman_81.zip" + output.file.name: "Dressman_81.2.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "Dressman_81.zip": + "Dressman_81.2.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.2.zip" diff --git a/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml b/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml index c4bc579..1272bfe 100644 --- a/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml +++ b/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "Lin.et.al.2008.zip" + output.file.name: "Lin.et.al.2008.2.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "Lin.et.al.2008.zip": + "Lin.et.al.2008.2.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.2.zip" diff --git a/gpunit_breadthTest/other/MD.outcome.2_test.yml b/gpunit_breadthTest/other/MD.outcome.2_test.yml index d4b7659..f40470d 100644 --- a/gpunit_breadthTest/other/MD.outcome.2_test.yml +++ b/gpunit_breadthTest/other/MD.outcome.2_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "MD.MD.outcome.2.cls_0_versus_NA.zip" + output.file.name: "MD.outcome.MD.outcome.2.cls_0_versus_NA.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "MD.MD.outcome.2.cls_0_versus_NA.zip": + "MD.outcome.MD.outcome.2.cls_0_versus_NA.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.MD.outcome.2.cls_0_versus_NA.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.outcome.MD.outcome.2.cls_0_versus_NA.zip" diff --git a/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml b/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml index 27a8c20..0a4ff67 100644 --- a/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml +++ b/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "MD.outcome.zip" + output.file.name: "MD.outcome.BCAT.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "MD.outcome.zip": + "MD.outcome.BCAT.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.BCAT.zip" diff --git a/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml b/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml index b71f077..8787932 100644 --- a/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml +++ b/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "Ross_et_al.zip" + output.file.name: "Ross_et_al.3-class.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "Ross_et_al.zip": + "Ross_et_al.3-class.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.3-class.zip" diff --git a/gpunit_breadthTest/other/dfci.subset_test.yml b/gpunit_breadthTest/other/dfci.subset_test.yml index f050811..3a7de26 100644 --- a/gpunit_breadthTest/other/dfci.subset_test.yml +++ b/gpunit_breadthTest/other/dfci.subset_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "dfci.zip" + output.file.name: "dfci.subset.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "dfci.zip": + "dfci.subset.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.subset.zip" diff --git a/gpunit_breadthTest/other/meta.1.2_test.yml b/gpunit_breadthTest/other/meta.1.2_test.yml index bcbd1d6..cb4837f 100644 --- a/gpunit_breadthTest/other/meta.1.2_test.yml +++ b/gpunit_breadthTest/other/meta.1.2_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "meta.meta.1.2.cls_breast_versus_colon.zip" + output.file.name: "meta.1.meta.1.2.cls_breast_versus_colon.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "meta.meta.1.2.cls_breast_versus_colon.zip": + "meta.1.meta.1.2.cls_breast_versus_colon.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.meta.1.2.cls_breast_versus_colon.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.1.meta.1.2.cls_breast_versus_colon.zip" diff --git a/gpunit_breadthTest/other/primet1.2_test.yml b/gpunit_breadthTest/other/primet1.2_test.yml index 917d239..356fc55 100644 --- a/gpunit_breadthTest/other/primet1.2_test.yml +++ b/gpunit_breadthTest/other/primet1.2_test.yml @@ -13,7 +13,7 @@ params: collapse.dataset: "No_Collapse" #chip.platform.file: # Renaming result file for ease of testing. - output.file.name: "primet1.zip" + output.file.name: "primet1.2.zip" scoring.scheme: "weighted" metric.for.ranking.genes: "Signal2Noise" gene.list.sorting.mode: "real" @@ -40,6 +40,6 @@ params: assertions: jobStatus: success files: - "primet1.zip": + "primet1.2.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.2.zip" diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml new file mode 100644 index 0000000..2176d9a --- /dev/null +++ b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml @@ -0,0 +1,50 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 +module: GSEA +name: GSEA collapse_NaN_Missing_max_test +description: Test the GSEA 'collapse dataset' function handling NaN and Missing values, collapse to max of probes. Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1. +params: + expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct" + gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] + number.of.permutations: "10" + # Uses P53_6samples.cls because it happens to have a reasonable class template for this use + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples.cls" + target.profile: "" + permutation.type: "phenotype" + collapse.dataset: "Collapse" + chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip" + # Modifying the ZIP name here so that the diffCmd can find the RNK inside. We could modify the diffCmd + # to be able to find it, but that makes the code somewhat complicated. + output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + scoring.scheme: "weighted" + metric.for.ranking.genes: "Signal2Noise" + gene.list.sorting.mode: "real" + gene.list.ordering.mode: "descending" + max.gene.set.size: "500" + min.gene.set.size: "15" + collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe" + normalization.mode: "meandiv" + randomization.mode: "no_balance" + omit.features.with.no.symbol.match: "true" + median.for.class.metrics: "false" + number.of.markers: "100" + # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results + random.seed: "149" + create.svgs: "false" + create.gcts: "true" + save.random.ranked.lists: "false" + plot.graphs.for.the.top.sets.of.each.phenotype: "20" + make.detailed.gene.set.report: "false" + selected.gene.sets: "" + dev.mode: "true" + alt.delim: "" + create.zip: "true" +assertions: + jobStatus: success + files: + "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip": + diffCmd: ../diffGseaResults.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + "stdout.txt": + diffCmd: ../grepMessages.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/stdoutMatches.txt" diff --git a/gpunit_functionality/grepMessages.sh b/gpunit_functionality/grepMessages.sh new file mode 100755 index 0000000..7775ea7 --- /dev/null +++ b/gpunit_functionality/grepMessages.sh @@ -0,0 +1,10 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#!/bin/sh + +# The file passed in $1 should be a list of fixed strings, one per line, to be checked if they are found in the $2 file. +# We verify by checking the count of matches against the count of fixed strings/ +# This is an imperfect check but should be good enough provided the fixed strings are sufficiently detailed to appear +# only *once* in the $2 file. +grepOut=`grep -c -F -f $1 $2` +numChecks=`cat $1 | wc -l` +exit $(( numChecks - grepOut )) diff --git a/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml b/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml new file mode 100644 index 0000000..b34e069 --- /dev/null +++ b/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml @@ -0,0 +1,51 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 +module: GSEA +name: GSEA continuous_metric_NaN_Missing_test +description: Test the GSEA continuous metric handling NaN and Missing values. Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1. +params: + # Using a dataset needing collapse because it happens to have missing & NaN values + expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct" + gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] + number.of.permutations: "10" + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_6samples_fake_as_cont_pheno.cls" + target.profile: "" + permutation.type: "phenotype" + collapse.dataset: "Collapse" + chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip" + # Modifying the ZIP name here so that the diffCmd can find the RNK inside. We could modify the diffCmd + # to be able to find it, but that makes the code somewhat complicated. + output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + scoring.scheme: "weighted" + metric.for.ranking.genes: "Spearman" + gene.list.sorting.mode: "real" + gene.list.ordering.mode: "descending" + max.gene.set.size: "500" + min.gene.set.size: "15" + collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe" + normalization.mode: "meandiv" + randomization.mode: "no_balance" + omit.features.with.no.symbol.match: "true" + median.for.class.metrics: "false" + number.of.markers: "100" + # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results + random.seed: "149" + create.svgs: "false" + create.gcts: "true" + save.random.ranked.lists: "false" + plot.graphs.for.the.top.sets.of.each.phenotype: "20" + make.detailed.gene.set.report: "false" + selected.gene.sets: "" + dev.mode: "true" + alt.delim: "" + create.zip: "true" +assertions: + jobStatus: success + files: + "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip": + diffCmd: ../diffGseaResults.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + "stdout.txt": + diffCmd: ../grepMessages.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/stdoutMatches.txt" + \ No newline at end of file diff --git a/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml b/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml new file mode 100644 index 0000000..70779a8 --- /dev/null +++ b/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml @@ -0,0 +1,44 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 +module: GSEA +name: GSEA metric_Diff_of_Classes_test +description: Test GSEA using the Diff_of_Classes metric. Variation of user_gene_set_test with a reduced dataset. +params: + expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct" + gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] + number.of.permutations: "10" + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls" + target.profile: "" + permutation.type: "phenotype" + collapse.dataset: "No_Collapse" + #chip.platform.file: + output.file.name: ".zip" + scoring.scheme: "weighted" + metric.for.ranking.genes: "Diff_of_Classes" + gene.list.sorting.mode: "real" + gene.list.ordering.mode: "descending" + max.gene.set.size: "500" + min.gene.set.size: "15" + collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe" + normalization.mode: "meandiv" + randomization.mode: "no_balance" + omit.features.with.no.symbol.match: "true" + median.for.class.metrics: "false" + number.of.markers: "100" + # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results + random.seed: "149" + create.svgs: "false" + create.gcts: "false" + save.random.ranked.lists: "false" + plot.graphs.for.the.top.sets.of.each.phenotype: "20" + make.detailed.gene.set.report: "false" + selected.gene.sets: "" + dev.mode: "true" + alt.delim: "" + create.zip: "true" +assertions: + jobStatus: success + files: + "P53_14samples_collapsed_symbols.zip": + diffCmd: ../diffGseaResults.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Classes_test/P53_14samples_collapsed_symbols.zip" diff --git a/gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml b/gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml similarity index 84% rename from gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml rename to gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml index e389302..135fd5e 100644 --- a/gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml +++ b/gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml @@ -1,8 +1,8 @@ -# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:000142:15 module: GSEA -name: GSEA metric_Ratio_of_Means_test -description: Test GSEA using the Ratio_of_Means metric. Variation of user_gene_set_test with a reduced dataset. +name: GSEA metric_Ratio_of_Classes_test +description: Test GSEA using the Ratio_of_Classes metric. Variation of user_gene_set_test with a reduced dataset. params: expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct" gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] @@ -14,7 +14,7 @@ params: #chip.platform.file: output.file.name: ".zip" scoring.scheme: "weighted" - metric.for.ranking.genes: "Ratio_of_Means" + metric.for.ranking.genes: "Ratio_of_Classes" gene.list.sorting.mode: "real" gene.list.ordering.mode: "descending" max.gene.set.size: "500" @@ -41,4 +41,4 @@ assertions: files: "P53_14samples_collapsed_symbols.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Means_test/P53_14samples_collapsed_symbols.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Classes_test/P53_14samples_collapsed_symbols.zip" diff --git a/gpunit_functionality/metrics/metric_Diff_of_Means_test.yml b/gpunit_functionality/metrics/metric_Spearman_test.yml similarity index 84% rename from gpunit_functionality/metrics/metric_Diff_of_Means_test.yml rename to gpunit_functionality/metrics/metric_Spearman_test.yml index 0e53ede..cc11981 100644 --- a/gpunit_functionality/metrics/metric_Diff_of_Means_test.yml +++ b/gpunit_functionality/metrics/metric_Spearman_test.yml @@ -1,20 +1,20 @@ # Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 module: GSEA -name: GSEA metric_Diff_of_Means_test -description: Test GSEA using the Diff_of_Means metric. Variation of user_gene_set_test with a reduced dataset. +name: GSEA metric_Spearman_test +description: Test GSEA using the Spearman metric. Variation of user_gene_set_test with a reduced dataset and a synthetic continuous phenotype CLS file. params: expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct" gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] number.of.permutations: "10" - phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls" + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_fake_as_cont_pheno.cls" target.profile: "" permutation.type: "phenotype" collapse.dataset: "No_Collapse" #chip.platform.file: output.file.name: ".zip" scoring.scheme: "weighted" - metric.for.ranking.genes: "Diff_of_Means" + metric.for.ranking.genes: "Spearman" gene.list.sorting.mode: "real" gene.list.ordering.mode: "descending" max.gene.set.size: "500" @@ -41,4 +41,4 @@ assertions: files: "P53_14samples_collapsed_symbols.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Means_test/P53_14samples_collapsed_symbols.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Spearman_test/P53_14samples_collapsed_symbols.zip" diff --git a/gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml b/gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml similarity index 88% rename from gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml rename to gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml index 70358c2..61f2282 100644 --- a/gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml +++ b/gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml @@ -1,7 +1,7 @@ -# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +# Copyright (c) 2003-20s1 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 module: GSEA -name: GSEA metric_log2_ratio_of_means_test +name: GSEA metric_log2_ratio_of_classes_test description: Test GSEA using the log2_Ratio_of_Classes metric. Variation of user_gene_set_test with a reduced dataset. params: expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct" @@ -14,7 +14,7 @@ params: #chip.platform.file: output.file.name: ".zip" scoring.scheme: "weighted" - metric.for.ranking.genes: "log2_ratio_of_means" + metric.for.ranking.genes: "log2_Ratio_of_Classes" gene.list.sorting.mode: "real" gene.list.ordering.mode: "descending" max.gene.set.size: "500" @@ -41,4 +41,4 @@ assertions: files: "P53_14samples_collapsed_symbols.zip": diffCmd: ../diffGseaResults.sh - diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_log2_ratio_of_means_test/P53_14samples_collapsed_symbols.zip" + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_log2_ratio_of_classes_test/P53_14samples_collapsed_symbols.zip" diff --git a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml new file mode 100644 index 0000000..05e3c77 --- /dev/null +++ b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 +module: GSEA +name: GSEA too_few_samples_gene_set_perm_test +description: Provide a proper error message when there are too few samples for particular metrics, run with the gene_set permutation mode +params: + expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples_collapsed_symbols.gct" + gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] + number.of.permutations: "10" + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_too_few_samples.cls" + target.profile: "" + permutation.type: "gene_set" + collapse.dataset: "No_Collapse" + #chip.platform.file: + output.file.name: ".zip" + scoring.scheme: "weighted" + metric.for.ranking.genes: "Signal2Noise" + gene.list.sorting.mode: "real" + gene.list.ordering.mode: "descending" + max.gene.set.size: "500" + min.gene.set.size: "15" + collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe" + normalization.mode: "meandiv" + randomization.mode: "no_balance" + omit.features.with.no.symbol.match: "true" + median.for.class.metrics: "false" + number.of.markers: "100" + # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results + random.seed: "149" + create.svgs: "false" + create.gcts: "false" + save.random.ranked.lists: "false" + plot.graphs.for.the.top.sets.of.each.phenotype: "20" + make.detailed.gene.set.report: "false" + selected.gene.sets: "" + dev.mode: "true" + alt.delim: "" + create.zip: "true" +assertions: + jobStatus: fail + files: + "stderr.txt": + diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.Too.few.samples.in.class.B.of.the.dataset.to.use.this.metric + # The following file is unused; keeping it just for reference. + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/phenotype_cls/too_few_samples_gene_set_perm_test/stderr.txt" diff --git a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml index 39c98b8..4a94c02 100644 --- a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml +++ b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml @@ -40,6 +40,6 @@ assertions: jobStatus: fail files: "stderr.txt": - diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.One.of.the.classes.in.this.dataset.has.too.few.samples.in.one.of.the.classes.of.the.dataset.to.use.this.metric + diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.Too.few.samples.in.class.B.of.the.dataset.to.use.this.metric # The following file is unused; keeping it just for reference. diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/phenotype_cls/invalid_too_few_samples_test/stderr.txt" From 60b948c67400b67363a6fbd77aa180513025e62a Mon Sep 17 00:00:00 2001 From: eby Date: Tue, 2 Nov 2021 18:26:37 -0700 Subject: [PATCH 2/5] Updated manifest, docs, etc ahead of the GSEA 4.2.0 release Will be bumping the version to 20.3.x. --- docs/v20/index.html | 20 ++++++++++---------- docs/v20/test.md | 19 +++++++++---------- manifest | 33 ++++++++++----------------------- paramgroups.json | 2 +- release.properties | 4 ++-- 5 files changed, 32 insertions(+), 46 deletions(-) diff --git a/docs/v20/index.html b/docs/v20/index.html index dfd9560..e7a874d 100644 --- a/docs/v20/index.html +++ b/docs/v20/index.html @@ -1,7 +1,7 @@ - GSEA (v20.2.x) + GSEA (v20.3.x) @@ -15,7 +15,7 @@
-

GSEA (v20.2.x) GP Logo

+

GSEA (v20.3.x) GP Logo

@@ -33,7 +33,7 @@

GSEA (v20.2.x)

-

GSEA Version: 4.1.0

+

GSEA Version: 4.2.0

@@ -318,10 +318,6 @@

Parameters

create gcts * Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap.  - - create zip * - Create a ZIP bundle of the output files. This is true by default, matching the former behavior where a ZIP bundle was always created. - @@ -358,11 +354,10 @@

Input Files

Output Files

-

1. Optional Enrichment Report archive: ZIP

+

1. Enrichment Report archive: ZIP

ZIP file containing the result files.  For more information on interpreting these results, see Interpreting GSEA Results in the GSEA User Guide. -Note that in prior versions the ZIP bundle was created as the only output file. This behavior has been changed to give direct access to the results without the need for a download. The default is to create the ZIP bundle, matching the former behavior, but the report files -will always be created directly.

+Note that in prior versions the ZIP bundle was created as the only output file. This behavior has been changed to give direct access to the results without the need for a download.

2. Enrichment Report: HTML and PNG images

@@ -410,6 +405,11 @@

Version Comments

+ + 20.3.0 + 2021-11-5 + Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. + 20.2.4 2021-4-22 diff --git a/docs/v20/test.md b/docs/v20/test.md index dd175d8..81ad8dd 100644 --- a/docs/v20/test.md +++ b/docs/v20/test.md @@ -1,4 +1,4 @@ -# GSEA (v20.2.x) +# GSEA (v20.3.x) Gene Set Enrichment Analysis @@ -14,7 +14,7 @@ for GSEA questions. team](http://software.broadinstitute.org/cancer/software/genepattern/contact) for GenePattern issues. -**GSEA Version:** 4.1.0 +**GSEA Version:** 4.2.0 ## Description @@ -367,10 +367,6 @@ For descriptions of the ranking metrics, see create gcts * Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap.  - -create zip * -Create a ZIP bundle of the output files. This is true by default, matching the former behavior where a ZIP bundle was always created. - @@ -423,16 +419,14 @@ drop-down ## Output Files -1\. Optional Enrichment Report archive: ZIP +1\. Enrichment Report archive: ZIP ZIP file containing the result files.  For more information on interpreting these results, see [Interpreting GSEA Results](http://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Interpreting_GSEA_Results) in the GSEA User Guide. Note that in prior versions the ZIP bundle was created as the only output file. This behavior has been changed to give -direct access to the results without the need for a download. The -default is to create the ZIP bundle, matching the former behavior, but -the report files will always be created directly. +direct access to the results without the need for a download. 2\. Enrichment Report: HTML and PNG images @@ -476,6 +470,11 @@ Java + +20.3.0 +2021-11-2 +Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. + 20.2.4 2021-4-22 diff --git a/manifest b/manifest index 4d4af26..0db80ea 100644 --- a/manifest +++ b/manifest @@ -1,10 +1,10 @@ #Fri, 05 Oct 2018 04:51:00 +0900 -# Copyright (c) 2003-2020 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. #GSEA JVMLevel=11 LSID=urn\:lsid\:broad.mit.edu\:cancer.software.genepattern.module.analysis\:00072\:999999999 author=Aravind Subramanian, Pablo Tamayo, David Eby; Broad Institute -commandLine=/opt/gsea/GSEA_4.1.0/gsea-cli.sh GSEA -res -cls -collapse -mode -norm -nperm -permute -rnd_type -scoring_scheme -metric -sort -order -include_only_symbols -make_sets -median -num -plot_top_x -rnd_seed -save_rnd_lists -set_max -set_min -gmx_list -create_svgs -create_gcts -target_profile -dev_mode -zip_report -run_as_genepattern true +commandLine=/opt/gsea/GSEA_4.2.0/gsea-cli.sh GSEA -res -cls -collapse -mode -norm -nperm -permute -rnd_type -scoring_scheme -metric -sort -order -include_only_symbols -make_sets -median -num -plot_top_x -rnd_seed -save_rnd_lists -set_max -set_min -gmx_list -create_svgs -create_gcts -target_profile -dev_mode -zip_report true -run_as_genepattern true cpuType=any taskDoc=doc.html description=Gene Set Enrichment Analysis. If you are using GSEA on RNA-seq data, please read these guidelines. @@ -12,7 +12,7 @@ fileFormat=zip language=Java name=GSEA os=any -job.docker.image=genepattern/gsea_4.1.0\:0.1 +job.docker.image=genepattern/gsea_4.2.0\:0.1 p1_MODE=IN @@ -359,7 +359,7 @@ p26_value= p27_MODE= p27_TYPE=TEXT p27_default_value=.zip -p27_description=Name of the output file. Note that this only applies if create.zip\=true. +p27_description=Name of the output ZIP file. p27_fileFormat= p27_flag= p27_name=output.file.name @@ -384,11 +384,11 @@ p28_value= p29_MODE= p29_TYPE=TEXT -p29_default_value=true -p29_description=Create a ZIP bundle of the output files. +p29_default_value=false +p29_description=Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap. p29_fileFormat= p29_flag= -p29_name=create.zip +p29_name=create.gcts p29_optional= p29_prefix= p29_prefix_when_specified= @@ -398,29 +398,16 @@ p29_value=false;true p30_MODE= p30_TYPE=TEXT p30_default_value=false -p30_description=Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap. +p30_description=Enable developer mode. p30_fileFormat= p30_flag= -p30_name=create.gcts +p30_name=dev.mode p30_optional= p30_prefix= p30_prefix_when_specified= p30_type=java.lang.String p30_value=false;true -p31_MODE= -p31_TYPE=TEXT -p31_default_value=false -p31_description=Enable developer mode. -p31_fileFormat= -p31_flag= -p31_name=dev.mode -p31_optional= -p31_prefix= -p31_prefix_when_specified= -p31_type=java.lang.String -p31_value=false;true - pipelineModel= privacy=public quality=development @@ -430,6 +417,6 @@ serializedModel= taskType=Pathway Analysis categories=gsea;pathway analysis userid=eby@broadinstitute.org -version=Fixed minor typo. +version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. publicationDate=10/05/2018 04\:51 diff --git a/paramgroups.json b/paramgroups.json index e853e9f..6144062 100644 --- a/paramgroups.json +++ b/paramgroups.json @@ -1,5 +1,5 @@ [{name: "Basic parameters", description: "These parameters are essential for the analysis.", parameters: ["expression.dataset", "gene.sets.database", "number.of.permutations", "phenotype.labels", "target.profile", "permutation.type", "collapse.dataset", "chip.platform.file", "output.file.name" ]}, {name: "Advanced parameters", description: "Additional parameters with standard defaults; typically you will run GSEA with these parameters set to their default values.", hidden: false}, {name: "Advanced parameters/Algorithmic", hidden: true, parameters: ["scoring.scheme", "metric.for.ranking.genes", "gene.list.sorting.mode", "gene.list.ordering.mode", "max.gene.set.size", "min.gene.set.size", "collapsing.mode.for.probe.sets.with.more.than.one.match", "normalization.mode", "randomization.mode", "omit.features.with.no.symbol.match", "median.for.class.metrics", "number.of.markers", "random.seed", "save.random.ranked.lists", "selected.gene.sets", "dev.mode", "alt.delim"]}, - {name: "Advanced parameters/Reporting", hidden: true, parameters: ["plot.graphs.for.the.top.sets.of.each.phenotype", "make.detailed.gene.set.report", "create.svgs", "create.gcts", "create.zip"]} + {name: "Advanced parameters/Reporting", hidden: true, parameters: ["plot.graphs.for.the.top.sets.of.each.phenotype", "make.detailed.gene.set.report", "create.svgs", "create.gcts"]} ] \ No newline at end of file diff --git a/release.properties b/release.properties index 9187613..b69b05d 100644 --- a/release.properties +++ b/release.properties @@ -1,5 +1,5 @@ #Thu, 22 Apr 2021 11:20:36 -0700 LSID.noVersion=urn\:lsid\:broad.mit.edu\:cancer.software.genepattern.module.analysis\:00072 -release.version=20.2 -build.number=4 +release.version=20.3 +build.number=-1 build.timestamp=Thu, 22 Apr 2021 11\:20\:36 -0700 From 12665cfaf08a4aba9f7798e1b747c6326e20fc34 Mon Sep 17 00:00:00 2001 From: eby Date: Wed, 1 Dec 2021 18:56:10 -0800 Subject: [PATCH 3/5] Added a test case for the new abs_max collapse mode --- .../collapse_NaN_Missing_abs_max_test.yml | 50 +++++++++++++++++++ .../collapse_NaN_Missing_max_test.yml | 2 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml new file mode 100644 index 0000000..4c2b1e9 --- /dev/null +++ b/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml @@ -0,0 +1,50 @@ +# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California. All rights reserved. +#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 +module: GSEA +name: GSEA collapse_NaN_Missing_abs_max_test +description: Test the GSEA 'collapse dataset' function handling Infinite, NaN and Missing values, collapse to absolute max of probes. Tests are centered on HTR4, HTR6, FLJ22639, HTR7, NPAL2, NPAL3, GSTK1, BCR. +params: + expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct" + gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] + number.of.permutations: "10" + # Uses P53_6samples.cls because it happens to have a reasonable class template for this use + phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples.cls" + target.profile: "" + permutation.type: "phenotype" + collapse.dataset: "Collapse" + chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip" + # Modifying the ZIP name here so that the diffCmd can find the RNK inside. We could modify the diffCmd + # to be able to find it, but that makes the code somewhat complicated. + output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + scoring.scheme: "weighted" + metric.for.ranking.genes: "Signal2Noise" + gene.list.sorting.mode: "real" + gene.list.ordering.mode: "descending" + max.gene.set.size: "500" + min.gene.set.size: "15" + collapsing.mode.for.probe.sets.with.more.than.one.match: "Abs_max_of_probes" + normalization.mode: "meandiv" + randomization.mode: "no_balance" + omit.features.with.no.symbol.match: "true" + median.for.class.metrics: "false" + number.of.markers: "100" + # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results + random.seed: "149" + create.svgs: "false" + create.gcts: "true" + save.random.ranked.lists: "false" + plot.graphs.for.the.top.sets.of.each.phenotype: "20" + make.detailed.gene.set.report: "false" + selected.gene.sets: "" + dev.mode: "true" + alt.delim: "" + create.zip: "true" +assertions: + jobStatus: success + files: + "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip": + diffCmd: ../diffGseaResults.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_abs_max_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip" + "stdout.txt": + diffCmd: ../grepMessages.sh + diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_abs_max_test/stdoutMatches.txt" diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml index 2176d9a..8fe181a 100644 --- a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml +++ b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml @@ -2,7 +2,7 @@ #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20 module: GSEA name: GSEA collapse_NaN_Missing_max_test -description: Test the GSEA 'collapse dataset' function handling NaN and Missing values, collapse to max of probes. Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1. +description: Test the GSEA 'collapse dataset' function handling Infinite, NaN and Missing values, collapse to max of probes. Tests are centered on HTR4, HTR6, FLJ22639, HTR7, NPAL2, NPAL3, GSTK1, BCR. params: expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct" gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ] From 69356c830d13748828f7d401ffa9dd4d2cddaa1f Mon Sep 17 00:00:00 2001 From: eby Date: Fri, 3 Dec 2021 16:29:33 -0800 Subject: [PATCH 4/5] Edited docs and manifest ahead of GSEA 4.2.0 release New option(s), updated docs. --- docs/v20/index.html | 8 ++++++-- docs/v20/test.md | 7 ++++++- manifest | 6 +++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/v20/index.html b/docs/v20/index.html index e7a874d..a4266fb 100644 --- a/docs/v20/index.html +++ b/docs/v20/index.html @@ -245,6 +245,9 @@

Parameters

  • Median_of_probes: For each sample, use the median expression value for the probe set.
  • Mean_of_probes: For each sample, use the mean expression value for the probe set.
  • Sum_of_probes: For each sample, sum all the expression values of the probe set.
  • +
  • Abs_max_of_probes: For each sample, use the expression value for the probe set with the maximum **absolute value**. Note that each value retains its original sign but is chosen based on absolute value. + In other words, the largest magnitude value is used. While this method is useful with computational-based input datasets it is generally **not recommended** for use with quantification-based expression + measures such as counts or microarray fluorescence.
  • @@ -369,7 +372,8 @@

    Output Files

    3. Optional GCTs

    -

    The datasets backing all the heatmap images from the Enrichment Report for use in external visualizers or analysis tools. These will have the same name as the corresponding image but instead with a GCT extension.

    +

    The datasets backing all the heatmap images from the Enrichment Report for use in external visualizers or analysis tools. These will have the same name as the corresponding image but instead with a GCT extension. +When Collapse or Remap_Only is set, the collapsed dataset is also saved as a GCT. These files will be created if the Create GCTs option is true.

    @@ -408,7 +412,7 @@

    Version Comments

    20.3.0 2021-11-5 - Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. + Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. 20.2.4 diff --git a/docs/v20/test.md b/docs/v20/test.md index 81ad8dd..345bf0c 100644 --- a/docs/v20/test.md +++ b/docs/v20/test.md @@ -301,6 +301,9 @@ For descriptions of the ranking metrics, see @@ -444,6 +447,8 @@ can be decompressed using 'gunzip' on Mac or Linux and 7-Zip on Windows The datasets backing all the heatmap images from the Enrichment Report for use in external visualizers or analysis tools. These will have the same name as the corresponding image but instead with a GCT extension. +When Collapse or Remap_Only is set, the collapsed dataset is also saved +as a GCT. These files will be created if the Create GCTs option is true. ## Platform Dependencies @@ -473,7 +478,7 @@ Java 20.3.0 2021-11-2 -Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. +Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. 20.2.4 diff --git a/manifest b/manifest index 0db80ea..532685f 100644 --- a/manifest +++ b/manifest @@ -146,7 +146,7 @@ p10_optional= p10_prefix= p10_prefix_when_specified= p10_type=java.lang.String -p10_value=Signal2Noise;tTest;Ratio_of_Classes;Diff_of_Classes;log2_Ratio_of_Classes;Pearson;Cosine;Manhatten\=Manhattan;Euclidean +p10_value=Signal2Noise;tTest;Ratio_of_Classes;Diff_of_Classes;log2_Ratio_of_Classes;Pearson;Spearman;Cosine;Manhatten\=Manhattan;Euclidean p11_MODE= p11_TYPE=TEXT @@ -211,7 +211,7 @@ p15_optional= p15_prefix= p15_prefix_when_specified= p15_type=java.lang.String -p15_value=Max_probe;Median_of_probes;Mean_of_probes;Sum_of_probes +p15_value=Max_probe;Median_of_probes;Mean_of_probes;Sum_of_probes;Abs_max_of_probes p16_MODE= p16_TYPE=TEXT @@ -417,6 +417,6 @@ serializedModel= taskType=Pathway Analysis categories=gsea;pathway analysis userid=eby@broadinstitute.org -version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Fixed some issues handling datasets with missing values. Introduced the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot. +version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. publicationDate=10/05/2018 04\:51 From 051c707e495c4e2ee2ac16b05c1d41b6aac2687a Mon Sep 17 00:00:00 2001 From: eby Date: Fri, 17 Dec 2021 15:10:50 -0800 Subject: [PATCH 5/5] Updated docs for the 4.2.0 --- docs/v20/index.html | 4 ++-- docs/v20/test.md | 4 ++-- manifest | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/v20/index.html b/docs/v20/index.html index a4266fb..7bf86ba 100644 --- a/docs/v20/index.html +++ b/docs/v20/index.html @@ -411,8 +411,8 @@

    Version Comments

    20.3.0 - 2021-11-5 - Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. + 2021-12-17 + Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring. 20.2.4 diff --git a/docs/v20/test.md b/docs/v20/test.md index 345bf0c..1a06a4e 100644 --- a/docs/v20/test.md +++ b/docs/v20/test.md @@ -477,8 +477,8 @@ Java 20.3.0 -2021-11-2 -Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. +2021-12-17 +Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring. 20.2.4 diff --git a/manifest b/manifest index 532685f..4d39d89 100644 --- a/manifest +++ b/manifest @@ -417,6 +417,6 @@ serializedModel= taskType=Pathway Analysis categories=gsea;pathway analysis userid=eby@broadinstitute.org -version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. +version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring. publicationDate=10/05/2018 04\:51