From ade2255856466459fe91a2c59d0df7df3670a7b2 Mon Sep 17 00:00:00 2001
From: eby <eby@broadinstitute.org>
Date: Tue, 2 Nov 2021 17:01:12 -0700
Subject: [PATCH 1/5] Updated the test suite for the GSEA Desktop 4.2.0 bugfix
 release

---
 .../other/Dressman_81.2_test.yml              |  6 +--
 .../other/Lin.et.al.2008.2_test.yml           |  6 +--
 .../other/MD.outcome.2_test.yml               |  6 +--
 .../other/MD.outcome.BCAT_test.yml            |  6 +--
 .../other/Ross_et_al.3-class_test.yml         |  6 +--
 gpunit_breadthTest/other/dfci.subset_test.yml |  6 +--
 gpunit_breadthTest/other/meta.1.2_test.yml    |  6 +--
 gpunit_breadthTest/other/primet1.2_test.yml   |  6 +--
 .../collapse_NaN_Missing_max_test.yml         | 50 ++++++++++++++++++
 gpunit_functionality/grepMessages.sh          | 10 ++++
 .../continuous_metric_NaN_Missing_test.yml    | 51 +++++++++++++++++++
 .../metrics/metric_Diff_of_Classes_test.yml   | 44 ++++++++++++++++
 ...t.yml => metric_Ratio_of_Classes_test.yml} | 10 ++--
 ...eans_test.yml => metric_Spearman_test.yml} | 10 ++--
 ... => metric_log2_ratio_of_classes_test.yml} |  8 +--
 ...lid_too_few_samples_gene_set_perm_test.yml | 45 ++++++++++++++++
 .../invalid_too_few_samples_test.yml          |  2 +-
 17 files changed, 239 insertions(+), 39 deletions(-)
 create mode 100644 gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
 create mode 100755 gpunit_functionality/grepMessages.sh
 create mode 100644 gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml
 create mode 100644 gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml
 rename gpunit_functionality/metrics/{metric_Ratio_of_Means_test.yml => metric_Ratio_of_Classes_test.yml} (84%)
 rename gpunit_functionality/metrics/{metric_Diff_of_Means_test.yml => metric_Spearman_test.yml} (84%)
 rename gpunit_functionality/metrics/{metric_log2_ratio_of_means_test.yml => metric_log2_ratio_of_classes_test.yml} (88%)
 create mode 100644 gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml

diff --git a/gpunit_breadthTest/other/Dressman_81.2_test.yml b/gpunit_breadthTest/other/Dressman_81.2_test.yml
index 7040454..ef34a27 100644
--- a/gpunit_breadthTest/other/Dressman_81.2_test.yml
+++ b/gpunit_breadthTest/other/Dressman_81.2_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "Dressman_81.zip"
+       output.file.name: "Dressman_81.2.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "Dressman_81.zip":
+            "Dressman_81.2.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Dressman_81.2_test/Dressman_81.2.zip"
diff --git a/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml b/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml
index c4bc579..1272bfe 100644
--- a/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml
+++ b/gpunit_breadthTest/other/Lin.et.al.2008.2_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "Lin.et.al.2008.zip"
+       output.file.name: "Lin.et.al.2008.2.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "Lin.et.al.2008.zip":
+            "Lin.et.al.2008.2.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Lin.et.al.2008.2_test/Lin.et.al.2008.2.zip"
diff --git a/gpunit_breadthTest/other/MD.outcome.2_test.yml b/gpunit_breadthTest/other/MD.outcome.2_test.yml
index d4b7659..f40470d 100644
--- a/gpunit_breadthTest/other/MD.outcome.2_test.yml
+++ b/gpunit_breadthTest/other/MD.outcome.2_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "MD.MD.outcome.2.cls_0_versus_NA.zip"
+       output.file.name: "MD.outcome.MD.outcome.2.cls_0_versus_NA.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "MD.MD.outcome.2.cls_0_versus_NA.zip":
+            "MD.outcome.MD.outcome.2.cls_0_versus_NA.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.MD.outcome.2.cls_0_versus_NA.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.2_test/MD.outcome.MD.outcome.2.cls_0_versus_NA.zip"
diff --git a/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml b/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml
index 27a8c20..0a4ff67 100644
--- a/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml
+++ b/gpunit_breadthTest/other/MD.outcome.BCAT_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "MD.outcome.zip"
+       output.file.name: "MD.outcome.BCAT.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "MD.outcome.zip":
+            "MD.outcome.BCAT.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/MD.outcome.BCAT_test/MD.outcome.BCAT.zip"
diff --git a/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml b/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml
index b71f077..8787932 100644
--- a/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml
+++ b/gpunit_breadthTest/other/Ross_et_al.3-class_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "Ross_et_al.zip"
+       output.file.name: "Ross_et_al.3-class.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "Ross_et_al.zip":
+            "Ross_et_al.3-class.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/Ross_et_al.3-class_test/Ross_et_al.3-class.zip"
diff --git a/gpunit_breadthTest/other/dfci.subset_test.yml b/gpunit_breadthTest/other/dfci.subset_test.yml
index f050811..3a7de26 100644
--- a/gpunit_breadthTest/other/dfci.subset_test.yml
+++ b/gpunit_breadthTest/other/dfci.subset_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "dfci.zip"
+       output.file.name: "dfci.subset.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "dfci.zip":
+            "dfci.subset.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/dfci.subset_test/dfci.subset.zip"
diff --git a/gpunit_breadthTest/other/meta.1.2_test.yml b/gpunit_breadthTest/other/meta.1.2_test.yml
index bcbd1d6..cb4837f 100644
--- a/gpunit_breadthTest/other/meta.1.2_test.yml
+++ b/gpunit_breadthTest/other/meta.1.2_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "meta.meta.1.2.cls_breast_versus_colon.zip"
+       output.file.name: "meta.1.meta.1.2.cls_breast_versus_colon.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "meta.meta.1.2.cls_breast_versus_colon.zip":
+            "meta.1.meta.1.2.cls_breast_versus_colon.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.meta.1.2.cls_breast_versus_colon.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/meta.1.2_test/meta.1.meta.1.2.cls_breast_versus_colon.zip"
diff --git a/gpunit_breadthTest/other/primet1.2_test.yml b/gpunit_breadthTest/other/primet1.2_test.yml
index 917d239..356fc55 100644
--- a/gpunit_breadthTest/other/primet1.2_test.yml
+++ b/gpunit_breadthTest/other/primet1.2_test.yml
@@ -13,7 +13,7 @@ params:
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        # Renaming result file for ease of testing.
-       output.file.name: "primet1.zip"
+       output.file.name: "primet1.2.zip"
        scoring.scheme: "weighted"
        metric.for.ranking.genes: "Signal2Noise"
        gene.list.sorting.mode: "real"
@@ -40,6 +40,6 @@ params:
 assertions:
         jobStatus: success
         files:
-            "primet1.zip":
+            "primet1.2.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/breadthTest/output/other/primet1.2_test/primet1.2.zip"
diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
new file mode 100644
index 0000000..2176d9a
--- /dev/null
+++ b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
@@ -0,0 +1,50 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
+module: GSEA
+name: GSEA collapse_NaN_Missing_max_test
+description: Test the GSEA 'collapse dataset' function handling NaN and Missing values, collapse to max of probes.  Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1.
+params:
+       expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
+       gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
+       number.of.permutations: "10"
+       # Uses P53_6samples.cls because it happens to have a reasonable class template for this use
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples.cls"
+       target.profile: ""
+       permutation.type: "phenotype"
+       collapse.dataset: "Collapse"
+       chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip"
+       # Modifying the ZIP name here so that the diffCmd can find the RNK inside.  We could modify the diffCmd 
+       # to be able to find it, but that makes the code somewhat complicated. 
+       output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+       scoring.scheme: "weighted"
+       metric.for.ranking.genes: "Signal2Noise"
+       gene.list.sorting.mode: "real"
+       gene.list.ordering.mode: "descending"
+       max.gene.set.size: "500"
+       min.gene.set.size: "15"
+       collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
+       normalization.mode: "meandiv"
+       randomization.mode: "no_balance"
+       omit.features.with.no.symbol.match: "true"
+       median.for.class.metrics: "false"
+       number.of.markers: "100"
+       # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
+       random.seed: "149"
+       create.svgs: "false"
+       create.gcts: "true"
+       save.random.ranked.lists: "false"
+       plot.graphs.for.the.top.sets.of.each.phenotype: "20"
+       make.detailed.gene.set.report: "false"
+       selected.gene.sets: ""
+       dev.mode: "true"
+       alt.delim: ""
+       create.zip: "true"
+assertions:
+        jobStatus: success
+        files:
+            "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip":
+                diffCmd: ../diffGseaResults.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+            "stdout.txt":
+                diffCmd: ../grepMessages.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_max_test/stdoutMatches.txt"
diff --git a/gpunit_functionality/grepMessages.sh b/gpunit_functionality/grepMessages.sh
new file mode 100755
index 0000000..7775ea7
--- /dev/null
+++ b/gpunit_functionality/grepMessages.sh
@@ -0,0 +1,10 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#!/bin/sh
+
+# The file passed in $1 should be a list of fixed strings, one per line, to be checked if they are found in the $2 file.
+# We verify by checking the count of matches against the count of fixed strings/ 
+# This is an imperfect check but should be good enough provided the fixed strings are sufficiently detailed to appear
+# only *once* in the $2 file.
+grepOut=`grep -c -F -f $1 $2`
+numChecks=`cat $1 | wc -l`
+exit $(( numChecks - grepOut ))
diff --git a/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml b/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml
new file mode 100644
index 0000000..b34e069
--- /dev/null
+++ b/gpunit_functionality/metrics/continuous_metric_NaN_Missing_test.yml
@@ -0,0 +1,51 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
+module: GSEA
+name: GSEA continuous_metric_NaN_Missing_test
+description: Test the GSEA continuous metric handling NaN and Missing values.  Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1.
+params:
+       # Using a dataset needing collapse because it happens to have missing & NaN values
+       expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
+       gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
+       number.of.permutations: "10"
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_6samples_fake_as_cont_pheno.cls"
+       target.profile: ""
+       permutation.type: "phenotype"
+       collapse.dataset: "Collapse"
+       chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip"
+       # Modifying the ZIP name here so that the diffCmd can find the RNK inside.  We could modify the diffCmd 
+       # to be able to find it, but that makes the code somewhat complicated. 
+       output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+       scoring.scheme: "weighted"
+       metric.for.ranking.genes: "Spearman"
+       gene.list.sorting.mode: "real"
+       gene.list.ordering.mode: "descending"
+       max.gene.set.size: "500"
+       min.gene.set.size: "15"
+       collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
+       normalization.mode: "meandiv"
+       randomization.mode: "no_balance"
+       omit.features.with.no.symbol.match: "true"
+       median.for.class.metrics: "false"
+       number.of.markers: "100"
+       # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
+       random.seed: "149"
+       create.svgs: "false"
+       create.gcts: "true"
+       save.random.ranked.lists: "false"
+       plot.graphs.for.the.top.sets.of.each.phenotype: "20"
+       make.detailed.gene.set.report: "false"
+       selected.gene.sets: ""
+       dev.mode: "true"
+       alt.delim: ""
+       create.zip: "true"
+assertions:
+        jobStatus: success
+        files:
+            "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip":
+                diffCmd: ../diffGseaResults.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+            "stdout.txt":
+                diffCmd: ../grepMessages.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/continuous_metric_NaN_Missing_test/stdoutMatches.txt"
+                
\ No newline at end of file
diff --git a/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml b/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml
new file mode 100644
index 0000000..70779a8
--- /dev/null
+++ b/gpunit_functionality/metrics/metric_Diff_of_Classes_test.yml
@@ -0,0 +1,44 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
+module: GSEA
+name: GSEA metric_Diff_of_Classes_test
+description: Test GSEA using the Diff_of_Classes metric.  Variation of user_gene_set_test with a reduced dataset.
+params:
+       expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
+       gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
+       number.of.permutations: "10"
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls"
+       target.profile: ""
+       permutation.type: "phenotype"
+       collapse.dataset: "No_Collapse"
+       #chip.platform.file: 
+       output.file.name: "<expression.dataset_basename>.zip"
+       scoring.scheme: "weighted"
+       metric.for.ranking.genes: "Diff_of_Classes"
+       gene.list.sorting.mode: "real"
+       gene.list.ordering.mode: "descending"
+       max.gene.set.size: "500"
+       min.gene.set.size: "15"
+       collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
+       normalization.mode: "meandiv"
+       randomization.mode: "no_balance"
+       omit.features.with.no.symbol.match: "true"
+       median.for.class.metrics: "false"
+       number.of.markers: "100"
+       # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
+       random.seed: "149"
+       create.svgs: "false"
+       create.gcts: "false"
+       save.random.ranked.lists: "false"
+       plot.graphs.for.the.top.sets.of.each.phenotype: "20"
+       make.detailed.gene.set.report: "false"
+       selected.gene.sets: ""
+       dev.mode: "true"
+       alt.delim: ""
+       create.zip: "true"
+assertions:
+        jobStatus: success
+        files:
+            "P53_14samples_collapsed_symbols.zip":
+                diffCmd: ../diffGseaResults.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Classes_test/P53_14samples_collapsed_symbols.zip"
diff --git a/gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml b/gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml
similarity index 84%
rename from gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml
rename to gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml
index e389302..135fd5e 100644
--- a/gpunit_functionality/metrics/metric_Ratio_of_Means_test.yml
+++ b/gpunit_functionality/metrics/metric_Ratio_of_Classes_test.yml
@@ -1,8 +1,8 @@
-# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
 #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:000142:15
 module: GSEA
-name: GSEA metric_Ratio_of_Means_test
-description: Test GSEA using the Ratio_of_Means metric.  Variation of user_gene_set_test with a reduced dataset.
+name: GSEA metric_Ratio_of_Classes_test
+description: Test GSEA using the Ratio_of_Classes metric.  Variation of user_gene_set_test with a reduced dataset.
 params:
        expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
        gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
@@ -14,7 +14,7 @@ params:
        #chip.platform.file: 
        output.file.name: "<expression.dataset_basename>.zip"
        scoring.scheme: "weighted"
-       metric.for.ranking.genes: "Ratio_of_Means"
+       metric.for.ranking.genes: "Ratio_of_Classes"
        gene.list.sorting.mode: "real"
        gene.list.ordering.mode: "descending"
        max.gene.set.size: "500"
@@ -41,4 +41,4 @@ assertions:
         files:
             "P53_14samples_collapsed_symbols.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Means_test/P53_14samples_collapsed_symbols.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Ratio_of_Classes_test/P53_14samples_collapsed_symbols.zip"
diff --git a/gpunit_functionality/metrics/metric_Diff_of_Means_test.yml b/gpunit_functionality/metrics/metric_Spearman_test.yml
similarity index 84%
rename from gpunit_functionality/metrics/metric_Diff_of_Means_test.yml
rename to gpunit_functionality/metrics/metric_Spearman_test.yml
index 0e53ede..cc11981 100644
--- a/gpunit_functionality/metrics/metric_Diff_of_Means_test.yml
+++ b/gpunit_functionality/metrics/metric_Spearman_test.yml
@@ -1,20 +1,20 @@
 # Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
 #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
 module: GSEA
-name: GSEA metric_Diff_of_Means_test
-description: Test GSEA using the Diff_of_Means metric.  Variation of user_gene_set_test with a reduced dataset.
+name: GSEA metric_Spearman_test
+description: Test GSEA using the Spearman metric.  Variation of user_gene_set_test with a reduced dataset and a synthetic continuous phenotype CLS file.
 params:
        expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
        gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
        number.of.permutations: "10"
-       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples.cls"
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_fake_as_cont_pheno.cls"
        target.profile: ""
        permutation.type: "phenotype"
        collapse.dataset: "No_Collapse"
        #chip.platform.file: 
        output.file.name: "<expression.dataset_basename>.zip"
        scoring.scheme: "weighted"
-       metric.for.ranking.genes: "Diff_of_Means"
+       metric.for.ranking.genes: "Spearman"
        gene.list.sorting.mode: "real"
        gene.list.ordering.mode: "descending"
        max.gene.set.size: "500"
@@ -41,4 +41,4 @@ assertions:
         files:
             "P53_14samples_collapsed_symbols.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Diff_of_Means_test/P53_14samples_collapsed_symbols.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_Spearman_test/P53_14samples_collapsed_symbols.zip"
diff --git a/gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml b/gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml
similarity index 88%
rename from gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml
rename to gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml
index 70358c2..61f2282 100644
--- a/gpunit_functionality/metrics/metric_log2_ratio_of_means_test.yml
+++ b/gpunit_functionality/metrics/metric_log2_ratio_of_classes_test.yml
@@ -1,7 +1,7 @@
-# Copyright (c) 2003-2019 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+# Copyright (c) 2003-20s1 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
 #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
 module: GSEA
-name: GSEA metric_log2_ratio_of_means_test
+name: GSEA metric_log2_ratio_of_classes_test
 description: Test GSEA using the log2_Ratio_of_Classes metric.  Variation of user_gene_set_test with a reduced dataset.
 params:
        expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_14samples_collapsed_symbols.gct"
@@ -14,7 +14,7 @@ params:
        #chip.platform.file: 
        output.file.name: "<expression.dataset_basename>.zip"
        scoring.scheme: "weighted"
-       metric.for.ranking.genes: "log2_ratio_of_means"
+       metric.for.ranking.genes: "log2_Ratio_of_Classes"
        gene.list.sorting.mode: "real"
        gene.list.ordering.mode: "descending"
        max.gene.set.size: "500"
@@ -41,4 +41,4 @@ assertions:
         files:
             "P53_14samples_collapsed_symbols.zip":
                 diffCmd: ../diffGseaResults.sh
-                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_log2_ratio_of_means_test/P53_14samples_collapsed_symbols.zip"
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/metrics/metric_log2_ratio_of_classes_test/P53_14samples_collapsed_symbols.zip"
diff --git a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml
new file mode 100644
index 0000000..05e3c77
--- /dev/null
+++ b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_gene_set_perm_test.yml
@@ -0,0 +1,45 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
+module: GSEA
+name: GSEA too_few_samples_gene_set_perm_test
+description: Provide a proper error message when there are too few samples for particular metrics, run with the gene_set permutation mode
+params:
+       expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples_collapsed_symbols.gct"
+       gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
+       number.of.permutations: "10"
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_too_few_samples.cls"
+       target.profile: ""
+       permutation.type: "gene_set"
+       collapse.dataset: "No_Collapse"
+       #chip.platform.file: 
+       output.file.name: "<expression.dataset_basename>.zip"
+       scoring.scheme: "weighted"
+       metric.for.ranking.genes: "Signal2Noise"
+       gene.list.sorting.mode: "real"
+       gene.list.ordering.mode: "descending"
+       max.gene.set.size: "500"
+       min.gene.set.size: "15"
+       collapsing.mode.for.probe.sets.with.more.than.one.match: "Max_probe"
+       normalization.mode: "meandiv"
+       randomization.mode: "no_balance"
+       omit.features.with.no.symbol.match: "true"
+       median.for.class.metrics: "false"
+       number.of.markers: "100"
+       # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
+       random.seed: "149"
+       create.svgs: "false"
+       create.gcts: "false"
+       save.random.ranked.lists: "false"
+       plot.graphs.for.the.top.sets.of.each.phenotype: "20"
+       make.detailed.gene.set.report: "false"
+       selected.gene.sets: ""
+       dev.mode: "true"
+       alt.delim: ""
+       create.zip: "true"
+assertions:
+        jobStatus: fail
+        files:
+            "stderr.txt":
+                diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.Too.few.samples.in.class.B.of.the.dataset.to.use.this.metric
+                # The following file is unused; keeping it just for reference.
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/phenotype_cls/too_few_samples_gene_set_perm_test/stderr.txt"
diff --git a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml
index 39c98b8..4a94c02 100644
--- a/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml
+++ b/gpunit_functionality/phenotype_cls/invalid_too_few_samples_test.yml
@@ -40,6 +40,6 @@ assertions:
         jobStatus: fail
         files:
             "stderr.txt":
-                diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.One.of.the.classes.in.this.dataset.has.too.few.samples.in.one.of.the.classes.of.the.dataset.to.use.this.metric
+                diffCmd: ../grepMessage.sh xtools.api.param.BadParamException:.Too.few.samples.in.class.B.of.the.dataset.to.use.this.metric
                 # The following file is unused; keeping it just for reference.
                 diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/phenotype_cls/invalid_too_few_samples_test/stderr.txt"

From 60b948c67400b67363a6fbd77aa180513025e62a Mon Sep 17 00:00:00 2001
From: eby <eby@broadinstitute.org>
Date: Tue, 2 Nov 2021 18:26:37 -0700
Subject: [PATCH 2/5] Updated manifest, docs, etc ahead of the GSEA 4.2.0
 release

Will be bumping the version to 20.3.x.
---
 docs/v20/index.html | 20 ++++++++++----------
 docs/v20/test.md    | 19 +++++++++----------
 manifest            | 33 ++++++++++-----------------------
 paramgroups.json    |  2 +-
 release.properties  |  4 ++--
 5 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/docs/v20/index.html b/docs/v20/index.html
index dfd9560..e7a874d 100644
--- a/docs/v20/index.html
+++ b/docs/v20/index.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <!-- saved from url=(0083)http://software.broadinstitute.org/cancer/software/genepattern/modules/docs/GSEA/18 -->
 <html class=""><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-  <title>GSEA (v20.2.x)</title>
+  <title>GSEA (v20.3.x)</title>
   <link href="./application.css" media="all" rel="stylesheet">
   <script src="./application.js"></script><style>.cke{visibility:hidden;}</style><style type="text/css"></style>
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
@@ -15,7 +15,7 @@
 
 	<div class="gp-content-header fluid">
 	<div class="container">
-		<h1>GSEA (v20.2.x) <a style="float: right" href="https://www.genepattern.org"><img alt="GP Logo" src="gplogo.png" /></a></h1>
+		<h1>GSEA (v20.3.x) <a style="float: right" href="https://www.genepattern.org"><img alt="GP Logo" src="gplogo.png" /></a></h1>
 	</div>
 </div>
 <div class="container">
@@ -33,7 +33,7 @@ <h1>GSEA (v20.2.x) <a style="float: right" href="https://www.genepattern.org"><i
 <p></p>
 				</div>
 				<div class="col-sm-4">
-					<p><strong>GSEA Version: </strong> 4.1.0</p>
+					<p><strong>GSEA Version: </strong> 4.2.0</p>
 				</div>
 			</div>
 
@@ -318,10 +318,6 @@ <h2>Parameters</h2>
             <td valign="top">create gcts&nbsp;<span style="color:red;">*</span></td>
             <td valign="top">Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap.&nbsp;</td>
         </tr>
-        <tr>
-            <td valign="top">create zip&nbsp;<span style="color:red;">*</span></td>
-            <td valign="top">Create a ZIP bundle of the output files.  This is true by default, matching the former behavior where a ZIP bundle was always created.</td>
-        </tr>
 	</tbody>
 </table>
 
@@ -358,11 +354,10 @@ <h2>Input Files</h2>
 
 <h2>Output Files</h2>
 
-<p>1. Optional Enrichment Report archive: ZIP</p>
+<p>1. Enrichment Report archive: ZIP</p>
 
 <p style="margin-left: 40px;">ZIP file containing the result files. &nbsp;For more information on interpreting these results, see <a href="http://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Interpreting_GSEA_Results">Interpreting GSEA Results</a> in the GSEA User Guide.
-Note that in prior versions the ZIP bundle was created as the only output file.  This behavior has been changed to give direct access to the results without the need for a download.  The default is to create the ZIP bundle, matching the former behavior, but the report files
-will always be created directly.</p>
+Note that in prior versions the ZIP bundle was created as the only output file.  This behavior has been changed to give direct access to the results without the need for a download.</p>
 
 <p>2. Enrichment Report: HTML and PNG images</p>
 
@@ -410,6 +405,11 @@ <h2>Version Comments</h2>
 					</tr>
 				</thead>
 				<tbody>
+                        <tr>
+                            <td>20.3.0</td>
+                            <td>2021-11-5</td>
+                            <td>Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.</td>
+                        </tr>
                         <tr>
                             <td>20.2.4</td>
                             <td>2021-4-22</td>
diff --git a/docs/v20/test.md b/docs/v20/test.md
index dd175d8..81ad8dd 100644
--- a/docs/v20/test.md
+++ b/docs/v20/test.md
@@ -1,4 +1,4 @@
-# GSEA (v20.2.x)
+# GSEA (v20.3.x)
 
 Gene Set Enrichment Analysis
 
@@ -14,7 +14,7 @@ for GSEA questions.
 team](http://software.broadinstitute.org/cancer/software/genepattern/contact)
 for GenePattern issues.
 
-**GSEA Version:** 4.1.0
+**GSEA Version:** 4.2.0
 
 ## Description
 
@@ -367,10 +367,6 @@ For descriptions of the ranking metrics, see <a href="http://www.gsea-msigdb.org
 <td align="left">create gcts <span style="color:red;">*</span></td>
 <td align="left">Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap. </td>
 </tr>
-<tr class="even">
-<td align="left">create zip <span style="color:red;">*</span></td>
-<td align="left">Create a ZIP bundle of the output files. This is true by default, matching the former behavior where a ZIP bundle was always created.</td>
-</tr>
 </tbody>
 </table>
 
@@ -423,16 +419,14 @@ drop-down
 
 ## Output Files
 
-1\. Optional Enrichment Report archive: ZIP
+1\. Enrichment Report archive: ZIP
 
 ZIP file containing the result files.  For more information on
 interpreting these results, see [Interpreting GSEA
 Results](http://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Interpreting_GSEA_Results)
 in the GSEA User Guide. Note that in prior versions the ZIP bundle was
 created as the only output file. This behavior has been changed to give
-direct access to the results without the need for a download. The
-default is to create the ZIP bundle, matching the former behavior, but
-the report files will always be created directly.
+direct access to the results without the need for a download.
 
 2\. Enrichment Report: HTML and PNG images
 
@@ -476,6 +470,11 @@ Java
 </tr>
 </thead>
 <tbody>
+<tr class="even">
+<td align="left">20.3.0</td>
+<td align="left">2021-11-2</td>
+<td align="left">Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.</td>
+</tr>
 <tr class="odd">
 <td align="left">20.2.4</td>
 <td align="left">2021-4-22</td>
diff --git a/manifest b/manifest
index 4d4af26..0db80ea 100644
--- a/manifest
+++ b/manifest
@@ -1,10 +1,10 @@
 #Fri, 05 Oct 2018 04:51:00 +0900
-# Copyright (c) 2003-2020 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
 #GSEA
 JVMLevel=11
 LSID=urn\:lsid\:broad.mit.edu\:cancer.software.genepattern.module.analysis\:00072\:999999999
 author=Aravind Subramanian, Pablo Tamayo, David Eby; Broad Institute
-commandLine=/opt/gsea/GSEA_4.1.0/gsea-cli.sh GSEA -res <expression.dataset> -cls <phenotype.labels> -collapse <collapse.dataset> -mode <collapsing.mode.for.probe.sets.with.more.than.one.match> -norm <normalization.mode> -nperm <number.of.permutations> -permute <permutation.type> -rnd_type <randomization.mode> -scoring_scheme <scoring.scheme> -metric <metric.for.ranking.genes> -sort <gene.list.sorting.mode> -order <gene.list.ordering.mode> -include_only_symbols <omit.features.with.no.symbol.match> -make_sets <make.detailed.gene.set.report> -median <median.for.class.metrics> -num <number.of.markers> -plot_top_x <plot.graphs.for.the.top.sets.of.each.phenotype> -rnd_seed <random.seed> -save_rnd_lists <save.random.ranked.lists> -set_max <max.gene.set.size> -set_min <min.gene.set.size> <chip.platform.file> -gmx_list <gene.sets.database> -create_svgs <create.svgs> -create_gcts <create.gcts> -target_profile <target.profile>  <selected.gene.sets> <output.file.name> <alt.delim> -dev_mode <dev.mode> -zip_report <create.zip> -run_as_genepattern true
+commandLine=/opt/gsea/GSEA_4.2.0/gsea-cli.sh GSEA -res <expression.dataset> -cls <phenotype.labels> -collapse <collapse.dataset> -mode <collapsing.mode.for.probe.sets.with.more.than.one.match> -norm <normalization.mode> -nperm <number.of.permutations> -permute <permutation.type> -rnd_type <randomization.mode> -scoring_scheme <scoring.scheme> -metric <metric.for.ranking.genes> -sort <gene.list.sorting.mode> -order <gene.list.ordering.mode> -include_only_symbols <omit.features.with.no.symbol.match> -make_sets <make.detailed.gene.set.report> -median <median.for.class.metrics> -num <number.of.markers> -plot_top_x <plot.graphs.for.the.top.sets.of.each.phenotype> -rnd_seed <random.seed> -save_rnd_lists <save.random.ranked.lists> -set_max <max.gene.set.size> -set_min <min.gene.set.size> <chip.platform.file> -gmx_list <gene.sets.database> -create_svgs <create.svgs> -create_gcts <create.gcts> -target_profile <target.profile>  <selected.gene.sets> <output.file.name> <alt.delim> -dev_mode <dev.mode> -zip_report true -run_as_genepattern true
 cpuType=any
 taskDoc=doc.html
 description=Gene Set Enrichment Analysis.  <strong>If you are using GSEA on RNA-seq data, please read <a href="http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Using_RNA-seq_Datasets_with_GSEA" target="_blank">these guidelines</a>.</strong>
@@ -12,7 +12,7 @@ fileFormat=zip
 language=Java
 name=GSEA
 os=any
-job.docker.image=genepattern/gsea_4.1.0\:0.1
+job.docker.image=genepattern/gsea_4.2.0\:0.1
 
 
 p1_MODE=IN
@@ -359,7 +359,7 @@ p26_value=
 p27_MODE=
 p27_TYPE=TEXT
 p27_default_value=<expression.dataset_basename>.zip
-p27_description=Name of the output file.  Note that this only applies if create.zip\=true.
+p27_description=Name of the output ZIP file.
 p27_fileFormat=
 p27_flag=
 p27_name=output.file.name
@@ -384,11 +384,11 @@ p28_value=
 
 p29_MODE=
 p29_TYPE=TEXT
-p29_default_value=true
-p29_description=Create a ZIP bundle of the output files.
+p29_default_value=false
+p29_description=Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap.
 p29_fileFormat=
 p29_flag=
-p29_name=create.zip
+p29_name=create.gcts
 p29_optional=
 p29_prefix=
 p29_prefix_when_specified=
@@ -398,29 +398,16 @@ p29_value=false;true
 p30_MODE=
 p30_TYPE=TEXT
 p30_default_value=false
-p30_description=Whether to save the dataset subsets backing the GSEA report heatmaps as GCT files; these will be subsets of your original dataset corresponding only to the genes of the heatmap.
+p30_description=Enable developer mode.
 p30_fileFormat=
 p30_flag=
-p30_name=create.gcts
+p30_name=dev.mode
 p30_optional=
 p30_prefix=
 p30_prefix_when_specified=
 p30_type=java.lang.String
 p30_value=false;true
 
-p31_MODE=
-p31_TYPE=TEXT
-p31_default_value=false
-p31_description=Enable developer mode.
-p31_fileFormat=
-p31_flag=
-p31_name=dev.mode
-p31_optional=
-p31_prefix=
-p31_prefix_when_specified=
-p31_type=java.lang.String
-p31_value=false;true
-
 pipelineModel=
 privacy=public
 quality=development
@@ -430,6 +417,6 @@ serializedModel=
 taskType=Pathway Analysis
 categories=gsea;pathway analysis
 userid=eby@broadinstitute.org
-version=Fixed minor typo.
+version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.
 
 publicationDate=10/05/2018 04\:51 
diff --git a/paramgroups.json b/paramgroups.json
index e853e9f..6144062 100644
--- a/paramgroups.json
+++ b/paramgroups.json
@@ -1,5 +1,5 @@
 [{name: "Basic parameters", description: "These parameters are essential for the analysis.", parameters: ["expression.dataset", "gene.sets.database", "number.of.permutations", "phenotype.labels", "target.profile", "permutation.type", "collapse.dataset", "chip.platform.file", "output.file.name" ]},
  {name: "Advanced parameters", description: "Additional parameters with standard defaults; typically you will run GSEA with these parameters set to their default values.", hidden: false},
    {name: "Advanced parameters/Algorithmic", hidden: true, parameters: ["scoring.scheme", "metric.for.ranking.genes", "gene.list.sorting.mode", "gene.list.ordering.mode", "max.gene.set.size", "min.gene.set.size", "collapsing.mode.for.probe.sets.with.more.than.one.match", "normalization.mode", "randomization.mode", "omit.features.with.no.symbol.match", "median.for.class.metrics", "number.of.markers", "random.seed", "save.random.ranked.lists", "selected.gene.sets", "dev.mode", "alt.delim"]},
-   {name: "Advanced parameters/Reporting", hidden: true, parameters: ["plot.graphs.for.the.top.sets.of.each.phenotype", "make.detailed.gene.set.report", "create.svgs", "create.gcts", "create.zip"]}
+   {name: "Advanced parameters/Reporting", hidden: true, parameters: ["plot.graphs.for.the.top.sets.of.each.phenotype", "make.detailed.gene.set.report", "create.svgs", "create.gcts"]}
 ]
\ No newline at end of file
diff --git a/release.properties b/release.properties
index 9187613..b69b05d 100644
--- a/release.properties
+++ b/release.properties
@@ -1,5 +1,5 @@
 #Thu, 22 Apr 2021 11:20:36 -0700
 LSID.noVersion=urn\:lsid\:broad.mit.edu\:cancer.software.genepattern.module.analysis\:00072
-release.version=20.2
-build.number=4
+release.version=20.3
+build.number=-1
 build.timestamp=Thu, 22 Apr 2021 11\:20\:36 -0700

From 12665cfaf08a4aba9f7798e1b747c6326e20fc34 Mon Sep 17 00:00:00 2001
From: eby <eby@broadinstitute.org>
Date: Wed, 1 Dec 2021 18:56:10 -0800
Subject: [PATCH 3/5] Added a test case for the new abs_max collapse mode

---
 .../collapse_NaN_Missing_abs_max_test.yml     | 50 +++++++++++++++++++
 .../collapse_NaN_Missing_max_test.yml         |  2 +-
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml

diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml
new file mode 100644
index 0000000..4c2b1e9
--- /dev/null
+++ b/gpunit_functionality/collapse/collapse_NaN_Missing_abs_max_test.yml
@@ -0,0 +1,50 @@
+# Copyright (c) 2003-2021 Broad Institute, Inc., Massachusetts Institute of Technology, and Regents of the University of California.  All rights reserved.
+#module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
+module: GSEA
+name: GSEA collapse_NaN_Missing_abs_max_test
+description: Test the GSEA 'collapse dataset' function handling Infinite, NaN and Missing values, collapse to absolute max of probes.  Tests are centered on HTR4, HTR6, FLJ22639, HTR7, NPAL2, NPAL3, GSTK1, BCR.
+params:
+       expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
+       gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]
+       number.of.permutations: "10"
+       # Uses P53_6samples.cls because it happens to have a reasonable class template for this use
+       phenotype.labels: "<%gpunit.testData%>gpunit/GSEA/v20/input/P53_6samples.cls"
+       target.profile: ""
+       permutation.type: "phenotype"
+       collapse.dataset: "Collapse"
+       chip.platform.file: "<%gpunit.testData%>gpunit/GSEA/v20/input/HG_U133A.chip"
+       # Modifying the ZIP name here so that the diffCmd can find the RNK inside.  We could modify the diffCmd 
+       # to be able to find it, but that makes the code somewhat complicated. 
+       output.file.name: "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+       scoring.scheme: "weighted"
+       metric.for.ranking.genes: "Signal2Noise"
+       gene.list.sorting.mode: "real"
+       gene.list.ordering.mode: "descending"
+       max.gene.set.size: "500"
+       min.gene.set.size: "15"
+       collapsing.mode.for.probe.sets.with.more.than.one.match: "Abs_max_of_probes"
+       normalization.mode: "meandiv"
+       randomization.mode: "no_balance"
+       omit.features.with.no.symbol.match: "true"
+       median.for.class.metrics: "false"
+       number.of.markers: "100"
+       # Note that we use a fixed random seed rather than the 'timestamp' default so that we'll have reproducible test results
+       random.seed: "149"
+       create.svgs: "false"
+       create.gcts: "true"
+       save.random.ranked.lists: "false"
+       plot.graphs.for.the.top.sets.of.each.phenotype: "20"
+       make.detailed.gene.set.report: "false"
+       selected.gene.sets: ""
+       dev.mode: "true"
+       alt.delim: ""
+       create.zip: "true"
+assertions:
+        jobStatus: success
+        files:
+            "Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip":
+                diffCmd: ../diffGseaResults.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_abs_max_test/Diabetes_hgu133a_NaN_missing_vals_collapsed_to_symbols.zip"
+            "stdout.txt":
+                diffCmd: ../grepMessages.sh
+                diff: "<%gpunit.resultData%>gpunit/GSEA/v20/output/collapse/collapse_NaN_Missing_abs_max_test/stdoutMatches.txt"
diff --git a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
index 2176d9a..8fe181a 100644
--- a/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
+++ b/gpunit_functionality/collapse/collapse_NaN_Missing_max_test.yml
@@ -2,7 +2,7 @@
 #module: urn:lsid:broad.mit.edu:cancer.software.genepattern.module.analysis:00072:20
 module: GSEA
 name: GSEA collapse_NaN_Missing_max_test
-description: Test the GSEA 'collapse dataset' function handling NaN and Missing values, collapse to max of probes.  Tests are centered on HTR4, HTR7, NPAL2, NPAL3, GSTK1.
+description: Test the GSEA 'collapse dataset' function handling Infinite, NaN and Missing values, collapse to max of probes.  Tests are centered on HTR4, HTR6, FLJ22639, HTR7, NPAL2, NPAL3, GSTK1, BCR.
 params:
        expression.dataset: "<%gpunit.testData%>gpunit/GSEA/v20/input/Diabetes_hgu133a_NaN_missing_vals.gct"
        gene.sets.database: [ "<%gpunit.testData%>gpunit/GSEA/v20/input/c1.symbols.reduced.gmt" ]

From 69356c830d13748828f7d401ffa9dd4d2cddaa1f Mon Sep 17 00:00:00 2001
From: eby <eby@broadinstitute.org>
Date: Fri, 3 Dec 2021 16:29:33 -0800
Subject: [PATCH 4/5] Edited docs and manifest ahead of GSEA 4.2.0 release

New option(s), updated docs.
---
 docs/v20/index.html | 8 ++++++--
 docs/v20/test.md    | 7 ++++++-
 manifest            | 6 +++---
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/v20/index.html b/docs/v20/index.html
index e7a874d..a4266fb 100644
--- a/docs/v20/index.html
+++ b/docs/v20/index.html
@@ -245,6 +245,9 @@ <h2>Parameters</h2>
 				<li>Median_of_probes: For each sample, use the median expression value for the probe set.</li>
 				<li>Mean_of_probes: For each sample, use the mean expression value for the probe set.</li>
 				<li>Sum_of_probes: For each sample, sum all the expression values of the probe set.</li>
+				<li>Abs_max_of_probes: For each sample, use the expression value for the probe set with the maximum **absolute value**.  Note that each value retains its original sign but is chosen based on absolute value.
+                    In other words, the largest magnitude value is used.  While this method is useful with computational-based input datasets it is generally **not recommended** for use with quantification-based expression 
+                    measures such as counts or microarray fluorescence.</li>
 			</ul>
 			</td>
 		</tr>
@@ -369,7 +372,8 @@ <h2>Output Files</h2>
 
 <p>3. Optional GCTs</p>
 
-<p style="margin-left: 40px;">The datasets backing all the heatmap images from the Enrichment Report for use in external visualizers or analysis tools.  These will have the same name as the corresponding image but instead with a GCT extension.</p>
+<p style="margin-left: 40px;">The datasets backing all the heatmap images from the Enrichment Report for use in external visualizers or analysis tools.  These will have the same name as the corresponding image but instead with a GCT extension.  
+When Collapse or Remap_Only is set, the collapsed dataset is also saved as a GCT.  These files will be created if the Create GCTs option is true.</p>
 
 				</div>
 			</div>
@@ -408,7 +412,7 @@ <h2>Version Comments</h2>
                         <tr>
                             <td>20.3.0</td>
                             <td>2021-11-5</td>
-                            <td>Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.</td>
+                            <td>Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.</td>
                         </tr>
                         <tr>
                             <td>20.2.4</td>
diff --git a/docs/v20/test.md b/docs/v20/test.md
index 81ad8dd..345bf0c 100644
--- a/docs/v20/test.md
+++ b/docs/v20/test.md
@@ -301,6 +301,9 @@ For descriptions of the ranking metrics, see <a href="http://www.gsea-msigdb.org
 <li>Median_of_probes: For each sample, use the median expression value for the probe set.</li>
 <li>Mean_of_probes: For each sample, use the mean expression value for the probe set.</li>
 <li>Sum_of_probes: For each sample, sum all the expression values of the probe set.</li>
+<li>Abs_max_of_probes: For each sample, use the expression value for the probe set with the maximum **absolute value**.  Note that each value retains its original sign but is chosen based on absolute value.
+In other words, the largest magnitude value is used.  While this method is useful with computational-based input datasets it is generally **not recommended** for use with quantification-based expression 
+measures such as counts or microarray fluorescence.</li>
 </ul></td>
 </tr>
 <tr class="even">
@@ -444,6 +447,8 @@ can be decompressed using 'gunzip' on Mac or Linux and 7-Zip on Windows
 The datasets backing all the heatmap images from the Enrichment Report
 for use in external visualizers or analysis tools. These will have the
 same name as the corresponding image but instead with a GCT extension.
+When Collapse or Remap_Only is set, the collapsed dataset is also saved 
+as a GCT.  These files will be created if the Create GCTs option is true.
 
 ## Platform Dependencies
 
@@ -473,7 +478,7 @@ Java
 <tr class="even">
 <td align="left">20.3.0</td>
 <td align="left">2021-11-2</td>
-<td align="left">Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.</td>
+<td align="left">Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.</td>
 </tr>
 <tr class="odd">
 <td align="left">20.2.4</td>
diff --git a/manifest b/manifest
index 0db80ea..532685f 100644
--- a/manifest
+++ b/manifest
@@ -146,7 +146,7 @@ p10_optional=
 p10_prefix=
 p10_prefix_when_specified=
 p10_type=java.lang.String
-p10_value=Signal2Noise;tTest;Ratio_of_Classes;Diff_of_Classes;log2_Ratio_of_Classes;Pearson;Cosine;Manhatten\=Manhattan;Euclidean
+p10_value=Signal2Noise;tTest;Ratio_of_Classes;Diff_of_Classes;log2_Ratio_of_Classes;Pearson;Spearman;Cosine;Manhatten\=Manhattan;Euclidean
 
 p11_MODE=
 p11_TYPE=TEXT
@@ -211,7 +211,7 @@ p15_optional=
 p15_prefix=
 p15_prefix_when_specified=
 p15_type=java.lang.String
-p15_value=Max_probe;Median_of_probes;Mean_of_probes;Sum_of_probes
+p15_value=Max_probe;Median_of_probes;Mean_of_probes;Sum_of_probes;Abs_max_of_probes
 
 p16_MODE=
 p16_TYPE=TEXT
@@ -417,6 +417,6 @@ serializedModel=
 taskType=Pathway Analysis
 categories=gsea;pathway analysis
 userid=eby@broadinstitute.org
-version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Fixed some issues handling datasets with missing values.  Introduced the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode. Changed the FDR q-value scale on the NES vs Significance plot.
+version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.
 
 publicationDate=10/05/2018 04\:51 

From 051c707e495c4e2ee2ac16b05c1d41b6aac2687a Mon Sep 17 00:00:00 2001
From: eby <eby@broadinstitute.org>
Date: Fri, 17 Dec 2021 15:10:50 -0800
Subject: [PATCH 5/5] Updated docs for the 4.2.0

---
 docs/v20/index.html | 4 ++--
 docs/v20/test.md    | 4 ++--
 manifest            | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/v20/index.html b/docs/v20/index.html
index a4266fb..7bf86ba 100644
--- a/docs/v20/index.html
+++ b/docs/v20/index.html
@@ -411,8 +411,8 @@ <h2>Version Comments</h2>
 				<tbody>
                         <tr>
                             <td>20.3.0</td>
-                            <td>2021-11-5</td>
-                            <td>Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.</td>
+                            <td>2021-12-17</td>
+                            <td>Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring.</td>
                         </tr>
                         <tr>
                             <td>20.2.4</td>
diff --git a/docs/v20/test.md b/docs/v20/test.md
index 345bf0c..1a06a4e 100644
--- a/docs/v20/test.md
+++ b/docs/v20/test.md
@@ -477,8 +477,8 @@ Java
 <tbody>
 <tr class="even">
 <td align="left">20.3.0</td>
-<td align="left">2021-11-2</td>
-<td align="left">Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.</td>
+<td align="left">2021-12-17</td>
+<td align="left">Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode. Fixed some issues handling datasets with missing values. Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring.</td>
 </tr>
 <tr class="odd">
 <td align="left">20.2.4</td>
diff --git a/manifest b/manifest
index 532685f..4d39d89 100644
--- a/manifest
+++ b/manifest
@@ -417,6 +417,6 @@ serializedModel=
 taskType=Pathway Analysis
 categories=gsea;pathway analysis
 userid=eby@broadinstitute.org
-version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes.  Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric.  Fixed issue with the min-sample check with gene_set permutation mode.  Improved warnings and logging.  Changed the FDR q-value scale on the NES vs Significance plot.
+version=Updated with the GSEA Desktop 4.2.0 code base with numerous bug fixes. Adds the Abs_max_of_probes collapse mode.  Fixed some issues handling datasets with missing values.  Added the Spearman metric. Fixed issue with the min-sample check with gene_set permutation mode. Improved warnings and logging. Changed the FDR q-value scale on the NES vs Significance plot. Fixed bugs in weighted_p1.5 scoring.
 
 publicationDate=10/05/2018 04\:51