Merge pull request #14 from c3g/feat/configurable-assays

feat: configurable set of available assays for nodes
c3g · Jan 17, 2024 · d3e2a98 · d3e2a98
2 parents 62e0b3e + 1edf109
commit d3e2a98
Show file tree

Hide file tree

Showing 10 changed files with 86 additions and 33 deletions.
diff --git a/config.example.js b/config.example.js
@@ -4,11 +4,28 @@
  * This file configures the dataset that this EpiVar node is responsible for hosting.
  */
 
+const ASSAY_RNA_SEQ = "RNA-seq";
+const ASSAY_ATAC_SEQ = "ATAC-seq";
+const ASSAY_H3K4ME1 = "H3K4me1";
+const ASSAY_H3K4ME3 = "H3K4me3";
+const ASSAY_H3K27AC = "H3K27ac";
+const ASSAY_H3K27ME3 = "H3K27me3";
+
 // noinspection JSUnusedGlobalSymbols
 module.exports = {
   title: "Aracena 𝘦𝘵 𝘢𝘭.",
   assembly: "hg19",
 
+  // availableAssays must be a subset or the set of assays supported by EpiVar:
+  availableAssays: [
+    ASSAY_RNA_SEQ,
+    ASSAY_ATAC_SEQ,
+    ASSAY_H3K4ME1,
+    ASSAY_H3K4ME3,
+    ASSAY_H3K27AC,
+    ASSAY_H3K27ME3,
+  ],
+
   conditions: [
     {id: "NI", name: "Non-infected"},
     {id: "Flu", name: "Flu"},

diff --git a/docs/setting_up_a_node.md b/docs/setting_up_a_node.md
@@ -17,12 +17,12 @@
 
 The following assay types can be ingested into an EpiVar node:
 
-- RNA-seq
-- ATAC-seq
-- H3K4me1
-- H3K4me3
-- H3K27ac
-- H3K27me3
+- `RNA-seq`
+- `ATAC-seq`
+- `H3K4me1`
+- `H3K4me3`
+- `H3K27ac`
+- `H3K27me3`
 
 ### Dataset metadata
 
@@ -53,7 +53,8 @@ The following assay types can be ingested into an EpiVar node:
     - `assay`
 
 - [ ] A dataset configuration file, which takes the form described in the
-  [example configuration file](/config.example.js).
+  [example configuration file](/config.example.js). Here, assays available in this node can be specified, as well as 
+  experimental conditions, population groups, and functions for interacting with the genotype VCF file.
 
   This file specifies information about the dataset being hosted by the EpiVar node, including dataset title,
   sample groups and experimental treatments (in both of these, each entry has an ID and a name), assembly ID (`hg19` or

diff --git a/epivar-prod/node1/config.js b/epivar-prod/node1/config.js
@@ -4,11 +4,28 @@
  * This file configures the dataset that this EpiVar node is responsible for hosting.
  */
 
+const ASSAY_RNA_SEQ = "RNA-seq";
+const ASSAY_ATAC_SEQ = "ATAC-seq";
+const ASSAY_H3K4ME1 = "H3K4me1";
+const ASSAY_H3K4ME3 = "H3K4me3";
+const ASSAY_H3K27AC = "H3K27ac";
+const ASSAY_H3K27ME3 = "H3K27me3";
+
 // noinspection JSUnusedGlobalSymbols
 module.exports = {
   title: "Aracena 𝘦𝘵 𝘢𝘭.",
   assembly: "hg19",
 
+  // availableAssays must be a subset or the set of assays supported by EpiVar:
+  availableAssays: [
+    ASSAY_RNA_SEQ,
+    ASSAY_ATAC_SEQ,
+    ASSAY_H3K4ME1,
+    ASSAY_H3K4ME3,
+    ASSAY_H3K27AC,
+    ASSAY_H3K27ME3,
+  ],
+
   conditions: [
     {id: "NI", name: "Non-infected"},
     {id: "Flu", name: "Flu"},

diff --git a/epivar-prod/node2/config.js b/epivar-prod/node2/config.js
@@ -4,11 +4,16 @@
  * This file configures the dataset that this EpiVar node is responsible for hosting.
  */
 
+const ASSAY_RNA_SEQ = "RNA-seq";
+
 // noinspection JSUnusedGlobalSymbols
 module.exports = {
   title: "Aracena 𝘦𝘵 𝘢𝘭. RNA-seq liftover",
   assembly: "hg38",
 
+  // availableAssays must be a subset or the set of assays supported by EpiVar:
+  availableAssays: [ASSAY_RNA_SEQ],
+
   conditions: [
     {id: "NI", name: "Non-infected"},
     {id: "Flu", name: "Flu"},

diff --git a/helpers/assays.mjs b/helpers/assays.mjs
@@ -0,0 +1,19 @@
+import config from "../config.js";
+
+export const ASSAY_RNA_SEQ = "RNA-seq";
+export const ASSAY_ATAC_SEQ = "ATAC-seq";
+export const ASSAY_H3K4ME1 = "H3K4me1";
+export const ASSAY_H3K4ME3 = "H3K4me3";
+export const ASSAY_H3K27AC = "H3K27ac";
+export const ASSAY_H3K27ME3 = "H3K27me3";
+
+export const ALL_ASSAYS = [
+  ASSAY_RNA_SEQ,
+  ASSAY_ATAC_SEQ,
+  ASSAY_H3K4ME1,
+  ASSAY_H3K4ME3,
+  ASSAY_H3K27AC,
+  ASSAY_H3K27ME3,
+];
+
+export const AVAILABLE_ASSAYS = config.availableAssays ?? ALL_ASSAYS;
diff --git a/models/assays.mjs b/models/assays.mjs
@@ -1,8 +1,11 @@
+import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs";
 import db from "./db.mjs";
 
+const AVAILABLE_ASSAYS_SET = new Set(AVAILABLE_ASSAYS);
+
 export const list = () => {
   return db.findAll("SELECT id, name FROM assays ORDER BY id")
-    .then(rows => rows.map(r => r.name))
+    .then(rows => rows.map((r) => r.name).filter((a) => AVAILABLE_ASSAYS_SET.includes(a)));
 };
 
 export default {

diff --git a/models/schema.sql b/models/schema.sql
@@ -69,7 +69,7 @@ create table if not exists features
     -- optional:
     --  pre-computed, (presumably batch-corrected etc.) array of treatment-arrays of points for the peak to render in
     --  the box plot, instead of pulling live from the bigWigs. treatments and samples MUST be in alphabetical order of
-    --  their IDs, eg [Flu: [<value for AF01>, ..., <value for EU01>], NI: [...]]
+    --  their IDs, e.g., [Flu: [<value for AF01>, ..., <value for EU01>], NI: [...]]
     -- if the array is NULL, points SHOULD be pulled from bigWigs instead
     "points" real[]      default null,
 
@@ -94,7 +94,7 @@ create table if not exists peaks
     "snp"       integer  not null,
     "feature"   integer  not null,
 
-    -- values as array of treatments (in alphabetical order): [<p_Flu>, <p_NI>]
+    -- values as array of treatments (in alphabetical order), e.g., [<p_Flu>, <p_NI>]
     "values"    real[]   not null,
 
     foreign key ("snp")     references snps     ("id"),

diff --git a/models/tracks.mjs b/models/tracks.mjs
@@ -8,19 +8,21 @@ import fs from "node:fs";
 import md5 from "md5";
 import {groupBy, map, path as prop} from "rambda";
 
-import bigWigMerge from "../helpers/bigwig-merge.js";
-import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js";
-import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs";
-import cache from "../helpers/cache.mjs";
-import valueAt from "../helpers/value-at.mjs";
 import config from "../config.js";
 import {NODE_BASE_URL, MERGED_TRACKS_DIR} from "../envConfig.js";
 import Metadata from "./metadata.js";
 import Samples from "./samples.mjs";
+
+import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs";
+import bigWigMerge from "../helpers/bigwig-merge.js";
+import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js";
+import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs";
+import cache from "../helpers/cache.mjs";
+import {belowThreshold} from "../helpers/censorship.mjs";
 import {DEFAULT_CONDITIONS} from "../helpers/defaultValues.mjs";
 import {donorLookup} from "../helpers/donors.mjs";
 import {normalizeChrom, GENOTYPE_STATES, GENOTYPE_STATE_NAMES} from "../helpers/genome.mjs";
-import {belowThreshold} from "../helpers/censorship.mjs";
+import valueAt from "../helpers/value-at.mjs";
 
 
 export default {
@@ -84,7 +86,7 @@ async function values(peak, usePrecomputed = false) {
     // RNA-seq results are either forward or reverse strand; we only want tracks from the direction
     // of the selected peak (otherwise results will appear incorrectly, and we'll have 2x the # of
     // values we should in some cases.)
-    track.assay !== "RNA-Seq" || track.view === strandToView[peak.feature.strand]
+    track.assay !== ASSAY_RNA_SEQ || track.view === strandToView[peak.feature.strand]
   ).map(track =>
     getValueForTrack(track).then(value => (value === undefined ? undefined : {
       donor: track.donor,

diff --git a/scripts/_common.mjs b/scripts/_common.mjs
@@ -2,27 +2,18 @@ import fs from "fs";
 
 import envConfig from "../envConfig.js";
 
+import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs";
 import {donorLookup} from "../helpers/donors.mjs";
 
 export const stripQuotes = str => str.replace(/"/g, "").trim();
 
-// TODO: configurable
-export const ASSAYS = [
-  'RNA-seq',
-  'ATAC-seq',
-  'H3K4me1',
-  'H3K4me3',
-  'H3K27ac',
-  'H3K27me3',
-];
-
 export const loadingPrecomputedPoints = !!envConfig.POINTS_TEMPLATE;
 export const precomputedPoints = {};
 
 if (loadingPrecomputedPoints) {
   console.log("Pre-loading precomputed point matrices");
 
-  ASSAYS.forEach(assay => {
+  AVAILABLE_ASSAYS.forEach(assay => {
     const fc = fs.readFileSync(envConfig.POINTS_TEMPLATE.replace(/\$ASSAY/g, assay))
       .toString()
       .split("\n");

diff --git a/scripts/import-genes.mjs b/scripts/import-genes.mjs
@@ -6,21 +6,19 @@ import config from "../config.js";
 
 import {genePathsByAssemblyID} from "../data/assemblies/index.mjs";
 
-const ASSAY_NAME_RNASEQ = "RNA-seq";
-
 const genesPath = genePathsByAssemblyID[config.assembly];
 const genesFeaturesPath = process.argv[2] || "/dev/stdin";
 
 import {precomputedPoints} from "./_common.mjs";
 
+import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs";
 import db from "../models/db.mjs";
 import Gene from "../models/genes.mjs";
 
-const rnaSeqPrecomputed = precomputedPoints[ASSAY_NAME_RNASEQ];
+const rnaSeqPrecomputed = precomputedPoints[ASSAY_RNA_SEQ];
 
 const assaysByName = Object.fromEntries((await db.findAll("SELECT * FROM assays")).map(r => [r.name, r.id]));
-
-const rnaSeq = assaysByName[ASSAY_NAME_RNASEQ];
+const rnaSeq = assaysByName[ASSAY_RNA_SEQ];
 
 const parseGene = line => {
   const fields = line.trim().split('\t');