diff --git a/config.example.js b/config.example.js index de1497ea..108a606f 100644 --- a/config.example.js +++ b/config.example.js @@ -4,11 +4,28 @@ * This file configures the dataset that this EpiVar node is responsible for hosting. */ +const ASSAY_RNA_SEQ = "RNA-seq"; +const ASSAY_ATAC_SEQ = "ATAC-seq"; +const ASSAY_H3K4ME1 = "H3K4me1"; +const ASSAY_H3K4ME3 = "H3K4me3"; +const ASSAY_H3K27AC = "H3K27ac"; +const ASSAY_H3K27ME3 = "H3K27me3"; + // noinspection JSUnusedGlobalSymbols module.exports = { title: "Aracena 𝘦𝘵 𝘢𝘭.", assembly: "hg19", + // availableAssays must be a subset or the set of assays supported by EpiVar: + availableAssays: [ + ASSAY_RNA_SEQ, + ASSAY_ATAC_SEQ, + ASSAY_H3K4ME1, + ASSAY_H3K4ME3, + ASSAY_H3K27AC, + ASSAY_H3K27ME3, + ], + conditions: [ {id: "NI", name: "Non-infected"}, {id: "Flu", name: "Flu"}, diff --git a/docs/setting_up_a_node.md b/docs/setting_up_a_node.md index c501fdd5..adb7a859 100644 --- a/docs/setting_up_a_node.md +++ b/docs/setting_up_a_node.md @@ -17,12 +17,12 @@ The following assay types can be ingested into an EpiVar node: -- RNA-seq -- ATAC-seq -- H3K4me1 -- H3K4me3 -- H3K27ac -- H3K27me3 +- `RNA-seq` +- `ATAC-seq` +- `H3K4me1` +- `H3K4me3` +- `H3K27ac` +- `H3K27me3` ### Dataset metadata @@ -53,7 +53,8 @@ The following assay types can be ingested into an EpiVar node: - `assay` - [ ] A dataset configuration file, which takes the form described in the - [example configuration file](/config.example.js). + [example configuration file](/config.example.js). Here, assays available in this node can be specified, as well as + experimental conditions, population groups, and functions for interacting with the genotype VCF file. This file specifies information about the dataset being hosted by the EpiVar node, including dataset title, sample groups and experimental treatments (in both of these, each entry has an ID and a name), assembly ID (`hg19` or diff --git a/epivar-prod/node1/config.js b/epivar-prod/node1/config.js index 3d1ea207..f8e861af 100644 --- a/epivar-prod/node1/config.js +++ b/epivar-prod/node1/config.js @@ -4,11 +4,28 @@ * This file configures the dataset that this EpiVar node is responsible for hosting. */ +const ASSAY_RNA_SEQ = "RNA-seq"; +const ASSAY_ATAC_SEQ = "ATAC-seq"; +const ASSAY_H3K4ME1 = "H3K4me1"; +const ASSAY_H3K4ME3 = "H3K4me3"; +const ASSAY_H3K27AC = "H3K27ac"; +const ASSAY_H3K27ME3 = "H3K27me3"; + // noinspection JSUnusedGlobalSymbols module.exports = { title: "Aracena 𝘦𝘵 𝘢𝘭.", assembly: "hg19", + // availableAssays must be a subset or the set of assays supported by EpiVar: + availableAssays: [ + ASSAY_RNA_SEQ, + ASSAY_ATAC_SEQ, + ASSAY_H3K4ME1, + ASSAY_H3K4ME3, + ASSAY_H3K27AC, + ASSAY_H3K27ME3, + ], + conditions: [ {id: "NI", name: "Non-infected"}, {id: "Flu", name: "Flu"}, diff --git a/epivar-prod/node2/config.js b/epivar-prod/node2/config.js index 75b14f8d..9488d5ff 100644 --- a/epivar-prod/node2/config.js +++ b/epivar-prod/node2/config.js @@ -4,11 +4,16 @@ * This file configures the dataset that this EpiVar node is responsible for hosting. */ +const ASSAY_RNA_SEQ = "RNA-seq"; + // noinspection JSUnusedGlobalSymbols module.exports = { title: "Aracena 𝘦𝘵 𝘢𝘭. RNA-seq liftover", assembly: "hg38", + // availableAssays must be a subset or the set of assays supported by EpiVar: + availableAssays: [ASSAY_RNA_SEQ], + conditions: [ {id: "NI", name: "Non-infected"}, {id: "Flu", name: "Flu"}, diff --git a/helpers/assays.mjs b/helpers/assays.mjs new file mode 100644 index 00000000..835091bc --- /dev/null +++ b/helpers/assays.mjs @@ -0,0 +1,19 @@ +import config from "../config.js"; + +export const ASSAY_RNA_SEQ = "RNA-seq"; +export const ASSAY_ATAC_SEQ = "ATAC-seq"; +export const ASSAY_H3K4ME1 = "H3K4me1"; +export const ASSAY_H3K4ME3 = "H3K4me3"; +export const ASSAY_H3K27AC = "H3K27ac"; +export const ASSAY_H3K27ME3 = "H3K27me3"; + +export const ALL_ASSAYS = [ + ASSAY_RNA_SEQ, + ASSAY_ATAC_SEQ, + ASSAY_H3K4ME1, + ASSAY_H3K4ME3, + ASSAY_H3K27AC, + ASSAY_H3K27ME3, +]; + +export const AVAILABLE_ASSAYS = config.availableAssays ?? ALL_ASSAYS; diff --git a/models/assays.mjs b/models/assays.mjs index 7f9beb94..25631229 100644 --- a/models/assays.mjs +++ b/models/assays.mjs @@ -1,8 +1,11 @@ +import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs"; import db from "./db.mjs"; +const AVAILABLE_ASSAYS_SET = new Set(AVAILABLE_ASSAYS); + export const list = () => { return db.findAll("SELECT id, name FROM assays ORDER BY id") - .then(rows => rows.map(r => r.name)) + .then(rows => rows.map((r) => r.name).filter((a) => AVAILABLE_ASSAYS_SET.includes(a))); }; export default { diff --git a/models/schema.sql b/models/schema.sql index 48c64eb7..630cfc78 100644 --- a/models/schema.sql +++ b/models/schema.sql @@ -69,7 +69,7 @@ create table if not exists features -- optional: -- pre-computed, (presumably batch-corrected etc.) array of treatment-arrays of points for the peak to render in -- the box plot, instead of pulling live from the bigWigs. treatments and samples MUST be in alphabetical order of - -- their IDs, eg [Flu: [, ..., ], NI: [...]] + -- their IDs, e.g., [Flu: [, ..., ], NI: [...]] -- if the array is NULL, points SHOULD be pulled from bigWigs instead "points" real[] default null, @@ -94,7 +94,7 @@ create table if not exists peaks "snp" integer not null, "feature" integer not null, - -- values as array of treatments (in alphabetical order): [, ] + -- values as array of treatments (in alphabetical order), e.g., [, ] "values" real[] not null, foreign key ("snp") references snps ("id"), diff --git a/models/tracks.mjs b/models/tracks.mjs index 573d9154..15f78ecc 100644 --- a/models/tracks.mjs +++ b/models/tracks.mjs @@ -8,19 +8,21 @@ import fs from "node:fs"; import md5 from "md5"; import {groupBy, map, path as prop} from "rambda"; -import bigWigMerge from "../helpers/bigwig-merge.js"; -import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js"; -import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs"; -import cache from "../helpers/cache.mjs"; -import valueAt from "../helpers/value-at.mjs"; import config from "../config.js"; import {NODE_BASE_URL, MERGED_TRACKS_DIR} from "../envConfig.js"; import Metadata from "./metadata.js"; import Samples from "./samples.mjs"; + +import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs"; +import bigWigMerge from "../helpers/bigwig-merge.js"; +import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js"; +import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs"; +import cache from "../helpers/cache.mjs"; +import {belowThreshold} from "../helpers/censorship.mjs"; import {DEFAULT_CONDITIONS} from "../helpers/defaultValues.mjs"; import {donorLookup} from "../helpers/donors.mjs"; import {normalizeChrom, GENOTYPE_STATES, GENOTYPE_STATE_NAMES} from "../helpers/genome.mjs"; -import {belowThreshold} from "../helpers/censorship.mjs"; +import valueAt from "../helpers/value-at.mjs"; export default { @@ -84,7 +86,7 @@ async function values(peak, usePrecomputed = false) { // RNA-seq results are either forward or reverse strand; we only want tracks from the direction // of the selected peak (otherwise results will appear incorrectly, and we'll have 2x the # of // values we should in some cases.) - track.assay !== "RNA-Seq" || track.view === strandToView[peak.feature.strand] + track.assay !== ASSAY_RNA_SEQ || track.view === strandToView[peak.feature.strand] ).map(track => getValueForTrack(track).then(value => (value === undefined ? undefined : { donor: track.donor, diff --git a/scripts/_common.mjs b/scripts/_common.mjs index 63c1229e..aef186ec 100644 --- a/scripts/_common.mjs +++ b/scripts/_common.mjs @@ -2,27 +2,18 @@ import fs from "fs"; import envConfig from "../envConfig.js"; +import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs"; import {donorLookup} from "../helpers/donors.mjs"; export const stripQuotes = str => str.replace(/"/g, "").trim(); -// TODO: configurable -export const ASSAYS = [ - 'RNA-seq', - 'ATAC-seq', - 'H3K4me1', - 'H3K4me3', - 'H3K27ac', - 'H3K27me3', -]; - export const loadingPrecomputedPoints = !!envConfig.POINTS_TEMPLATE; export const precomputedPoints = {}; if (loadingPrecomputedPoints) { console.log("Pre-loading precomputed point matrices"); - ASSAYS.forEach(assay => { + AVAILABLE_ASSAYS.forEach(assay => { const fc = fs.readFileSync(envConfig.POINTS_TEMPLATE.replace(/\$ASSAY/g, assay)) .toString() .split("\n"); diff --git a/scripts/import-genes.mjs b/scripts/import-genes.mjs index 3b86f53a..124f1272 100644 --- a/scripts/import-genes.mjs +++ b/scripts/import-genes.mjs @@ -6,21 +6,19 @@ import config from "../config.js"; import {genePathsByAssemblyID} from "../data/assemblies/index.mjs"; -const ASSAY_NAME_RNASEQ = "RNA-seq"; - const genesPath = genePathsByAssemblyID[config.assembly]; const genesFeaturesPath = process.argv[2] || "/dev/stdin"; import {precomputedPoints} from "./_common.mjs"; +import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs"; import db from "../models/db.mjs"; import Gene from "../models/genes.mjs"; -const rnaSeqPrecomputed = precomputedPoints[ASSAY_NAME_RNASEQ]; +const rnaSeqPrecomputed = precomputedPoints[ASSAY_RNA_SEQ]; const assaysByName = Object.fromEntries((await db.findAll("SELECT * FROM assays")).map(r => [r.name, r.id])); - -const rnaSeq = assaysByName[ASSAY_NAME_RNASEQ]; +const rnaSeq = assaysByName[ASSAY_RNA_SEQ]; const parseGene = line => { const fields = line.trim().split('\t');