Skip to content

Commit

Permalink
Merge pull request #14 from c3g/feat/configurable-assays
Browse files Browse the repository at this point in the history
feat: configurable set of available assays for nodes
  • Loading branch information
davidlougheed authored Jan 17, 2024
2 parents 62e0b3e + 1edf109 commit d3e2a98
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 33 deletions.
17 changes: 17 additions & 0 deletions config.example.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,28 @@
* This file configures the dataset that this EpiVar node is responsible for hosting.
*/

const ASSAY_RNA_SEQ = "RNA-seq";
const ASSAY_ATAC_SEQ = "ATAC-seq";
const ASSAY_H3K4ME1 = "H3K4me1";
const ASSAY_H3K4ME3 = "H3K4me3";
const ASSAY_H3K27AC = "H3K27ac";
const ASSAY_H3K27ME3 = "H3K27me3";

// noinspection JSUnusedGlobalSymbols
module.exports = {
title: "Aracena 𝘦𝘵 𝘢𝘭.",
assembly: "hg19",

// availableAssays must be a subset or the set of assays supported by EpiVar:
availableAssays: [
ASSAY_RNA_SEQ,
ASSAY_ATAC_SEQ,
ASSAY_H3K4ME1,
ASSAY_H3K4ME3,
ASSAY_H3K27AC,
ASSAY_H3K27ME3,
],

conditions: [
{id: "NI", name: "Non-infected"},
{id: "Flu", name: "Flu"},
Expand Down
15 changes: 8 additions & 7 deletions docs/setting_up_a_node.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

The following assay types can be ingested into an EpiVar node:

- RNA-seq
- ATAC-seq
- H3K4me1
- H3K4me3
- H3K27ac
- H3K27me3
- `RNA-seq`
- `ATAC-seq`
- `H3K4me1`
- `H3K4me3`
- `H3K27ac`
- `H3K27me3`

### Dataset metadata

Expand Down Expand Up @@ -53,7 +53,8 @@ The following assay types can be ingested into an EpiVar node:
- `assay`

- [ ] A dataset configuration file, which takes the form described in the
[example configuration file](/config.example.js).
[example configuration file](/config.example.js). Here, assays available in this node can be specified, as well as
experimental conditions, population groups, and functions for interacting with the genotype VCF file.

This file specifies information about the dataset being hosted by the EpiVar node, including dataset title,
sample groups and experimental treatments (in both of these, each entry has an ID and a name), assembly ID (`hg19` or
Expand Down
17 changes: 17 additions & 0 deletions epivar-prod/node1/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,28 @@
* This file configures the dataset that this EpiVar node is responsible for hosting.
*/

const ASSAY_RNA_SEQ = "RNA-seq";
const ASSAY_ATAC_SEQ = "ATAC-seq";
const ASSAY_H3K4ME1 = "H3K4me1";
const ASSAY_H3K4ME3 = "H3K4me3";
const ASSAY_H3K27AC = "H3K27ac";
const ASSAY_H3K27ME3 = "H3K27me3";

// noinspection JSUnusedGlobalSymbols
module.exports = {
title: "Aracena 𝘦𝘵 𝘢𝘭.",
assembly: "hg19",

// availableAssays must be a subset or the set of assays supported by EpiVar:
availableAssays: [
ASSAY_RNA_SEQ,
ASSAY_ATAC_SEQ,
ASSAY_H3K4ME1,
ASSAY_H3K4ME3,
ASSAY_H3K27AC,
ASSAY_H3K27ME3,
],

conditions: [
{id: "NI", name: "Non-infected"},
{id: "Flu", name: "Flu"},
Expand Down
5 changes: 5 additions & 0 deletions epivar-prod/node2/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
* This file configures the dataset that this EpiVar node is responsible for hosting.
*/

const ASSAY_RNA_SEQ = "RNA-seq";

// noinspection JSUnusedGlobalSymbols
module.exports = {
title: "Aracena 𝘦𝘵 𝘢𝘭. RNA-seq liftover",
assembly: "hg38",

// availableAssays must be a subset or the set of assays supported by EpiVar:
availableAssays: [ASSAY_RNA_SEQ],

conditions: [
{id: "NI", name: "Non-infected"},
{id: "Flu", name: "Flu"},
Expand Down
19 changes: 19 additions & 0 deletions helpers/assays.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import config from "../config.js";

export const ASSAY_RNA_SEQ = "RNA-seq";
export const ASSAY_ATAC_SEQ = "ATAC-seq";
export const ASSAY_H3K4ME1 = "H3K4me1";
export const ASSAY_H3K4ME3 = "H3K4me3";
export const ASSAY_H3K27AC = "H3K27ac";
export const ASSAY_H3K27ME3 = "H3K27me3";

export const ALL_ASSAYS = [
ASSAY_RNA_SEQ,
ASSAY_ATAC_SEQ,
ASSAY_H3K4ME1,
ASSAY_H3K4ME3,
ASSAY_H3K27AC,
ASSAY_H3K27ME3,
];

export const AVAILABLE_ASSAYS = config.availableAssays ?? ALL_ASSAYS;
5 changes: 4 additions & 1 deletion models/assays.mjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs";
import db from "./db.mjs";

const AVAILABLE_ASSAYS_SET = new Set(AVAILABLE_ASSAYS);

export const list = () => {
return db.findAll("SELECT id, name FROM assays ORDER BY id")
.then(rows => rows.map(r => r.name))
.then(rows => rows.map((r) => r.name).filter((a) => AVAILABLE_ASSAYS_SET.includes(a)));
};

export default {
Expand Down
4 changes: 2 additions & 2 deletions models/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ create table if not exists features
-- optional:
-- pre-computed, (presumably batch-corrected etc.) array of treatment-arrays of points for the peak to render in
-- the box plot, instead of pulling live from the bigWigs. treatments and samples MUST be in alphabetical order of
-- their IDs, eg [Flu: [<value for AF01>, ..., <value for EU01>], NI: [...]]
-- their IDs, e.g., [Flu: [<value for AF01>, ..., <value for EU01>], NI: [...]]
-- if the array is NULL, points SHOULD be pulled from bigWigs instead
"points" real[] default null,

Expand All @@ -94,7 +94,7 @@ create table if not exists peaks
"snp" integer not null,
"feature" integer not null,

-- values as array of treatments (in alphabetical order): [<p_Flu>, <p_NI>]
-- values as array of treatments (in alphabetical order), e.g., [<p_Flu>, <p_NI>]
"values" real[] not null,

foreign key ("snp") references snps ("id"),
Expand Down
16 changes: 9 additions & 7 deletions models/tracks.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@ import fs from "node:fs";
import md5 from "md5";
import {groupBy, map, path as prop} from "rambda";

import bigWigMerge from "../helpers/bigwig-merge.js";
import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js";
import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs";
import cache from "../helpers/cache.mjs";
import valueAt from "../helpers/value-at.mjs";
import config from "../config.js";
import {NODE_BASE_URL, MERGED_TRACKS_DIR} from "../envConfig.js";
import Metadata from "./metadata.js";
import Samples from "./samples.mjs";

import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs";
import bigWigMerge from "../helpers/bigwig-merge.js";
import bigWigChromosomeLength from "../helpers/bigwig-chromosome-length.js";
import {boxPlot, getDomain, PLOT_HEIGHT, PLOT_WIDTH} from "../helpers/boxplot.mjs";
import cache from "../helpers/cache.mjs";
import {belowThreshold} from "../helpers/censorship.mjs";
import {DEFAULT_CONDITIONS} from "../helpers/defaultValues.mjs";
import {donorLookup} from "../helpers/donors.mjs";
import {normalizeChrom, GENOTYPE_STATES, GENOTYPE_STATE_NAMES} from "../helpers/genome.mjs";
import {belowThreshold} from "../helpers/censorship.mjs";
import valueAt from "../helpers/value-at.mjs";


export default {
Expand Down Expand Up @@ -84,7 +86,7 @@ async function values(peak, usePrecomputed = false) {
// RNA-seq results are either forward or reverse strand; we only want tracks from the direction
// of the selected peak (otherwise results will appear incorrectly, and we'll have 2x the # of
// values we should in some cases.)
track.assay !== "RNA-Seq" || track.view === strandToView[peak.feature.strand]
track.assay !== ASSAY_RNA_SEQ || track.view === strandToView[peak.feature.strand]
).map(track =>
getValueForTrack(track).then(value => (value === undefined ? undefined : {
donor: track.donor,
Expand Down
13 changes: 2 additions & 11 deletions scripts/_common.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,18 @@ import fs from "fs";

import envConfig from "../envConfig.js";

import {AVAILABLE_ASSAYS} from "../helpers/assays.mjs";
import {donorLookup} from "../helpers/donors.mjs";

export const stripQuotes = str => str.replace(/"/g, "").trim();

// TODO: configurable
export const ASSAYS = [
'RNA-seq',
'ATAC-seq',
'H3K4me1',
'H3K4me3',
'H3K27ac',
'H3K27me3',
];

export const loadingPrecomputedPoints = !!envConfig.POINTS_TEMPLATE;
export const precomputedPoints = {};

if (loadingPrecomputedPoints) {
console.log("Pre-loading precomputed point matrices");

ASSAYS.forEach(assay => {
AVAILABLE_ASSAYS.forEach(assay => {
const fc = fs.readFileSync(envConfig.POINTS_TEMPLATE.replace(/\$ASSAY/g, assay))
.toString()
.split("\n");
Expand Down
8 changes: 3 additions & 5 deletions scripts/import-genes.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,19 @@ import config from "../config.js";

import {genePathsByAssemblyID} from "../data/assemblies/index.mjs";

const ASSAY_NAME_RNASEQ = "RNA-seq";

const genesPath = genePathsByAssemblyID[config.assembly];
const genesFeaturesPath = process.argv[2] || "/dev/stdin";

import {precomputedPoints} from "./_common.mjs";

import {ASSAY_RNA_SEQ} from "../helpers/assays.mjs";
import db from "../models/db.mjs";
import Gene from "../models/genes.mjs";

const rnaSeqPrecomputed = precomputedPoints[ASSAY_NAME_RNASEQ];
const rnaSeqPrecomputed = precomputedPoints[ASSAY_RNA_SEQ];

const assaysByName = Object.fromEntries((await db.findAll("SELECT * FROM assays")).map(r => [r.name, r.id]));

const rnaSeq = assaysByName[ASSAY_NAME_RNASEQ];
const rnaSeq = assaysByName[ASSAY_RNA_SEQ];

const parseGene = line => {
const fields = line.trim().split('\t');
Expand Down

0 comments on commit d3e2a98

Please sign in to comment.