You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am currently using some of the functionality implemented in scLVM to analyse your single-cell data.
I've made a few modifications to the plotting/identifying function getVariableGenes(... method="fdr" ...) that might be of interest as extra features (I've commented out the sections I have not updated):
# -----------------------------------------------------------------
# modification of getVariableGenes() to add more parameters:
# - (required) ERCC counts for overlay on plot
# - modifiable min bio dispersion parameter
# - adjustable ylim
# - possibility to have an interactive plot to identify points (genes)
# within a hand-drawn polygon
# - returns plotted X and Y values
# (& interactively selected points if applicable)
# for futher manipulation
getVariableGenes <- function (
nCountsEndo, nCountsERCC, fit, method = "fit", threshold = 0.1, minBiolDisp=0.5,
ylim = NULL,
fit_type = NULL, sfEndo = NULL, sfERCC = NULL, plot = T,
interactive=FALSE) {
if (!(method %in% c("fdr", "fit"))) {
stop("'method' needs to be either 'fdr' or 'fit'")
}
if (is.null(fit_type)) {
print("No 'fit_type' specified. Trying to guess its from parameter names")
if ("a0" %in% names(coefficients(fit)) & "a1tilde" %in%
names(coefficients(fit))) {
fit_type = "counts"
}
else {
if ("a" %in% names(coefficients(fit)) & "k" %in%
names(coefficients(fit))) {
fit_type = "log"
}
else {
if (is.call(fit$call)) {
fit_type = "logvar"
}
}
}
print(paste("Assuming 'fit_type' is ", "'", fit_type,
"'", sep = ""))
}
if (is.null(fit_type)) {
stop("Couldn't guess fit_type. Please specify it or run the getTechincalNoise \n function to obtain the fit")
}
if (!(fit_type %in% c("counts", "log", "logvar")) & !is.null(fit_type)) {
stop("'fit_type' needs to be either 'fdr' or 'fit'")
}
if (method == "fdr" & fit_type != "counts") {
stop("method='fdr', can only be used with fit_type 'counts'")
}
if (method == "fdr" & (is.null(sfERCC) | is.null(sfEndo))) {
stop("Please specify sfERCC and sfEndo when using method='fdr'")
}
if (method == "fdr") {
meansEndo <- rowMeans(nCountsEndo)
varsEndo <- rowVars(nCountsEndo)
cv2Endo <- varsEndo/meansEndo^2
meansERCC <- rowMeans(nCountsERCC)
varsERCC <- rowVars(nCountsERCC)
cv2ERCC <- varsERCC/meansERCC^2
minBiolDisp <- (minBiolDisp^2)
xi <- mean(1/sfERCC)
m <- ncol(nCountsEndo)
psia1thetaA <- mean(1/sfERCC) + (coefficients(fit)["a1tilde"] - xi) * mean(sfERCC/sfEndo)
cv2thA <- coefficients(fit)["a0"] + minBiolDisp + coefficients(fit)["a0"] * minBiolDisp
testDenomA <- (meansEndo * psia1thetaA + meansEndo^2 * cv2thA)/(1 + cv2thA/m)
pA <- 1 - pchisq(varsEndo * (m - 1)/testDenomA, m - 1)
padjA <- p.adjust(pA, "BH")
print(table(padjA < 0.1))
is_het = padjA < threshold
is_het[is.na(is_het)] = FALSE
if (plot == TRUE) {
if (is.null(ylim)) ylim <- c(0.1, 250)
plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het,
ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
xg <- 10^seq(-3, 5, length.out = 100)
lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg,
lwd = 2, col = "green")
try(points(meansERCC, cv2ERCC, pch = 20, cex = 1,
col = "blue"))
legend("bottomleft", c("Endo. genes", "Var. genes",
"ERCCs", "Fit"), pch = c(1, 1, 20, NA),
lty = c(NA, NA, NA, 1),
col = c("black", "red", "blue", "green"),
cex = 0.7)
}
}
# if (method == "fit" & fit_type == "log") {
# LCountsEndo <- log10(nCountsEndo + 1)
# LmeansEndo <- rowMeans(LCountsEndo)
# Lcv2Endo = rowVars(LCountsEndo)/LmeansEndo^2
# is_het = (fit$opts$offset * coefficients(fit)["a"] *
# 10^(-coefficients(fit)["k"] * LmeansEndo) < Lcv2Endo) &
# LmeansEndo > fit$opts$minmean
# if (plot == TRUE) {
# plot(LmeansEndo, Lcv2Endo, log = "y", col = 1 +
# is_het, xlab = "meansLogEndo", ylab = "cv2LogEndo")
# xg <- seq(0, 5.5, length.out = 100)
# lines(xg, fit$opts$offset * coefficients(fit)[1] *
# 10^(-coefficients(fit)[2] * xg), lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "blue"), cex = 0.7)
# }
# }
# if (method == "fit" & fit_type == "counts") {
# meansEndo <- rowMeans(nCountsEndo)
# varsEndo <- rowVars(nCountsEndo)
# cv2Endo <- varsEndo/meansEndo^2
# is_het = (coefficients(fit)[[1]] + coefficients(fit)[[2]]/meansEndo) <
# cv2Endo
# if (plot == TRUE) {
# if (is.null(ylim)) ylim <- c(0.1, 95)
# plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het,
# ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
# xg <- 10^seq(-3, 5, length.out = 100)
# lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg,
# lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "green"), cex = 0.7)
# }
# }
# if (method == "fit" & fit_type == "logvar") {
# LCountsEndo <- log10(nCountsEndo + 1)
# LmeansEndo <- rowMeans(LCountsEndo)
# LVarsEndo <- rowVars(LCountsEndo)
# xg = LmeansEndo
# Var_techEndo_logfit_loess = predict(fit, LmeansEndo)
# minVar_Endo = min(LVarsEndo[LmeansEndo > 2.5])
# if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 *
# minVar_Endo)) {
# idx = which(xg > 2.5 & Var_techEndo_logfit_loess <
# 0.6 * minVar_Endo)
# Var_techEndo_logfit_loess[idx] = 0.6 * minVar_Endo
# }
# is_het = (Var_techEndo_logfit_loess < LVarsEndo) & LmeansEndo >
# 0.3
# print(sum(is_het))
# if (plot == TRUE) {
# plot(LmeansEndo, LVarsEndo, log = "y", col = 1 +
# is_het, xlab = "meansLogEndo", ylab = "varsLogEndo")
# xg <- seq(0, 5.5, length.out = 100)
# Var_techEndo_logfit_loess = predict(fit, xg)
# if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 *
# minVar_Endo)) {
# idx_1 = which(xg > 2.5 & Var_techEndo_logfit_loess <
# 0.6 * minVar_Endo)[1]
# idx_end = length(Var_techEndo_logfit_loess)
# Var_techEndo_logfit_loess[idx_1:idx_end] = 0.6 *
# minVar_Endo
# }
# lines(xg, Var_techEndo_logfit_loess, lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "green"), cex = 0.7)
# }
# }
if (exists("meansEndo")) X <- meansEndo
if (exists("LmeansEndo")) X <- LmeansEndo
if (exists("cv2Endo")) Y <- cv2Endo
if (exists("Lcv2Endo")) Y <- Lcv2Endo
selected <- NULL
if (interactive) {
if(!require(sp)) stop("Interactiveness requires package sp")
if(!require(splancs)) stop("Interactiveness requires package splancs.")
poly <- getpoly( quiet=TRUE )
selected <- names(X)[point.in.polygon(X, Y, poly[,1], poly[,2])>0]
}
return( list(
X = X,
Y = Y,
is_het = is_het,
selected = selected
))
}
Hope this helps in some way!
Best regards,
-- Alex
The text was updated successfully, but these errors were encountered:
Dear scLVM team,
I am currently using some of the functionality implemented in scLVM to analyse your single-cell data.
I've made a few modifications to the plotting/identifying function getVariableGenes(... method="fdr" ...) that might be of interest as extra features (I've commented out the sections I have not updated):
Hope this helps in some way!
Best regards,
-- Alex
The text was updated successfully, but these errors were encountered: