Feature request: more parameters to function getVariableGenes() #9

skiaphrene · 2016-04-29T14:18:45Z

Dear scLVM team,

I am currently using some of the functionality implemented in scLVM to analyse your single-cell data.
I've made a few modifications to the plotting/identifying function getVariableGenes(... method="fdr" ...) that might be of interest as extra features (I've commented out the sections I have not updated):

# -----------------------------------------------------------------
# modification of getVariableGenes() to add more parameters:
# - (required) ERCC counts for overlay on plot
# - modifiable min bio dispersion parameter
# - adjustable ylim
# - possibility to have an interactive plot to identify points (genes)
#   within a hand-drawn polygon
# - returns plotted X and Y values
#   (& interactively selected points if applicable)
#   for futher manipulation
getVariableGenes <- function (
  nCountsEndo, nCountsERCC, fit, method = "fit", threshold = 0.1, minBiolDisp=0.5,
  ylim = NULL,
  fit_type = NULL, sfEndo = NULL, sfERCC = NULL, plot = T,
  interactive=FALSE) {

  if (!(method %in% c("fdr", "fit"))) {
    stop("'method' needs to be either 'fdr' or 'fit'")
  }
  if (is.null(fit_type)) {
    print("No 'fit_type' specified. Trying to guess its from parameter names")
    if ("a0" %in% names(coefficients(fit)) & "a1tilde" %in% 
        names(coefficients(fit))) {
      fit_type = "counts"
    }
    else {
      if ("a" %in% names(coefficients(fit)) & "k" %in% 
          names(coefficients(fit))) {
        fit_type = "log"
      }
      else {
        if (is.call(fit$call)) {
          fit_type = "logvar"
        }
      }
    }
    print(paste("Assuming 'fit_type' is ", "'", fit_type, 
                "'", sep = ""))
  }
  if (is.null(fit_type)) {
    stop("Couldn't guess fit_type. Please specify it or run the getTechincalNoise \n                           function to obtain the fit")
  }
  if (!(fit_type %in% c("counts", "log", "logvar")) & !is.null(fit_type)) {
    stop("'fit_type' needs to be either 'fdr' or 'fit'")
  }
  if (method == "fdr" & fit_type != "counts") {
    stop("method='fdr', can only be used with fit_type 'counts'")
  }
  if (method == "fdr" & (is.null(sfERCC) | is.null(sfEndo))) {
    stop("Please specify sfERCC and sfEndo when using method='fdr'")
  }
  if (method == "fdr") {
    meansEndo <- rowMeans(nCountsEndo)
    varsEndo <- rowVars(nCountsEndo)
    cv2Endo <- varsEndo/meansEndo^2

    meansERCC <- rowMeans(nCountsERCC)
    varsERCC <- rowVars(nCountsERCC)
    cv2ERCC <- varsERCC/meansERCC^2

    minBiolDisp <- (minBiolDisp^2)
    xi <- mean(1/sfERCC)
    m <- ncol(nCountsEndo)
    psia1thetaA <- mean(1/sfERCC) + (coefficients(fit)["a1tilde"] - xi) * mean(sfERCC/sfEndo)
    cv2thA <- coefficients(fit)["a0"] + minBiolDisp + coefficients(fit)["a0"] * minBiolDisp
    testDenomA <- (meansEndo * psia1thetaA + meansEndo^2 * cv2thA)/(1 + cv2thA/m)
    pA <- 1 - pchisq(varsEndo * (m - 1)/testDenomA, m - 1)
    padjA <- p.adjust(pA, "BH")
    print(table(padjA < 0.1))
    is_het = padjA < threshold
    is_het[is.na(is_het)] = FALSE
    if (plot == TRUE) {
      if (is.null(ylim)) ylim <- c(0.1, 250)
      plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het, 
           ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
      xg <- 10^seq(-3, 5, length.out = 100)
      lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg, 
            lwd = 2, col = "green")
      try(points(meansERCC, cv2ERCC, pch = 20, cex = 1, 
                 col = "blue"))
      legend("bottomleft", c("Endo. genes", "Var. genes", 
                             "ERCCs", "Fit"), pch = c(1, 1, 20, NA), 
                             lty = c(NA, NA, NA, 1), 
                             col = c("black", "red", "blue", "green"), 
                             cex = 0.7)
    }
  }
#   if (method == "fit" & fit_type == "log") {
#     LCountsEndo <- log10(nCountsEndo + 1)
#     LmeansEndo <- rowMeans(LCountsEndo)
#     Lcv2Endo = rowVars(LCountsEndo)/LmeansEndo^2
#     is_het = (fit$opts$offset * coefficients(fit)["a"] * 
#                 10^(-coefficients(fit)["k"] * LmeansEndo) < Lcv2Endo) & 
#       LmeansEndo > fit$opts$minmean
#     if (plot == TRUE) {
#       plot(LmeansEndo, Lcv2Endo, log = "y", col = 1 + 
#              is_het, xlab = "meansLogEndo", ylab = "cv2LogEndo")
#       xg <- seq(0, 5.5, length.out = 100)
#       lines(xg, fit$opts$offset * coefficients(fit)[1] * 
#               10^(-coefficients(fit)[2] * xg), lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "blue"), cex = 0.7)
#     }
#   }
#   if (method == "fit" & fit_type == "counts") {
#     meansEndo <- rowMeans(nCountsEndo)
#     varsEndo <- rowVars(nCountsEndo)
#     cv2Endo <- varsEndo/meansEndo^2
#     is_het = (coefficients(fit)[[1]] + coefficients(fit)[[2]]/meansEndo) < 
#       cv2Endo
#     if (plot == TRUE) {
#       if (is.null(ylim)) ylim <- c(0.1, 95)
#       plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het, 
#            ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
#       xg <- 10^seq(-3, 5, length.out = 100)
#       lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg, 
#             lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "green"), cex = 0.7)
#     }
#   }
#   if (method == "fit" & fit_type == "logvar") {
#     LCountsEndo <- log10(nCountsEndo + 1)
#     LmeansEndo <- rowMeans(LCountsEndo)
#     LVarsEndo <- rowVars(LCountsEndo)
#     xg = LmeansEndo
#     Var_techEndo_logfit_loess = predict(fit, LmeansEndo)
#     minVar_Endo = min(LVarsEndo[LmeansEndo > 2.5])
#     if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 * 
#             minVar_Endo)) {
#       idx = which(xg > 2.5 & Var_techEndo_logfit_loess < 
#                     0.6 * minVar_Endo)
#       Var_techEndo_logfit_loess[idx] = 0.6 * minVar_Endo
#     }
#     is_het = (Var_techEndo_logfit_loess < LVarsEndo) & LmeansEndo > 
#       0.3
#     print(sum(is_het))
#     if (plot == TRUE) {
#       plot(LmeansEndo, LVarsEndo, log = "y", col = 1 + 
#              is_het, xlab = "meansLogEndo", ylab = "varsLogEndo")
#       xg <- seq(0, 5.5, length.out = 100)
#       Var_techEndo_logfit_loess = predict(fit, xg)
#       if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 * 
#               minVar_Endo)) {
#         idx_1 = which(xg > 2.5 & Var_techEndo_logfit_loess < 
#                         0.6 * minVar_Endo)[1]
#         idx_end = length(Var_techEndo_logfit_loess)
#         Var_techEndo_logfit_loess[idx_1:idx_end] = 0.6 * 
#           minVar_Endo
#       }
#       lines(xg, Var_techEndo_logfit_loess, lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "green"), cex = 0.7)
#     }
#   }

  if (exists("meansEndo"))  X <- meansEndo
  if (exists("LmeansEndo")) X <- LmeansEndo
  if (exists("cv2Endo"))    Y <- cv2Endo
  if (exists("Lcv2Endo"))   Y <- Lcv2Endo

  selected <- NULL
  if (interactive) {
    if(!require(sp))      stop("Interactiveness requires package sp")
    if(!require(splancs)) stop("Interactiveness requires package splancs.")
    poly <- getpoly( quiet=TRUE )
    selected <- names(X)[point.in.polygon(X, Y, poly[,1], poly[,2])>0]
  }

  return( list(
    X        = X,
    Y        = Y,
    is_het   = is_het,
    selected = selected
  ))
}

Hope this helps in some way!

Best regards,

-- Alex

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Feature request: more parameters to function getVariableGenes() #9

Feature request: more parameters to function getVariableGenes() #9

skiaphrene commented Apr 29, 2016

Feature request: more parameters to function getVariableGenes() #9

Feature request: more parameters to function getVariableGenes() #9

Comments

skiaphrene commented Apr 29, 2016