Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature request: more parameters to function getVariableGenes() #9

Open
skiaphrene opened this issue Apr 29, 2016 · 0 comments
Open

Comments

@skiaphrene
Copy link

Dear scLVM team,

I am currently using some of the functionality implemented in scLVM to analyse your single-cell data.
I've made a few modifications to the plotting/identifying function getVariableGenes(... method="fdr" ...) that might be of interest as extra features (I've commented out the sections I have not updated):

# -----------------------------------------------------------------
# modification of getVariableGenes() to add more parameters:
# - (required) ERCC counts for overlay on plot
# - modifiable min bio dispersion parameter
# - adjustable ylim
# - possibility to have an interactive plot to identify points (genes)
#   within a hand-drawn polygon
# - returns plotted X and Y values
#   (& interactively selected points if applicable)
#   for futher manipulation
getVariableGenes <- function (
  nCountsEndo, nCountsERCC, fit, method = "fit", threshold = 0.1, minBiolDisp=0.5,
  ylim = NULL,
  fit_type = NULL, sfEndo = NULL, sfERCC = NULL, plot = T,
  interactive=FALSE) {

  if (!(method %in% c("fdr", "fit"))) {
    stop("'method' needs to be either 'fdr' or 'fit'")
  }
  if (is.null(fit_type)) {
    print("No 'fit_type' specified. Trying to guess its from parameter names")
    if ("a0" %in% names(coefficients(fit)) & "a1tilde" %in% 
        names(coefficients(fit))) {
      fit_type = "counts"
    }
    else {
      if ("a" %in% names(coefficients(fit)) & "k" %in% 
          names(coefficients(fit))) {
        fit_type = "log"
      }
      else {
        if (is.call(fit$call)) {
          fit_type = "logvar"
        }
      }
    }
    print(paste("Assuming 'fit_type' is ", "'", fit_type, 
                "'", sep = ""))
  }
  if (is.null(fit_type)) {
    stop("Couldn't guess fit_type. Please specify it or run the getTechincalNoise \n                           function to obtain the fit")
  }
  if (!(fit_type %in% c("counts", "log", "logvar")) & !is.null(fit_type)) {
    stop("'fit_type' needs to be either 'fdr' or 'fit'")
  }
  if (method == "fdr" & fit_type != "counts") {
    stop("method='fdr', can only be used with fit_type 'counts'")
  }
  if (method == "fdr" & (is.null(sfERCC) | is.null(sfEndo))) {
    stop("Please specify sfERCC and sfEndo when using method='fdr'")
  }
  if (method == "fdr") {
    meansEndo <- rowMeans(nCountsEndo)
    varsEndo <- rowVars(nCountsEndo)
    cv2Endo <- varsEndo/meansEndo^2

    meansERCC <- rowMeans(nCountsERCC)
    varsERCC <- rowVars(nCountsERCC)
    cv2ERCC <- varsERCC/meansERCC^2

    minBiolDisp <- (minBiolDisp^2)
    xi <- mean(1/sfERCC)
    m <- ncol(nCountsEndo)
    psia1thetaA <- mean(1/sfERCC) + (coefficients(fit)["a1tilde"] - xi) * mean(sfERCC/sfEndo)
    cv2thA <- coefficients(fit)["a0"] + minBiolDisp + coefficients(fit)["a0"] * minBiolDisp
    testDenomA <- (meansEndo * psia1thetaA + meansEndo^2 * cv2thA)/(1 + cv2thA/m)
    pA <- 1 - pchisq(varsEndo * (m - 1)/testDenomA, m - 1)
    padjA <- p.adjust(pA, "BH")
    print(table(padjA < 0.1))
    is_het = padjA < threshold
    is_het[is.na(is_het)] = FALSE
    if (plot == TRUE) {
      if (is.null(ylim)) ylim <- c(0.1, 250)
      plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het, 
           ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
      xg <- 10^seq(-3, 5, length.out = 100)
      lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg, 
            lwd = 2, col = "green")
      try(points(meansERCC, cv2ERCC, pch = 20, cex = 1, 
                 col = "blue"))
      legend("bottomleft", c("Endo. genes", "Var. genes", 
                             "ERCCs", "Fit"), pch = c(1, 1, 20, NA), 
                             lty = c(NA, NA, NA, 1), 
                             col = c("black", "red", "blue", "green"), 
                             cex = 0.7)
    }
  }
#   if (method == "fit" & fit_type == "log") {
#     LCountsEndo <- log10(nCountsEndo + 1)
#     LmeansEndo <- rowMeans(LCountsEndo)
#     Lcv2Endo = rowVars(LCountsEndo)/LmeansEndo^2
#     is_het = (fit$opts$offset * coefficients(fit)["a"] * 
#                 10^(-coefficients(fit)["k"] * LmeansEndo) < Lcv2Endo) & 
#       LmeansEndo > fit$opts$minmean
#     if (plot == TRUE) {
#       plot(LmeansEndo, Lcv2Endo, log = "y", col = 1 + 
#              is_het, xlab = "meansLogEndo", ylab = "cv2LogEndo")
#       xg <- seq(0, 5.5, length.out = 100)
#       lines(xg, fit$opts$offset * coefficients(fit)[1] * 
#               10^(-coefficients(fit)[2] * xg), lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "blue"), cex = 0.7)
#     }
#   }
#   if (method == "fit" & fit_type == "counts") {
#     meansEndo <- rowMeans(nCountsEndo)
#     varsEndo <- rowVars(nCountsEndo)
#     cv2Endo <- varsEndo/meansEndo^2
#     is_het = (coefficients(fit)[[1]] + coefficients(fit)[[2]]/meansEndo) < 
#       cv2Endo
#     if (plot == TRUE) {
#       if (is.null(ylim)) ylim <- c(0.1, 95)
#       plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het, 
#            ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
#       xg <- 10^seq(-3, 5, length.out = 100)
#       lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg, 
#             lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "green"), cex = 0.7)
#     }
#   }
#   if (method == "fit" & fit_type == "logvar") {
#     LCountsEndo <- log10(nCountsEndo + 1)
#     LmeansEndo <- rowMeans(LCountsEndo)
#     LVarsEndo <- rowVars(LCountsEndo)
#     xg = LmeansEndo
#     Var_techEndo_logfit_loess = predict(fit, LmeansEndo)
#     minVar_Endo = min(LVarsEndo[LmeansEndo > 2.5])
#     if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 * 
#             minVar_Endo)) {
#       idx = which(xg > 2.5 & Var_techEndo_logfit_loess < 
#                     0.6 * minVar_Endo)
#       Var_techEndo_logfit_loess[idx] = 0.6 * minVar_Endo
#     }
#     is_het = (Var_techEndo_logfit_loess < LVarsEndo) & LmeansEndo > 
#       0.3
#     print(sum(is_het))
#     if (plot == TRUE) {
#       plot(LmeansEndo, LVarsEndo, log = "y", col = 1 + 
#              is_het, xlab = "meansLogEndo", ylab = "varsLogEndo")
#       xg <- seq(0, 5.5, length.out = 100)
#       Var_techEndo_logfit_loess = predict(fit, xg)
#       if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 * 
#               minVar_Endo)) {
#         idx_1 = which(xg > 2.5 & Var_techEndo_logfit_loess < 
#                         0.6 * minVar_Endo)[1]
#         idx_end = length(Var_techEndo_logfit_loess)
#         Var_techEndo_logfit_loess[idx_1:idx_end] = 0.6 * 
#           minVar_Endo
#       }
#       lines(xg, Var_techEndo_logfit_loess, lwd = 2, col = "green")
#       legend("bottomright", c("Endo. genes", "Var. genes", 
#                               "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1), 
#              col = c("black", "red", "green"), cex = 0.7)
#     }
#   }

  if (exists("meansEndo"))  X <- meansEndo
  if (exists("LmeansEndo")) X <- LmeansEndo
  if (exists("cv2Endo"))    Y <- cv2Endo
  if (exists("Lcv2Endo"))   Y <- Lcv2Endo

  selected <- NULL
  if (interactive) {
    if(!require(sp))      stop("Interactiveness requires package sp")
    if(!require(splancs)) stop("Interactiveness requires package splancs.")
    poly <- getpoly( quiet=TRUE )
    selected <- names(X)[point.in.polygon(X, Y, poly[,1], poly[,2])>0]
  }

  return( list(
    X        = X,
    Y        = Y,
    is_het   = is_het,
    selected = selected
  ))
}

Hope this helps in some way!

Best regards,

-- Alex

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant