Skip to content

Commit

Permalink
Merge pull request #6 from ParkerICI/negative-values
Browse files Browse the repository at this point in the history
Negative values
  • Loading branch information
pfgherardini authored Jun 21, 2019
2 parents 17d3058 + 834fd2a commit 74260c4
Show file tree
Hide file tree
Showing 13 changed files with 210 additions and 47 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: grappolo
Type: Package
Title: Feature generation from single-cell data
Version: 0.4.4
Version: 0.5.1
Authors@R: "Pier Federico Gherardini <[email protected]> [aut, cre]"
Description: This is a package to cluster single-cell flow data and generate
features that can be used for model building
Expand Down
23 changes: 14 additions & 9 deletions R/cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ cluster_data <- function(tab, col.names, k, ...) {
#' @inheritParams cluster_fcs_files
#'
#'
process_files_groups <- function(files, col.names, num.clusters, num.samples, asinh.cofactor, downsample.to, output.dir) {
process_files_groups <- function(files, col.names, num.clusters, num.samples, asinh.cofactor,
downsample.to, output.dir, negative.values, quantile.prob) {
tab <- NULL
orig.data <- NULL

Expand All @@ -81,7 +82,7 @@ process_files_groups <- function(files, col.names, num.clusters, num.samples, as

fcs.file <- flowCore::read.FCS(f)
temp.orig.data <- flowCore::exprs(fcs.file)
temp.tab <- convert_fcs(fcs.file, asinh.cofactor)
temp.tab <- convert_fcs(fcs.file, asinh.cofactor, negative.values = negative.values, quantile.prob = quantile.prob)

if(downsample.to > 0) {
x <- NULL
Expand Down Expand Up @@ -123,10 +124,10 @@ process_files_groups <- function(files, col.names, num.clusters, num.samples, as
#' @param f The file path
#' @inheritParams cluster_fcs_files
#'
process_file <- function(f, col.names, num.clusters, num.samples, asinh.cofactor, output.dir) {
process_file <- function(f, col.names, num.clusters, num.samples, asinh.cofactor, output.dir, negative.values, quantile.prob) {
fcs.file <- flowCore::read.FCS(f)
orig.data <- flowCore::exprs(fcs.file)
tab <- convert_fcs(fcs.file, asinh.cofactor)
tab <- convert_fcs(fcs.file, asinh.cofactor, negative.values = negative.values, quantile.prob = quantile.prob)

m <- grappolo:::cluster_data(tab, col.names, k = num.clusters, sampsize = min(nrow(tab), 1000), samples = num.samples)
colnames(m) <- gsub("groups", "cellType", colnames(m))
Expand Down Expand Up @@ -199,18 +200,20 @@ cluster_fcs_files_in_dir <- function(wd, ...) {
#' @param num.cores Number of CPU cores to use
#' @param col.names A vector of column names indicating which columns should be used for clustering
#' @param num.clusters The desired number of clusters
#' @param asinh.cofactor Cofactor for asinh transformation. If this is \code{NULL} no transformation is performed (see \code{convert_fcs})
#' @param num.samples Number of samples to be used for the CLARA algorithm (see \code{cluster::clara})
#' @param output.dir The name of the output directory, it will be created if it does not exist
#' @inheritParams convert_fcs
#' @return Returns either \code{NULL} or a \code{try-error} object if some error occurred during the computation
#' @export
cluster_fcs_files <- function(files.list, num.cores, col.names, num.clusters, asinh.cofactor, num.samples = 50, output.dir = ".") {
cluster_fcs_files <- function(files.list, num.cores, col.names, num.clusters, asinh.cofactor,
num.samples = 50, output.dir = ".", negative.values = "truncate", quantile.prob = 0.05) {
if(!dir.exists(output.dir))
dir.create(output.dir, recursive = TRUE, showWarnings = TRUE)

parallel::mclapply(files.list, mc.cores = num.cores, mc.preschedule = FALSE,
process_file, col.names = col.names, num.clusters = num.clusters,
num.samples = num.samples, asinh.cofactor = asinh.cofactor, output.dir = output.dir)
num.samples = num.samples, asinh.cofactor = asinh.cofactor,
output.dir = output.dir, negative.values = negative.values, quantile.prob = quantile.prob)
}


Expand All @@ -227,7 +230,8 @@ cluster_fcs_files <- function(files.list, num.cores, col.names, num.clusters, as
#'
#' @export
cluster_fcs_files_groups <- function(files.list, num.cores, col.names, num.clusters, asinh.cofactor,
num.samples = 50, downsample.to = 0, output.dir = ".") {
num.samples = 50, downsample.to = 0, output.dir = ".", negative.values = "truncate",
quantile.prob = 0.05) {

files.list <- lapply(names(files.list), function(x) {
c(x, files.list[[x]])
Expand All @@ -238,7 +242,8 @@ cluster_fcs_files_groups <- function(files.list, num.cores, col.names, num.clust

parallel::mclapply(files.list, mc.cores = num.cores, mc.preschedule = FALSE,
process_files_groups, col.names = col.names, num.clusters = num.clusters, num.samples = num.samples,
asinh.cofactor = asinh.cofactor, downsample.to = downsample.to, output.dir = output.dir)
asinh.cofactor = asinh.cofactor, downsample.to = downsample.to,
output.dir = output.dir, negative.values = negative.values, quantile.prob = quantile.prob)

}

Expand Down
24 changes: 14 additions & 10 deletions R/features.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ multistep_normalize <- function(tab, norm.template, subject.var) {
#' stimulation conditions.
#' In this case the \code{metadata.tab} would look like this
#' \itemize{
#' \item{\code{file}}{The names of the data files that contain data for each sample. These must match the names in the clustering results (see above)}
#' \item{\code{timepoint}}{The timepoint information}
#' \item{\code{condition}}{The stimulation condition}
#' \item{\code{subject}}{The subjet each file was derived from}
#' \item{\code{file}}{ The names of the data files that contain data for each sample. These must match the names in the clustering results (see above)}
#' \item{\code{timepoint}}{ The timepoint information}
#' \item{\code{condition}}{ The stimulation condition}
#' \item{\code{subject}}{ The subjet each file was derived from}
#' }
#' Let's assume a few different scenarios.
#' \enumerate{
Expand All @@ -111,20 +111,24 @@ multistep_normalize <- function(tab, norm.template, subject.var) {
#' (see Details). The combination of \code{predictors} and \code{endpoint.grouping} must uniquely identify every row in \code{metadata.tab}.
#' The function will throw an error if this is not the case.
#' @param filename.col The name of the column in \code{metadata.tab} that is used to identify the file names in tab
#' @return Returns a data frame whose format depends on the value of the \code{format} parameter
#' @return Returns a data frame whose format depends on the value of the \code{out.format} parameter
#' \itemize{
#' \item{table}: each row corresponds to a combination of the levels of the variables specified in \code{endpoint.grouping}, and the columns are
#' cluster features, which are combinations of the levels of the \code{predictors} for each feature specified in \code{features.names}
#' \item{tidy}: there is a single numeric column, and all the other columns represent variables whose combinations uniquely identify each observation (i.e. each row)
#' }
#' @export

get_cluster_features <- function(tab, metadata.tab, features.names, out.format = "table", predictors = NULL, endpoint.grouping = NULL, filename.col = "file") {
get_cluster_features <- function(tab, metadata.tab, features.names, out.format = "table", predictors = NULL,
endpoint.grouping = NULL, filename.col = "file", transform.popsize = TRUE) {
out.format <- match.arg(out.format, c("table", "tidy"))
m <- reshape_cluster_features(tab, features.names)
m <- reshape_cluster_features(tab, features.names, transform.popsize)

df <- reshape::melt(m, varnames = c(filename.col, "variable"))

# Restore the original value of filename.col
names(df) <- gsub(make.names(filename.col), filename.col, names(df))

df <- merge(df, metadata.tab, by = filename.col)
ret <- NULL

Expand All @@ -143,7 +147,7 @@ get_cluster_features <- function(tab, metadata.tab, features.names, out.format =
ret <- reshape::cast(df, formula.exp)
}

return(ret)
return(data.frame(ret, check.names = FALSE, stringsAsFactors = FALSE))
}


Expand Down Expand Up @@ -171,7 +175,7 @@ transpose_feature_matrix <- function(m) {
}


reshape_cluster_features <- function(input.tab, features) {
reshape_cluster_features <- function(input.tab, features, transform.popsize = TRUE) {
col.names <- sapply(features, paste, "@", sep = "")
col.names <- paste(col.names, collapse = "|")
col.names <- grep(col.names, names(input.tab), value = T)
Expand All @@ -183,7 +187,7 @@ reshape_cluster_features <- function(input.tab, features) {
temp <- m[, grep(sprintf("%s@", s), colnames(m))]
temp[is.na(temp)] <- 0

if(s == "popsize") {
if(s == "popsize" && transform.popsize) {
temp <- t(temp)
temp <- temp / rowSums(temp)
temp <- t(temp)
Expand Down
54 changes: 48 additions & 6 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,22 @@
#'
#' @param f The \code{flowFrame} to convert
#' @param asinh.cofactor Cofactor for \code{asinh} transformation. If this is \code{NULL} no transformation is performed
#' @param clip.at.zero Wether to clip negative values (after transformation) at zero
#' @param compensate Wether to compensate the data using the compensation matrix embedded in the \code{flowFrame} (if any)
#'
#' @param negative.values How to deal with negative values in the data. If this is \code{NULL} negative values
#' are left as is. Otherwise two options are possible:
#' \itemize{
#' \item{\code{truncate}}: Negative values will be truncated (i.e. replaced with 0)
#' \item{\code{shift}}: The data will be shifted so that only \code{quantile.prob} of the values for each channel will
#' be truncated to 0. This option is useful in cases where the range of data significantly extends
#' in the negatives, for instance due to compensation.
#' }
#' @param quantile.prob Only used if \code{negative.value} is set to \code{shift}. The quantile of measurements
#' that are going to be truncated to 0. For instance if this is 0.05, the data will be shifted so that
#' only 5 percent of the values are negative and will be truncated to 0
#' @return Returns a \code{data.frame} corresponding to the data in \code{flowCore::exprs(f)} after compensation
#' and transformation
#'
#' @export
convert_fcs <- function(f, asinh.cofactor = NULL, clip.at.zero = T, compensate = T) {
convert_fcs <- function(f, asinh.cofactor = NULL, compensate = T, negative.values = "truncate", quantile.prob = 0.05) {
comp <- grep("SPILL", names(flowCore::description(f)), value = T)

if(compensate && (length(comp) > 0)) {
Expand All @@ -35,8 +43,13 @@ convert_fcs <- function(f, asinh.cofactor = NULL, clip.at.zero = T, compensate =
if(!is.null(asinh.cofactor))
m <- asinh(m / asinh.cofactor)

if(clip.at.zero)
m[m < 0] <- 0
if(!is.null(negative.values)) {
negative.values <- match.arg(negative.values, choices = c("truncate", "shift"))
if(negative.values == "truncate")
m[m < 0] <- 0
else if(negative.values == "shift")
m <- shift_negative_values(m, quantile.prob)
}

tab <- data.frame(m, check.names = F, stringsAsFactors = F)

Expand All @@ -51,6 +64,35 @@ convert_fcs <- function(f, asinh.cofactor = NULL, clip.at.zero = T, compensate =
}


#' Shift negative values in a matrix
#'
#' This function shifts negative values in a data matrix. For each column vector
#' the procedure is as follows:
#' \enumerate{
#' \item A specific quantile is calculated from the vector
#' \item If the quantile is negative then its absolute value is added to the vector
#' \item Values that are still negative are truncated at 0
#' }
#'
#' @param m The data matrix
#' @param quantile.prob The quantile probability to use
#' @return Return the transformed data matrix
#'
shift_negative_values <- function(m, quantile.prob) {
apply(m, 2, function(x) {
qq <- quantile(x, quantile.prob)
ret <- NULL
if(qq < 0)
ret <- x + abs(qq)
else
ret <- x
ret[ret < 0] <- 0
return(ret)
})
}



#' Get the columns that are common to a set of input tabular files
#'
#' @param files.list A vector of input file names. If these are text files, each file should be a tab-separated table,
Expand Down
14 changes: 12 additions & 2 deletions inst/shinyGUI/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,15 @@ render_clustering_ui <- function(working.directory, ...) {renderUI({
),
fluidRow(
column(12,

numericInput("clusteringui_num_clusters", "Number of clusters", value = 200, min = 1, max = 2000),
numericInput("clusteringui_num_samples", "Number of samples (lower numbers lead to faster but less accurate results)", value = 50, min = 2),
numericInput("clusteringui_asinh_cofactor", "Cofactor for asinh transformation", value = 5),
numericInput("clusteringui_num_cores", "Number of CPU cores to use", value = 1),
selectInput("clusteringui_negative_values", "Negative vaues", choices = c("truncate", "shift")),
conditionalPanel(
condition = "input.clusteringui_negative_values == 'shift'",
numericInput("clusteringui_quantile_prob", "Quantile probability", value = 0.05, min = 0, max = 1, step = 0.01)
),
actionButton("clusteringui_start", "Start clustering")
)
)
Expand Down Expand Up @@ -106,6 +110,8 @@ shinyServer(function(input, output, session) {
num.samples <- force(input$clusteringui_num_samples)
downsample.to <- force(input$clusteringui_downsample_to)
output.dir <- force(working.directory)
negative.values <- force(input$clusteringui_negative_values)
quantile.prob <- force(input$clusteringui_quantile_prob)

if(input$clusteringui_clustering_mode == "Pooled") {
input.files <- lapply(clusteringui_reactive_values$clustering_groups, function(s) {file.path(working.directory, s)})
Expand All @@ -116,6 +122,8 @@ shinyServer(function(input, output, session) {
asinh.cofactor = asinh.cofactor,
num.samples = num.samples,
downsample.to = downsample.to,
negative.values = negative.values,
quantile.prob = quantile.prob,
output.dir = output.dir
)
} else {
Expand All @@ -125,7 +133,9 @@ shinyServer(function(input, output, session) {
num.clusters = num.clusters,
asinh.cofactor = asinh.cofactor,
num.samples = num.samples,
output.dir = output.dir
output.dir = output.dir,
negative.values = negative.values,
quantile.prob = quantile.prob
)
}

Expand Down
18 changes: 16 additions & 2 deletions man/cluster_fcs_files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 16 additions & 2 deletions man/cluster_fcs_files_groups.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 74260c4

Please sign in to comment.