From 313f5060d6a27d752ab46f8cf5c598ba7ba3535b Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Thu, 28 May 2020 09:04:40 -0400
Subject: [PATCH 1/5] Create write_gct.R

---
 R/write_gct.R | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 R/write_gct.R

diff --git a/R/write_gct.R b/R/write_gct.R
new file mode 100644
index 0000000..79fcef2
--- /dev/null
+++ b/R/write_gct.R
@@ -0,0 +1,127 @@
+#' Write CellProfiler data to gct file.
+#'
+#'  @param x ...
+#'  @param path ...
+#'  @param channels ...
+#'
+#'
+#' @return The input \code{x}, invisibly.
+#'
+write_gct <- function(x, path, channels = NULL, feature_regex = "^Nuclei_|^Cells_|^Cytoplasm_", create_row_annotations = TRUE) {
+  stopifnot(is.data.frame(x))
+  path <- normalizePath(path, mustWork = FALSE)
+
+  if(file.exists(path)) {
+    file.remove(path)
+  }
+
+  # id is hash of metadata columns
+  x %<>%
+    tidyr::unite("id", matches("Metadata_"), remove = F) %>%
+    dplyr::rowwise() %>%
+    dplyr::mutate(id = digest::digest(id)) %>%
+    dplyr::ungroup()
+
+  # change hash to an sequential id because some sig tools fail if not
+  x %<>%
+    dplyr::mutate(id = paste0("SAMPLE_", dense_rank(id)))
+
+  # write.gctx does not handle Date
+  x %<>% dplyr::mutate_if(is.numeric.Date, as.character)
+
+  feature_cols <-
+    colnames(x) %>%
+    stringr::str_subset(feature_regex)
+
+  measurements <-
+    x %>%
+    dplyr::select_(.dots = feature_cols) %>%
+    data.matrix() %>%
+    t()
+
+  column_annotations <-
+    x %>%
+    dplyr::select(matches("^id$|^Metadata_"))
+
+  row_annotations <-
+    tibble::data_frame(cp_feature_name = row.names(measurements))
+
+  if (create_row_annotations) {
+      row_annotations %<>%
+        tidyr::separate(col = "cp_feature_name",
+                        into = c("compartment", "feature_group", "feature_name"),
+                        sep = "_", extra = "merge", remove = FALSE)
+  }
+
+  if (!is.null(channels)) {
+    # get all combinations of channels
+    channels <- stringr::str_split(channels, ",")[[1]]
+
+    channels <- c(as.vector(outer(channels, channels, FUN = paste, sep = "_")),
+                  channels)
+
+    # get channel name
+    channel_name <- function(feature_name) {
+      name <- channels[which(stringr::str_detect(feature_name, channels))[1]]
+
+      if (is.na(name)) {
+        name <- "None"
+      }
+
+      name
+    }
+
+    # add channel name to row annotations
+    row_annotations %<>%
+      dplyr::rowwise() %>%
+      dplyr::mutate(channel_name = channel_name(feature_name)) %>%
+      ungroup()
+  }
+
+  column_annotations_df <-
+    column_annotations %>%
+    t() %>%
+    as.data.frame() %>%
+    tibble::rownames_to_column() %>%
+    dplyr::mutate(rowname = stringr::str_replace(rowname, "Metadata_", ""))
+
+  filler <- row_annotations %>% slice(0)
+  filler[1,] <- colnames(filler)
+  filler[2:nrow(column_annotations_df),] <- NA
+
+  column_annotations_df <-
+    dplyr::bind_cols(column_annotations_df[1],
+                     filler,
+                     column_annotations_df[2:ncol(column_annotations_df)]
+    )
+
+  measurements_df <-
+    measurements %>%
+    as.data.frame() %>%
+    tibble::rownames_to_column()
+
+  measurements_df <-
+    dplyr::bind_cols(measurements_df[1],
+                     row_annotations,
+                     measurements_df[2:ncol(measurements_df)]
+    )
+
+  f <- file(path, "w")
+
+  writeLines("#1.3", con = f)
+
+  writeLines(sprintf("%d\t%d\t%d\t%d",
+                     nrow(measurements),
+                     ncol(measurements),
+                     ncol(row_annotations),
+                     ncol(column_annotations) - 1),
+             con = f)
+
+  close(f)
+
+  readr::write_tsv(x = column_annotations_df, path = path, append = TRUE, col_names = FALSE)
+
+  readr::write_tsv(x = measurements_df, path = path, append = TRUE)
+
+  invisible(x)
+}

From 05c94348fa2428c52b843b9cb6bf28a7c023c35e Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Thu, 28 May 2020 09:11:56 -0400
Subject: [PATCH 2/5] Update docs

---
 DESCRIPTION      |  2 +-
 man/aggregate.Rd | 12 +++++++++---
 man/annotate.Rd  | 13 ++++++++++---
 man/normalize.Rd | 18 +++++++++++++-----
 man/preselect.Rd | 13 ++++++++++---
 man/sample.Rd    |  3 +--
 6 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b58287f..fd0a868 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -33,4 +33,4 @@ Imports:
     stringr (>= 1.2.0)
 Remotes: cytomining/cytominer
 VignetteBuilder: knitr
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.0
diff --git a/man/aggregate.Rd b/man/aggregate.Rd
index b11da6d..e8b608b 100644
--- a/man/aggregate.Rd
+++ b/man/aggregate.Rd
@@ -4,10 +4,16 @@
 \alias{aggregate}
 \title{Aggregate data based on given grouping.}
 \usage{
-aggregate(sqlite_file, output_file, compartments = c("cells",
-  "cytoplasm", "nuclei"), operation = "mean",
+aggregate(
+  sqlite_file,
+  output_file,
+  compartments = c("cells", "cytoplasm", "nuclei"),
+  operation = "mean",
   strata = c("Metadata_Plate", "Metadata_Well"),
-  image_variables = NULL, variables = "all", univariate = TRUE)
+  image_variables = NULL,
+  variables = "all",
+  univariate = TRUE
+)
 }
 \arguments{
 \item{sqlite_file}{SQLite database storing morphological profiles.}
diff --git a/man/annotate.Rd b/man/annotate.Rd
index aa19ca7..5fc0c5d 100644
--- a/man/annotate.Rd
+++ b/man/annotate.Rd
@@ -4,9 +4,16 @@
 \alias{annotate}
 \title{Add plate and well metadata.}
 \usage{
-annotate(batch_id, plate_id, cell_id = NULL, external_metadata = NULL,
-  format_broad_cmap = FALSE, output = NULL,
-  perturbation_mode = "chemical", workspace_dir = ".")
+annotate(
+  batch_id,
+  plate_id,
+  cell_id = NULL,
+  external_metadata = NULL,
+  format_broad_cmap = FALSE,
+  output = NULL,
+  perturbation_mode = "chemical",
+  workspace_dir = "."
+)
 }
 \arguments{
 \item{batch_id}{Batch ID.}
diff --git a/man/normalize.Rd b/man/normalize.Rd
index 02210c8..8859be3 100644
--- a/man/normalize.Rd
+++ b/man/normalize.Rd
@@ -4,14 +4,22 @@
 \alias{normalize}
 \title{Normalize profiles based on given control subset}
 \usage{
-normalize(input_file = NULL, output_file = NULL, subset = NULL,
-  sample_single_cell = FALSE, input_sqlite_file = NULL,
+normalize(
+  input_file = NULL,
+  output_file = NULL,
+  subset = NULL,
+  sample_single_cell = FALSE,
+  input_sqlite_file = NULL,
   compartments = c("cells", "cytoplasm", "nuclei"),
-  operation = "robustize", strata = c("Metadata_Plate"),
-  batch_id = NULL, plate_id = NULL, workspace_dir = ".",
+  operation = "robustize",
+  strata = c("Metadata_Plate"),
+  batch_id = NULL,
+  plate_id = NULL,
+  workspace_dir = ".",
   image_object_join_columns = c("TableNumber", "ImageNumber"),
   well_unique_id_columns = c("Metadata_Plate", "Metadata_Well"),
-  well_unique_id_columns_db_prefix = "")
+  well_unique_id_columns_db_prefix = ""
+)
 }
 \arguments{
 \item{input_file}{Input file with profiles to be normalized. If \code{NULL}, reads from \code{workspace_dir/backend/batch_id/plate_id/plate_id_augmented.csv}. default: \code{NULL}.}
diff --git a/man/preselect.Rd b/man/preselect.Rd
index 3557789..0d8bb07 100644
--- a/man/preselect.Rd
+++ b/man/preselect.Rd
@@ -4,9 +4,16 @@
 \alias{preselect}
 \title{Create a list of variables using various variable selection methods}
 \usage{
-preselect(input, operations, replicates = NULL, batch_id = NULL,
-  subset = NULL, cores = NULL, output_dir = NULL,
-  workspace_dir = ".")
+preselect(
+  input,
+  operations,
+  replicates = NULL,
+  batch_id = NULL,
+  subset = NULL,
+  cores = NULL,
+  output_dir = NULL,
+  workspace_dir = "."
+)
 }
 \arguments{
 \item{input}{Test data on which to perform variable selection operations. Must be CSV, rds, or feather.}
diff --git a/man/sample.Rd b/man/sample.Rd
index 70ffdfc..c1e5e2c 100644
--- a/man/sample.Rd
+++ b/man/sample.Rd
@@ -4,8 +4,7 @@
 \alias{sample}
 \title{Sample selects replicates across specified plates.}
 \usage{
-sample(batch_id, pattern, output, replicates = NULL,
-  workspace_dir = ".")
+sample(batch_id, pattern, output, replicates = NULL, workspace_dir = ".")
 }
 \arguments{
 \item{batch_id}{Batch ID.}

From 810e916e0b9ac0aac76e3efe75a338070688970f Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Thu, 28 May 2020 09:12:08 -0400
Subject: [PATCH 3/5] Update docs

---
 man/write_gct.Rd | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 man/write_gct.Rd

diff --git a/man/write_gct.Rd b/man/write_gct.Rd
new file mode 100644
index 0000000..66b2fee
--- /dev/null
+++ b/man/write_gct.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_gct.R
+\name{write_gct}
+\alias{write_gct}
+\title{Write CellProfiler data to gct file.}
+\usage{
+write_gct(
+  x,
+  path,
+  channels = NULL,
+  feature_regex = "^Nuclei_|^Cells_|^Cytoplasm_",
+  create_row_annotations = TRUE
+)
+}
+\value{
+The input \code{x}, invisibly.
+}
+\description{
+@param x ...
+ @param path ...
+ @param channels ...
+}

From 8e6c0339d042892f96681b7dfc53e5ee4eea803d Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Thu, 28 May 2020 09:12:53 -0400
Subject: [PATCH 4/5] Format code

---
 R/write_gct.R | 214 +++++++++++++++++++++++++++-----------------------
 1 file changed, 116 insertions(+), 98 deletions(-)

diff --git a/R/write_gct.R b/R/write_gct.R
index 79fcef2..143428b 100644
--- a/R/write_gct.R
+++ b/R/write_gct.R
@@ -7,121 +7,139 @@
 #'
 #' @return The input \code{x}, invisibly.
 #'
-write_gct <- function(x, path, channels = NULL, feature_regex = "^Nuclei_|^Cells_|^Cytoplasm_", create_row_annotations = TRUE) {
-  stopifnot(is.data.frame(x))
-  path <- normalizePath(path, mustWork = FALSE)
-
-  if(file.exists(path)) {
-    file.remove(path)
-  }
+write_gct <-
+  function(x,
+           path,
+           channels = NULL,
+           feature_regex = "^Nuclei_|^Cells_|^Cytoplasm_",
+           create_row_annotations = TRUE) {
+    stopifnot(is.data.frame(x))
+    path <- normalizePath(path, mustWork = FALSE)
+
+    if (file.exists(path)) {
+      file.remove(path)
+    }
 
-  # id is hash of metadata columns
-  x %<>%
-    tidyr::unite("id", matches("Metadata_"), remove = F) %>%
-    dplyr::rowwise() %>%
-    dplyr::mutate(id = digest::digest(id)) %>%
-    dplyr::ungroup()
+    # id is hash of metadata columns
+    x %<>%
+      tidyr::unite("id", matches("Metadata_"), remove = F) %>%
+      dplyr::rowwise() %>%
+      dplyr::mutate(id = digest::digest(id)) %>%
+      dplyr::ungroup()
 
-  # change hash to an sequential id because some sig tools fail if not
-  x %<>%
-    dplyr::mutate(id = paste0("SAMPLE_", dense_rank(id)))
+    # change hash to an sequential id because some sig tools fail if not
+    x %<>%
+      dplyr::mutate(id = paste0("SAMPLE_", dense_rank(id)))
 
-  # write.gctx does not handle Date
-  x %<>% dplyr::mutate_if(is.numeric.Date, as.character)
+    # write.gctx does not handle Date
+    x %<>% dplyr::mutate_if(is.numeric.Date, as.character)
 
-  feature_cols <-
-    colnames(x) %>%
-    stringr::str_subset(feature_regex)
+    feature_cols <-
+      colnames(x) %>%
+      stringr::str_subset(feature_regex)
 
-  measurements <-
-    x %>%
-    dplyr::select_(.dots = feature_cols) %>%
-    data.matrix() %>%
-    t()
+    measurements <-
+      x %>%
+      dplyr::select_(.dots = feature_cols) %>%
+      data.matrix() %>%
+      t()
 
-  column_annotations <-
-    x %>%
-    dplyr::select(matches("^id$|^Metadata_"))
+    column_annotations <-
+      x %>%
+      dplyr::select(matches("^id$|^Metadata_"))
 
-  row_annotations <-
-    tibble::data_frame(cp_feature_name = row.names(measurements))
+    row_annotations <-
+      tibble::data_frame(cp_feature_name = row.names(measurements))
 
-  if (create_row_annotations) {
+    if (create_row_annotations) {
       row_annotations %<>%
-        tidyr::separate(col = "cp_feature_name",
-                        into = c("compartment", "feature_group", "feature_name"),
-                        sep = "_", extra = "merge", remove = FALSE)
-  }
+        tidyr::separate(
+          col = "cp_feature_name",
+          into = c("compartment", "feature_group", "feature_name"),
+          sep = "_",
+          extra = "merge",
+          remove = FALSE
+        )
+    }
+
+    if (!is.null(channels)) {
+      # get all combinations of channels
+      channels <- stringr::str_split(channels, ",")[[1]]
 
-  if (!is.null(channels)) {
-    # get all combinations of channels
-    channels <- stringr::str_split(channels, ",")[[1]]
+      channels <-
+        c(as.vector(outer(
+          channels, channels, FUN = paste, sep = "_"
+        )),
+        channels)
 
-    channels <- c(as.vector(outer(channels, channels, FUN = paste, sep = "_")),
-                  channels)
+      # get channel name
+      channel_name <- function(feature_name) {
+        name <-
+          channels[which(stringr::str_detect(feature_name, channels))[1]]
 
-    # get channel name
-    channel_name <- function(feature_name) {
-      name <- channels[which(stringr::str_detect(feature_name, channels))[1]]
+        if (is.na(name)) {
+          name <- "None"
+        }
 
-      if (is.na(name)) {
-        name <- "None"
+        name
       }
 
-      name
+      # add channel name to row annotations
+      row_annotations %<>%
+        dplyr::rowwise() %>%
+        dplyr::mutate(channel_name = channel_name(feature_name)) %>%
+        ungroup()
     }
 
-    # add channel name to row annotations
-    row_annotations %<>%
-      dplyr::rowwise() %>%
-      dplyr::mutate(channel_name = channel_name(feature_name)) %>%
-      ungroup()
-  }
-
-  column_annotations_df <-
-    column_annotations %>%
-    t() %>%
-    as.data.frame() %>%
-    tibble::rownames_to_column() %>%
-    dplyr::mutate(rowname = stringr::str_replace(rowname, "Metadata_", ""))
-
-  filler <- row_annotations %>% slice(0)
-  filler[1,] <- colnames(filler)
-  filler[2:nrow(column_annotations_df),] <- NA
-
-  column_annotations_df <-
-    dplyr::bind_cols(column_annotations_df[1],
-                     filler,
-                     column_annotations_df[2:ncol(column_annotations_df)]
+    column_annotations_df <-
+      column_annotations %>%
+      t() %>%
+      as.data.frame() %>%
+      tibble::rownames_to_column() %>%
+      dplyr::mutate(rowname = stringr::str_replace(rowname, "Metadata_", ""))
+
+    filler <- row_annotations %>% slice(0)
+    filler[1, ] <- colnames(filler)
+    filler[2:nrow(column_annotations_df), ] <- NA
+
+    column_annotations_df <-
+      dplyr::bind_cols(column_annotations_df[1],
+                       filler,
+                       column_annotations_df[2:ncol(column_annotations_df)])
+
+    measurements_df <-
+      measurements %>%
+      as.data.frame() %>%
+      tibble::rownames_to_column()
+
+    measurements_df <-
+      dplyr::bind_cols(measurements_df[1],
+                       row_annotations,
+                       measurements_df[2:ncol(measurements_df)])
+
+    f <- file(path, "w")
+
+    writeLines("#1.3", con = f)
+
+    writeLines(sprintf(
+      "%d\t%d\t%d\t%d",
+      nrow(measurements),
+      ncol(measurements),
+      ncol(row_annotations),
+      ncol(column_annotations) - 1
+    ),
+    con = f)
+
+    close(f)
+
+    readr::write_tsv(
+      x = column_annotations_df,
+      path = path,
+      append = TRUE,
+      col_names = FALSE
     )
 
-  measurements_df <-
-    measurements %>%
-    as.data.frame() %>%
-    tibble::rownames_to_column()
-
-  measurements_df <-
-    dplyr::bind_cols(measurements_df[1],
-                     row_annotations,
-                     measurements_df[2:ncol(measurements_df)]
-    )
-
-  f <- file(path, "w")
-
-  writeLines("#1.3", con = f)
+    readr::write_tsv(x = measurements_df, path = path, append = TRUE)
 
-  writeLines(sprintf("%d\t%d\t%d\t%d",
-                     nrow(measurements),
-                     ncol(measurements),
-                     ncol(row_annotations),
-                     ncol(column_annotations) - 1),
-             con = f)
-
-  close(f)
-
-  readr::write_tsv(x = column_annotations_df, path = path, append = TRUE, col_names = FALSE)
-
-  readr::write_tsv(x = measurements_df, path = path, append = TRUE)
-
-  invisible(x)
-}
+    invisible(x)
+  }

From e2628c6d81fb6cc6cd6cbf06e64edc8ee4e4fbdf Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Sun, 15 Nov 2020 13:05:41 -0500
Subject: [PATCH 5/5] unverified changes

---
 NAMESPACE        |  1 +
 R/write_gct.R    | 16 +++++++++-------
 man/write_gct.Rd |  6 +++---
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index e529dd8..2aa0ea3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(normalize)
 export(preselect)
 export(regularize)
 export(sample)
+export(write_gct)
 importFrom(magrittr,"%<>%")
 importFrom(magrittr,"%>%")
 importFrom(magrittr,extract2)
diff --git a/R/write_gct.R b/R/write_gct.R
index 143428b..63f42b2 100644
--- a/R/write_gct.R
+++ b/R/write_gct.R
@@ -1,12 +1,14 @@
 #' Write CellProfiler data to gct file.
 #'
-#'  @param x ...
-#'  @param path ...
-#'  @param channels ...
-#'
+#'  @param x A data frame of CellProfiler readouts to write to disk
+#'  @param path Path or connection to write to
+#'  @param channels Image channels present in the CellProfiler data frame
 #'
 #' @return The input \code{x}, invisibly.
 #'
+#' @importFrom magrittr %<>%
+#' @importFrom magrittr %>%
+#' @export
 write_gct <-
   function(x,
            path,
@@ -22,7 +24,7 @@ write_gct <-
 
     # id is hash of metadata columns
     x %<>%
-      tidyr::unite("id", matches("Metadata_"), remove = F) %>%
+      tidyr::unite("id", dplyr::matches("Metadata_"), remove = F) %>%
       dplyr::rowwise() %>%
       dplyr::mutate(id = digest::digest(id)) %>%
       dplyr::ungroup()
@@ -46,7 +48,7 @@ write_gct <-
 
     column_annotations <-
       x %>%
-      dplyr::select(matches("^id$|^Metadata_"))
+      dplyr::select(dplyr::matches("^id$|^Metadata_"))
 
     row_annotations <-
       tibble::data_frame(cp_feature_name = row.names(measurements))
@@ -98,7 +100,7 @@ write_gct <-
       tibble::rownames_to_column() %>%
       dplyr::mutate(rowname = stringr::str_replace(rowname, "Metadata_", ""))
 
-    filler <- row_annotations %>% slice(0)
+    filler <- row_annotations %>% dplyr::slice(0)
     filler[1, ] <- colnames(filler)
     filler[2:nrow(column_annotations_df), ] <- NA
 
diff --git a/man/write_gct.Rd b/man/write_gct.Rd
index 66b2fee..5549ce7 100644
--- a/man/write_gct.Rd
+++ b/man/write_gct.Rd
@@ -16,7 +16,7 @@ write_gct(
 The input \code{x}, invisibly.
 }
 \description{
-@param x ...
- @param path ...
- @param channels ...
+@param x A data frame of CellProfiler readouts to write to disk
+ @param path Path or connection to write to
+ @param channels Image channels present in the CellProfiler data frame
 }