shahcompbio · marcjwilliams1 · Oct 10, 2023 · Jun 13, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -41,7 +41,7 @@ Imports:
     grid,
     ggforce,
     ggtree
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -37,6 +37,7 @@ export(fixjitter)
 export(format_haplotypes)
 export(format_haplotypes_dlp)
 export(format_haplotypes_rna)
+export(format_tree_labels)
 export(getBins)
 export(get_clone_label_pos)
 export(getphase)

diff --git a/R/callHSCN.R b/R/callHSCN.R
@@ -920,6 +920,10 @@ callHaplotypeSpecificCN <- function(CNbins,
   return(out)
 }
 
+# #TODO This is horrible and needs to be re-written!
+# Basic ideas is to remove singletons (single bins with copy number that is different from their neighbours)
+# this is done by checking whether the log-likelihood of read counts in bin i better supports
+# the copy number in bin i-1 or bin i+1
 #' @export
 fix_assignments <- function(hscn) {
   if (hscn$likelihood$likelihood == "binomial") {

diff --git a/R/clustering.R b/R/clustering.R
@@ -51,17 +51,53 @@ umap_clustering <- function(CNbins,
     pca <- NULL
     fast_sgd <- FALSE
   }
-  umapresults <- uwot::umap(cnmatrix,
-    metric = umapmetric,
-    n_neighbors = n_neighbors,
-    n_components = 2,
-    min_dist = min_dist,
-    ret_model = TRUE,
-    ret_nn = TRUE,
-    pca = pca,
-    fast_sgd = fast_sgd
-  )
+  # umapresults <- uwot::umap(cnmatrix,
+  #   metric = umapmetric,
+  #   n_neighbors = n_neighbors,
+  #   n_components = 2,
+  #   min_dist = min_dist,
+  #   ret_model = TRUE,
+  #   ret_nn = TRUE,
+  #   pca = pca,
+  #   fast_sgd = fast_sgd
+  # )
 
+  #TODO find out why umap gives an error for some cases, seems to be a new bug
+  umapresults <- tryCatch(
+    {
+      umapresults <- uwot::umap(cnmatrix,
+                                metric = umapmetric,
+                                n_neighbors = n_neighbors,
+                                n_components = 2,
+                                min_dist = min_dist,
+                                ret_model = TRUE,
+                                ret_nn = TRUE,
+                                pca = pca,
+                                fast_sgd = fast_sgd)
+    },
+    error = function(e) {
+      # Handle error by rerunning UMAP with different parameters
+      message("An error occurred in umap calculation: ", e$message)
+      message("Rerunning UMAP after adding small jitter to data points...")
+
+      mat <- cnmatrix + matrix(runif(nrow(cnmatrix) * ncol(cnmatrix),
+                                     min=-0.005, max=0.005), 
+                               nrow=nrow(cnmatrix), ncol=ncol(cnmatrix))
+
+      umapresults <- uwot::umap(mat,
+                                metric = umapmetric,
+                                n_neighbors = n_neighbors,
+                                n_components = 2,
+                                min_dist = min_dist,
+                                ret_model = TRUE,
+                                ret_nn = TRUE,
+                                pca = pca,
+                                fast_sgd = fast_sgd)
+    }
+  )
+
+
+
   dfumap <- data.frame(
     umap1 = umapresults$embedding[, 1],
     umap2 = umapresults$embedding[, 2],

diff --git a/R/col_palettes.R b/R/col_palettes.R
@@ -13,6 +13,20 @@ scCN_colors <- c(
   `CN11` = "#D4B9DA"
 )
 
+cyto_colors = c(
+  'gpos100'= rgb(0/255.0,0/255.0,0/255.0),
+  'gpos'   = rgb(0/255.0,0/255.0,0/255.0),
+  'gpos75' = rgb(130/255.0,130/255.0,130/255.0),
+  'gpos66' = rgb(160/255.0,160/255.0,160/255.0),
+  'gpos50' = rgb(200/255.0,200/255.0,200/255.0),
+  'gpos33' = rgb(210/255.0,210/255.0,210/255.0),
+  'gpos25' = rgb(200/255.0,200/255.0,200/255.0),
+  'gvar'   = rgb(220/255.0,220/255.0,220/255.0),
+  'gneg'  = rgb(255/255.0,255/255.0,255/255.0),
+  'acen'  = rgb(217/255.0,47/255.0,39/255.0),
+  'stalk' = rgb(100/255.0,127/255.0,164/255.0)
+)
+
 scCNstate_colors <- c(
   `0` = "#3182BD",
   `1` = "#9ECAE1",

diff --git a/R/heatmap_plot.R b/R/heatmap_plot.R
@@ -966,7 +966,7 @@ plotHeatmap <- function(cn,
                         annotation_height = NULL, 
                         annofontsize = 10,
                         na_col = "white",
-                        linkheight = 5,
+                        linkheight = 2.5,
                         newlegendname = NULL,
                         str_to_remove = NULL,
                         maxCNcol = 11,
@@ -1066,12 +1066,26 @@ plotHeatmap <- function(cn,
   }
 
   ncells <- length(unique(CNbins$cell_id))
+
+  if (!is.null(clusters) & !is.null(tree)) {
+    cells_clusters <- unique(clusters$cell_id)
+    cells_data <- unique(CNbins$cell_id)
+    cells_tree <- unique(tree$tip.label)
+    check_cells <- all(c(length(cells_tree),length(cells_clusters),length(cells_data)) == length(cells_tree))
+    if (check_cells == FALSE){
+      warning("Trees, clusters and copy number data have different numbers of cells, removing non-overlapping cells.")
+      cells_to_keep <- intersect(intersect(cells_clusters, cells_data), cells_tree)
+      CNbins <- dplyr::filter(CNbins, cell_id %in% cells_to_keep)
+      clusters <- dplyr::filter(clusters, cell_id %in% cells_to_keep)
+      cells_to_remove <- setdiff(cells_tree, cells_to_keep)
+      tree <- ape::drop.tip(tree, cells_to_remove, collapse.singles = FALSE, trim.internal = FALSE)
+      tree <- format_tree_labels(tree)
+    }
+  } 
 
   if (is.null(clusters) & !is.null(tree)) {
     ordered_cell_ids <- paste0(unique(CNbins$cell_id))
     clusters <- data.frame(cell_id = unique(CNbins$cell_id), clone_id = "0")
-  } else {
-    ordered_cell_ids <- paste0(clusters$cell_id)
   }
 
   if (is.null(tree) & is.null(clusters)) {
@@ -1097,7 +1111,10 @@ plotHeatmap <- function(cn,
     cells_clusters <- length(unique(clusters$cell_id))
     cells_data <- length(unique(CNbins$cell_id))
     if (cells_data != cells_clusters){
-      warning("Number of cells in clusters dataframe !=  number of cells in the bins data!")
+      warning("Number of cells in clusters dataframe !=  number of cells in the bins data! Removing some cells")
+      cells_to_keep <- intersect(cells_clusters, cells_data)
+      CNbins <- dplyr::filter(CNbins, cell_id %in% cells_to_keep)
+      clusters <- dplyr::filter(clusters, cell_id %in% cells_to_keep)
     }
     if (!"clone_id" %in% names(clusters)) {
       stop("No clone_id columns in clusters dataframe, you might need to rename your clusters")