From 3decd73cc99e94b4f825891d26c13583be30dd29 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Mon, 18 Mar 2024 10:14:22 +0000 Subject: [PATCH 1/9] browser where x.trunc is failing --- app/R/plot_functions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/app/R/plot_functions.R b/app/R/plot_functions.R index ad279ab..fbb4768 100644 --- a/app/R/plot_functions.R +++ b/app/R/plot_functions.R @@ -64,6 +64,7 @@ pca_plot <- function(matrix){ generate_heatmap_colors <- function(matrix){ x <- as.matrix(matrix) x.trunc <- as.vector(unique(x)) + browser() x.trim <- x.trunc[x.trunc >= quantile(x.trunc, probs = 0.01, na.rm = T) & x.trunc <= quantile(x.trunc, probs = 0.99, na.rm = T)] x.max <- which.max(x.trim) From 3f6ff27e8062b4c496a1fe82160bd9b3daaf9c9d Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Mon, 25 Mar 2024 16:09:54 +0000 Subject: [PATCH 2/9] save as options: hides format, width and height, shows resolution - problems with label alignment and image fidelity to website image --- app/R/app_server.R | 10 ++++---- app/R/app_ui.R | 12 +++++---- app/R/save_as_button.R | 56 +++++++++++++++++++++++++++++++----------- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/app/R/app_server.R b/app/R/app_server.R index b8164ca..b3b505c 100644 --- a/app/R/app_server.R +++ b/app/R/app_server.R @@ -265,12 +265,12 @@ app_server <- function(input, output, session) { #### Download buttons #### shinyjs::hide("subheat_save_as-save_as_button") - save_as_Server("pca_save_as", input$dataset, .pca_plot(), "PCA") - save_as_Server("heatmap_save_as", input$dataset, .heatmap_plot(), + save_as_server("pca_save_as", input$dataset, .pca_plot(), "PCA") + save_as_server("heatmap_save_as", input$dataset, .heatmap_plot(), "HeatMap") - save_as_Server("subheat_save_as", input$dataset, .subheat_plot(), + save_as_server("subheat_save_as", input$dataset, .subheat_plot(), "SubHeatMap") - save_as_Server("bar_save_as", input$dataset, .bar_plot(), "Bar") - save_as_Server("box_save_as", input$dataset, .box_plot(), "Box") + save_as_server("bar_save_as", input$dataset, .bar_plot(), "Bar") + save_as_server("box_save_as", input$dataset, .box_plot(), "Box") } diff --git a/app/R/app_ui.R b/app/R/app_ui.R index 4991bbe..420cccb 100644 --- a/app/R/app_ui.R +++ b/app/R/app_ui.R @@ -115,7 +115,9 @@ app_ui <- function(request) { #### PCA #### tabPanel("PCA", plotlyOutput("PCA_plot", width = 600, height = 500), - save_as_UI("pca_save_as", 600, 500) + div( + save_as_ui("pca_save_as", 600, 500) + ), ), #### HeatMap #### tabPanel("HeatMap", @@ -145,11 +147,11 @@ app_ui <- function(request) { height = 500, brush = "heatmap_brush" ), - save_as_UI("heatmap_save_as", 250, 500), + save_as_ui("heatmap_save_as", 250, 500), ), div( uiOutput("sub_heat"), - save_as_UI("subheat_save_as", 600, 500), + save_as_ui("subheat_save_as", 600, 500), ) ) ), @@ -160,11 +162,11 @@ app_ui <- function(request) { div(class = "container", div( plotlyOutput("plot_bar", width = 500, height = 500), - save_as_UI("bar_save_as", 500, 500) + save_as_ui("bar_save_as", 500, 500) ), div( plotlyOutput("plot_box", width = 300, height = 500), - save_as_UI("box_save_as", 300, 500) + save_as_ui("box_save_as", 300, 500) ), ) ), diff --git a/app/R/save_as_button.R b/app/R/save_as_button.R index cfc51ca..230b428 100644 --- a/app/R/save_as_button.R +++ b/app/R/save_as_button.R @@ -1,4 +1,11 @@ -save_as_UI <- function(id, default_width = 600, default_height = 450) { +library(shiny) +library(shinyjs) +library(ggplot2) + +save_as_ui <- function(id, + default_width = 600, + default_height = 450, + default_dpi = 300) { tagList( tags$head( tags$style(HTML( @@ -17,15 +24,15 @@ save_as_UI <- function(id, default_width = 600, default_height = 450) { padding: 10px; margin-left: 0px; margin-bottom: 20px; - max-width: 500px; + max-width: 300px; } " )), ), #### Download Button #### actionButton( - NS(id,"save_as_button"), - label = HTML( ' Save as...'), + NS(id, "save_as_button"), + label = HTML(' Save as...'), tooltip = "Save image as...", class = "save-as", style = " @@ -48,6 +55,7 @@ save_as_UI <- function(id, default_width = 600, default_height = 450) { ) ), div( + style = "display: none;", radioButtons( NS(id,"download_format"), label = "Format:", @@ -59,6 +67,15 @@ save_as_UI <- function(id, default_width = 600, default_height = 450) { ) ), div( + numericInput( + NS(id,"download_image_resolution"), + label = "Resolution [dpi]", + value = default_dpi, + width = "120px" + ) + ), + div( + style = "display: none;", numericInput( NS(id,"download_image_width"), label = "Width [px]", @@ -67,6 +84,7 @@ save_as_UI <- function(id, default_width = 600, default_height = 450) { ) ), div( + style = "display: none;", numericInput( NS(id,"download_image_height"), label = "Height [px]", @@ -78,7 +96,10 @@ save_as_UI <- function(id, default_width = 600, default_height = 450) { ) } -save_as_Server <- function(id, dataset_name = NULL, plot = NULL, plot_tag = NULL) { +save_as_server <- function(id, + dataset_name = NULL, + plot = NULL, + plot_tag = NULL) { moduleServer(id, function(input, output, session) { observeEvent(input$save_as_button, { # Toggle this "save_as_options" @@ -94,17 +115,21 @@ save_as_Server <- function(id, dataset_name = NULL, plot = NULL, plot_tag = NULL paste(dataset_name,"_",plot_tag,"_plot.",input$download_format,sep = "") }, content = function(file) { - w = input$download_image_width - h = input$download_image_height - format = input$download_format + screen_dpi <- 72 + dpi <- input$download_image_resolution + w <- input$download_image_width + h <- input$download_image_height + w <- as.integer(dpi * w / screen_dpi) + h <- as.integer(dpi * h / screen_dpi) + format <- input$download_format if (format == "png") { - png(file, width = w, height = h, units = "px") + png(file, width = w, height = h, units = "px", res = dpi) } else if (format == "pdf") { pdf(file, width = w, height = h) } else if (format == "svg") { svg(file, width = w, height = h) } - if (class(plot) %in% c("Heatmap", "HeatmapList", "ComplexHeatmap")) { + if (class(plot)[1] %in% c("Heatmap", "HeatmapList", "ComplexHeatmap")) { print(plot) dev.off() return() @@ -112,11 +137,12 @@ save_as_Server <- function(id, dataset_name = NULL, plot = NULL, plot_tag = NULL ggsave( filename = file, plot = plot, - dpi = "screen", - units = "px", - width = w, - height = h, - device = format) + dpi = dpi, + # units = "px", + # width = w, + # height = h, + device = format + ) dev.off() } ) From ec42450a64d5cb3663d1098754038a50b5658742 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Mon, 25 Mar 2024 16:19:12 +0000 Subject: [PATCH 3/9] Removes nginx cloudflared dependency and exposes 80 443 --- docker-compose.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3510107..2831aa8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,12 +10,12 @@ services: proxy: image: jc21/nginx-proxy-manager:latest container_name: pB-proxy - depends_on: - - cloudflared + # depends_on: + # - cloudflared restart: unless-stopped ports: - # - '80:80' # Public HTTP Port - # - '443:443' # Public HTTPS Port + - '80:80' # Public HTTP Port + - '443:443' # Public HTTPS Port - '81:81' # Admin Web Port2 volumes: - nginx-proxy:/data @@ -92,4 +92,4 @@ volumes: driver: local minio_s3data: nginx-proxy: - nginx-certs: \ No newline at end of file + nginx-certs: From 8b45a851d9f738c37ad54a88176de35af4ac9050 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Tue, 26 Mar 2024 14:44:25 +0000 Subject: [PATCH 4/9] Uses random pass for minio, and fixes bootstrapping script --- README.md | 17 ++++++++++------- db/config/bootstrap.sh | 2 +- docker-compose.yml | 10 +++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8358d74..6540014 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,16 @@ docker pull jc21/nginx-proxy-manager:latest ``` ### One time setup -Navigate to the cloned repo directory, and setup minio for the first time: +Navigate to the cloned repo directory. ``` cd proteinBASE +``` +Create a password for the root user of minio. You can generate a random one with: +``` +echo "MINIO_ROOT_PASSWORD=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 10 | head -n 1)" > ./db/config/minio/.secret_minio_passwd +``` +Now we can spin up minio for the first time: +``` docker compose up minio proxy -d ``` #### Minio keys @@ -58,12 +65,8 @@ For example, if the ip is 10.20.30.40, in a browser go to ``` http://10.20.30.40:9001/access-keys ``` -Log in using the temporary credentials -``` -u: ROOTUSER -p: CHANGEME123 -``` -Update your credentials. +Log in using the root user credentials ( user: ROOTUSER and the password you just generated). + Once logged in, use the 'create' button. Hit 'Create' again, and then 'Download for import'. diff --git a/db/config/bootstrap.sh b/db/config/bootstrap.sh index 57f0f5b..23aad50 100755 --- a/db/config/bootstrap.sh +++ b/db/config/bootstrap.sh @@ -21,7 +21,7 @@ set -e # Change the current directory to the directory of the script cd "$(dirname "$0")" # Fix secret credentials url. Should be "http://localhost:9000" -sed -i 's#9001/api/v1/service-account-credentials#9000#g' minio/.secret_credentials.json +sed -i 's#http.*api/v1/service-account-credentials#http://localhost:9000#g' minio/.secret_credentials.json # Generate random password cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 10 | head -n 1 > .secret_passwd diff --git a/docker-compose.yml b/docker-compose.yml index 2831aa8..e2676f0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,14 +4,12 @@ services: container_name: pB-cloudflared restart: unless-stopped environment: - - TUNNEL_TOKEN=eyJhIjoiZTk3ZjM1OGM3NTc5ZjdkYjJlNDVmNTI2OTE1MzgxMzYiLCJ0IjoiZDNiZWUxM2MtZWI3Ny00ZDY4LTllMTMtYzRlZGM5Yjg4NjFmIiwicyI6IlpHTmpPVEF6TXpNdFpEUTJZeTAwTURsakxXSXhZak10WldZNE16UTROemxsT0ROayJ9 + - TUNNEL_TOKEN=not-the-real-token command: tunnel --no-autoupdate run proxy: image: jc21/nginx-proxy-manager:latest container_name: pB-proxy - # depends_on: - # - cloudflared restart: unless-stopped ports: - '80:80' # Public HTTP Port @@ -19,9 +17,10 @@ services: - '81:81' # Admin Web Port2 volumes: - nginx-proxy:/data + # - ${PWD}/proxy_data:/data - nginx-certs:/etc/letsencrypt healthcheck: - test: ["CMD", "/bin/check-health"] + test: [ "CMD", "/bin/check-health" ] interval: 10s timeout: 3s @@ -34,7 +33,8 @@ services: - "9001:9001" environment: - MINIO_ROOT_USER=ROOTUSER - - MINIO_ROOT_PASSWORD=CHANGEME123 + env_file: + - ${PWD}/db/config/minio/.secret_minio_passwd volumes: - minio_s3data:/s3data - ${PWD}/app/data:/data From c39126365b03898f237c744d4c341bcaee28b5d8 Mon Sep 17 00:00:00 2001 From: Mychel Morais Date: Wed, 27 Mar 2024 15:09:33 +0000 Subject: [PATCH 5/9] fixed x.trunc heatmap_color, quantile normalization before log transform --- R/.DS_Store | Bin 0 -> 6148 bytes R/data_processing.R | 195 ++++++++++++++++++++++++++++++++++++++++ R/plot_functions.R | 212 ++++++++++++++++++++++++++++++++++++++++++++ data/.DS_Store | Bin 0 -> 6148 bytes proteomics-web-app | 1 + 5 files changed, 408 insertions(+) create mode 100644 R/.DS_Store create mode 100644 R/data_processing.R create mode 100644 R/plot_functions.R create mode 100644 data/.DS_Store create mode 160000 proteomics-web-app diff --git a/R/.DS_Store b/R/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..cb84adba3cf7e2a9e51c98301b39829efe7e30e8 GIT binary patch literal 6148 zcmeHK!Ab)$5PhjVR21paV}3x3e!*JmO*~lCqpfYxg56!(UC`Tpx^E^!X{q8tkRmfM zd6UV^Bzb|&20-Z7(+i*rphFdG9kTgCWL$JdTILZ9TH_64RG8rjmpjq!*hdEB>`wH| z7AUcKe&_VQqbjTCn|f6(8TW`GKCrUr9v)CIqsD?KzCsTJJ^eAQd-i8os<$><#tTd= zK4%rldBL1kkCk6(<*a|jb(A<` zXScSqW39Z&fbD)CkIdKP3^)VMz^@tLoh{NiO6aXK;0!neYX;=|5UGNhVUtil9c=Uo zKy1^kLR;Pvk`oOx!zLj|C?Ta1Q);j$Mo8)OM~ll0n}jJHL1$(in>pAECFtz*M-4~F z5_;(1unu8q_YRYc;NgzFR< hdKEKPuHsXw3jI+GVrJMRq=(`^0!o87&cL5C@Cm`qM4tcv literal 0 HcmV?d00001 diff --git a/R/data_processing.R b/R/data_processing.R new file mode 100644 index 0000000..406e2e5 --- /dev/null +++ b/R/data_processing.R @@ -0,0 +1,195 @@ +#### Load libraries #### +library(MSnbase) +library(reshape2) +library(QFeatures) +library(tidyverse) +library(openxlsx) +library(readxl) + + +#### Full pre-processing pipeline #### +preprocess_data <- function(raw_data_path = "", dataset_name = "", dataset_path = "") { + # This function triggers the full data pre-processing pipeline. + # To go from the raw txt file with the data, provide the `raw_data_path`. + # The `dataset_name` is optional, and it is used to name the output files. + # To skip the data preparation, provide the `dataset_path` instead of the `raw_data_path`. + + # Set paths + data_path <- "data/" + if (dataset_name == "") { + dataset_name <- gsub(".txt", "", basename(raw_data_path)) + } + pca_data_path <- paste(data_path, "pcaplots/", dataset_name, "_pca.xlsx", sep="") + heatmaps_path <- paste(data_path, "heatmaps/", dataset_name, "_heatmap.xlsx", sep="") + + # Prepare dataset + if (dataset_path == "") { + dataset_path <- paste(data_path, "datasets/", dataset_name, ".xlsx", sep="") + prepare_dataset(raw_data_path, dataset_path) + } + + # Load protein data + protein_data <- tryCatch({ + df <- read_excel(dataset_path) + ############################################################################# + #### This should not be needed once we stop using mixed old and new data #### + col_names <- colnames(df) + if ("UniprotID" %in% col_names && "gene.names" %in% col_names) { + df <- df %>% + rename(Identifiers = UniprotID, Gene = gene.names) + } + ############################################################################# + df <- df |> + pivot_longer(cols = !(Identifiers:Gene), + names_to = "experiment", + values_to = "expression") + }, error = function(e) { + message("Error reading the Excel file:", conditionMessage(e)) + }) + + # Preprocess data for plots + pca_data(protein_data, pca_data_path) + heatmap_data(protein_data, heatmaps_path) +} + + +#### Generates dataset from raw txt #### +prepare_dataset <- function(raw_data_path, dataset_path) { + # This function reads the raw txt file with the data and prepares it as protein data. + # The `raw_data_path` is the path to the raw txt file with the data. + # The `dataset_path` is the path to save the prepared dataset. + + peptides <- read.delim(raw_data_path) + # Identify columns with quantitative expression values + ecols <- grep("Intensity.", names(peptides)) + # Identify pooled samples from the data if present and remove it + if (any(grepl("pool", names(peptides[ecols]), ignore.case = TRUE))) { + ecols <- ecols[!grepl("pool", names(peptides[ecols]), ignore.case = TRUE)] + } + # Create an S4 object, QFeatures type + s4 <- QFeatures::readQFeatures( + table = peptides, # peptide data #### ****** modified here + ecol = ecols, # expression indexes + fnames = 1, # feature names + name = "raw", # raw assay name + sep = "\t" # separator for tabular data + ) + # Add column with number of non-zero values to assay array + rowData(s4[[1]])$nNonZero <- rowSums(assay(s4[[1]]) > 0) + # Replace zeroes with NA in S4 object + s4 <- QFeatures::zeroIsNA(object = s4, i = "raw") + # Remove contaminant peptides + s4 <- QFeatures::filterFeatures( + object = s4, + filter = ~Potential.contaminant != "+" + ) + # Remove decoys + s4 <- QFeatures::filterFeatures(object = s4, filter = ~Reverse != "+") + # Remove proteins present in < 2 replicates + s4 <- QFeatures::filterFeatures(object = s4, filter = ~nNonZero > 1) + # Remove overlapping proteins + filter <- rowData(s4[[1]])$Proteins %in% + msqrob2::smallestUniqueGroups(rowData(s4[[1]])$Proteins) + s4 <- s4[filter, , ] + # Normalize by quantiles + s4 <- normalize( + object = s4, # S4 peptide data + i = "raw", # log expression data + name = "norm", # norm assay name + method = "quantiles" # normalization method #### ****** modified here + ) + # Log2-transform peptide data #### ****** modified here + s4 <- QFeatures::logTransform( + object = s4, # S4 peptide data + i = "norm", # raw expression matrix + name = "log", # log assay name + base = 2 + ) + # Summarize peptide to protein + s4 <- QFeatures::aggregateFeatures( + object = s4, # S4 peptide data + i = "log", # normalized expression data + name = "prot", # protein assay name + fcol = "Proteins", # rowData variable for aggregation + fun = MsCoreUtils::robustSummary, # quantitative aggregation function + ) + # Extract summarized protein data + protein_data <- dplyr::as_tibble(assay(s4[[4]])) + protein_data$Protein <- rowData(s4[[4]])$Leading.razor.protein + protein_data$Gene <- rowData(s4[[4]])$Gene.names + protein_data$Identifiers <- rowData(s4[[4]])$Proteins + colnames(protein_data) <- gsub("Intensity.", "", colnames(protein_data)) + protein_data <- protein_data |> + dplyr::relocate(Identifiers, Protein, Gene) |> + dplyr::arrange(Protein) + # Save to file + openxlsx::write.xlsx( + x = protein_data, + file = protein_data.xlsx + ) +} + + +#### PCA plot #### +pca_data <- function(protein_data, pca_data_path) { + # This function prepares the protein data for PCA plot. + # The `protein_data` is the already loaded and prepared protein data. + # The `pca_data_path` is the output file path. + + protein_pca <- protein_data |> + dplyr::select_if(is.numeric) + # Identify and remove rows with missing values + protein_pca$nZero <- apply(protein_pca, 1, function(x) sum(is.na(x))) + protein_pca <- protein_pca |> + dplyr::filter(!nZero > 0) + # Format data set as a transposed matrix + protein_pca_matrix <- t(protein_pca[, 1:(ncol(protein_pca) - 1)]) + # Save to file + protein_pca_df <- as.data.frame(protein_pca_matrix) + openxlsx::write.xlsx(x = protein_pca_df, + file = pca_data_path, + rowNames = TRUE, overwrite = TRUE) +} + + +#### Heatmap plot #### +heatmap_data <- function(protein_data, heatmaps_path) { + # This function prepares the protein data for Heatmap plots. + # The `protein_data` is the already loaded and prepared protein data. + # The `heatmaps_path` is the output file path. + + protein_heatmap <- protein_data + # Handle missing values in the data set + protein_heatmap$nZero <- rowSums(is.na( + dplyr::select_if(protein_heatmap, is.numeric) + )) + # Exclude variables with only missing values + total_nVar <- ncol(dplyr::select_if(protein_heatmap, is.numeric)) - 1 + if (any(protein_heatmap$nZero == total_nVar)) { + protein_heatmap <- protein_heatmap[!protein_heatmap$nZero == total_nVar, ] + } + # Exclude variables with missing values > 25% + if (any(protein_heatmap$nZero > (total_nVar * 0.25))) { + protein_heatmap <- + protein_heatmap[protein_heatmap$nZero < (total_nVar * 0.25), ] + } + # Handling multiple identifiers + if (any(grepl(";", protein_heatmap$Gene) == TRUE)) { + mult_ids <- grep(";", protein_heatmap$Gene) + ids <- sub(";.*", "", protein_heatmap$Gene[mult_ids]) + protein_heatmap$Gene[mult_ids] <- ids + } + # Handling missing identifiers + missing_ids <- which(is.na(protein_heatmap$Gene) | protein_heatmap$Gene == "") + protein_heatmap$Gene[missing_ids] <- protein_heatmap$Protein[missing_ids] + + protein_heatmap <- protein_heatmap |> + dplyr::group_by(Protein) |> + dplyr::arrange(Identifiers) |> + dplyr::ungroup() |> + dplyr::select(!nZero) + # Save to file + openxlsx::write.xlsx(x = protein_heatmap, + file = heatmaps_path, + overwrite = TRUE) +} diff --git a/R/plot_functions.R b/R/plot_functions.R new file mode 100644 index 0000000..64687d0 --- /dev/null +++ b/R/plot_functions.R @@ -0,0 +1,212 @@ +#### Load libraries #### +library(ggplot2) +library(mixOmics) +library(ComplexHeatmap) +library(circlize) + +#### Settings #### +font_family <- 'sans' +title_fontsize <- 13 +label_fontsize <- 11 +# Fonts in complex heatmaps are smaller, so added an extra pt. +title_font_gp <- function() { + title_fonts <- gpar(fontsize = title_fontsize + 1, + fontface = "bold", + fontfamily = font_family) + return(title_fonts) +} +label_font_gp <- function() { + label_fonts <- gpar(fontsize = label_fontsize + 1, + fontfamily = font_family) + return(label_fonts) +} + +##### Plotting scripts #### + +#### TAB 1 #### +pca_plot <- function(matrix){ + # Run PCA + pca_protein <- mixOmics::pca( + X = matrix, + ncomp = nrow(matrix), + center = TRUE + ) + # Extract from 'pca_protein' coordinates for the PCA plot + coord <- as.data.frame(round(pca_protein$variates$X, digits = 2)) + coord$group <- as.factor(gsub("_\\d+", "", rownames(coord))) + coord$sampleName <- gsub("_", " ", rownames(coord)) + # Extract labels for x and y axes + labels.pca <- as.vector(pca_protein$cum.var) + xlabel <- paste0("PC1, ", + (round((labels.pca[1]*100), + digits = 2)),"%") + ylabel <- paste0("PC2, ", + (round(((labels.pca[2]-labels.pca[1])*100), + digits = 2)),"%") + + # Generate 2D PCA plot + pca_plot <- ggplot(coord, + aes(x = PC1, y = PC2, + color = group, + text = paste("", sampleName))) + + geom_point(size = 4) + + labs(x = xlabel, y = ylabel, color = "") + + xlim((min(coord$PC1)-10), max(coord$PC1)+10) + + ylim((min(coord$PC2)-10), max(coord$PC2)+10) + + theme_light() + + theme(text = element_text(family = font_family, size = title_fontsize), + axis.text.x = element_text(size = label_fontsize), + axis.text.y = element_text(size = label_fontsize)) + + guides(shape = "none") + return(pca_plot) +} + +#### TAB 2 #### +generate_heatmap_colors <- function(data){ + x <- as.matrix(select_if(data, is.numeric)) # modified here + x.trunc <- as.vector(unique(x)) + x.trim <- x.trunc[x.trunc >= quantile(x.trunc, probs = 0.01, na.rm = T) & + x.trunc <= quantile(x.trunc, probs = 0.99, na.rm = T)] + x.max <- which.max(x.trim) + x.min <- which.min(x.trim) + # Create a color function for the heatmap values + col_fun <- colorRamp2(breaks = c(x.trim[x.min], 0, x.trim[x.max]), # modified here + colors = c("blue", "white", "red"), + space = "sRGB") + # Create color lists for samples and groups labels + samples_names <- gsub("_", " ", colnames(x)) # for sample annotation + samples_colors <- rainbow(length(samples_names)) + col_samples <- setNames(samples_colors, samples_names) + group_names <- gsub("_\\d+", "", colnames(x)) |> as.factor() |> levels() + group_colors <- hcl.colors(length(group_names)) + col_group <- setNames(group_colors, group_names) + + return(list(col_fun = col_fun, col_samples = col_samples, col_group = col_group)) +} + +# ### added this just to test the generate_heatmap_color function above +heatmap_colors <- generate_heatmap_colors(protein_heatmap) + +top_annotation <- function(data, heatmap_colors, legend = TRUE) { + samples_lab <- gsub("_", " ", colnames(data)) + groups_lab <- gsub("_\\d+", "", colnames(data)) + top_annotation = HeatmapAnnotation(Samples = samples_lab, + Groups = groups_lab, + col = list(Samples = heatmap_colors$col_samples, + Groups = heatmap_colors$col_group), + show_annotation_name = T, + annotation_name_side = "left", + annotation_name_gp = label_font_gp(), + annotation_legend_param = list( + title_gp = title_font_gp(), + labels_gp = label_font_gp() + ), + show_legend = legend) + return(top_annotation) +} + +make_heatmap <- function(data, heatmap_colors){ + data_m <- as.matrix(select_if(data, is.numeric)) # modified here + set.seed(3) + ht <- ComplexHeatmap::Heatmap(data_m, + name = "P. Level", + cluster_rows = TRUE, + cluster_columns = TRUE, + col = heatmap_colors$col_fun, + show_row_names = F, + show_column_names = F, + row_title = "Proteins", + row_title_gp = title_font_gp(), + column_title = "", + row_dend_width = unit(1.5, "cm"), + top_annotation = top_annotation(data_m, heatmap_colors), + heatmap_legend_param = list( + title_gp = title_font_gp(), + labels_gp = label_font_gp() + ) + ) + dht = draw(ht,merge_legend = TRUE,newpage = FALSE) + return(dht) +} + +calculate_row_fontsize <- function(n_rows) { + # Uses at most 12pt and at least 3pt font size for row names + # Starting at 20 rows, the font size decreases by 1pt for every 10 rows + return(max(12 - floor((n_rows - 20) / 10), 3)) +} + +make_sub_heatmap <- function(data, heatmap_colors){ + data_m <- as.matrix(data) + row_lab <- gsub(".*,\\s*", "", rownames(data_m)) + row_font_size <- calculate_row_fontsize(length(row_lab)) + set.seed(3) + ht <- ComplexHeatmap::Heatmap(data_m, + name = "P. Level", + cluster_rows = FALSE, + cluster_columns = FALSE, + col = heatmap_colors$col_fun, + show_row_names = T, + row_labels = row_lab, + row_names_gp = gpar(fontsize = row_font_size, fontfamily = font_family), + show_column_names = F, + row_title = "Proteins", + row_title_gp = title_font_gp(), + column_title = "", + top_annotation = top_annotation(data_m, heatmap_colors, legend = FALSE), + show_heatmap_legend = FALSE) + return(draw(ht)) +} + +#### TAB 3 #### +bar_plot <- function(gene_dropdown, df){ + ## TODO: Sort out Warning: Removed x rows containing missing values + if (!(gene_dropdown %in% df$Gene)) { + return() + } + df.plot <- df |> + filter(Gene == gene_dropdown) |> + mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> + filter(!is.na(expression)) + p <- ggplot(data = df.plot, aes(x = experiment, y = expression)) + p <- p + + geom_bar(mapping = aes(x = experiment, + y = expression, + fill = experiment_type), + stat = "identity", + col = "black") + + xlab("") + + scale_y_continuous(name = "Normalized Log2-protein intensity") + + theme_light() + + theme(text = element_text(family = font_family, size = title_fontsize), + axis.text.x = element_text(size = label_fontsize), + axis.text.y = element_text(size = label_fontsize), + legend.position = "none") + + labs(NULL) + return(p) +} +box_plot <- function(gene_dropdown, df){ + if (!(gene_dropdown %in% df$Gene)) { + return() + } + df.plot <- df |> + filter(Gene == gene_dropdown) |> + mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> + filter(!is.na(expression)) + p <- ggplot(data = df.plot, aes(x = experiment_type, y = expression)) + p <- p + + geom_boxplot(mapping = aes(x = experiment_type, + y = expression, + fill = experiment_type), + col = "black") + + geom_point(mapping = aes(x = experiment_type, + y = expression), + pch = 20, size = 1.5) + + xlab("") + + scale_y_continuous(name = "Normalized Log2-protein intensity") + + theme_light() + + theme(text = element_text(family = font_family, size = title_fontsize), + axis.text.x = element_text(size = label_fontsize), + axis.text.y = element_text(size = label_fontsize), + legend.position = "none") + return(p) +} \ No newline at end of file diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..177c640ed997720a1952088d338917d8a4c83b7f GIT binary patch literal 6148 zcmeHKF=_)r43uJn4QX7u+%Mz@i*a7y4+I>OCY&MIuCL0w^0drIBE+221k!{NNVBWe z>~T|^PG;tt^Yeq*%FLEx4G-aiL6p#W^Knh5KUn;=f3tQeNDoOzhz!h=3QVfkh~Y^`zExc>921jnUc-milf5PskEi4Q7UkwWQBev=folb3 zxm~gUKf-^Q|F22fNdYPFuN3h4cDLQ&m8!Q+UXH!C!Jpu)`GC`~4hllFV_>vnY}k(1 cQIvI!Yn=DOF)`@K2OX%N0qP=?0)MT*CxGe|wEzGB literal 0 HcmV?d00001 diff --git a/proteomics-web-app b/proteomics-web-app new file mode 160000 index 0000000..2a7b8b2 --- /dev/null +++ b/proteomics-web-app @@ -0,0 +1 @@ +Subproject commit 2a7b8b2365adef8dd38ba0f7996af78c53dcce56 From 63426b0200bd019fdc4fb76b90fc19559ea42fe6 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Thu, 28 Mar 2024 20:43:22 +0000 Subject: [PATCH 6/9] fixes heatmaps and subheatmaps --- app/R/app_server.R | 21 ++++++++++++++++++--- app/R/plot_functions.R | 19 +++++++++++-------- db/R/data_preprocessing.R | 29 +++++++++++++---------------- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/app/R/app_server.R b/app/R/app_server.R index b724f82..208453b 100644 --- a/app/R/app_server.R +++ b/app/R/app_server.R @@ -95,13 +95,28 @@ app_server <- function(input, output, session) { type = "error", duration = 10 ) }) - ht_colors(generate_heatmap_colors(excel_ok)) + # Set row names + row_names <- paste(excel_ok$Gene, + " (", + excel_ok$Protein, + ")", + sep = "") + # Update autocomplete choices for search bar updateSelectizeInput( session, "subh_gene", - choices = sort(unique(unlist(strsplit(rownames(excel_ok), ",")))), + choices = sort(unique(unlist(row_names))), server = TRUE ) + # Check for duplicated row_names and add a number to make unique + duplicates <- duplicated(row_names) + if (any(duplicates)) { + sequence <- ave(seq_along(row_names), row_names, FUN = function(x) seq_along(x) - 1) + row_names[duplicates] <- paste(row_names[duplicates], sequence[duplicates], sep = " ") + } + rownames(excel_ok) <- row_names + # Calculate colors used for heatmap and subheatmap + ht_colors(generate_heatmap_colors(excel_ok)) return(excel_ok) }) @@ -185,7 +200,7 @@ app_server <- function(input, output, session) { verbose = FALSE, calibrate = FALSE) sub_rows <- unlist(selection$row_index) - sub_cols <- unlist(selection$column_index) + sub_cols <- unlist(selection$column_label) sub_data(heatmap_data()[sub_rows, sub_cols, drop = FALSE]) if (nrow(sub_data()) == 0 || ncol(sub_data()) == 0) { .subheat_plot(NULL) diff --git a/app/R/plot_functions.R b/app/R/plot_functions.R index fbb4768..a87bd8e 100644 --- a/app/R/plot_functions.R +++ b/app/R/plot_functions.R @@ -1,7 +1,11 @@ #### Load libraries #### library(ggplot2) library(mixOmics) +library(ComplexHeatmap) +library(circlize) library(stringr) +library(grid) +library(dplyr) #### Settings #### font_family <- 'sans' @@ -61,17 +65,16 @@ pca_plot <- function(matrix){ } #### TAB 2 #### -generate_heatmap_colors <- function(matrix){ - x <- as.matrix(matrix) +generate_heatmap_colors <- function(data){ + x <- as.matrix(select_if(data, is.numeric)) x.trunc <- as.vector(unique(x)) - browser() x.trim <- x.trunc[x.trunc >= quantile(x.trunc, probs = 0.01, na.rm = T) & x.trunc <= quantile(x.trunc, probs = 0.99, na.rm = T)] x.max <- which.max(x.trim) x.min <- which.min(x.trim) # Create a color function for the heatmap values - col_fun <- circlize::colorRamp2(c(x.trim[x.min], 0, x.trim[x.max]), - c("blue", "white", "red"), + col_fun <- circlize::colorRamp2(breaks = c(x.trim[x.min], 0, x.trim[x.max]), + colors = c("blue", "white", "red"), space = "sRGB") # Create color lists for samples and groups labels samples_names <- gsub("_", " ", colnames(x)) # for sample annotation @@ -103,7 +106,7 @@ top_annotation <- function(data, heatmap_colors, legend = TRUE) { } make_heatmap <- function(data, heatmap_colors){ - data_m <- as.matrix(data) + data_m <- as.matrix(select_if(data, is.numeric)) set.seed(3) ht <- ComplexHeatmap::Heatmap(data_m, name = "P. Level", @@ -133,8 +136,8 @@ calculate_row_fontsize <- function(n_rows) { } make_sub_heatmap <- function(data, heatmap_colors){ - data_m <- as.matrix(data) - row_lab <- gsub(".*,\\s*", "", rownames(data_m)) + data_m <- as.matrix(select_if(data, is.numeric)) + row_lab <- rownames(data_m) row_font_size <- calculate_row_fontsize(length(row_lab)) set.seed(3) ht <- ComplexHeatmap::Heatmap(data_m, diff --git a/db/R/data_preprocessing.R b/db/R/data_preprocessing.R index c7982f7..120d32b 100644 --- a/db/R/data_preprocessing.R +++ b/db/R/data_preprocessing.R @@ -1,14 +1,11 @@ #### Load libraries #### library(MSnbase) -library(msqrob2) library(reshape2) library(QFeatures) library(tidyverse) library(openxlsx) library(readxl) -# library(tidyr) -# library(SummarizedExperiment) -# library(dplyr) +library(igraph) #### Full pre-processing pipeline #### @@ -70,11 +67,11 @@ preprocess_data <- function(raw_data_path = "", dataset_name = "", dataset_path print("Preprocessing data for pca plots...") pca_data(protein_data, pca_data_path) print("Preprocessing data for heatmaps...") + heatmap_data(protein_data, heatmaps_path) protein_data <- protein_data |> pivot_longer(cols = !(Identifiers:Gene), names_to = "experiment", values_to = "expression") - heatmap_data(protein_data, heatmaps_path) return(files) } @@ -105,13 +102,6 @@ prepare_dataset <- function(raw_data_path, dataset_path) { rowData(s4[[1]])$nNonZero <- rowSums(assay(s4[[1]]) > 0) # Replace zeroes with NA in S4 object s4 <- QFeatures::zeroIsNA(object = s4, i = "raw") - # Log2-transform peptide data - s4 <- QFeatures::logTransform( - object = s4, # S4 peptide data - i = "raw", # raw expression matrix - name = "log", # log assay name - base = 2 - ) # Remove contaminant peptides s4 <- QFeatures::filterFeatures( object = s4, @@ -125,17 +115,24 @@ prepare_dataset <- function(raw_data_path, dataset_path) { filter <- rowData(s4[[1]])$Proteins %in% msqrob2::smallestUniqueGroups(rowData(s4[[1]])$Proteins) s4 <- s4[filter, , ] - # Normalize by median centering + # Normalize by quantiles s4 <- normalize( object = s4, # S4 peptide data - i = "log", # log expression data + i = "raw", # log expression data name = "norm", # norm assay name - method = "center.median" # normalization method + method = "quantiles" # normalization method + ) + # Log2-transform peptide data #### ****** modified here + s4 <- QFeatures::logTransform( + object = s4, # S4 peptide data + i = "norm", # raw expression matrix + name = "log", # log assay name + base = 2 ) # Summarize peptide to protein s4 <- QFeatures::aggregateFeatures( object = s4, # S4 peptide data - i = "norm", # normalized expression data + i = "log", # normalized expression data name = "prot", # protein assay name fcol = "Proteins", # rowData variable for aggregation fun = MsCoreUtils::robustSummary, # quantitative aggregation function From 156a47024605c56c619183942b74c28e28b4c94d Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Thu, 28 Mar 2024 20:43:45 +0000 Subject: [PATCH 7/9] Revert "fixed x.trunc heatmap_color, quantile normalization before log transform" This reverts commit c39126365b03898f237c744d4c341bcaee28b5d8. --- R/.DS_Store | Bin 6148 -> 0 bytes R/data_processing.R | 195 ---------------------------------------- R/plot_functions.R | 212 -------------------------------------------- data/.DS_Store | Bin 6148 -> 0 bytes proteomics-web-app | 1 - 5 files changed, 408 deletions(-) delete mode 100644 R/.DS_Store delete mode 100644 R/data_processing.R delete mode 100644 R/plot_functions.R delete mode 100644 data/.DS_Store delete mode 160000 proteomics-web-app diff --git a/R/.DS_Store b/R/.DS_Store deleted file mode 100644 index cb84adba3cf7e2a9e51c98301b39829efe7e30e8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK!Ab)$5PhjVR21paV}3x3e!*JmO*~lCqpfYxg56!(UC`Tpx^E^!X{q8tkRmfM zd6UV^Bzb|&20-Z7(+i*rphFdG9kTgCWL$JdTILZ9TH_64RG8rjmpjq!*hdEB>`wH| z7AUcKe&_VQqbjTCn|f6(8TW`GKCrUr9v)CIqsD?KzCsTJJ^eAQd-i8os<$><#tTd= zK4%rldBL1kkCk6(<*a|jb(A<` zXScSqW39Z&fbD)CkIdKP3^)VMz^@tLoh{NiO6aXK;0!neYX;=|5UGNhVUtil9c=Uo zKy1^kLR;Pvk`oOx!zLj|C?Ta1Q);j$Mo8)OM~ll0n}jJHL1$(in>pAECFtz*M-4~F z5_;(1unu8q_YRYc;NgzFR< hdKEKPuHsXw3jI+GVrJMRq=(`^0!o87&cL5C@Cm`qM4tcv diff --git a/R/data_processing.R b/R/data_processing.R deleted file mode 100644 index 406e2e5..0000000 --- a/R/data_processing.R +++ /dev/null @@ -1,195 +0,0 @@ -#### Load libraries #### -library(MSnbase) -library(reshape2) -library(QFeatures) -library(tidyverse) -library(openxlsx) -library(readxl) - - -#### Full pre-processing pipeline #### -preprocess_data <- function(raw_data_path = "", dataset_name = "", dataset_path = "") { - # This function triggers the full data pre-processing pipeline. - # To go from the raw txt file with the data, provide the `raw_data_path`. - # The `dataset_name` is optional, and it is used to name the output files. - # To skip the data preparation, provide the `dataset_path` instead of the `raw_data_path`. - - # Set paths - data_path <- "data/" - if (dataset_name == "") { - dataset_name <- gsub(".txt", "", basename(raw_data_path)) - } - pca_data_path <- paste(data_path, "pcaplots/", dataset_name, "_pca.xlsx", sep="") - heatmaps_path <- paste(data_path, "heatmaps/", dataset_name, "_heatmap.xlsx", sep="") - - # Prepare dataset - if (dataset_path == "") { - dataset_path <- paste(data_path, "datasets/", dataset_name, ".xlsx", sep="") - prepare_dataset(raw_data_path, dataset_path) - } - - # Load protein data - protein_data <- tryCatch({ - df <- read_excel(dataset_path) - ############################################################################# - #### This should not be needed once we stop using mixed old and new data #### - col_names <- colnames(df) - if ("UniprotID" %in% col_names && "gene.names" %in% col_names) { - df <- df %>% - rename(Identifiers = UniprotID, Gene = gene.names) - } - ############################################################################# - df <- df |> - pivot_longer(cols = !(Identifiers:Gene), - names_to = "experiment", - values_to = "expression") - }, error = function(e) { - message("Error reading the Excel file:", conditionMessage(e)) - }) - - # Preprocess data for plots - pca_data(protein_data, pca_data_path) - heatmap_data(protein_data, heatmaps_path) -} - - -#### Generates dataset from raw txt #### -prepare_dataset <- function(raw_data_path, dataset_path) { - # This function reads the raw txt file with the data and prepares it as protein data. - # The `raw_data_path` is the path to the raw txt file with the data. - # The `dataset_path` is the path to save the prepared dataset. - - peptides <- read.delim(raw_data_path) - # Identify columns with quantitative expression values - ecols <- grep("Intensity.", names(peptides)) - # Identify pooled samples from the data if present and remove it - if (any(grepl("pool", names(peptides[ecols]), ignore.case = TRUE))) { - ecols <- ecols[!grepl("pool", names(peptides[ecols]), ignore.case = TRUE)] - } - # Create an S4 object, QFeatures type - s4 <- QFeatures::readQFeatures( - table = peptides, # peptide data #### ****** modified here - ecol = ecols, # expression indexes - fnames = 1, # feature names - name = "raw", # raw assay name - sep = "\t" # separator for tabular data - ) - # Add column with number of non-zero values to assay array - rowData(s4[[1]])$nNonZero <- rowSums(assay(s4[[1]]) > 0) - # Replace zeroes with NA in S4 object - s4 <- QFeatures::zeroIsNA(object = s4, i = "raw") - # Remove contaminant peptides - s4 <- QFeatures::filterFeatures( - object = s4, - filter = ~Potential.contaminant != "+" - ) - # Remove decoys - s4 <- QFeatures::filterFeatures(object = s4, filter = ~Reverse != "+") - # Remove proteins present in < 2 replicates - s4 <- QFeatures::filterFeatures(object = s4, filter = ~nNonZero > 1) - # Remove overlapping proteins - filter <- rowData(s4[[1]])$Proteins %in% - msqrob2::smallestUniqueGroups(rowData(s4[[1]])$Proteins) - s4 <- s4[filter, , ] - # Normalize by quantiles - s4 <- normalize( - object = s4, # S4 peptide data - i = "raw", # log expression data - name = "norm", # norm assay name - method = "quantiles" # normalization method #### ****** modified here - ) - # Log2-transform peptide data #### ****** modified here - s4 <- QFeatures::logTransform( - object = s4, # S4 peptide data - i = "norm", # raw expression matrix - name = "log", # log assay name - base = 2 - ) - # Summarize peptide to protein - s4 <- QFeatures::aggregateFeatures( - object = s4, # S4 peptide data - i = "log", # normalized expression data - name = "prot", # protein assay name - fcol = "Proteins", # rowData variable for aggregation - fun = MsCoreUtils::robustSummary, # quantitative aggregation function - ) - # Extract summarized protein data - protein_data <- dplyr::as_tibble(assay(s4[[4]])) - protein_data$Protein <- rowData(s4[[4]])$Leading.razor.protein - protein_data$Gene <- rowData(s4[[4]])$Gene.names - protein_data$Identifiers <- rowData(s4[[4]])$Proteins - colnames(protein_data) <- gsub("Intensity.", "", colnames(protein_data)) - protein_data <- protein_data |> - dplyr::relocate(Identifiers, Protein, Gene) |> - dplyr::arrange(Protein) - # Save to file - openxlsx::write.xlsx( - x = protein_data, - file = protein_data.xlsx - ) -} - - -#### PCA plot #### -pca_data <- function(protein_data, pca_data_path) { - # This function prepares the protein data for PCA plot. - # The `protein_data` is the already loaded and prepared protein data. - # The `pca_data_path` is the output file path. - - protein_pca <- protein_data |> - dplyr::select_if(is.numeric) - # Identify and remove rows with missing values - protein_pca$nZero <- apply(protein_pca, 1, function(x) sum(is.na(x))) - protein_pca <- protein_pca |> - dplyr::filter(!nZero > 0) - # Format data set as a transposed matrix - protein_pca_matrix <- t(protein_pca[, 1:(ncol(protein_pca) - 1)]) - # Save to file - protein_pca_df <- as.data.frame(protein_pca_matrix) - openxlsx::write.xlsx(x = protein_pca_df, - file = pca_data_path, - rowNames = TRUE, overwrite = TRUE) -} - - -#### Heatmap plot #### -heatmap_data <- function(protein_data, heatmaps_path) { - # This function prepares the protein data for Heatmap plots. - # The `protein_data` is the already loaded and prepared protein data. - # The `heatmaps_path` is the output file path. - - protein_heatmap <- protein_data - # Handle missing values in the data set - protein_heatmap$nZero <- rowSums(is.na( - dplyr::select_if(protein_heatmap, is.numeric) - )) - # Exclude variables with only missing values - total_nVar <- ncol(dplyr::select_if(protein_heatmap, is.numeric)) - 1 - if (any(protein_heatmap$nZero == total_nVar)) { - protein_heatmap <- protein_heatmap[!protein_heatmap$nZero == total_nVar, ] - } - # Exclude variables with missing values > 25% - if (any(protein_heatmap$nZero > (total_nVar * 0.25))) { - protein_heatmap <- - protein_heatmap[protein_heatmap$nZero < (total_nVar * 0.25), ] - } - # Handling multiple identifiers - if (any(grepl(";", protein_heatmap$Gene) == TRUE)) { - mult_ids <- grep(";", protein_heatmap$Gene) - ids <- sub(";.*", "", protein_heatmap$Gene[mult_ids]) - protein_heatmap$Gene[mult_ids] <- ids - } - # Handling missing identifiers - missing_ids <- which(is.na(protein_heatmap$Gene) | protein_heatmap$Gene == "") - protein_heatmap$Gene[missing_ids] <- protein_heatmap$Protein[missing_ids] - - protein_heatmap <- protein_heatmap |> - dplyr::group_by(Protein) |> - dplyr::arrange(Identifiers) |> - dplyr::ungroup() |> - dplyr::select(!nZero) - # Save to file - openxlsx::write.xlsx(x = protein_heatmap, - file = heatmaps_path, - overwrite = TRUE) -} diff --git a/R/plot_functions.R b/R/plot_functions.R deleted file mode 100644 index 64687d0..0000000 --- a/R/plot_functions.R +++ /dev/null @@ -1,212 +0,0 @@ -#### Load libraries #### -library(ggplot2) -library(mixOmics) -library(ComplexHeatmap) -library(circlize) - -#### Settings #### -font_family <- 'sans' -title_fontsize <- 13 -label_fontsize <- 11 -# Fonts in complex heatmaps are smaller, so added an extra pt. -title_font_gp <- function() { - title_fonts <- gpar(fontsize = title_fontsize + 1, - fontface = "bold", - fontfamily = font_family) - return(title_fonts) -} -label_font_gp <- function() { - label_fonts <- gpar(fontsize = label_fontsize + 1, - fontfamily = font_family) - return(label_fonts) -} - -##### Plotting scripts #### - -#### TAB 1 #### -pca_plot <- function(matrix){ - # Run PCA - pca_protein <- mixOmics::pca( - X = matrix, - ncomp = nrow(matrix), - center = TRUE - ) - # Extract from 'pca_protein' coordinates for the PCA plot - coord <- as.data.frame(round(pca_protein$variates$X, digits = 2)) - coord$group <- as.factor(gsub("_\\d+", "", rownames(coord))) - coord$sampleName <- gsub("_", " ", rownames(coord)) - # Extract labels for x and y axes - labels.pca <- as.vector(pca_protein$cum.var) - xlabel <- paste0("PC1, ", - (round((labels.pca[1]*100), - digits = 2)),"%") - ylabel <- paste0("PC2, ", - (round(((labels.pca[2]-labels.pca[1])*100), - digits = 2)),"%") - - # Generate 2D PCA plot - pca_plot <- ggplot(coord, - aes(x = PC1, y = PC2, - color = group, - text = paste("", sampleName))) + - geom_point(size = 4) + - labs(x = xlabel, y = ylabel, color = "") + - xlim((min(coord$PC1)-10), max(coord$PC1)+10) + - ylim((min(coord$PC2)-10), max(coord$PC2)+10) + - theme_light() + - theme(text = element_text(family = font_family, size = title_fontsize), - axis.text.x = element_text(size = label_fontsize), - axis.text.y = element_text(size = label_fontsize)) + - guides(shape = "none") - return(pca_plot) -} - -#### TAB 2 #### -generate_heatmap_colors <- function(data){ - x <- as.matrix(select_if(data, is.numeric)) # modified here - x.trunc <- as.vector(unique(x)) - x.trim <- x.trunc[x.trunc >= quantile(x.trunc, probs = 0.01, na.rm = T) & - x.trunc <= quantile(x.trunc, probs = 0.99, na.rm = T)] - x.max <- which.max(x.trim) - x.min <- which.min(x.trim) - # Create a color function for the heatmap values - col_fun <- colorRamp2(breaks = c(x.trim[x.min], 0, x.trim[x.max]), # modified here - colors = c("blue", "white", "red"), - space = "sRGB") - # Create color lists for samples and groups labels - samples_names <- gsub("_", " ", colnames(x)) # for sample annotation - samples_colors <- rainbow(length(samples_names)) - col_samples <- setNames(samples_colors, samples_names) - group_names <- gsub("_\\d+", "", colnames(x)) |> as.factor() |> levels() - group_colors <- hcl.colors(length(group_names)) - col_group <- setNames(group_colors, group_names) - - return(list(col_fun = col_fun, col_samples = col_samples, col_group = col_group)) -} - -# ### added this just to test the generate_heatmap_color function above -heatmap_colors <- generate_heatmap_colors(protein_heatmap) - -top_annotation <- function(data, heatmap_colors, legend = TRUE) { - samples_lab <- gsub("_", " ", colnames(data)) - groups_lab <- gsub("_\\d+", "", colnames(data)) - top_annotation = HeatmapAnnotation(Samples = samples_lab, - Groups = groups_lab, - col = list(Samples = heatmap_colors$col_samples, - Groups = heatmap_colors$col_group), - show_annotation_name = T, - annotation_name_side = "left", - annotation_name_gp = label_font_gp(), - annotation_legend_param = list( - title_gp = title_font_gp(), - labels_gp = label_font_gp() - ), - show_legend = legend) - return(top_annotation) -} - -make_heatmap <- function(data, heatmap_colors){ - data_m <- as.matrix(select_if(data, is.numeric)) # modified here - set.seed(3) - ht <- ComplexHeatmap::Heatmap(data_m, - name = "P. Level", - cluster_rows = TRUE, - cluster_columns = TRUE, - col = heatmap_colors$col_fun, - show_row_names = F, - show_column_names = F, - row_title = "Proteins", - row_title_gp = title_font_gp(), - column_title = "", - row_dend_width = unit(1.5, "cm"), - top_annotation = top_annotation(data_m, heatmap_colors), - heatmap_legend_param = list( - title_gp = title_font_gp(), - labels_gp = label_font_gp() - ) - ) - dht = draw(ht,merge_legend = TRUE,newpage = FALSE) - return(dht) -} - -calculate_row_fontsize <- function(n_rows) { - # Uses at most 12pt and at least 3pt font size for row names - # Starting at 20 rows, the font size decreases by 1pt for every 10 rows - return(max(12 - floor((n_rows - 20) / 10), 3)) -} - -make_sub_heatmap <- function(data, heatmap_colors){ - data_m <- as.matrix(data) - row_lab <- gsub(".*,\\s*", "", rownames(data_m)) - row_font_size <- calculate_row_fontsize(length(row_lab)) - set.seed(3) - ht <- ComplexHeatmap::Heatmap(data_m, - name = "P. Level", - cluster_rows = FALSE, - cluster_columns = FALSE, - col = heatmap_colors$col_fun, - show_row_names = T, - row_labels = row_lab, - row_names_gp = gpar(fontsize = row_font_size, fontfamily = font_family), - show_column_names = F, - row_title = "Proteins", - row_title_gp = title_font_gp(), - column_title = "", - top_annotation = top_annotation(data_m, heatmap_colors, legend = FALSE), - show_heatmap_legend = FALSE) - return(draw(ht)) -} - -#### TAB 3 #### -bar_plot <- function(gene_dropdown, df){ - ## TODO: Sort out Warning: Removed x rows containing missing values - if (!(gene_dropdown %in% df$Gene)) { - return() - } - df.plot <- df |> - filter(Gene == gene_dropdown) |> - mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> - filter(!is.na(expression)) - p <- ggplot(data = df.plot, aes(x = experiment, y = expression)) - p <- p + - geom_bar(mapping = aes(x = experiment, - y = expression, - fill = experiment_type), - stat = "identity", - col = "black") + - xlab("") + - scale_y_continuous(name = "Normalized Log2-protein intensity") + - theme_light() + - theme(text = element_text(family = font_family, size = title_fontsize), - axis.text.x = element_text(size = label_fontsize), - axis.text.y = element_text(size = label_fontsize), - legend.position = "none") + - labs(NULL) - return(p) -} -box_plot <- function(gene_dropdown, df){ - if (!(gene_dropdown %in% df$Gene)) { - return() - } - df.plot <- df |> - filter(Gene == gene_dropdown) |> - mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> - filter(!is.na(expression)) - p <- ggplot(data = df.plot, aes(x = experiment_type, y = expression)) - p <- p + - geom_boxplot(mapping = aes(x = experiment_type, - y = expression, - fill = experiment_type), - col = "black") + - geom_point(mapping = aes(x = experiment_type, - y = expression), - pch = 20, size = 1.5) + - xlab("") + - scale_y_continuous(name = "Normalized Log2-protein intensity") + - theme_light() + - theme(text = element_text(family = font_family, size = title_fontsize), - axis.text.x = element_text(size = label_fontsize), - axis.text.y = element_text(size = label_fontsize), - legend.position = "none") - return(p) -} \ No newline at end of file diff --git a/data/.DS_Store b/data/.DS_Store deleted file mode 100644 index 177c640ed997720a1952088d338917d8a4c83b7f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKF=_)r43uJn4QX7u+%Mz@i*a7y4+I>OCY&MIuCL0w^0drIBE+221k!{NNVBWe z>~T|^PG;tt^Yeq*%FLEx4G-aiL6p#W^Knh5KUn;=f3tQeNDoOzhz!h=3QVfkh~Y^`zExc>921jnUc-milf5PskEi4Q7UkwWQBev=folb3 zxm~gUKf-^Q|F22fNdYPFuN3h4cDLQ&m8!Q+UXH!C!Jpu)`GC`~4hllFV_>vnY}k(1 cQIvI!Yn=DOF)`@K2OX%N0qP=?0)MT*CxGe|wEzGB diff --git a/proteomics-web-app b/proteomics-web-app deleted file mode 160000 index 2a7b8b2..0000000 --- a/proteomics-web-app +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2a7b8b2365adef8dd38ba0f7996af78c53dcce56 From b2194ca5418f80c159213cab00c10505e54b6e55 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Thu, 28 Mar 2024 21:07:12 +0000 Subject: [PATCH 8/9] Fix linting issues --- app/R/app_server.R | 8 ++- app/R/plot_functions.R | 143 +++++++++++++++++++++-------------------- app/R/save_as_button.R | 28 +++++--- 3 files changed, 100 insertions(+), 79 deletions(-) diff --git a/app/R/app_server.R b/app/R/app_server.R index b5fd55a..9c06387 100644 --- a/app/R/app_server.R +++ b/app/R/app_server.R @@ -122,8 +122,12 @@ app_server <- function(input, output, session) { # Check for duplicated row_names and add a number to make unique duplicates <- duplicated(row_names) if (any(duplicates)) { - sequence <- ave(seq_along(row_names), row_names, FUN = function(x) seq_along(x) - 1) - row_names[duplicates] <- paste(row_names[duplicates], sequence[duplicates], sep = " ") + sequence <- ave( + seq_along(row_names), row_names, FUN = function(x) seq_along(x) - 1 + ) + row_names[duplicates] <- paste(row_names[duplicates], + sequence[duplicates], + sep = " ") } rownames(excel_ok) <- row_names # Calculate colors used for heatmap and subheatmap diff --git a/app/R/plot_functions.R b/app/R/plot_functions.R index 3eff2c9..bed04c9 100644 --- a/app/R/plot_functions.R +++ b/app/R/plot_functions.R @@ -8,7 +8,7 @@ library(grid) library(dplyr) #### Settings #### -font_family <- 'sans' +font_family <- "sans" title_fontsize <- 13 label_fontsize <- 11 # Fonts in complex heatmaps are smaller, so added an extra pt. @@ -27,7 +27,7 @@ label_font_gp <- function() { ##### Plotting scripts #### #### TAB 1 #### -pca_plot <- function(matrix){ +pca_plot <- function(matrix) { # Run PCA pca_protein <- mixOmics::pca( X = matrix, @@ -41,11 +41,11 @@ pca_plot <- function(matrix){ # Extract labels for x and y axes labels.pca <- as.vector(pca_protein$cum.var) xlabel <- paste0("PC1, ", - (round((labels.pca[1]*100), - digits = 2)),"%") + (round((labels.pca[1] * 100), + digits = 2)), "%") ylabel <- paste0("PC2, ", - (round(((labels.pca[2]-labels.pca[1])*100), - digits = 2)),"%") + (round(((labels.pca[2] - labels.pca[1]) * 100), + digits = 2)), "%") # Generate 2D PCA plot pca_plot <- ggplot(coord, @@ -54,8 +54,8 @@ pca_plot <- function(matrix){ text = paste("", sampleName))) + geom_point(size = 4) + labs(x = xlabel, y = ylabel, color = "") + - xlim((min(coord$PC1)-10), max(coord$PC1)+10) + - ylim((min(coord$PC2)-10), max(coord$PC2)+10) + + xlim((min(coord$PC1) - 10), max(coord$PC1) + 10) + + ylim((min(coord$PC2) - 10), max(coord$PC2) + 10) + theme_light() + theme(text = element_text(family = font_family, size = title_fontsize), axis.text.x = element_text(size = label_fontsize), @@ -65,15 +65,15 @@ pca_plot <- function(matrix){ } #### TAB 2 #### -generate_heatmap_colors <- function(data){ +generate_heatmap_colors <- function(data) { x <- as.matrix(select_if(data, is.numeric)) - x.trunc <- as.vector(unique(x)) - x.trim <- x.trunc[x.trunc >= quantile(x.trunc, probs = 0.01, na.rm = T) & - x.trunc <= quantile(x.trunc, probs = 0.99, na.rm = T)] - x.max <- which.max(x.trim) - x.min <- which.min(x.trim) + x_trunc <- as.vector(unique(x)) + x_trim <- x_trunc[x_trunc >= quantile(x_trunc, probs = 0.01, na.rm = TRUE) & + x_trunc <= quantile(x_trunc, probs = 0.99, na.rm = TRUE)] + x_max <- which.max(x_trim) + x_min <- which.min(x_trim) # Create a color function for the heatmap values - col_fun <- circlize::colorRamp2(breaks = c(x.trim[x.min], 0, x.trim[x.max]), + col_fun <- circlize::colorRamp2(breaks = c(x_trim[x_min], 0, x_trim[x_max]), colors = c("blue", "white", "red"), space = "sRGB") # Create color lists for samples and groups labels @@ -84,48 +84,53 @@ generate_heatmap_colors <- function(data){ group_colors <- hcl.colors(length(group_names)) col_group <- setNames(group_colors, group_names) - return(list(col_fun = col_fun, col_samples = col_samples, col_group = col_group)) + return(list(col_fun = col_fun, + col_samples = col_samples, + col_group = col_group)) } top_annotation <- function(data, heatmap_colors, legend = TRUE) { samples_lab <- gsub("_", " ", colnames(data)) groups_lab <- gsub("_\\d+", "", colnames(data)) - top_annotation = HeatmapAnnotation(Samples = samples_lab, - Groups = groups_lab, - col = list(Samples = heatmap_colors$col_samples, - Groups = heatmap_colors$col_group), - show_annotation_name = T, - annotation_name_side = "left", - annotation_name_gp = label_font_gp(), - annotation_legend_param = list( - title_gp = title_font_gp(), - labels_gp = label_font_gp() - ), - show_legend = legend) + top_annotation <- HeatmapAnnotation( + Samples = samples_lab, + Groups = groups_lab, + col = list(Samples = heatmap_colors$col_samples, + Groups = heatmap_colors$col_group), + show_annotation_name = TRUE, + annotation_name_side = "left", + annotation_name_gp = label_font_gp(), + annotation_legend_param = list( + title_gp = title_font_gp(), + labels_gp = label_font_gp() + ), + show_legend = legend + ) return(top_annotation) } -make_heatmap <- function(data, heatmap_colors){ +make_heatmap <- function(data, heatmap_colors) { data_m <- as.matrix(select_if(data, is.numeric)) set.seed(3) - ht <- ComplexHeatmap::Heatmap(data_m, - name = "P. Level", - cluster_rows = TRUE, - cluster_columns = TRUE, - col = heatmap_colors$col_fun, - show_row_names = F, - show_column_names = F, - row_title = "Proteins", - row_title_gp = title_font_gp(), - column_title = "", - row_dend_width = unit(1.5, "cm"), - top_annotation = top_annotation(data_m, heatmap_colors), - heatmap_legend_param = list( - title_gp = title_font_gp(), - labels_gp = label_font_gp() - ) - ) - dht = draw(ht,merge_legend = TRUE,newpage = FALSE) + ht <- ComplexHeatmap::Heatmap( + data_m, + name = "P. Level", + cluster_rows = TRUE, + cluster_columns = TRUE, + col = heatmap_colors$col_fun, + show_row_names = FALSE, + show_column_names = FALSE, + row_title = "Proteins", + row_title_gp = title_font_gp(), + column_title = "", + row_dend_width = unit(1.5, "cm"), + top_annotation = top_annotation(data_m, heatmap_colors), + heatmap_legend_param = list( + title_gp = title_font_gp(), + labels_gp = label_font_gp() + ) + ) + dht <- draw(ht, merge_legend = TRUE, newpage = FALSE) return(dht) } @@ -135,39 +140,41 @@ calculate_row_fontsize <- function(n_rows) { return(max(12 - floor((n_rows - 20) / 10), 3)) } -make_sub_heatmap <- function(data, heatmap_colors){ +make_sub_heatmap <- function(data, heatmap_colors) { data_m <- as.matrix(select_if(data, is.numeric)) row_lab <- rownames(data_m) row_font_size <- calculate_row_fontsize(length(row_lab)) set.seed(3) - ht <- ComplexHeatmap::Heatmap(data_m, - name = "P. Level", - cluster_rows = FALSE, - cluster_columns = FALSE, - col = heatmap_colors$col_fun, - show_row_names = T, - row_labels = row_lab, - row_names_gp = gpar(fontsize = row_font_size, fontfamily = font_family), - show_column_names = F, - row_title = "Proteins", - row_title_gp = title_font_gp(), - column_title = "", - top_annotation = top_annotation(data_m, heatmap_colors, legend = FALSE), - show_heatmap_legend = FALSE) + ht <- ComplexHeatmap::Heatmap( + data_m, + name = "P. Level", + cluster_rows = FALSE, + cluster_columns = FALSE, + col = heatmap_colors$col_fun, + show_row_names = TRUE, + row_labels = row_lab, + row_names_gp = gpar(fontsize = row_font_size, fontfamily = font_family), + show_column_names = FALSE, + row_title = "Proteins", + row_title_gp = title_font_gp(), + column_title = "", + top_annotation = top_annotation(data_m, heatmap_colors, legend = FALSE), + show_heatmap_legend = FALSE + ) return(draw(ht)) } #### TAB 3 #### -bar_plot <- function(gene_dropdown, df){ +bar_plot <- function(gene_dropdown, df) { ## TODO: Sort out Warning: Removed x rows containing missing values if (!(gene_dropdown %in% df$Gene)) { return() } - df.plot <- df |> + df_plot <- df |> filter(Gene == gene_dropdown) |> mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> filter(!is.na(expression)) - p <- ggplot(data = df.plot, aes(x = experiment, y = expression)) + p <- ggplot(data = df_plot, aes(x = experiment, y = expression)) p <- p + geom_bar(mapping = aes(x = experiment, y = expression, @@ -184,15 +191,15 @@ bar_plot <- function(gene_dropdown, df){ labs(NULL) return(p) } -box_plot <- function(gene_dropdown, df){ +box_plot <- function(gene_dropdown, df) { if (!(gene_dropdown %in% df$Gene)) { return() } - df.plot <- df |> + df_plot <- df |> filter(Gene == gene_dropdown) |> mutate(experiment_type = str_extract(experiment, "[A-Z]+")) |> filter(!is.na(expression)) - p <- ggplot(data = df.plot, aes(x = experiment_type, y = expression)) + p <- ggplot(data = df_plot, aes(x = experiment_type, y = expression)) p <- p + geom_boxplot(mapping = aes(x = experiment_type, y = expression, diff --git a/app/R/save_as_button.R b/app/R/save_as_button.R index 230b428..adb2094 100644 --- a/app/R/save_as_button.R +++ b/app/R/save_as_button.R @@ -48,7 +48,7 @@ save_as_ui <- function(id, style = "display: none;", div( downloadButton( - NS(id,'download_button'), + NS(id, "download_button"), "Save image", class = "btn-info", style = "padding: 20px 10px;" @@ -57,18 +57,18 @@ save_as_ui <- function(id, div( style = "display: none;", radioButtons( - NS(id,"download_format"), + NS(id, "download_format"), label = "Format:", choices = list("png", - "pdf", - "svg"), + "pdf", + "svg"), selected = "png", inline = TRUE ) ), div( numericInput( - NS(id,"download_image_resolution"), + NS(id, "download_image_resolution"), label = "Resolution [dpi]", value = default_dpi, width = "120px" @@ -77,7 +77,7 @@ save_as_ui <- function(id, div( style = "display: none;", numericInput( - NS(id,"download_image_width"), + NS(id, "download_image_width"), label = "Width [px]", value = default_width, width = "80px" @@ -86,7 +86,7 @@ save_as_ui <- function(id, div( style = "display: none;", numericInput( - NS(id,"download_image_height"), + NS(id, "download_image_height"), label = "Height [px]", value = default_height, width = "80px" @@ -105,14 +105,24 @@ save_as_server <- function(id, # Toggle this "save_as_options" shinyjs::toggle("save_as_options") # Hide all other "save_as_options" - sel <- paste("[id*=save_as_options]:not(#",id,"-save_as_options)",sep = "") + sel <- paste( + "[id*=save_as_options]:not(#", id, "-save_as_options)", + sep = "" + ) shinyjs::hide(selector = sel) # Scroll to the bottom of the page shinyjs::runjs("window.scrollTo(0,document.body.scrollHeight);") }) output$download_button <- downloadHandler( filename = function() { - paste(dataset_name,"_",plot_tag,"_plot.",input$download_format,sep = "") + paste( + dataset_name, + "_", + plot_tag, + "_plot.", + input$download_format, + sep = "" + ) }, content = function(file) { screen_dpi <- 72 From f73bab0dd95517475959ba17265f09254979bdb4 Mon Sep 17 00:00:00 2001 From: fherreazcue Date: Thu, 28 Mar 2024 21:14:53 +0000 Subject: [PATCH 9/9] loads cloudflared tunnel token from secret file --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index e2676f0..89564f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,8 +3,8 @@ services: image: cloudflare/cloudflared:latest container_name: pB-cloudflared restart: unless-stopped - environment: - - TUNNEL_TOKEN=not-the-real-token + env_file: + - ${PWD}/db/config/cloudflared/.secret_tunnel_token command: tunnel --no-autoupdate run proxy: