diff --git a/src/r/make_manuscript_figures.Rmd b/src/r/make_manuscript_figures.Rmd index 4a247a6..faf6b22 100644 --- a/src/r/make_manuscript_figures.Rmd +++ b/src/r/make_manuscript_figures.Rmd @@ -185,7 +185,7 @@ final_figure dev.off() ``` #### vii. Create the accessory function -This accessory function `pathway_heatmap` will create a heatmap for specific metabolic pathways using the `data` variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way. +This accessory function `flux_heatmap` will create a heatmap for specific metabolic pathways using the `data` variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way. ```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100} flux_heatmap = function(df, pathway){ @@ -210,6 +210,7 @@ flux_heatmap = function(df, pathway){ mid2_data = abs(data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")]) late_data = abs(data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", "Keshamouni_72hrs", "GSE147405_72hrs")]) + tmp = cbind(early_data, mid1_data, mid2_data, late_data) # Create column annotation bars h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"), @@ -246,7 +247,10 @@ flux_heatmap = function(df, pathway){ ) # Construct the heatmaps - col_fun = colorRamp2(c(0, median(data), max(data)), + col_fun = colorRamp2(c(0, + median(as.matrix(tmp), + na.rm=TRUE), + max(as.matrix(tmp), na.rm=TRUE)), c("Gray", "#FD7470", "#DC1C13")) h1 = Heatmap(row_labels=data$name, row_names_side="left", @@ -388,7 +392,7 @@ ko_heatmap = function(df, pathway){ mid2_data = data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")] - 1 late_data = data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", "Keshamouni_72hrs", "GSE147405_72hrs")] - 1 - tmp = cbind(early_data, mid1_data, mid2_data, late_data) - 1 + tmp = cbind(early_data, mid1_data, mid2_data, late_data) # Create column annotation bars h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"), @@ -411,7 +415,7 @@ ko_heatmap = function(df, pathway){ # Construct experiment heatmap annotation hm = Heatmap(as.matrix(expt_data), col = colorRamp2(c(1, 5), - c("Gray", "#95C623")), + c("#FFFFFF", "#5BC0BE", "#3A506B", "#1C2541", "#0B132B")), heatmap_legend_param=list(title="Experiments", color_bar="discrete"), rect_gp = gpar(col="black", @@ -427,7 +431,7 @@ ko_heatmap = function(df, pathway){ ) # Construct the heatmaps - col_fun = colorRamp2(c(min(tmp), 0, max(tmp)), + col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), c("#073B4C", "#FFFFFF", "#EF476F")) h1 = Heatmap(row_labels=data$name, row_names_side="left", @@ -472,7 +476,7 @@ ko_heatmap = function(df, pathway){ width = ncol(late_data)*unit(5, "mm"), height = nrow(late_data)*unit(5, "mm"), heatmap_legend_param=list(title="Normalized KO Growth Score", - at = c(min(tmp), 0, max(tmp)), + at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), labels = c("Decr. Fitness", "No change", "Incr. fitness"))) # Return the final figure @@ -482,7 +486,7 @@ ko_heatmap = function(df, pathway){ } ``` -### B. Plot the top 50 reactions based on priority score. +### C. Plot the top 50 reactions based on priority score. ```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100} # Sort data by priority score data = data[order(data$`Absolute Effect * N expts`, decreasing=TRUE), ] @@ -504,7 +508,7 @@ dev.off top50_figures ``` -### C. Grab high confidence reactions (2 or more in same sample) +### D. Grab high confidence reactions (2 or more in same sample) There's not that much for high confidence reactions. ```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100} @@ -530,7 +534,7 @@ specific_figures ``` -### D. Grab low confidence reactions (1 in same sample) +### E. Grab low confidence reactions (1 in same sample) The list of low confidence reactions I'd like to visualize is high. ```{r} @@ -634,6 +638,9 @@ hm = Heatmap(as.matrix(data[, "Std_All"]), ### D. Create the Heatmap with additional figures ```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100} final_data = as.matrix(tmp) +col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), + c("#073B4C", "#FFFFFF", "#EF476F")) + ht = Heatmap(row_labels=data$name, final_data, row_names_side="left", @@ -641,10 +648,9 @@ ht = Heatmap(row_labels=data$name, lwd = 1), cluster_columns=FALSE, cluster_rows=FALSE, - col = colorRamp2(c(-0.5, 0, 0.1), - c("blue", "white", "red")), + col = col_fun, heatmap_legend_param=list(title="Normalized KO Growth Score", - at = c(-0.6, 0, 0.2), + at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), labels = c("Decr. Fitness", "No change", "Incr. fitness")), width = ncol(final_data)*unit(5, "mm"), height = nrow(final_data)*unit(5, "mm")) + @@ -652,11 +658,11 @@ ht = Heatmap(row_labels=data$name, ht ``` - +Save figures ```{r} png(filename="D:/Analysis/EMT/figures/scRNASeq_avg_rxn_ko.png", - width=7.5, - height=10, + width=20, + height=12.5, units='in', res=1200) ht diff --git a/src/r/make_manuscript_figures.nb.html b/src/r/make_manuscript_figures.nb.html index c73ff69..8b79383 100644 --- a/src/r/make_manuscript_figures.nb.html +++ b/src/r/make_manuscript_figures.nb.html @@ -1755,14 +1755,80 @@

Load Libraries

Let’s load some plotting libraries.

- -
library(tidyverse)
-library(ggplot2)
-library(reshape2)
-library(extrafont)
-library(ComplexHeatmap)
-library(circlize)
-#font_import()
+
+
library(tidyverse)
+ + +
Registered S3 methods overwritten by 'dbplyr':
+  method         from
+  print.tbl_lazy     
+  print.tbl_sql      
+-- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
+v ggplot2 3.3.3     v purrr   0.3.4
+v tibble  3.1.1     v dplyr   1.0.5
+v tidyr   1.1.3     v stringr 1.4.0
+v readr   1.4.0     v forcats 0.5.1
+-- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
+x dplyr::filter() masks stats::filter()
+x dplyr::lag()    masks stats::lag()
+ + +
library(ggplot2)
+library(reshape2)
+ + +

+Attaching package: 㤼㸱reshape2㤼㸲
+
+The following object is masked from 㤼㸱package:tidyr㤼㸲:
+
+    smiths
+ + +
library(extrafont)
+ + +
Registering fonts with R
+ + +
library(ComplexHeatmap)
+ + +
Loading required package: grid
+========================================
+ComplexHeatmap version 2.6.2
+Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
+Github page: https://github.com/jokergoo/ComplexHeatmap
+Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
+
+If you use it in published research, please cite:
+Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
+  genomic data. Bioinformatics 2016.
+
+This message can be suppressed by:
+  suppressPackageStartupMessages(library(ComplexHeatmap))
+========================================
+ + +
library(circlize)
+ + +
========================================
+circlize version 0.4.12
+CRAN page: https://cran.r-project.org/package=circlize
+Github page: https://github.com/jokergoo/circlize
+Documentation: https://jokergoo.github.io/circlize_book/book/
+
+If you use it in published research, please cite:
+Gu, Z. circlize implements and enhances circular visualization
+  in R. Bioinformatics 2014.
+
+This message can be suppressed by:
+  suppressPackageStartupMessages(library(circlize))
+========================================
+ + +
#font_import()
 #loadfonts(device = "win")
@@ -1996,11 +2062,11 @@

vi. Save the figure

vii. Create the accessory function

-

This accessory function pathway_heatmap will create a heatmap for specific metabolic pathways using the data variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way.

+

This accessory function flux_heatmap will create a heatmap for specific metabolic pathways using the data variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way.

- -
pathway_heatmap = function(df, pathway){
+
+
flux_heatmap = function(df, pathway){
   
   # If pathway variable is missing, just visualize everything
   if(missing(pathway)){
@@ -2022,6 +2088,7 @@ 

vii. Create the accessory function

mid2_data = abs(data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")]) late_data = abs(data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", "Keshamouni_72hrs", "GSE147405_72hrs")]) + tmp = cbind(early_data, mid1_data, mid2_data, late_data) # Create column annotation bars h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"), @@ -2058,7 +2125,10 @@

vii. Create the accessory function

) # Construct the heatmaps - col_fun = colorRamp2(c(0, 5, 20), + col_fun = colorRamp2(c(0, + median(as.matrix(tmp), + na.rm=TRUE), + max(as.matrix(tmp), na.rm=TRUE)), c("Gray", "#FD7470", "#DC1C13")) h1 = Heatmap(row_labels=data$name, row_names_side="left", @@ -2102,7 +2172,7 @@

vii. Create the accessory function

bottom_annotation=h4_col, width = ncol(late_data)*unit(5, "mm"), height = nrow(late_data)*unit(5, "mm"), - heatmap_legend_param=list(title="Absolute Flux")) + heatmap_legend_param=list(title="Absolute flux (gDW/mmol*hr)")) # Return the final figure final_figure = h1+h2+h3+h4+hm+bp @@ -2119,8 +2189,8 @@

C. Visualize Glycolytic Reactions

Now let’s specifically get reactions related to Glycolysis.

- -
glycolysis_figures = pathway_heatmap(data, pathway="Glycolysis/Gluconeogenesis")
+ +
glycolysis_figures = flux_heatmap(data, pathway="Glycolysis/Gluconeogenesis")
The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.
@@ -2145,8 +2215,8 @@

C. Visualize Glycolytic Reactions

D. Visualize Pentose Phosphate Pathway Reactions

- -
ppp_figures = pathway_heatmap(data, pathway="Pentose Phosphate Pathway")
+ +
ppp_figures = flux_heatmap(data, pathway="Pentose Phosphate Pathway")
The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.
@@ -2172,7 +2242,7 @@

E. Visualize Specific Reactions

Now let’s specifically get reactions that are unique to each state.

- +
# Manually subset reactions
 reactions_of_interest = c("ribulose 5-phosphate 3-epimerase",
                           "Biotin transport via sodium symport",
@@ -2183,7 +2253,7 @@ 

E. Visualize Specific Reactions

specific_data = data[idx, ] # Create figures -specific_figures = pathway_heatmap(specific_data)
+specific_figures = flux_heatmap(specific_data)
The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.The input is a data frame, convert it to the matrix.
@@ -2246,7 +2316,7 @@

A. Load Data

B. Create accessory function

- +
ko_heatmap = function(df, pathway){
   
   # If pathway variable is missing, just visualize everything
@@ -2269,7 +2339,7 @@ 

B. Create accessory function

mid2_data = data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")] - 1 late_data = data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", "Keshamouni_72hrs", "GSE147405_72hrs")] - 1 - tmp = cbind(early_data, mid1_data, mid2_data, late_data) - 1 + tmp = cbind(early_data, mid1_data, mid2_data, late_data) # Create column annotation bars h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"), @@ -2308,7 +2378,7 @@

B. Create accessory function

) # Construct the heatmaps - col_fun = colorRamp2(c(min(tmp), 0, max(tmp)), + col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), c("#073B4C", "#FFFFFF", "#EF476F")) h1 = Heatmap(row_labels=data$name, row_names_side="left", @@ -2353,7 +2423,7 @@

B. Create accessory function

width = ncol(late_data)*unit(5, "mm"), height = nrow(late_data)*unit(5, "mm"), heatmap_legend_param=list(title="Normalized KO Growth Score", - at = c(min(tmp), 0, max(tmp)), + at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), labels = c("Decr. Fitness", "No change", "Incr. fitness"))) # Return the final figure @@ -2365,18 +2435,18 @@

B. Create accessory function

-
-

B. Plot the top 50 reactions based on priority score.

+
+

C. Plot the top 50 reactions based on priority score.

-

+

-
-

C. Grab high confidence reactions (2 or more in same sample)

+
+

D. Grab high confidence reactions (2 or more in same sample)

There’s not that much for high confidence reactions.

@@ -2406,11 +2476,22 @@

C. Grab high confidence reactions (2 or more in same sample)

null device 
           1 
+ +
# Show
+specific_figures
+ + +

+ + +
NA
+NA
+
-
-

D. Grab low confidence reactions (1 in same sample)

+
+

E. Grab low confidence reactions (1 in same sample)

The list of low confidence reactions I’d like to visualize is high.

@@ -2459,20 +2540,9 @@

D. Grab low confidence reactions (1 in same sample)

Subset and create the plot.

- -
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
-specific_data = data[idx, ]
-
-# Create figures
-specific_figures = ko_heatmap(specific_data)
-png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_low_priority_heatmap.png",
-       width=20,
-       height=12.5,
-       units='in',
-    res=1200)
-specific_figures
-dev.off()
- + +

+
@@ -2491,11 +2561,33 @@

3. Single-Cell Reaction KOs

A. Load Data

- +
filename = "D:/Analysis/EMT/data/scRNASeq Reaction KO.csv"
-data = read_csv(filename)
-
-# Create new variable for row names
+data = read_csv(filename)
+ + +

+-- Column specification ----------------------------------------------------------------------------------------------------------------
+cols(
+  `Reaction Name` = col_character(),
+  `BiGG Reaction ID` = col_character(),
+  `KEGG Subsystem` = col_character(),
+  Average_All = col_double(),
+  Average_Day0 = col_double(),
+  Average_Hour8 = col_double(),
+  Average_Day1 = col_double(),
+  Average_Day3 = col_double(),
+  Average_Day7 = col_double(),
+  Std_All = col_double(),
+  Std_Day0 = col_double(),
+  Std_Hour8 = col_double(),
+  Std_Day1 = col_double(),
+  Std_Day3 = col_double(),
+  Std_Day7 = col_double()
+)
+ + +
# Create new variable for row names
 data$name = paste0(data$`BiGG Reaction ID`, 
                     ', ',
                     data$`KEGG Subsystem`)
@@ -2521,12 +2613,14 @@

B. Subset the average data columns

C. Create an annotation heatmap showing the average standard deviation for each reaction across all cells

- +
hm = Heatmap(as.matrix(data[, "Std_All"]),
              rect_gp = gpar(col = "black", 
                             lwd = 1),
              col = colorRamp2(c(0, 0.3), 
                               c("#FFFFFF", "orange")),
+             cluster_columns=FALSE,
+             cluster_rows=FALSE,
              heatmap_legend_param=list(title="Average Std",
                                        color_bar="continuous"),
              width = ncol(data[, "Std_All"])*unit(5, "mm"), 
@@ -2539,8 +2633,11 @@ 

C. Create an annotation heatmap showing the average standard deviation for e

D. Create the Heatmap with additional figures

- +
final_data = as.matrix(tmp)
+col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), 
+                       c("#073B4C", "#FFFFFF", "#EF476F"))
+
 ht = Heatmap(row_labels=data$name,
              final_data, 
              row_names_side="left",
@@ -2548,10 +2645,9 @@ 

D. Create the Heatmap with additional figures

lwd = 1), cluster_columns=FALSE, cluster_rows=FALSE, - col = colorRamp2(c(-0.5, 0, 0.1), - c("blue", "white", "red")), + col = col_fun, heatmap_legend_param=list(title="Normalized KO Growth Score", - at = c(-0.6, 0, 0.2), + at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), labels = c("Decr. Fitness", "No change", "Incr. fitness")), width = ncol(final_data)*unit(5, "mm"), height = nrow(final_data)*unit(5, "mm")) + @@ -2560,21 +2656,16 @@

D. Create the Heatmap with additional figures

ht
-

+

+

Save figures

- -
png(filename="D:/Analysis/EMT/figures/scRNASeq_avg_rxn_ko.png",
-       width=7.5,
-       height=10,
-       units='in',
-    res=1200)
-ht
-dev.off()
- + +

+
@@ -2584,7 +2675,7 @@

Conclusion

-
---
title: "EMT Manuscript Figures"
output: html_notebook
author: "Scott"
---

## Summary
This notebook makes several figures for the COBRA EMT publication.

```{r}
rm(list=ls())
```

## Load Libraries
Let's load some plotting libraries.

```{r}
library(tidyverse)
library(ggplot2)
library(reshape2)
library(extrafont)
library(ComplexHeatmap)
library(circlize)
#font_import()
#loadfonts(device = "win")
```

## 1. Visualize Bulk Fluxes

### A. Load Data
First, let's load the data
```{r}
filename = "D:/Analysis/EMT/data/Bulk Flux Intersection.csv"
data = read_csv(filename)
```

### B. Visualize the top 50 reaction knockouts from bulk/aggreggated studies
Here's a description of each variable created:
  * `top50`: Top 50 reactions ranked by priority score
  * `bardata`: data used in Bar Plot
  * `expt_data`: data used in discrete heatmap
  
```{r}
# Sort data by priority score
data = data[order(data$`Absolute Effect * N expts`, decreasing=TRUE), ]

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)

# Grab top 50
top50 = data[1:50, ]

# Create new data for barplots and discrete heatmap
bardata = top50[, 4]
expt_data = top50[, 5]
```

#### i. Reformat the data
We're going to grab selected time points that have replicate values across multiple studies and tell the EMT story. The selected time points will be stored 4 separate dataframes corresponding to each time point.

```{r}
# Split by hours
early_data = abs(top50[, c("GSE17708_1hrs", "Garcia_1hrs")])
mid1_data  = abs(top50[, c("GSE17708_8hrs", "GSE147405_8hrs")])
mid2_data  = abs(top50[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")])
late_data  = abs(top50[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                       "Keshamouni_72hrs", "GSE147405_72hrs")])
```

#### ii. Create the column annotation heatmap for each time-point
This creates individual annotation bars for each of the corresponding time points.

```{r}
h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                      labels=c("Early"), 
                                      labels_gp=gpar(col="black", 
                                                     fontsize=10)))
h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                      labels=c("Int. 1"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                      labels=c("Int. 2"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                      labels=c("Late"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
```

#### iii. Create the barplot that shows the priority score
The annotation barplot will show the priority scores on the righthand side.

```{r}
bp = rowAnnotation(
  Priority=anno_barplot(
    bardata
  )
)
```

#### iv. Create a heatmap showing the number of experiments for each reaction
We'll also create an annotation heatmap corresponding to the number of experiments for each reaction.

```{r, fig.width=10, fig.height=7.5}
hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1,5), 
                              c("Gray", "#95C623")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"),
             rect_gp = gpar(col="black", 
                              lwd=1),
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
```

#### v. Create the figure
Finally, we'll create the entire figure.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
col_fun = colorRamp2(c(0, 5, 20), 
                     c("Gray", "#FD7470", "#DC1C13"))
h1 = Heatmap(row_labels=top50$name,
             row_names_side="left",
             early_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h1_col, 
             width = ncol(early_data)*unit(5, "mm"), 
             height = nrow(early_data)*unit(5, "mm"))
h2 = Heatmap(mid1_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h2_col, 
             width = ncol(mid1_data)*unit(5, "mm"), 
             height = nrow(mid1_data)*unit(5, "mm"))
h3 = Heatmap(mid2_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h3_col, 
             width = ncol(mid2_data)*unit(5, "mm"), 
             height = nrow(mid2_data)*unit(5, "mm"))
h4 = Heatmap(late_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=TRUE,
             bottom_annotation=h4_col,
             width = ncol(late_data)*unit(5, "mm"), 
             height = nrow(late_data)*unit(5, "mm"),
             heatmap_legend_param=list(title="Absolute Flux"))

final_figure = h1+h2+h3+h4+hm+bp
final_figure
```

#### vi. Save the figure
Finally, we'll save the entire figure with high resolution.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
png(filename="D:/Analysis/EMT/figures/RECON1_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
final_figure
dev.off()
```
#### vii. Create the accessory function
This accessory function `pathway_heatmap` will create a heatmap for specific metabolic pathways using the `data` variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
flux_heatmap = function(df, pathway){
  
  # If pathway variable is missing, just visualize everything
  if(missing(pathway)){
    data = df
  }else{
    idx = (df$`KEGG Subsystem` == pathway)
    idx[is.na(idx)] = FALSE
    data = df[idx, ]
  }

  # Create new data for barplots and discrete heatmap
  bardata = data[, 4]
  bardata[is.na(bardata)] = 0
  expt_data = data[, 5]
  
  # Split by hours
  early_data = abs(data[, c("GSE17708_1hrs", "Garcia_1hrs")])
  mid1_data  = abs(data[, c("GSE17708_8hrs", "GSE147405_8hrs")])
  mid2_data  = abs(data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")])
  late_data  = abs(data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                                     "Keshamouni_72hrs", "GSE147405_72hrs")])
  
  # Create column annotation bars
  h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                        labels=c("Early"), 
                                        labels_gp=gpar(col="black", 
                                                       fontsize=10)))
  h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                        labels=c("Int. 1"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                        labels=c("Int. 2"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                        labels=c("Late"), 
                                        labels_gp=gpar(col="white", 
                                                     fontsize=10)))
  
  # Construct experiment heatmap annotation
  hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1,5), 
                              c("Gray", "#95C623")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"), 
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
  
  # Construct priority score barplot annotation
  bp = rowAnnotation(
    Priority=anno_barplot(
      bardata
    )
  )
  
  # Construct the heatmaps
  col_fun = colorRamp2(c(0, median(data), max(data)), 
                       c("Gray", "#FD7470", "#DC1C13"))
  h1 = Heatmap(row_labels=data$name,
               row_names_side="left",
               early_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h1_col, 
               width = ncol(early_data)*unit(5, "mm"), 
               height = nrow(early_data)*unit(5, "mm"))
  h2 = Heatmap(mid1_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h2_col, 
               width = ncol(mid1_data)*unit(5, "mm"), 
               height = nrow(mid1_data)*unit(5, "mm"))
  h3 = Heatmap(mid2_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h3_col, 
               width = ncol(mid2_data)*unit(5, "mm"), 
               height = nrow(mid2_data)*unit(5, "mm"))
  h4 = Heatmap(late_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=TRUE,
               bottom_annotation=h4_col,
               width = ncol(late_data)*unit(5, "mm"), 
               height = nrow(late_data)*unit(5, "mm"),
               heatmap_legend_param=list(title="Absolute flux (gDW/mmol*hr)"))
  
  # Return the final figure
  final_figure = h1+h2+h3+h4+hm+bp
  return(final_figure)
    
}
```

### C. Visualize Glycolytic Reactions
Now let's specifically get reactions related to Glycolysis.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
glycolysis_figures = flux_heatmap(data, pathway="Glycolysis/Gluconeogenesis")
png(filename="D:/Analysis/EMT/figures/glycolysis_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
glycolysis_figures
dev.off()
```

### D. Visualize Pentose Phosphate Pathway Reactions
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
ppp_figures = flux_heatmap(data, pathway="Pentose Phosphate Pathway")
png(filename="D:/Analysis/EMT/figures/ppp_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
ppp_figures
dev.off()
```


### E. Visualize Specific Reactions
Now let's specifically get reactions that are unique to each state.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Manually subset reactions
reactions_of_interest = c("ribulose 5-phosphate 3-epimerase",
                          "Biotin transport via sodium symport",
                          "Biotin reversible transport via proton symport",
                          "NADH dehydrogenase, mitochondrial",
                          "L-lactate dehydrogenase")
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = flux_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/specific_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()
```

## 2. Visualize the Bulk Reaction KOs
```{r}
rm(list=setdiff(ls(), "flux_heatmap"))
```

### A. Load Data
```{r}
filename = "D:/Analysis/EMT/data/Bulk Reaction KO Intersection.csv"
data = read_csv(filename)

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)
```

### B. Create accessory function
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
ko_heatmap = function(df, pathway){
  
  # If pathway variable is missing, just visualize everything
  if(missing(pathway)){
    data = df
  }else{
    idx = (df$`KEGG Subsystem` == pathway)
    idx[is.na(idx)] = FALSE
    data = df[idx, ]
  }

  # Create new data for barplots and discrete heatmap
  bardata = data[, 4]
  bardata[is.na(bardata)] = 0
  expt_data = data[, 5]
  
  # Split by hours
  early_data = data[, c("GSE17708_1hrs", "Garcia_1hrs")] - 1
  mid1_data  = data[, c("GSE17708_8hrs", "GSE147405_8hrs")] - 1
  mid2_data  = data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")] - 1
  late_data  = data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                                     "Keshamouni_72hrs", "GSE147405_72hrs")] - 1
  tmp = cbind(early_data, mid1_data, mid2_data, late_data) - 1
  
  # Create column annotation bars
  h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                        labels=c("Early"), 
                                        labels_gp=gpar(col="black", 
                                                       fontsize=10)))
  h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                        labels=c("Int. 1"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                        labels=c("Int. 2"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                        labels=c("Late"), 
                                        labels_gp=gpar(col="white", 
                                                     fontsize=10)))
  
  # Construct experiment heatmap annotation
  hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1, 5), 
                              c("Gray", "#95C623")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"), 
             rect_gp = gpar(col="black", 
                              lwd=1),
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
  
  # Construct priority score barplot annotation
  bp = rowAnnotation(
    Priority=anno_barplot(
      bardata
    )
  )
  
  # Construct the heatmaps
  col_fun = colorRamp2(c(min(tmp), 0, max(tmp)), 
                       c("#073B4C", "#FFFFFF", "#EF476F"))
  h1 = Heatmap(row_labels=data$name,
               row_names_side="left",
               early_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h1_col, 
               width = ncol(early_data)*unit(5, "mm"), 
               height = nrow(early_data)*unit(5, "mm"))
  h2 = Heatmap(mid1_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h2_col, 
               width = ncol(mid1_data)*unit(5, "mm"), 
               height = nrow(mid1_data)*unit(5, "mm"))
  h3 = Heatmap(mid2_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h3_col, 
               width = ncol(mid2_data)*unit(5, "mm"), 
               height = nrow(mid2_data)*unit(5, "mm"))
  h4 = Heatmap(late_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=TRUE,
               bottom_annotation=h4_col,
               width = ncol(late_data)*unit(5, "mm"), 
               height = nrow(late_data)*unit(5, "mm"),
               heatmap_legend_param=list(title="Normalized KO Growth Score",
                                       at = c(min(tmp), 0, max(tmp)),
                                       labels = c("Decr. Fitness", "No change", "Incr. fitness")))
  
  # Return the final figure
  final_figure = h1+h2+h3+h4+hm+bp
  return(final_figure)
    
}
```

### B. Plot the top 50 reactions based on priority score.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Sort data by priority score
data = data[order(data$`Absolute Effect * N expts`, decreasing=TRUE), ]

# Grab top 50
top50 = data[1:50, ]

# Create figures
top50_figures = ko_heatmap(top50)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
top50_figures
dev.off

# Show
top50_figures
```

### C. Grab high confidence reactions (2 or more in same sample)
There's not that much for high confidence reactions.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Subset Reactions
reactions_of_interest = c("alpha-ketoglutarate/malate transporter",
                          "carnitine O-acetyltransferase")
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = ko_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_high_priority_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()

# Show
specific_figures


```

### D. Grab low confidence reactions (1 in same sample)
The list of low confidence reactions I'd like to visualize is high.

```{r}
reactions_of_interest = c("enolase",
                          "fatty-acyl-CoA elongation (n-C20:4CoA)",
                          "fatty acyl-CoA synthase (n-C10:0CoA)",
                          "fatty-acyl-CoA synthase (n-C12:0CoA)",
                          "fatty-acyl-CoA synthase (n-C14:0CoA)",
                          "hydroxymethylglutaryl-CoA lyase",
                          "dGTP transport via ATP antiport",
                          "UDPglucose 4-epimerase",
                          "adenosine kinase",
                          "pantetheine-phosphate adenylyltransferase",
                          " 4-Hydroxyphenylpyruvate:oxygen oxidoreductase",
                          "fumarylacetoacetase",
                          "Homogentisate:oxygen 1,2-oxidoreductase (decyclizing)",
                          "maleylacetoacetate isomerase",
                          "Malate:sulfite antiport, mitochondrial",
                          "chondroitin 6-sulfotransferase, Golgi apparatus",
                          "nicotinate-nucleotide adenylyltransferase",
                          "malate dehydrogenase",
                          "Norepinephrine uniport",
                          "Hydroxymethylglutaryl-CoA reversible mitochondrial transport",
                          "carnitine-acetylcarnitine carrier, peroxisomal",
                          "carnitine O-acetyltransferase, reverse direction, peroxisomal",
                          "UDP-GlcNAc:betaGal beta-1,3-N-acetylglucosaminyltransferase 1",
                          "UDP-GlcNAc:betaGal beta-1,3-N-acetylglucosaminyltransferase 3, Golgi apparatus",
                          "Hydroxymethylglutaryl CoA synthase (ir)",
                          "citrate transport, mitochondrial",
                          "Aconitate hydratase",
                          "ATP-Citrate lyase",
                          "fumarase, mitochondrial",
                          "triose-phosphate isomerase",
                          "acetone monooxygenase",
                          "fructose-bisphosphatase",
                          "ornithine transaminase reversible (m)",
                          "succinate dehydrogenase",
                          "carnitine O-aceyltransferase, mitochondrial",
                          "citrate synthase",
                          "Isocitrate dehydrogenase (NADP+)",
                          "L-lactate dehydrogenase")
```

Subset and create the plot.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = ko_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_low_priority_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()
```

## 3. Single-Cell Reaction KOs
Finally, let's create a visualization for the single-cell reaction knockouts.

```{r}
rm(list=ls())
```

### A. Load Data
```{r}
filename = "D:/Analysis/EMT/data/scRNASeq Reaction KO.csv"
data = read_csv(filename)

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)
```
### B. Subset the average data columns
The data has already been averaged across all cells.
```{r}
tmp = data[, c("Average_All",	"Average_Day0",
               "Average_Hour8",	"Average_Day1",
               "Average_Day3",	"Average_Day7")]
tmp = tmp-1
```

### C. Create an annotation heatmap showing the average standard deviation for each reaction across all cells
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
hm = Heatmap(as.matrix(data[, "Std_All"]),
             rect_gp = gpar(col = "black", 
                            lwd = 1),
             col = colorRamp2(c(0, 0.3), 
                              c("#FFFFFF", "orange")),
             cluster_columns=FALSE,
             cluster_rows=FALSE,
             heatmap_legend_param=list(title="Average Std",
                                       color_bar="continuous"),
             width = ncol(data[, "Std_All"])*unit(5, "mm"), 
             height = nrow(data[, "Std_All"])*unit(5, "mm"))
```

### D. Create the Heatmap with additional figures
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
final_data = as.matrix(tmp)
ht = Heatmap(row_labels=data$name,
             final_data, 
             row_names_side="left",
             rect_gp = gpar(col = "black", 
                            lwd = 1),
             cluster_columns=FALSE,
             cluster_rows=FALSE,
             col = colorRamp2(c(-0.5, 0, 0.1), 
                              c("blue", "white", "red")),
             heatmap_legend_param=list(title="Normalized KO Growth Score",
                                       at = c(-0.6, 0, 0.2),
                                       labels = c("Decr. Fitness", "No change", "Incr. fitness")),
             width = ncol(final_data)*unit(5, "mm"), 
             height = nrow(final_data)*unit(5, "mm")) +
     hm 

ht
```

```{r}
png(filename="D:/Analysis/EMT/figures/scRNASeq_avg_rxn_ko.png",
       width=7.5,
       height=10,
       units='in',
    res=1200)
ht
dev.off()
```

## Conclusion
+
---
title: "EMT Manuscript Figures"
output: html_notebook
author: "Scott"
---

## Summary
This notebook makes several figures for the COBRA EMT publication.

```{r}
rm(list=ls())
```

## Load Libraries
Let's load some plotting libraries.

```{r}
library(tidyverse)
library(ggplot2)
library(reshape2)
library(extrafont)
library(ComplexHeatmap)
library(circlize)
#font_import()
#loadfonts(device = "win")
```

## 1. Visualize Bulk Fluxes

### A. Load Data
First, let's load the data
```{r}
filename = "D:/Analysis/EMT/data/Bulk Flux Intersection.csv"
data = read_csv(filename)
```

### B. Visualize the top 50 reaction knockouts from bulk/aggreggated studies
Here's a description of each variable created:
  * `top50`: Top 50 reactions ranked by priority score
  * `bardata`: data used in Bar Plot
  * `expt_data`: data used in discrete heatmap
  
```{r}
# Sort data by priority score
data = data[order(data$`Absolute Effect * N expts`, decreasing=TRUE), ]

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)

# Grab top 50
top50 = data[1:50, ]

# Create new data for barplots and discrete heatmap
bardata = top50[, 4]
expt_data = top50[, 5]
```

#### i. Reformat the data
We're going to grab selected time points that have replicate values across multiple studies and tell the EMT story. The selected time points will be stored 4 separate dataframes corresponding to each time point.

```{r}
# Split by hours
early_data = abs(top50[, c("GSE17708_1hrs", "Garcia_1hrs")])
mid1_data  = abs(top50[, c("GSE17708_8hrs", "GSE147405_8hrs")])
mid2_data  = abs(top50[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")])
late_data  = abs(top50[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                       "Keshamouni_72hrs", "GSE147405_72hrs")])
```

#### ii. Create the column annotation heatmap for each time-point
This creates individual annotation bars for each of the corresponding time points.

```{r}
h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                      labels=c("Early"), 
                                      labels_gp=gpar(col="black", 
                                                     fontsize=10)))
h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                      labels=c("Int. 1"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                      labels=c("Int. 2"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                      labels=c("Late"), 
                                      labels_gp=gpar(col="white", 
                                                     fontsize=10)))
```

#### iii. Create the barplot that shows the priority score
The annotation barplot will show the priority scores on the righthand side.

```{r}
bp = rowAnnotation(
  Priority=anno_barplot(
    bardata
  )
)
```

#### iv. Create a heatmap showing the number of experiments for each reaction
We'll also create an annotation heatmap corresponding to the number of experiments for each reaction.

```{r, fig.width=10, fig.height=7.5}
hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1,5), 
                              c("Gray", "#95C623")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"),
             rect_gp = gpar(col="black", 
                              lwd=1),
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
```

#### v. Create the figure
Finally, we'll create the entire figure.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
col_fun = colorRamp2(c(0, 5, 20), 
                     c("Gray", "#FD7470", "#DC1C13"))
h1 = Heatmap(row_labels=top50$name,
             row_names_side="left",
             early_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h1_col, 
             width = ncol(early_data)*unit(5, "mm"), 
             height = nrow(early_data)*unit(5, "mm"))
h2 = Heatmap(mid1_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h2_col, 
             width = ncol(mid1_data)*unit(5, "mm"), 
             height = nrow(mid1_data)*unit(5, "mm"))
h3 = Heatmap(mid2_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             
             cluster_rows=FALSE,
             show_heatmap_legend=FALSE,
             bottom_annotation=h3_col, 
             width = ncol(mid2_data)*unit(5, "mm"), 
             height = nrow(mid2_data)*unit(5, "mm"))
h4 = Heatmap(late_data, 
             col=col_fun, 
             cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
             cluster_rows=FALSE,
             show_heatmap_legend=TRUE,
             bottom_annotation=h4_col,
             width = ncol(late_data)*unit(5, "mm"), 
             height = nrow(late_data)*unit(5, "mm"),
             heatmap_legend_param=list(title="Absolute Flux"))

final_figure = h1+h2+h3+h4+hm+bp
final_figure
```

#### vi. Save the figure
Finally, we'll save the entire figure with high resolution.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
png(filename="D:/Analysis/EMT/figures/RECON1_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
final_figure
dev.off()
```
#### vii. Create the accessory function
This accessory function `flux_heatmap` will create a heatmap for specific metabolic pathways using the `data` variable constructed in the previous code blocks. You can also subset a data frame manually and visualize it that way.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
flux_heatmap = function(df, pathway){
  
  # If pathway variable is missing, just visualize everything
  if(missing(pathway)){
    data = df
  }else{
    idx = (df$`KEGG Subsystem` == pathway)
    idx[is.na(idx)] = FALSE
    data = df[idx, ]
  }

  # Create new data for barplots and discrete heatmap
  bardata = data[, 4]
  bardata[is.na(bardata)] = 0
  expt_data = data[, 5]
  
  # Split by hours
  early_data = abs(data[, c("GSE17708_1hrs", "Garcia_1hrs")])
  mid1_data  = abs(data[, c("GSE17708_8hrs", "GSE147405_8hrs")])
  mid2_data  = abs(data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")])
  late_data  = abs(data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                                     "Keshamouni_72hrs", "GSE147405_72hrs")])
  tmp = cbind(early_data, mid1_data, mid2_data, late_data)
  
  # Create column annotation bars
  h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                        labels=c("Early"), 
                                        labels_gp=gpar(col="black", 
                                                       fontsize=10)))
  h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                        labels=c("Int. 1"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                        labels=c("Int. 2"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                        labels=c("Late"), 
                                        labels_gp=gpar(col="white", 
                                                     fontsize=10)))
  
  # Construct experiment heatmap annotation
  hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1,5), 
                              c("Gray", "#95C623")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"), 
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
  
  # Construct priority score barplot annotation
  bp = rowAnnotation(
    Priority=anno_barplot(
      bardata
    )
  )
  
  # Construct the heatmaps
  col_fun = colorRamp2(c(0, 
                         median(as.matrix(tmp), 
                                   na.rm=TRUE), 
                         max(as.matrix(tmp), na.rm=TRUE)), 
                       c("Gray", "#FD7470", "#DC1C13"))
  h1 = Heatmap(row_labels=data$name,
               row_names_side="left",
               early_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h1_col, 
               width = ncol(early_data)*unit(5, "mm"), 
               height = nrow(early_data)*unit(5, "mm"))
  h2 = Heatmap(mid1_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h2_col, 
               width = ncol(mid1_data)*unit(5, "mm"), 
               height = nrow(mid1_data)*unit(5, "mm"))
  h3 = Heatmap(mid2_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h3_col, 
               width = ncol(mid2_data)*unit(5, "mm"), 
               height = nrow(mid2_data)*unit(5, "mm"))
  h4 = Heatmap(late_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=TRUE,
               bottom_annotation=h4_col,
               width = ncol(late_data)*unit(5, "mm"), 
               height = nrow(late_data)*unit(5, "mm"),
               heatmap_legend_param=list(title="Absolute flux (gDW/mmol*hr)"))
  
  # Return the final figure
  final_figure = h1+h2+h3+h4+hm+bp
  return(final_figure)
    
}
```

### C. Visualize Glycolytic Reactions
Now let's specifically get reactions related to Glycolysis.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
glycolysis_figures = flux_heatmap(data, pathway="Glycolysis/Gluconeogenesis")
png(filename="D:/Analysis/EMT/figures/glycolysis_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
glycolysis_figures
dev.off()
```

### D. Visualize Pentose Phosphate Pathway Reactions
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
ppp_figures = flux_heatmap(data, pathway="Pentose Phosphate Pathway")
png(filename="D:/Analysis/EMT/figures/ppp_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
ppp_figures
dev.off()
```


### E. Visualize Specific Reactions
Now let's specifically get reactions that are unique to each state.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Manually subset reactions
reactions_of_interest = c("ribulose 5-phosphate 3-epimerase",
                          "Biotin transport via sodium symport",
                          "Biotin reversible transport via proton symport",
                          "NADH dehydrogenase, mitochondrial",
                          "L-lactate dehydrogenase")
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = flux_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/specific_flux_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()
```

## 2. Visualize the Bulk Reaction KOs
```{r}
rm(list=setdiff(ls(), "flux_heatmap"))
```

### A. Load Data
```{r}
filename = "D:/Analysis/EMT/data/Bulk Reaction KO Intersection.csv"
data = read_csv(filename)

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)
```

### B. Create accessory function
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
ko_heatmap = function(df, pathway){
  
  # If pathway variable is missing, just visualize everything
  if(missing(pathway)){
    data = df
  }else{
    idx = (df$`KEGG Subsystem` == pathway)
    idx[is.na(idx)] = FALSE
    data = df[idx, ]
  }

  # Create new data for barplots and discrete heatmap
  bardata = data[, 4]
  bardata[is.na(bardata)] = 0
  expt_data = data[, 5]
  
  # Split by hours
  early_data = data[, c("GSE17708_1hrs", "Garcia_1hrs")] - 1
  mid1_data  = data[, c("GSE17708_8hrs", "GSE147405_8hrs")] - 1
  mid2_data  = data[, c("GSE17708_24hrs", "Garcia_24hrs", "GSE147405_24hrs")] - 1
  late_data  = data[, c("GSE17708_72hrs", "GSE17518_72hrs", "Garcia_48hrs", 
                                     "Keshamouni_72hrs", "GSE147405_72hrs")] - 1
  tmp = cbind(early_data, mid1_data, mid2_data, late_data)
  
  # Create column annotation bars
  h1_col = HeatmapAnnotation(h1=anno_block(gp=gpar(fill="#CAC4CE"),
                                        labels=c("Early"), 
                                        labels_gp=gpar(col="black", 
                                                       fontsize=10)))
  h2_col = HeatmapAnnotation(h2=anno_block(gp=gpar(fill="#8D86C9"),
                                        labels=c("Int. 1"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h3_col = HeatmapAnnotation(h3=anno_block(gp=gpar(fill="#725AC1"),
                                        labels=c("Int. 2"), 
                                        labels_gp=gpar(col="white", 
                                                       fontsize=10)))
  h4_col = HeatmapAnnotation(h4=anno_block(gp=gpar(fill="#242038"),
                                        labels=c("Late"), 
                                        labels_gp=gpar(col="white", 
                                                     fontsize=10)))
  
  # Construct experiment heatmap annotation
  hm = Heatmap(as.matrix(expt_data),
             col = colorRamp2(c(1, 5), 
                              c("#FFFFFF", "#5BC0BE", "#3A506B", "#1C2541", "#0B132B")),
             heatmap_legend_param=list(title="Experiments",
                                       color_bar="discrete"), 
             rect_gp = gpar(col="black", 
                              lwd=1),
             width = ncol(expt_data)*unit(5, "mm"), 
             height = nrow(expt_data)*unit(5, "mm"))
  
  # Construct priority score barplot annotation
  bp = rowAnnotation(
    Priority=anno_barplot(
      bardata
    )
  )
  
  # Construct the heatmaps
  col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), 
                       c("#073B4C", "#FFFFFF", "#EF476F"))
  h1 = Heatmap(row_labels=data$name,
               row_names_side="left",
               early_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h1_col, 
               width = ncol(early_data)*unit(5, "mm"), 
               height = nrow(early_data)*unit(5, "mm"))
  h2 = Heatmap(mid1_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h2_col, 
               width = ncol(mid1_data)*unit(5, "mm"), 
               height = nrow(mid1_data)*unit(5, "mm"))
  h3 = Heatmap(mid2_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=FALSE,
               bottom_annotation=h3_col, 
               width = ncol(mid2_data)*unit(5, "mm"), 
               height = nrow(mid2_data)*unit(5, "mm"))
  h4 = Heatmap(late_data, 
               col=col_fun, 
               cluster_columns=FALSE, 
               rect_gp = gpar(col="black", 
                              lwd=1),
               cluster_rows=FALSE,
               show_heatmap_legend=TRUE,
               bottom_annotation=h4_col,
               width = ncol(late_data)*unit(5, "mm"), 
               height = nrow(late_data)*unit(5, "mm"),
               heatmap_legend_param=list(title="Normalized KO Growth Score",
                                       at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))),
                                       labels = c("Decr. Fitness", "No change", "Incr. fitness")))
  
  # Return the final figure
  final_figure = h1+h2+h3+h4+hm+bp
  return(final_figure)
    
}
```

### C. Plot the top 50 reactions based on priority score.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Sort data by priority score
data = data[order(data$`Absolute Effect * N expts`, decreasing=TRUE), ]

# Grab top 50
top50 = data[1:50, ]

# Create figures
top50_figures = ko_heatmap(top50)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
top50_figures
dev.off

# Show
top50_figures
```

### D. Grab high confidence reactions (2 or more in same sample)
There's not that much for high confidence reactions.

```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
# Subset Reactions
reactions_of_interest = c("alpha-ketoglutarate/malate transporter",
                          "carnitine O-acetyltransferase")
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = ko_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_high_priority_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()

# Show
specific_figures


```

### E. Grab low confidence reactions (1 in same sample)
The list of low confidence reactions I'd like to visualize is high.

```{r}
reactions_of_interest = c("enolase",
                          "fatty-acyl-CoA elongation (n-C20:4CoA)",
                          "fatty acyl-CoA synthase (n-C10:0CoA)",
                          "fatty-acyl-CoA synthase (n-C12:0CoA)",
                          "fatty-acyl-CoA synthase (n-C14:0CoA)",
                          "hydroxymethylglutaryl-CoA lyase",
                          "dGTP transport via ATP antiport",
                          "UDPglucose 4-epimerase",
                          "adenosine kinase",
                          "pantetheine-phosphate adenylyltransferase",
                          " 4-Hydroxyphenylpyruvate:oxygen oxidoreductase",
                          "fumarylacetoacetase",
                          "Homogentisate:oxygen 1,2-oxidoreductase (decyclizing)",
                          "maleylacetoacetate isomerase",
                          "Malate:sulfite antiport, mitochondrial",
                          "chondroitin 6-sulfotransferase, Golgi apparatus",
                          "nicotinate-nucleotide adenylyltransferase",
                          "malate dehydrogenase",
                          "Norepinephrine uniport",
                          "Hydroxymethylglutaryl-CoA reversible mitochondrial transport",
                          "carnitine-acetylcarnitine carrier, peroxisomal",
                          "carnitine O-acetyltransferase, reverse direction, peroxisomal",
                          "UDP-GlcNAc:betaGal beta-1,3-N-acetylglucosaminyltransferase 1",
                          "UDP-GlcNAc:betaGal beta-1,3-N-acetylglucosaminyltransferase 3, Golgi apparatus",
                          "Hydroxymethylglutaryl CoA synthase (ir)",
                          "citrate transport, mitochondrial",
                          "Aconitate hydratase",
                          "ATP-Citrate lyase",
                          "fumarase, mitochondrial",
                          "triose-phosphate isomerase",
                          "acetone monooxygenase",
                          "fructose-bisphosphatase",
                          "ornithine transaminase reversible (m)",
                          "succinate dehydrogenase",
                          "carnitine O-aceyltransferase, mitochondrial",
                          "citrate synthase",
                          "Isocitrate dehydrogenase (NADP+)",
                          "L-lactate dehydrogenase")
```

Subset and create the plot.
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
idx = data$`BiGG Reaction ID` %in% reactions_of_interest
specific_data = data[idx, ]

# Create figures
specific_figures = ko_heatmap(specific_data)
png(filename="D:/Analysis/EMT/figures/RECON1_rxnko_low_priority_heatmap.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
specific_figures
dev.off()
```

## 3. Single-Cell Reaction KOs
Finally, let's create a visualization for the single-cell reaction knockouts.

```{r}
rm(list=ls())
```

### A. Load Data
```{r}
filename = "D:/Analysis/EMT/data/scRNASeq Reaction KO.csv"
data = read_csv(filename)

# Create new variable for row names
data$name = paste0(data$`BiGG Reaction ID`, 
                    ', ',
                    data$`KEGG Subsystem`)
```
### B. Subset the average data columns
The data has already been averaged across all cells.
```{r}
tmp = data[, c("Average_All",	"Average_Day0",
               "Average_Hour8",	"Average_Day1",
               "Average_Day3",	"Average_Day7")]
tmp = tmp-1
```

### C. Create an annotation heatmap showing the average standard deviation for each reaction across all cells
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
hm = Heatmap(as.matrix(data[, "Std_All"]),
             rect_gp = gpar(col = "black", 
                            lwd = 1),
             col = colorRamp2(c(0, 0.3), 
                              c("#FFFFFF", "orange")),
             cluster_columns=FALSE,
             cluster_rows=FALSE,
             heatmap_legend_param=list(title="Average Std",
                                       color_bar="continuous"),
             width = ncol(data[, "Std_All"])*unit(5, "mm"), 
             height = nrow(data[, "Std_All"])*unit(5, "mm"))
```

### D. Create the Heatmap with additional figures
```{r, fig.width=20, fig.height=12.5, fig.align='right', dpi=100}
final_data = as.matrix(tmp)
col_fun = colorRamp2(c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))), 
                       c("#073B4C", "#FFFFFF", "#EF476F"))

ht = Heatmap(row_labels=data$name,
             final_data, 
             row_names_side="left",
             rect_gp = gpar(col = "black", 
                            lwd = 1),
             cluster_columns=FALSE,
             cluster_rows=FALSE,
             col = col_fun,
             heatmap_legend_param=list(title="Normalized KO Growth Score",
                                       at = c(min(as.matrix(tmp)), 0, max(as.matrix(tmp))),
                                       labels = c("Decr. Fitness", "No change", "Incr. fitness")),
             width = ncol(final_data)*unit(5, "mm"), 
             height = nrow(final_data)*unit(5, "mm")) +
     hm 

ht
```
Save figures
```{r}
png(filename="D:/Analysis/EMT/figures/scRNASeq_avg_rxn_ko.png",
       width=20,
       height=12.5,
       units='in',
    res=1200)
ht
dev.off()
```

## Conclusion