Skip to content

Commit

Permalink
add mapping rate to genecatalog
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed Jul 18, 2023
1 parent db28f82 commit 6a49fd2
Showing 1 changed file with 48 additions and 30 deletions.
78 changes: 48 additions & 30 deletions R/Analyze_genecatalog.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -32,38 +32,27 @@ library(ggplot2)
library(ggbeeswarm)
library(pheatmap)
library(grid)
#library(microbiome)
#library(ape)
#library(vegan)
library(useful)
library(kableExtra)
#library(ggtree)
library(rhdf5)
library(dplyr)
library(tibble)
library(arrow)
library(yaml)
#library(phyloseq)
#library(microViz)
library(SummarizedExperiment)
```

Atlas output files are stored in the `Example` folder.

```{r}
Atlas output files are stored in the `DiarrheaExample` folder.

```{r, file_paths}
data_dir <- "../NewExample/"
data_dir <- "../DiarrheaExample/"
atlas_version <- "v2.17"
```


```{r, file_paths}
files <- yaml::yaml.load_file("../atlas_output_files.yaml")[[atlas_version]]
Expand All @@ -88,6 +77,7 @@ genecatalog_files <-files[["genecatalog"]]
abundance_file <- genecatalog_files$coverage
```


Expand All @@ -103,7 +93,44 @@ head(metadata)
```


```{r}
# Load gene stats per sample
sample_stats <- read.table(genecatalog_files$sample_stats,sep='\t',header=T,row.names=1)
assertthat::assert_that("Sum_coverage" %in% colnames(sample_stats))
head(sample_stats)
```


```{r, mapping_rate, fig.width=2.5, fig.height=4}
read_stats<-read_tsv( files[["readcounts"]], show_col_types = FALSE) %>%
filter(Step == "QC" ) %>% mutate(Total_reads= Reads_pe *2 +Reads_se) %>% .[,c("Sample","Total_reads","Reads_pe","Reads_se")]
read_stats <- read_stats %>% left_join( rownames_to_column(sample_stats,"Sample") ) %>%
mutate(Mapping_rate= Total_counts / Total_reads *100)
read_stats[,"x"]="Genecatalog"
plt <- ggplot(read_stats, aes(y = Mapping_rate, x=x,text=paste("Sample:",Sample))) +
ylim(c(50, 100))+
#xlim(c(-0.1,0.1)) +
labs(x=NULL)+
geom_beeswarm(cex = 2,size=2) +
theme_minimal()
print(plt)
#ggplotly(plt,tooltip = c('text','y' ))
```

```{r}
Expand Down Expand Up @@ -142,12 +169,7 @@ return(data)
```{r}
# Load gene stats per sample
sample_stats <- read.table(genecatalog_files$sample_stats,sep='\t',header=T,row.names=1)
assertthat::assert_that("Sum_coverage" %in% colnames(sample_stats))
# calculate copies per million
total_covarage <- sample_stats[,"Sum_coverage"]
Expand Down Expand Up @@ -208,7 +230,8 @@ gene_gcpm[1:5,1:5]
```{r }
library(SummarizedExperiment)
rownames(annotations) <- NULL
Expand All @@ -224,23 +247,18 @@ SE= SummarizedExperiment(
```


```

```{r}

```{}
library(tidybulk)
```






```{}
# Load full genecatalog matrix,
Expand Down

0 comments on commit 6a49fd2

Please sign in to comment.