forked from microbiome/course_2021_radboud
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport.Rmd
61 lines (41 loc) · 1.95 KB
/
import.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
biom_file_path <- "data/Aggregated_humanization2.biom"
sample_meta_file_path <- "data/Mapping_file_ADHD_aggregated.csv"
tree_file_path <- "data/Data_humanization_phylo_aggregation.tre"
library(mia)
# Imports the data
se <- loadFromBiom(biom_file_path)
names(rowData(se)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus")
# Goes through the whole DataFrame. Removes '.*[kpcofg]__' from strings, where [kpcofg]
# is any character from listed ones, and .* any character.
rowdata_modified <- BiocParallel::bplapply(rowData(se),
FUN = stringr::str_remove,
pattern = '.*[kpcofg]__')
# Genus level has additional '\"', so let's delete that also
rowdata_modified <- BiocParallel::bplapply(rowdata_modified,
FUN = stringr::str_remove,
pattern = '\"')
# rowdata_modified is a list, so it is converted back to DataFrame format.
rowdata_modified <- DataFrame(rowdata_modified)
# And then assigned back to the SE object
rowData(se) <- rowdata_modified
# We use this to check what type of data it is
# read.table(sample_meta_file_path)
# It seems like a comma separated file and it does not include headers
# Let us read it and then convert from data.frame to DataFrame
# (required for our purposes)
sample_meta <- DataFrame(read.table(sample_meta_file_path, sep = ",", header = FALSE))
# Add sample names to rownames
rownames(sample_meta) <- sample_meta[,1]
# Delete column that included sample names
sample_meta[,1] <- NULL
# We can add headers
colnames(sample_meta) <- c("patient_status", "cohort", "patient_status_vs_cohort", "sample_name")
# Then it can be added to colData
colData(se) <- sample_meta
# Convert to tse format
tse <- as(se, "TreeSummarizedExperiment")
# Reads the tree file
tree <- ape::read.tree(tree_file_path)
# Add tree to rowTree
rowTree(tse) <- tree