forked from idr-contrib/serrano-remining
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_HDF5.Rmd
63 lines (53 loc) · 1.5 KB
/
test_HDF5.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
---
title: "Test HDF5"
output: html_document
---
#### 1) Install and load *rhdf5*
```{r, echo=F}
if (!("rhdf5" %in% rownames(installed.packages())))
{
source("http://bioconductor.org/biocLite.R")
biocLite("rhdf5")
}
library(rhdf5)
```
#### 2) Explore the structure
```{r}
filename <- file.path("..","h5files","plate1_1_013.h5")
fields <- h5ls(filename)
str(fields)
```
#### 3) Read *example.h5*
```{r}
group_name <- paste0(fields$group, fields$name)
data <- h5read(filename, group_name[1], compoundAsDataFrame=FALSE)
H5close()
```
#### 4) Get metadata
```{r}
measures <- data$Measurements
imageID <- measures$ImageID
wellID <- measures$WellID
```
#### 5) Get feature values (imageID: `r imageID`; well: `r wellID`)
```{r}
# Features are stored in a list of matrices
featureListOfMatrices <- measures[11:length(measures)]
## 1) Feature values
# Feature Vector has 2919 values
featureVector <- as.vector(do.call(rbind, featureListOfMatrices))
## 2) Feature names: Build new ID for each feature
# Length of each feature type
feat_size <- lapply(featureListOfMatrices, length)
feat_size <- data.frame(name=names(feat_size), size=unlist(feat_size))
rownames(feat_size) <- seq(1:nrow(feat_size))
# Repeat "length" times the name
a <- rep(feat_size$name, feat_size$size)
# Build sequences of number to create the ids
b <- c()
for(nElem in feat_size$size)
{
b <- c(b, seq(1:nElem))
}
(feature_value <- data.frame(feature=paste(a,b,sep="_"), value=featureVector))
```