-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_impute_aCGH.R
executable file
·60 lines (43 loc) · 2.08 KB
/
1_impute_aCGH.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# TODO this script was used to impute some of our datasets but i hasn't been updated since 2016
# This program impute missing values from aCGH data with lowess using the
# aCGH package from Bioconductor
# (http://www.bioconductor.org/packages//2.11/bioc/html/aCGH.html)
# INPUT
# aCGH file. It must be a tab delimited text file (.txt)
# the file must follow the format as specified in README.txt
# Use a short name for the file, lets say "set" and save it under ~/Research/Data/set
# OTHER REQUIREMENTS
# the library aCGH from Bioconductor must be installed in advance
# Arguments
# 1. fileName without including the txt extension (e.g. "set")
# OUTPUT
# a file named set_lowess.txt will be saved in ~/Research/Data/set
# Example of the command in the terminal (use vanilla instead of
# slave to have input from the terminal)
# R --slave --args set < 1_impute_aCGH.R
# Get the command line arguments
args = commandArgs();
fileName<-args[4];
library(aCGH);
begPath <- "~/Research";
CGH_start <- 6
# Read the data
dataPath <- paste(begPath, "/Data/", fileName, "/", fileName, ".txt", sep = "");
data <- read.table(dataPath, sep = "\t", header = TRUE);
# Format log2.ratio file from data so aCGH package can be used
log2ratios <- data[, CGH_start: ncol(data)];
rownames(log2ratios) <- data$Clone;
# create clones.info for the aCGH package: mapping information including but not
# limited to clone name, chromosome and kb relative to the chromosome
clones_info <- data[, c(1:(CGH_start -1))];
# Change bp to kb
clones_info$bp <- round(clones_info$bp / 1000);
names(clones_info)[names(clones_info)=="bp"]<-"kb"
# Create aCGH object
ex_acgh <- create.aCGH(log2ratios, clones_info);
# Impute missing data
log2ratios_imputed <- impute.lowess(ex_acgh);
# put imputed data back together with the front matter...
lowess_imputed_data <- cbind(data[,1:(CGH_start -1)], log2ratios_imputed);
# Write lowess imputed climent data to file
write.table(lowess_imputed_data, paste(begPath, "/Data/", fileName, "/",fileName, "_lowess.txt", sep=''), sep='\t', row.names=F);