diff --git a/R/DNA_sequence_cos.R b/R/DNA_sequence_cos.R index 2b8db8a..8df55c3 100644 --- a/R/DNA_sequence_cos.R +++ b/R/DNA_sequence_cos.R @@ -1,4 +1,4 @@ -#' Obtain all mutations codons + #' Obtain all mutations codons #' #' This function will load all the CDS from the mutated genes in the input file. Then recovery the 3' and #' 5' nucleotide aside of the mutation site. @@ -7,8 +7,11 @@ #' @param num The number of patient you want to analyse. Default will go through all the dataset. #' @return DNA_seq = CDS sequence for all the genes used, codon_part = codon around mutation. #' @export -DNA_sequence_cos = function(cosmic,num = length(cosmic$GENE_NAME)){ - +DNA_sequence_cos = function(cosmic,num = length(cosmic$GENE_NAME),random = FALSE){ + if (random == TRUE){ + idx = sort(sample(1:length(cosmic$GENE_NAME),num, replace = F)) + cosmic = cosmic[idx,] + } genes_mut_names = sapply(strsplit(cosmic$GENE_NAME[1:num],"[\\_]+"), `[`, 1) genes_mut_site1 = sapply(strsplit(cosmic$MUTATION_CDS[1:num],"[\\c.]+"), `[`, 2) genes_mut_site = as.numeric(str_extract(genes_mut_site1, "[0-9]+")) diff --git a/R/Signature_algoV2.R b/R/Signature_algoV2.R index 95e7bd1..8c8e1b9 100644 --- a/R/Signature_algoV2.R +++ b/R/Signature_algoV2.R @@ -8,21 +8,21 @@ #' @return change_nu = the change of nucleotide due to the mutation, mut_to = the codon paste to #' the nucleotide after mutation. #' @export -Signature_algoV2 = function(file,num){ +Signature_algoV2 = function(file,num, random = FALSE){ for(i in 1:length(file)){ if( i == 1){ print("Running into file 1") data = import_data_cos(file[i]) - seq = DNA_sequence_cos(data,num) + seq = DNA_sequence_cos(data,num,random) mut_to = Site_mut_cos(seq) - mut_signa = mut_signature_cos(mut_to, data, i) + mut_signa = mut_signature_cos(mut_to, i) next } print(c("Running into file", i)) data = import_data_cos(file[i]) - seq = DNA_sequence_cos(data,num) + seq = DNA_sequence_cos(data,num,random) mut_to = Site_mut_cos(seq) - mut_signa1 = mut_signature_cos(mut_to, data, i) + mut_signa1 = mut_signature_cos(mut_to, i) mut_signa = rbind(mut_signa,mut_signa1) } mut_signa$cancer = as.factor(mut_signa$cancer)