Merge branch 'main' of https://github.com/AliYoussef96/LimROTS

AliYoussef96 · Oct 5, 2024 · df56293 · df56293
2 parents 0735673 + d3325a9
commit df56293
Show file tree

Hide file tree

Showing 9 changed files with 61 additions and 41 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -14,3 +14,6 @@ CONTRIBUTING.md
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
+^.*\.bib$
+^\.github$
+^vignettes/(?!reference.bib$).*\.bib$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,7 @@
 Package: LimROTS
-Title: An Extension of the ROTS Method with Limma Integration
+Title: A Hybrid Method Integrating Empirical Bayes and 
+        Reproducibility-Optimized Statistics for Robust Analysis of Proteomics 
+        and Metabolomics Data
 Version: 0.99.0
 Authors@R: 
     c(person(given = "Ali", family = "Youssef", role = c("aut", "cre"), 
@@ -8,24 +10,27 @@ Authors@R:
     person(given = "Leo", family = "Lahti", role = c("aut" ,"ths"),
     email = "[email protected]",
     comment = c(ORCID = "0000-0001-5537-637X")),
-    person(given = "Akewak", family = "Jeba", role = c("ctb"),
+    person(given = "Akewak", family = "Jeba", role = c("aut","ctb"),
     email = "[email protected]",
     comment = c(ORCID = "0009-0007-1347-7552")),
     person(given = "Eleanor", family = "Coffey", role = c("aut", "ths"),
     email = "[email protected]",
     comment = c(ORCID = "0000-0002-9717-5610")))
 Description: Differential expression analysis is a prevalent method utilised in 
-            the examination of diverse biological data.The 
+            the examination of diverse biological data. The 
             reproducibility-optimized test statistic (ROTS) modifies a 
             t-statistic based on the data's intrinsic characteristics and ranks 
             features according to their statistical significance for 
             differential expression between two or more groups (f-statistic). 
             Focussing on proteomics and metabolomics, the current ROTS 
             implementation cannot account for technical or biological 
             covariates such as MS batches or gender differences among 
-            the samples.Consequently, we developed LimROTS, which employs a 
+            the samples. Consequently, we developed LimROTS, which employs a 
             reproducibility-optimized test statistic utilising the limma 
-            methodology to simulate complex experimental designs.
+            methodology to simulate complex experimental designs. LimROTS is a
+            hybrid method integrating empirical bayes and 
+            reproducibility-optimized statistics for robust analysis 
+            of proteomics and metabolomics data
 License: Artistic-2.0
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -48,11 +53,11 @@ Imports:
     utils,
     stats,
     doRNG,
-    magick,
     dplyr
 Suggests: 
     BiocStyle,
     ggplot2,
+    magick,
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,

diff --git a/R/LM_with_limma_permuting.R b/R/LM_with_limma_permuting.R
@@ -50,8 +50,9 @@
 #' @importFrom stringr str_split_fixed fixed
 #'
 testStatistic_with_covariates_permutating <-
-    function(x, group.name, meta.info, formula.str, trend, robust,
-             permutating.group) {
+    function(x, group.name, meta.info,
+                formula.str, trend, robust,
+                permutating.group) {
         data <- x
         combined_data <- data.frame(
             check.rows = FALSE,

diff --git a/R/calculateFalseDiscoveryRate.R b/R/calculateFalseDiscoveryRate.R
@@ -53,11 +53,11 @@ calculateFalseDiscoveryRate <- function(observedValues, permutedValues,
     falseDiscoveryRate <- apply(FDRmatrix, 1, median)
     falseDiscoveryRate[falseDiscoveryRate > 1] <- 1
     falseDiscoveryRate[ord.obs] <-
-        rev(sapply(
+        rev(vapply(
             length(falseDiscoveryRate):1,
             function(x)
-                return(min(falseDiscoveryRate
-                [ord.obs][x:length(falseDiscoveryRate)]))
+                min(falseDiscoveryRate[ord.obs][x:length(falseDiscoveryRate)]),
+            numeric(1)
         ))
     return(falseDiscoveryRate)
 }

diff --git a/R/testStatOptimized.R b/R/testStatOptimized.R
@@ -64,21 +64,23 @@ testStatOptimized <- function(isPaired, x) {
     } else if (length(sampleGroups) > 2) {
         allSamples <- do.call("cbind", sampleGroups)
         if (!isPaired) {
-            factorScaling <- sum(sapply(sampleGroups, ncol)) /
-                prod(sapply(sampleGroups, ncol))
-            rowVariance <- rowSums(sapply(sampleGroups, function(group)
+            factorScaling <- sum(vapply(sampleGroups, ncol, as.numeric(1))) /
+                prod(vapply(sampleGroups, ncol, numeric(1)))
+            rowVariance <- rowSums(vapply(sampleGroups, function(group)
                 (
                     rowMeans(group, na.rm = TRUE) - rowMeans(allSamples,
                         na.rm = TRUE
                     )
-                )^2))
+                )^2, numeric(1)))
             meanDiff <- sqrt(factorScaling * rowVariance)
-            scalingFactor <- 1 / sum(sapply(sampleGroups, ncol) - 1) *
-                sum(1 / sapply(sampleGroups, ncol))
-            totalVariance <- rowSums(sapply(sampleGroups, function(group)
+            scalingFactor <- 1 / sum(vapply(sampleGroups,
+                                            ncol, numeric(1)) - 1) *
+                sum(1 / vapply(sampleGroups, ncol, numeric(1)))
+            totalVariance <- rowSums(vapply(sampleGroups,
+                                            function(group)
                 rowSums((group - rowMeans(group, na.rm = TRUE))^2,
                     na.rm = TRUE
-                )))
+                ), numeric(1)))
             standardDev <- sqrt(scalingFactor * totalVariance)
         } else {
             stop("Multiple paired groups are not supported!")

diff --git a/_pkgdown.yml → pkgdown/_pkgdown.yml b/_pkgdown.yml → pkgdown/_pkgdown.yml
diff --git a/vignettes/LimROTS.Rmd b/vignettes/LimROTS.Rmd
@@ -14,7 +14,6 @@ vignette: >
     %\VignetteIndexEntry{LimROTS}
     %\VignetteEngine{knitr::rmarkdown}
     %\VignetteEncoding{UTF-8}
-bibliography: reference.bib
 ---
 
 ```{r, include = FALSE}
@@ -198,7 +197,9 @@ limrots.result <- LimROTS(
     x = UPS1.Case4,
     B = B, K = K, meta.info = meta.info,
     cluster = cluster, group.name = group.name,
-    formula.str = formula.str, trend = TRUE, robust = TRUE, permutating.group = FALSE, seed.cl = 1234
+    formula.str = formula.str, trend = TRUE, 
+    robust = TRUE, permutating.group = FALSE, 
+    seed.cl = 1234
 )
 ```
 

diff --git a/vignettes/reference.bib b/vignettes/reference.bib
@@ -12,49 +12,57 @@ @Article{limma
 
 @Article{rots,
     author = {Tomi Suomi and Fatemeh Seyednasrollah and Maria Jaakkola and Thomas Faux and Laura Elo},
-    doi = {10.1371/journal.pcbi.1005562},
+    title = {ROTS: An R package for reproducibility-optimized statistical testing.},
     journal = {PLoS computational biology},
+    year = {2017},
     month = {May},
+    volume = {13},
     number = {5},
     pages = {e1005562},
     pmid = {28542205},
-    title = {ROTS: An R package for reproducibility-optimized statistical testing.},
     url = {http://www.ncbi.nlm.nih.gov/pubmed/28542205},
-    volume = {13},
-    year = {2017},
+    doi = {10.1371/journal.pcbi.1005562},
   }
 
 
-@article{elo2008reproducibility,
-  title={Reproducibility-optimized test statistic for ranking genes in microarray studies},
+@Article{elo2008reproducibility,
   author={Elo, Laura L and Fil{\'e}n, Sanna and Lahesmaa, Riitta and Aittokallio, Tero},
+  title={Reproducibility-optimized test statistic for ranking genes in microarray studies},
   journal={IEEE/ACM transactions on computational biology and bioinformatics},
   volume={5},
   number={3},
   pages={423--431},
   year={2008},
-  publisher={IEEE}
+  publisher={IEEE},
 }
 
 
-@article{GOTTI2022107829,
-title = {DIA proteomics data from a UPS1-spiked E.coli protein mixture processed with six software tools},
-journal = {Data in Brief},
-volume = {41},
-pages = {107829},
-year = {2022},
-issn = {2352-3409},
-doi = {https://doi.org/10.1016/j.dib.2022.107829},
-url = {https://www.sciencedirect.com/science/article/pii/S2352340922000415},
-author = {Clarisse Gotti and Florence Roux-Dalvai and Charles Joly-Beauparlant and Loïc Mangnier and Mickaël Leclercq and Arnaud Droit},
-keywords = {Data Independent Acquisition, Complex proteomic standard, Software tools benchmark, Spiked UPS1 human proteins},
-abstract = {In this article, we provide a proteomic reference dataset that has been initially generated for a benchmarking of software tools for Data-Independent Acquisition (DIA) analysis. This large dataset includes 96 DIA .raw files acquired from a complex proteomic standard composed of an E.coli protein background spiked-in with 8 different concentrations of 48 human proteins (UPS1 Sigma). These 8 samples were analyzed in triplicates on an Orbitrap mass spectrometer with 4 different DIA window schemes. We also provide the spectral libraries and FASTA file used for their analysis and the software outputs of the six tools used in this study: DIA-NN, Spectronaut, ScaffoldDIA, DIA-Umpire, Skyline and OpenSWATH. This dataset also contains post-processed quantification tables where the peptides and proteins have been validated, their intensities normalized and the missing values imputed with a noise value. All the files are available on ProteomeXchange. Altogether, these files represent the most comprehensive DIA reference dataset acquired on an Orbitrap instrument ever published. It will be a very useful resource to the proteomic scientists in order to assess the performance of DIA software tools or to test their processing pipelines, to the software developers to improve their tools or develop new ones and to the students for their training on proteomics data analysis.}
+@Article{GOTTI2022107829,
+  author = {Clarisse Gotti and Florence Roux-Dalvai and Charles Joly-Beauparlant and Loïc Mangnier and Mickaël Leclercq and Arnaud Droit},
+  title = {DIA proteomics data from a UPS1-spiked E.coli protein mixture processed with six software tools},
+  journal = {Data in Brief},
+  volume = {41},
+  pages = {107829},
+  year = {2022},
+  issn = {2352-3409},
+  doi = {https://doi.org/10.1016/j.dib.2022.107829},
+  url = {https://www.sciencedirect.com/science/article/pii/S2352340922000415},
+  keywords = {Data Independent Acquisition, Complex proteomic standard, Software tools benchmark, Spiked UPS1 human proteins},
+  abstract = {In this article, we provide a proteomic reference dataset that has been initially generated for a benchmarking of software tools for Data-Independent         Acquisition (DIA) analysis. This large dataset includes 96 DIA .raw files acquired from a complex proteomic standard composed of an E.coli protein
+        background spiked-in with 8 different concentrations of 48 human proteins (UPS1 Sigma). These 8 samples were analyzed in triplicates on an Orbitrap mass
+        spectrometer with 4 different DIA window schemes. We also provide the spectral libraries and FASTA file used for their analysis and the software outputs of
+        the six tools used in this study: DIA-NN, Spectronaut, ScaffoldDIA, DIA-Umpire, Skyline and OpenSWATH. This dataset also contains post-processed
+        quantification tables where the peptides and proteins have been validated, their intensities normalized and the missing values imputed with a noise value.
+        All the files are available on ProteomeXchange. Altogether, these files represent the most comprehensive DIA reference dataset acquired on an Orbitrap
+        instrument ever published. It will be a very useful resource to the proteomic scientists in order to assess the performance of DIA software tools or to test
+        their processing pipelines, to the software developers to improve their tools or develop new ones and to the students for their training on proteomics data
+        analysis.},
 }
 
 
 @Manual{qvalue,
-    title = {qvalue: Q-value estimation for false discovery rate control},
     author = {John D. Storey and Andrew J. Bass and Alan Dabney and David Robinson},
+    title = {qvalue: Q-value estimation for false discovery rate control},
     year = {2024},
     note = {R package version 2.36.0},
     url = {https://bioconductor.org/packages/qvalue},