Merge pull request #168 from thackl/cran-ready

Make gggenomes ready for CRAN submission
thackl · Mar 5, 2024 · 88f81c6 · 88f81c6
2 parents f4c97b6 + dfa3324
commit 88f81c6
Show file tree

Hide file tree

Showing 170 changed files with 3,209 additions and 2,445,067 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -3,3 +3,9 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^\.github$
+^doc$
+^Meta$
+^R-deprecated$
+^data-raw$
+^pkgdown$
+^LICENSE.md$
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@ R-deprecated/
 data-raw/
 tests/
 vignettes-manual-export/
+/doc/
+/Meta/
+gggenomes.Rcheck
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,44 +1,52 @@
 Package: gggenomes
 Title: A Grammar of Graphics for Comparative Genomics
-Version: 0.9.13.9000
+Version: 1.0.0
 Authors@R: c(
   person("Thomas", "Hackl", email = "[email protected]", role = c("aut", "cre")),
   person("Markus J.", "Ankenbrand", email = "[email protected]", role = c("aut")),
   person("Bart", "van Adrichem", role = c("aut")),
   person("Kristina", "Haslinger", email = "[email protected]", role = c("ctb", "sad")))
-Description: gggenomes is an extension of ggplot2 for creating complex genomic
+Description: An extension of ggplot2 for creating complex genomic
  maps. It builds on the power of ggplot2 and tidyverse adding new ggplot-style
  geoms & positions and dplyr-style verbs to manipulate the underlying data. It
  implements a layout concept inspired by ggraph and introduces tracks to bring
  tidyness to the mess that is genomics data.
-License: MIT
+License: MIT + file LICENSE
 URL: https://github.com/thackl/gggenomes
 BugReports: https://github.com/thackl/gggenomes/issues
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
 Roxygen: list(markdown = TRUE)
-Remotes: github::thackl/thacklr, bioc::release/rtracklayer
+VignetteBuilder: knitr
 Depends:
     R (>= 3.4.2),
     ggplot2 (>= 3.5.0),
 Imports:
     vctrs,
     rlang,
-    ggplot2,
     dplyr,
     tidyr,
     readr (>= 2.0.0),
     purrr,
     tibble,
     stringr,
     grid,
-    gggenes,
     jsonlite,
-    snakecase
+    snakecase,
+    magrittr,
+    scales,
+    tidyselect,
+    colorspace,
+    methods,
+    utils
 Suggests:
     testthat,
     rtracklayer,
     ggtree,
     patchwork,
-    Hmisc
+    Hmisc,
+    knitr,
+    ggrepel,
+    IRanges,
+    ellipsis
diff --git a/LICENSE b/LICENSE
@@ -1,21 +1,2 @@
-MIT License
-
-Copyright (c) 2018 Thomas Hackl
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+YEAR: 2018
+COPYRIGHT HOLDER: Thomas Hackl
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Thomas Hackl
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/NAMESPACE b/NAMESPACE
@@ -157,6 +157,7 @@ export(pull_links)
 export(pull_seqs)
 export(pull_track)
 export(qc)
+export(qw)
 export(read_alitv)
 export(read_bed)
 export(read_fai)
@@ -177,10 +178,12 @@ export(seqs)
 export(set_class)
 export(set_seqs)
 export(shift)
+export(split_by)
 export(strand_chr)
 export(strand_int)
 export(strand_lgl)
 export(strip_class)
+export(swap_if)
 export(swap_query)
 export(sync)
 export(theme_gggenomes_clean)
@@ -202,6 +205,10 @@ importFrom(ggplot2,theme_bw)
 importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%<>%")
 importFrom(magrittr,"%>%")
+importFrom(methods,is)
 importFrom(readr,read_tsv)
+importFrom(rlang,.data)
+importFrom(stats,na.omit)
 importFrom(tidyr,unnest)
 importFrom(tidyr,unnest_wider)
+importFrom(utils,type.convert)
diff --git a/R/aaa.R b/R/aaa.R
@@ -1,5 +1,6 @@
 #' Swap values of two columns based on a condition
 #'
+#' @export
 #' @param x a tibble
 #' @param condition an expression to be evaluated in data context returning a
 #' TRUE/FALSE vector
@@ -8,7 +9,7 @@
 #' @examples
 #' x <- tibble::tibble(start = c(10,100), end=c(30, 50))
 #' # ensure start of a range is always smaller than the end
-#' gggenomes:::swap_if(x, start > end, start, end)
+#' swap_if(x, start > end, start, end)
 swap_if <- function(x, condition, ...){
   i <- tidyselect::eval_select(rlang::expr(c(...)), x)
   if(length(i) != 2 || length(unique(i)) != 2)
@@ -42,7 +43,7 @@ ex <- function(file = NULL) {
 
 # are there any arguments in ...
 has_dots <- function(env = parent.frame()){
-  length(ellipsis:::dots(env)) > 0
+  length(ellipsis__dots(env)) > 0
 }
 
 shared_names <- function(x, ...){
@@ -76,10 +77,11 @@ magrittr::`%<>%`
 #' occurence. R base split converts keys to factors, changing default order to
 #' alphanumeric.
 #'
+#' @export
 #' @param key variable to split by
 #' @keywords internal
 #' @examples
-#' tibble(x=c(1,1,1,2), y=c("B", "A", "B", "B"), z="foo") %>%
+#' tibble::tibble(x=c(1,1,1,2), y=c("B", "A", "B", "B"), z="foo") %>%
 #'   split_by(x)
 split_by <- function(.data, key){
   keys <- pull(.data, !!enquo(key))
@@ -100,6 +102,7 @@ split_by <- function(.data, key){
 #' \href{https://stackoverflow.com/questions/520810/does-r-have-quote-like-operators-like-perls-qw}{stackoverflow/qw}
 #' and \href{https://github.com/jebyrnes/multifunc/blob/master/R/qw.R}{github/Jarrett Byrnes}
 #' 
+#' @export
 #' @param x A single string of elements to be split at whitespace chars.
 #' @return A vector of quoted words.
 #' @keywords internal
@@ -112,3 +115,15 @@ qw <- function(x) unlist(strsplit(x, "[[:space:]]+"))
 #' @param ... Unquated words, separated by comma.
 #' @export
 qc <- function(...) sapply(match.call()[-1], deparse)
+
+
+# CRAN Workaround for unexported useful tidyverse internals
+# https://stackoverflow.com/questions/32535773/using-un-exported-function-from-another-r-package
+ggplot2__ggname <- utils::getFromNamespace("ggname", "ggplot2")
+ggplot2__rd_aesthetics <- utils::getFromNamespace("rd_aesthetics", "ggplot2")
+ggplot2__scales_list <- utils::getFromNamespace("scales_list", "ggplot2")
+ggplot2__guides_list <- utils::getFromNamespace("guides_list", "ggplot2")
+ggplot2__make_labels <- utils::getFromNamespace("make_labels", "ggplot2")
+ellipsis__dots <- utils::getFromNamespace("dots", "ellipsis")
+scales__force_all <- utils::getFromNamespace("force_all", "scales")
+purrr__as_mapper.default <- utils::getFromNamespace("as_mapper.default", "purrr")
diff --git a/R/add_tracks.R b/R/add_tracks.R
@@ -1,6 +1,7 @@
 #' Add different types of tracks
 #' 
 #' @name add_tracks
+#' @param x object to add the tracks to (e.g. gggenomes, gggenomes_layout)
 #' @param ... named data.frames, i.e. genes=gene_df, snps=snp_df
 #' @param .track_id track_id of the feats that subfeats, sublinks or clusters map to.
 #' @param .transform one of "aa2nuc", "none", "nuc2aa"

diff --git a/R/clusters.R b/R/clusters.R
@@ -28,6 +28,7 @@ add_clusters.gggenomes <- function(x, ..., .track_id = "genes"){
   x
 }
 
+#' @importFrom rlang .data
 #' @export
 add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
   if(!has_dots()){
@@ -42,7 +43,7 @@ add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
   tracks <- purrr::map(tracks, function(track){
     require_vars(track, c("feat_id", "cluster_id"))
 
-    track <- filter(track, feat_id %in% x$feats[[pid]]$feat_id)
+    track <- dplyr::filter(track, .data$feat_id %in% x$feats[[pid]]$feat_id)
     if(nrow(track) < 1){
       warn(str_glue("No matches between clusters and parent track based on ",
           "`track_id`. Check your IDs and arguments"))
@@ -79,10 +80,10 @@ add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
 }
 
 cluster2sublinks <- function(x, parent_track){
-  x %>% split_by(cluster_id) %>%
+  x %>% split_by(.data$cluster_id) %>%
     purrr::keep(~nrow(.) > 1) %>% # links need >2 members, ignore singletons
     purrr::map_df(.id = "cluster_id", function(g){
-      mat <- combn(g$feat_id, 2, simplify=TRUE)
+      mat <- utils::combn(g$feat_id, 2, simplify=TRUE)
       tibble(feat_id = mat[1,], feat_id2 = mat[2,])
     })
 }
diff --git a/R/data.R b/R/data.R
@@ -13,7 +13,7 @@
 #'   \item{length}{length of the sequence}
 #' }
 #' @source
-#' * Publication: \url{http://dx.doi.org/10.1101/2020.11.30.404863}
+#' * Publication: \doi{10.1101/2020.11.30.404863}
 #' * Raw data: \url{https://github.com/thackl/cb-emales}
 #' * Derived & bundled data: `ex("emales/emales.fna")`
 "emale_seqs"
@@ -44,7 +44,7 @@
 #'   \item{geom_id}{an identifier telling the which features should be plotted as on items (usually CDS and mRNA of same gene)}
 #' }
 #' @source
-#' * Publication: \url{http://dx.doi.org/10.1101/2020.11.30.404863}
+#' * Publication: \doi{10.1101/2020.11.30.404863}
 #' * Raw data: \url{https://github.com/thackl/cb-emales}
 #' * Derived & bundled data: `ex("emales/emales.gff")`
 "emale_genes"
@@ -66,10 +66,11 @@
 #'   \item{score}{score of the annotation}
 #'   \item{phase}{For "CDS" features indicates where the next codon begins relative to the 5' start}
 #'   \item{name}{name of the feature}
+#'   \item{width}{end-start+1}
 #'   \item{geom_id}{an identifier telling the which features should be plotted as on items (usually CDS and mRNA of same gene)}
 #' }
 #' @source
-#' * Publication: \url{http://dx.doi.org/10.1101/2020.11.30.404863}
+#' * Publication: \doi{10.1101/2020.11.30.404863}
 #' * Raw data: \url{https://github.com/thackl/cb-emales}
 #' * Derived & bundled data: `ex("emales/emales-tirs.gff")`
 "emale_tirs"
@@ -94,7 +95,7 @@
 #'   \item{geom_id}{an identifier telling the which features should be plotted as on items (usually CDS and mRNA of same gene)}
 #' }
 #' @source
-#' * Publication: \url{http://dx.doi.org/10.1101/2020.11.30.404863}
+#' * Publication: \doi{10.1101/2020.11.30.404863}
 #' * Raw data: \url{https://github.com/thackl/cb-emales}
 #' * Derived & bundled data: `ex("emales/emales-ngaros.gff")`
 "emale_ngaros"
@@ -132,7 +133,7 @@
 #'   \item{length2}{length of the sequence}
 #'   \item{start2}{start of the feature on the sequence}
 #'   \item{end2}{end of the feature on the sequence}
-#'   \item{...}{see \url{https://github.com/lh3/miniasm/blob/master/PAF.md} for additional columns}
+#'   \item{map_match, map_length, map_quality, NM, ms, AS, nn, tp, cm, s1, de, rl, cg}{see \url{https://github.com/lh3/miniasm/blob/master/PAF.md} for additional columns}
 #' }
 #' @source
 #' * Derived & bundled data: `ex("emales/emales.paf")`
@@ -147,31 +148,12 @@
 #'   \item{file_id}{name of the file the data was read from}
 #'   \item{feat_id}{identifier of the first feature in the alignment}
 #'   \item{feat_id2}{identifier of the second feature in the alignment}
-#'   \item{pident, ...}{see \url{https://github.com/seqan/lambda/wiki/BLAST-Output-Formats} for BLAST-tabular format columns}
+#'   \item{pident, length, mismatch, gapopen, start, end, start2, end2, evalue, bitscore}{see \url{https://github.com/seqan/lambda/wiki/BLAST-Output-Formats} for BLAST-tabular format columns}
 #' }
 #' @source
 #' * Derived & bundled data: `ex("emales/emales-prot-ava.o6")`
 "emale_prot_ava"
 
-#' Alignments of 6 EMALE proteomes against Uniref50
-#'
-#' One row per alignment. Alignments were computed with mmseqs2 (blast-like), and
-#' filtered by evalue (<1e-20).
-#'
-#' @format A data frame with 509 rows and 16 columns
-#' \describe{
-#'   \item{file_id}{name of the file the data was read from}
-#'   \item{feat_id}{identifier of the first feature in the alignment}
-#'   \item{feat_id2}{identifier of the second feature in the alignment}
-#'   \item{pident, ...}{see \url{https://github.com/seqan/lambda/wiki/BLAST-Output-Formats} for BLAST-tabular format columns}
-#'   \item{seq_head}{full sequence header of the emale protein}
-#'   \item{seq_head2}{full sequence header of the Uniref50 protein}
-#'   \item{taxname}{one of the 4 most abundant taxonomic names among the Uniref50 hits or NA}
-#' }
-#' @source
-#' * Derived & bundled data: `ex("emales/emales-prot-uniref50.tsv")`
-"emale_prot_uniref50"
-
 #' Clusters of orthologs of 6 EMALE proteomes
 #'
 #' One row per feature. Clusters are based on manual curation.