From 2d9acb8b335a9dc281b7a4b460706e90e1a3d218 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Sat, 5 Oct 2024 08:58:46 +0300
Subject: [PATCH 01/61] Add parameter definitions to summarize.R

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE              |   1 -
 R/summarize.R          | 157 +++++++++++++++++++++++++++++++----------
 man/count_bycol.Rd     |  22 +++++-
 man/elements2words.Rd  |  23 ++++--
 man/filter_freq.Rd     |  10 ++-
 man/summ.DA.Rd         |  13 +++-
 man/summ.DA.byLin.Rd   |   9 ++-
 man/summ.GC.Rd         |  14 +++-
 man/summ.GC.byDALin.Rd |  15 +++-
 man/summarize_bylin.Rd |  15 +++-
 man/total_counts.Rd    |  24 +++++--
 man/words2wc.Rd        |  11 ++-
 12 files changed, 249 insertions(+), 65 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9d73120a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -131,7 +131,6 @@ importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
-importFrom(dplyr,n_distinct)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,right_join)
diff --git a/R/summarize.R b/R/summarize.R
index a9b13e43..e03ca463 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -91,18 +91,31 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 ## Function to obtain element counts (DA, GC)
 #' Count Bycol
 #'
-#' @param prot
-#' @param column
-#' @param min.freq
+#' @param prot A data frame containing the dataset to analyze, typically with 
+#' multiple columns including the one specified by the `column` parameter.
+#' @param column A character string specifying the name of the column to analyze. 
+#' The default is "DomArch".
+#' @param min.freq An integer specifying the minimum frequency an element must 
+#' have to be included in the output. Default is 1.
 #'
 #' @importFrom dplyr arrange as_tibble filter select
 #'
-#' @return Describe return, in detail
+#' @return A tibble with two columns:
+#' \describe{
+#'   \item{`column`}{The unique elements from the specified column 
+#'   (e.g., "DomArch").}
+#'   \item{`freq`}{The frequency of each element, i.e., the number of times 
+#'   each element appears in the specified column.}
+#' }
+#' The tibble is filtered to only include elements that have a frequency 
+#' greater than or equal to `min.freq` and does not include elements with `NA` 
+#' values or those starting with a hyphen ("-").
+#'
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' count_bycol()
+#' count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
 #' }
 count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
     counts <- prot %>%
@@ -123,19 +136,30 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
 #' context (GC)
 #'
-#' @param prot [dataframe]
-#' @param column [string] column name
-#' @param conversion_type [string] type of conversion: 'da2doms': domain architectures to
-#' domains. 'gc2da' genomic context to domain architectures
+#' @param prot A dataframe containing the dataset to analyze. The specified 
+#' `column` contains the string elements to be processed.
+#' @param column A character string specifying the name of the column to analyze. 
+#' Default is "DomArch".
+#' @param conversion_type A character string specifying the type of conversion. 
+#' Two options are available:
+#' \describe{
+#'   \item{`da2doms`}{Convert domain architectures into individual domains by 
+#'   replacing `+` symbols with spaces.}
+#'   \item{`gc2da`}{Convert genomic context into domain architectures by
+#'    replacing directional symbols (`<-`, `->`, and `|`) with spaces.}
+#' }
 #'
 #' @importFrom dplyr pull
 #' @importFrom stringr str_replace_all
 #'
-#' @return [string] with words delimited by spaces
+#' @return A single string where elements are delimited by spaces. The function 
+#' performs necessary substitutions based on the `conversion_type` and cleans up 
+#' extraneous characters like newlines, tabs, and multiple spaces.
 #'
 #' @examples
 #' \dontrun{
-#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
+#' tibble::tibble(DomArch = c("aaa+bbb", 
+#' "a+b", "b+c", "b-c")) |> elements2words()
 #' }
 #'
 elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
@@ -175,11 +199,19 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #' @description
 #' Get word counts (wc) [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
 #'
-#' @param string
+#' @param string A character string containing the elements (words) to count. 
+#' This would typically be a space-delimited string representing domain 
+#' architectures or genomic contexts.
 #'
-#' @importFrom dplyr as_tibble filter
+#' @importFrom dplyr as_tibble filter arrange
+#' @importFrom stringr str_replace_all
 #'
-#' @return [tbl_df] table with 2 columns: 1) words & 2) counts/frequency
+#' @return A tibble (tbl_df) with two columns: 
+#' \describe{
+#'   \item{`words`}{A column containing the individual words 
+#'   (domains or domain architectures).}
+#'   \item{`freq`}{A column containing the frequency counts for each word.}
+#' }
 #'
 #' @examples
 #' \dontrun{
@@ -219,10 +251,15 @@ words2wc <- function(string) {
 ## Function to filter based on frequencies
 #' Filter Frequency
 #'
-#' @param x
-#' @param min.freq
+#' @param x A tibble (tbl_df) containing at least two columns: one for 
+#' elements (e.g., `words`) and one for their frequency (e.g., `freq`).
+#' @param min.freq A numeric value specifying the minimum frequency threshold. 
+#' Only elements with frequencies greater than or equal to this value will be 
+#' retained.
+#'
+#' @return A tibble with the same structure as `x`, but filtered to include 
+#' only rows where the frequency is greater than or equal to `min.freq`.
 #'
-#' @return Describe return, in detail
 #' @export
 #'
 #' @examples
@@ -239,15 +276,20 @@ filter_freq <- function(x, min.freq) {
 #########################
 #' Summarize by Lineage
 #'
-#' @param prot
-#' @param column
-#' @param by
-#' @param query
+#' @param prot A dataframe or tibble containing the data.
+#' @param column A string representing the column to be summarized 
+#' (e.g., `DomArch`). Default is "DomArch".
+#' @param by A string representing the grouping column (e.g., `Lineage`). 
+#' Default is "Lineage".
+#' @param query A string specifying the query pattern for filtering the target 
+#' column. Use "all" to skip filtering and include all rows.
 #'
 #' @importFrom dplyr arrange filter group_by summarise
 #' @importFrom rlang sym
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing the counts of occurrences of elements in 
+#' the `column`, grouped by the `by` column. The result includes the number 
+#' of occurrences (`count`) and is arranged in descending order of count.
 #' @export
 #'
 #' @examples
@@ -283,11 +325,17 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #' Function to summarize and retrieve counts by Domains & Domains+Lineage
 #'
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `DomArch` and `Lineage`.
 #'
 #' @importFrom dplyr arrange count desc filter group_by summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing the counts of unique domain architectures 
+#' (`DomArch`) per lineage (`Lineage`). The resulting table contains three 
+#' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
+#' of each domain architecture for each lineage. The results are arranged in 
+#' descending order of `count`.
+#'
 #' @export
 #'
 #' @examples
@@ -309,11 +357,18 @@ summ.DA.byLin <- function(x) {
 #' @description
 #' Function to retrieve counts of how many lineages a DomArch appears in
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have a column 
+#' named `DomArch` and a count column, such as `count`, which represents the 
+#' occurrences of each architecture in various lineages.
 #'
 #' @importFrom dplyr arrange group_by filter summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing each unique `DomArch`, along with the following 
+#' columns:
+#' - `totalcount`: The total occurrences of each `DomArch` across all lineages.
+#' - `totallin`: The total number of unique lineages in which each `DomArch` 
+#' appears.
+#' The results are arranged in descending order of `totallin` and `totalcount`.
 #' @export
 #'
 #' @examples
@@ -332,11 +387,20 @@ summ.DA <- function(x) {
 
 #' summ.GC.byDALin
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
-#' @return Define return, in detail
+#' @return A tibble summarizing each unique combination of `GenContext`, 
+#' `DomArch`, and `Lineage`, along with the following columns:
+#' - `GenContext`: The genomic context for each entry.
+#' - `DomArch`: The domain architecture for each entry.
+#' - `Lineage`: The lineage associated with each entry.
+#' - `count`: The total number of occurrences for each combination of 
+#' `GenContext`, `DomArch`, and `Lineage`.
+#'
+#' The results are arranged in descending order of `count`.
 #' @export
 #'
 #' @examples
@@ -382,11 +446,19 @@ summ.GC.byLin <- function(x) {
 
 #' summ.GC
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `GenContext`, `DomArch`, and `Lineage`.
 #'
-#' @importFrom dplyr arrange desc filter group_by n_distinct summarise
+#' @importFrom dplyr arrange desc filter group_by n summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing each unique combination of `GenContext` and 
+#' `Lineage`, along with the following columns:
+#' - `GenContext`: The genomic context for each entry.
+#' - `Lineage`: The lineage associated with each entry.
+#' - `count`: The total number of occurrences for each combination of
+#'  `GenContext` and `Lineage`.
+#'
+#' The results are arranged in descending order of `count`.
 #' @export
 #'
 #' @examples
@@ -419,16 +491,27 @@ summ.GC <- function(x) {
 #'
 #' @param prot  A data frame that must contain columns:
 #' \itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}
-#' @param column Character. The column to summarize
-#' @param lineage_col
-#' @param cutoff Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.
-#' @param RowsCutoff
-#' @param digits
+#' @param column Character. The column to summarize, default is "DomArch".
+#' @param lineage_col Character. The name of the lineage column, default is 
+#' "Lineage".
+#' @param cutoff Numeric. Cutoff for total count. Counts below this cutoff value 
+#' will not be shown. Default is 0.
+#' @param RowsCutoff Logical. If TRUE, filters based on cumulative percentage 
+#' cutoff. Default is FALSE.
+#' @param digits Numeric. Number of decimal places for percentage columns. 
+#' Default is 2.
+#'
 #'
 #' @importFrom dplyr arrange distinct filter group_by left_join mutate select summarise ungroup
 #' @importFrom rlang as_string sym
 #'
-#' @return Define return, in detail
+#' @return A data frame with the following columns:
+#' - `{{ column }}`: Unique values from the specified column.
+#' - `totalcount`: The total count of occurrences for each unique value in 
+#' the specified column.
+#' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
+#' the overall count.
+#' - `CumulativePercent`: The cumulative percentage of total counts.
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
diff --git a/man/count_bycol.Rd b/man/count_bycol.Rd
index 884c0f0f..946a7ea2 100644
--- a/man/count_bycol.Rd
+++ b/man/count_bycol.Rd
@@ -7,16 +7,32 @@
 count_bycol(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
-\item{min.freq}{}
+\item{prot}{A data frame containing the dataset to analyze, typically with
+multiple columns including the one specified by the \code{column} parameter.}
+
+\item{column}{A character string specifying the name of the column to analyze.
+The default is "DomArch".}
+
+\item{min.freq}{An integer specifying the minimum frequency an element must
+have to be included in the output. Default is 1.}
 }
 \value{
-Describe return, in detail
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
 }
 \description{
 Count Bycol
 }
 \examples{
 \dontrun{
-count_bycol()
+count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
 }
 }
diff --git a/man/elements2words.Rd b/man/elements2words.Rd
index 80fcbafb..bda447db 100644
--- a/man/elements2words.Rd
+++ b/man/elements2words.Rd
@@ -7,15 +7,25 @@
 elements2words(prot, column = "DomArch", conversion_type = "da2doms")
 }
 \arguments{
-\item{prot}{\link{dataframe}}
+\item{prot}{A dataframe containing the dataset to analyze. The specified
+\code{column} contains the string elements to be processed.}
 
-\item{column}{\link{string} column name}
+\item{column}{A character string specifying the name of the column to analyze.
+Default is "DomArch".}
 
-\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
-domains. 'gc2da' genomic context to domain architectures}
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
 }
 \value{
-\link{string} with words delimited by spaces
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
 }
 \description{
 Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -23,7 +33,8 @@ context (GC)
 }
 \examples{
 \dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2words()
 }
 
 }
diff --git a/man/filter_freq.Rd b/man/filter_freq.Rd
index ce4db5ac..9dfba73b 100644
--- a/man/filter_freq.Rd
+++ b/man/filter_freq.Rd
@@ -7,10 +7,16 @@
 filter_freq(x, min.freq)
 }
 \arguments{
-\item{min.freq}{}
+\item{x}{A tibble (tbl_df) containing at least two columns: one for
+elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
 }
 \value{
-Describe return, in detail
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
 }
 \description{
 Filter Frequency
diff --git a/man/summ.DA.Rd b/man/summ.DA.Rd
index 13717140..01d15b3c 100644
--- a/man/summ.DA.Rd
+++ b/man/summ.DA.Rd
@@ -7,10 +7,19 @@
 summ.DA(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have a column
+named \code{DomArch} and a count column, such as \code{count}, which represents the
+occurrences of each architecture in various lineages.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing each unique \code{DomArch}, along with the following
+columns:
+\itemize{
+\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
+\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
+appears.
+The results are arranged in descending order of \code{totallin} and \code{totalcount}.
+}
 }
 \description{
 Function to retrieve counts of how many lineages a DomArch appears in
diff --git a/man/summ.DA.byLin.Rd b/man/summ.DA.byLin.Rd
index 66555fd5..d88e5d37 100644
--- a/man/summ.DA.byLin.Rd
+++ b/man/summ.DA.byLin.Rd
@@ -7,10 +7,15 @@
 summ.DA.byLin(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{DomArch} and \code{Lineage}.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing the counts of unique domain architectures
+(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
+columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
+of each domain architecture for each lineage. The results are arranged in
+descending order of \code{count}.
 }
 \description{
 Function to summarize and retrieve counts by Domains & Domains+Lineage
diff --git a/man/summ.GC.Rd b/man/summ.GC.Rd
index fa52a6bf..2ec4d651 100644
--- a/man/summ.GC.Rd
+++ b/man/summ.GC.Rd
@@ -7,10 +7,20 @@
 summ.GC(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing each unique combination of \code{GenContext} and
+\code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext} and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
 }
 \description{
 summ.GC
diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd
index 34c9f84d..7fc8d443 100644
--- a/man/summ.GC.byDALin.Rd
+++ b/man/summ.GC.byDALin.Rd
@@ -7,10 +7,21 @@
 summ.GC.byDALin(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
 }
 \value{
-Define return, in detail
+A tibble summarizing each unique combination of \code{GenContext},
+\code{DomArch}, and \code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{DomArch}: The domain architecture for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext}, \code{DomArch}, and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
 }
 \description{
 summ.GC.byDALin
diff --git a/man/summarize_bylin.Rd b/man/summarize_bylin.Rd
index a94c54c1..92b93652 100644
--- a/man/summarize_bylin.Rd
+++ b/man/summarize_bylin.Rd
@@ -7,10 +7,21 @@
 summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
 }
 \arguments{
-\item{query}{}
+\item{prot}{A dataframe or tibble containing the data.}
+
+\item{column}{A string representing the column to be summarized
+(e.g., \code{DomArch}). Default is "DomArch".}
+
+\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
+Default is "Lineage".}
+
+\item{query}{A string specifying the query pattern for filtering the target
+column. Use "all" to skip filtering and include all rows.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing the counts of occurrences of elements in
+the \code{column}, grouped by the \code{by} column. The result includes the number
+of occurrences (\code{count}) and is arranged in descending order of count.
 }
 \description{
 Summarize by Lineage
diff --git a/man/total_counts.Rd b/man/total_counts.Rd
index 49db8822..53d70096 100644
--- a/man/total_counts.Rd
+++ b/man/total_counts.Rd
@@ -17,14 +17,30 @@ total_counts(
 \item{prot}{A data frame that must contain columns:
 \itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
 
-\item{column}{Character. The column to summarize}
+\item{column}{Character. The column to summarize, default is "DomArch".}
 
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
+\item{lineage_col}{Character. The name of the lineage column, default is
+"Lineage".}
 
-\item{digits}{}
+\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
+will not be shown. Default is 0.}
+
+\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
+cutoff. Default is FALSE.}
+
+\item{digits}{Numeric. Number of decimal places for percentage columns.
+Default is 2.}
 }
 \value{
-Define return, in detail
+A data frame with the following columns:
+\itemize{
+\item \code{{{ column }}}: Unique values from the specified column.
+\item \code{totalcount}: The total count of occurrences for each unique value in
+the specified column.
+\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
+the overall count.
+\item \code{CumulativePercent}: The cumulative percentage of total counts.
+}
 }
 \description{
 Creates a data frame with a totalcount column
diff --git a/man/words2wc.Rd b/man/words2wc.Rd
index 1eba5dc4..69d006d5 100644
--- a/man/words2wc.Rd
+++ b/man/words2wc.Rd
@@ -7,10 +7,17 @@
 words2wc(string)
 }
 \arguments{
-\item{string}{}
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
 }
 \value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
 }
 \description{
 Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}

From 30d4bf3ab57c6296a81d6f792911c87586ca896e Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sat, 5 Oct 2024 12:29:37 +0100
Subject: [PATCH 02/61] usethis::pr_init("Implement error handling in acc2lin.R
 functions

- Added validation checks for input parameters (accessions, ipg_file, assembly_path, lineagelookup_path).
- Included error messages for missing or invalid inputs and file existence checks.
- Wrapped main logic in tryCatch for graceful error handling during execution.
")
---
 R/acc2lin.R       | 267 ++++++++++++++++++++++++++++++++++------------
 man/acc2lin.Rd    |   3 +-
 man/efetch_ipg.Rd |   3 +-
 man/ipg2lin.Rd    |   3 +-
 man/sink.reset.Rd |   1 +
 5 files changed, 207 insertions(+), 70 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index f8d71949..dfb33da9 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,6 +10,8 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
+#'         and handles any errors or warnings that occur during the process.
+#'
 #' @export
 #'
 #' @examples
@@ -17,9 +19,19 @@
 #' sink.reset()
 #' }
 sink.reset <- function() {
+  # Handle all errors and warnings
+  tryCatch({
     for (i in seq_len(sink.number())) {
-        sink(NULL)
+      sink(NULL)
     }
+    print("All sinks closed")
+  }, error = function(e) {
+    print(paste("Error: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("resetSink function execution completed.")
+  })
 }
 
 
@@ -44,23 +56,61 @@ sink.reset <- function() {
 #' add_lins()
 #' }
 add_lins <- function(df, acc_col = "AccNum", assembly_path,
-    lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
-    s_acc_col <- sym(acc_col)
-    accessions <- df %>% pull(acc_col)
-    lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
+                     lineagelookup_path, ipgout_path = NULL,
+                     plan = "sequential") {
+  # check for validate inputs
+  if (!is.data.frame(df)) {
+    stop("Input 'df' must be a data frame.")
+  }
+
+  if (!acc_col %in% colnames(df)) {
+    stop(paste("Column", acc_col, "not found in data frame."))
+  }
+
+  # Ensure paths are character strings
+  if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
+    stop("Both 'assembly_path' and 
+         'lineagelookup_path' must be character strings.")
+  }
+
+  # Ensure paths exist
+  if (!file.exists(assembly_path)) {
+    stop(paste("Assembly file not found at:", assembly_path))
+  }
 
-    # Drop a lot of the unimportant columns for now? will make merging much easier
-    lins <- lins[, c(
+  if (!file.exists(lineagelookup_path)) {
+    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+  }
+    tryCatch({
+      # Attempt to add lineages
+      acc_col <- sym(acc_col)
+      accessions <- df %>% pull(acc_col)
+      lins <- acc2lin(
+        accessions, assembly_path, lineagelookup_path, ipgout_path, plan
+      )
+
+      # Drop a lot of the unimportant columns for now? 
+      # will make merging much easier
+      lins <- lins[, c(
         "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
         "Id", "Strain"
-    ) := NULL]
-    lins <- unique(lins)
+      ) := NULL]
+      lins <- unique(lins)
+
+      # dup <- lins %>% group_by(Protein) %>% 
+      # summarize(count = n()) %>% filter(count > 1) %>%
+      # pull(Protein)
 
-    # dup <- lins %>% group_by(Protein) %>% summarize(count = n()) %>% filter(count > 1) %>%
-    #   pull(Protein)
+      merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
+      return(merged)
+    }, error = function(e) {
+      print(paste("Error: ", e$message))
+    }, warning = function(w) {
+      print(paste("Warning: ", w$message))
+    }, finally = {
+      print("addLineages function execution completed.")
+    })
 
-    merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
-    return(merged)
 }
 
 
@@ -68,7 +118,8 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+#' @description This function combines 'efetch_ipg()'
+#'              and 'ipg2lin()' to map a set
 #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 #'
 #' @param accessions Character vector of protein accessions
@@ -76,7 +127,8 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results of the efetch run of the accessions
+#' @param ipgout_path Path to write the results 
+#'                    of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan
 #'
@@ -87,27 +139,43 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' \dontrun{
 #' acc2lin()
 #' }
-acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
-    tmp_ipg <- F
-    if (is.null(ipgout_path)) {
-        tmp_ipg <- T
-        ipgout_path <- tempfile("ipg", fileext = ".txt")
-    }
+acc2lin <- function(accessions, assembly_path, 
+                    lineagelookup_path, ipgout_path = NULL, 
+                    plan = "sequential") {
+  tmp_ipg <- F
+  if (is.null(ipgout_path)) {
+    tmp_ipg <- T
+    ipgout_path <- tempfile("ipg", fileext = ".txt")
+  }
+
+  lins <- NULL
+  tryCatch({
+    # Attempt to fetch IPG
     efetch_ipg(accessions, out_path = ipgout_path, plan)
 
+    # Attempt to process IPG to lineages
     lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
+  }, error = function(e) {
+    print(paste("An error occurred: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("acc2lin function execution completed.")
+  })
 
-    if (tmp_ipg) {
-        unlink(tempdir(), recursive = T)
-    }
-    return(lins)
+  if (tmp_ipg) {
+    unlink(tempdir(), recursive = T)
+  }
+  return(lins)
 }
 
+
 #' efetch_ipg
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Perform efetch on the ipg database and write the results to out_path
+#' @description Perform efetch on the ipg database
+#'              and write the results to out_path
 #'
 #' @param accnums Character vector containing the accession numbers to query on
 #' the ipg database
@@ -126,57 +194,84 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path =
 #' efetch_ipg()
 #' }
 efetch_ipg <- function(accnums, out_path, plan = "sequential") {
-    if (length(accnums) > 0) {
-        partition <- function(in_data, groups) {
-            # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal
-            # Partition data to limit number of queries per second for rentrez fetch:
-            # limit of 10/second w/ key
-            l <- length(in_data)
-
-            partitioned <- list()
-            for (i in 1:groups)
-            {
-                partitioned[[i]] <- in_data[seq.int(i, l, groups)]
-            }
-
-            return(partitioned)
-        }
+  # Argument validation
+  if (!is.character(accnums) || length(accnums) == 0) {
+    stop("Error: 'accnums' must be a non-empty character vector.")
+  }
+
+  if (!is.character(out_path) || nchar(out_path) == 0) {
+    stop("Error: 'out_path' must be a non-empty string.")
+  }
+
+  if (!is.function(plan)) {
+    stop("Error: 'plan' must be a valid plan function.")
+  }
+  if (length(accnums) > 0) {
+    partition <- function(in_data, groups) {
+      # \\TODO This function should be defined outside of efetch_ipg().
+      # It can be non-exported/internal
+      # Partition data to limit number of queries per second for rentrez fetch:
+      # limit of 10/second w/ key
+      l <- length(in_data)
 
-        plan(strategy = plan, .skip = T)
-
-
-        min_groups <- length(accnums) / 200
-        groups <- min(max(min_groups, 15), length(accnums))
-        partitioned_acc <- partition(accnums, groups)
-        sink(out_path)
-
-        a <- future_map(1:length(partitioned_acc), function(x) {
-            # Avoid hitting the rate API limit
-            if (x %% 9 == 0) {
-                Sys.sleep(1)
-            }
-            cat(
-                entrez_fetch(
-                    id = partitioned_acc[[x]],
-                    db = "ipg",
-                    rettype = "xml",
-                    api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
-                )
-            )
-        })
-        sink(NULL)
+      partitioned <- list()
+      for (i in 1:groups){
+        partitioned[[i]] <- in_data[seq.int(i, l, groups)]
+      }
+
+      return(partitioned)
     }
+    tryCatch({
+      # Set the future plan strategy
+      plan(strategy = plan, .skip = T)
+
+
+      min_groups <- length(accnums) / 200
+      groups <- min(max(min_groups, 15), length(accnums))
+      partitioned_acc <- partition(accnums, groups)
+
+      # Open the sink to the output path
+      sink(out_path)
+
+      a <- future_map(1:length(partitioned_acc), function(x) {
+        # Avoid hitting the rate API limit
+        if (x %% 9 == 0) {
+          Sys.sleep(1)
+        }
+        cat(
+          entrez_fetch(
+            id = partitioned_acc[[x]],
+            db = "ipg",
+            rettype = "xml",
+            api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
+          )
+        )
+      })
+      sink(NULL)
+    }, error = function(e) {
+      print(paste("An error occurred: ", e$message))
+    }, warning = function(w) {
+      print(paste("Warning: ", w$message))
+    }, finally = {
+      print("efetch_ipg function execution completed.")
+    })
+  }
 }
 
+
+
 #' ipg2lin
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Takes the resulting file of an efetch run on the ipg database and
+#' @description Takes the resulting file
+#'              of an efetch run on the ipg database and
 #'
 #' @param accessions Character vector of protein accessions
-#' @param ipg_file Filepath to the file containing results of an efetch run on the
-#' ipg database. The protein accession in 'accessions' should be contained in this
+#' @param ipg_file Filepath to the file
+#'                 containing results of an efetch run on the
+#' ipg database. The protein accession in
+#'               'accessions' should be contained in this
 #' file
 #' @param assembly_path String of the path to the assembly_summary path
 #' This file can be generated using the "DownloadAssemblySummary()" function
@@ -195,16 +290,54 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' }
 #'
 ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {
+  # Argument validation for accessions
+  if (!is.character(accessions) || length(accessions) == 0) {
+    stop("Input 'accessions' must be a non-empty character vector.")
+  }
+
+  # check for validate inputs
+  if (!is.character(ipg_file)) {
+    stop("Input 'ipg_file' must be a character string.")
+  }
+  # Ensure paths are character strings
+  if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
+    stop("Both 'assembly_path' and 
+         'lineagelookup_path' must be character strings.")
+  }
+
+  # Ensure paths exist
+  if (!file.exists(assembly_path)) {
+    stop(paste("Assembly file not found at:", assembly_path))
+  }
+
+  if (!file.exists(lineagelookup_path)) {
+    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+  }
+
+  try({
+    # Attempt to read the IPG file
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
 
+    # Filter the IPG data table to only include the accessions
     ipg_dt <- ipg_dt[Protein %in% accessions]
 
+    # Rename the 'Assembly' column to 'GCA_ID'
     ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID")
 
+    # Convert the IPG data table to a lineage data table
     lins <- GCA2Lins(prot_data = ipg_dt, assembly_path, lineagelookup_path)
+
+    # Filter out rows with missing lineage information
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)
+  }, error = function(e) {
+    print(paste("An error occurred: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("ipg2lin function execution completed.")
+  })
 }
 
 
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index 6255b290..d3f2468b 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -38,7 +38,8 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 Describe return, in detail
 }
 \description{
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+This function combines 'efetch_ipg()'
+and 'ipg2lin()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
index ec5b6bcb..1fbb9d92 100644
--- a/man/efetch_ipg.Rd
+++ b/man/efetch_ipg.Rd
@@ -23,7 +23,8 @@ the ipg database}
 Describe return, in detail
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
index 3a14eada..453668b0 100644
--- a/man/ipg2lin.Rd
+++ b/man/ipg2lin.Rd
@@ -38,7 +38,8 @@ This file can be generated using the "DownloadAssemblySummary()" function}
 Describe return, in detail
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
index a31b841d..64087c49 100644
--- a/man/sink.reset.Rd
+++ b/man/sink.reset.Rd
@@ -8,6 +8,7 @@ sink.reset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From aff97e433e5a0c367dfbb8f284ea200e1876a5da Mon Sep 17 00:00:00 2001
From: teddyCodex <15376476+teddyCodex@users.noreply.github.com>
Date: Sat, 5 Oct 2024 16:16:51 +0100
Subject: [PATCH 03/61] Update CONTRIBUTING.md

Added a couple of clearer steps to the pull request process.
---
 .github/CONTRIBUTING.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 180ecf6c..5f240176 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -18,8 +18,17 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
 
 ### Pull request process
 
-*   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)`.
-
+*   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis`.
+
+*   Install and load the `usethis` package with:
+    ```
+    install.packages("usethis")
+    library(usethis)
+    ```
+*   Clone and fork the MolEvolvR package using:
+    ```
+    usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)
+    ```
 *   Install all development dependencies with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
     If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
 *   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.

From 3a0376fc7024f6069580ce2059c27510bffb16d0 Mon Sep 17 00:00:00 2001
From: teddyCodex <15376476+teddyCodex@users.noreply.github.com>
Date: Sat, 5 Oct 2024 16:29:11 +0100
Subject: [PATCH 04/61] Update CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 5f240176..9465c683 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -23,7 +23,7 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
 *   Install and load the `usethis` package with:
     ```
     install.packages("usethis")
-    library(usethis)
+    library("usethis")
     ```
 *   Clone and fork the MolEvolvR package using:
     ```

From 67a6d0eaeded334e6869621a70c781cd917dd3bc Mon Sep 17 00:00:00 2001
From: teddyCodex <15376476+teddyCodex@users.noreply.github.com>
Date: Sun, 6 Oct 2024 09:30:55 +0100
Subject: [PATCH 05/61] Update CONTRIBUTING.md to include explicit installation
 steps and improved clarity for development process

- Added explicit instructions for installing and loading the `usethis`, `devtools`, and `lintr` packages.
- Overall improvements to make the documentation more user-friendly, especially for new contributors.
---
 .github/CONTRIBUTING.md | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 9465c683..5db3f961 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -23,15 +23,29 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
 *   Install and load the `usethis` package with:
     ```
     install.packages("usethis")
+    
     library("usethis")
     ```
 *   Clone and fork the MolEvolvR package using:
     ```
     usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)
     ```
-*   Install all development dependencies with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
-    If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
-*   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.
+*   Install all development dependencies and then make sure the package passes R CMD check using `devtools`:
+    ```
+    install.packages("devtools")
+    
+    library("devtools")
+    
+    devtools::install_dev_deps()
+    
+    devtools::check()
+    ```
+    _If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing._
+    
+*   Create a Git branch for your pull request (PR). We recommend using
+    ```
+    usethis::pr_init("brief-description-of-change")
+    ```
 
 *   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
     The title of your PR should briefly describe the change.
@@ -44,7 +58,14 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
 *   New code should follow the tidyverse [style guide](https://style.tidyverse.org). 
     You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
     
-*   Lint Your Code: Ensure your code adheres to our style guidelines by using [lintr](https://lintr.r-lib.org/): `lintr::lint("path/to/your/file.R")`
+*   Lint Your Code: Ensure your code adheres to our style guidelines by using [lintr](https://lintr.r-lib.org/):
+    ```
+    install.packages("lintr")
+    
+    library("lintr")
+    
+    lintr::lint("path/to/your/file.R")
+    ```
 
 *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.  
 

From d9fa04bc729586ab336275083d67fb75420ac138 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Mon, 7 Oct 2024 07:42:08 +0300
Subject: [PATCH 06/61] use one documentation file

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 R/summarize.R          |  16 ++-
 man/count_bycol.Rd     |  38 ------
 man/elements2words.Rd  |  40 -------
 man/filter_by_doms.Rd  |  44 -------
 man/filter_freq.Rd     |  28 -----
 man/summ.DA.Rd         |  31 -----
 man/summ.DA.byLin.Rd   |  27 -----
 man/summ.GC.Rd         |  32 -----
 man/summ.GC.byDALin.Rd |  33 ------
 man/summ.GC.byLin.Rd   |  22 ----
 man/summarize.Rd       | 260 +++++++++++++++++++++++++++++++++++++++++
 man/summarize_bylin.Rd |  36 ------
 man/total_counts.Rd    |  58 ---------
 man/words2wc.Rd        |  32 -----
 14 files changed, 274 insertions(+), 423 deletions(-)
 delete mode 100644 man/count_bycol.Rd
 delete mode 100644 man/elements2words.Rd
 delete mode 100644 man/filter_by_doms.Rd
 delete mode 100644 man/filter_freq.Rd
 delete mode 100644 man/summ.DA.Rd
 delete mode 100644 man/summ.DA.byLin.Rd
 delete mode 100644 man/summ.GC.Rd
 delete mode 100644 man/summ.GC.byDALin.Rd
 delete mode 100644 man/summ.GC.byLin.Rd
 create mode 100644 man/summarize.Rd
 delete mode 100644 man/summarize_bylin.Rd
 delete mode 100644 man/total_counts.Rd
 delete mode 100644 man/words2wc.Rd

diff --git a/R/summarize.R b/R/summarize.R
index e03ca463..0580c15d 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -29,6 +29,7 @@
 #'
 #' @return Filtered data frame
 #' @note There is no need to make the domains 'regex safe', that will be handled by this function
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -110,7 +111,7 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 #' The tibble is filtered to only include elements that have a frequency 
 #' greater than or equal to `min.freq` and does not include elements with `NA` 
 #' values or those starting with a hyphen ("-").
-#'
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -155,6 +156,7 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' @return A single string where elements are delimited by spaces. The function 
 #' performs necessary substitutions based on the `conversion_type` and cleans up 
 #' extraneous characters like newlines, tabs, and multiple spaces.
+#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -212,6 +214,8 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #'   (domains or domain architectures).}
 #'   \item{`freq`}{A column containing the frequency counts for each word.}
 #' }
+#' 
+#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -259,6 +263,7 @@ words2wc <- function(string) {
 #'
 #' @return A tibble with the same structure as `x`, but filtered to include 
 #' only rows where the frequency is greater than or equal to `min.freq`.
+#' @name summarize
 #'
 #' @export
 #'
@@ -290,6 +295,7 @@ filter_freq <- function(x, min.freq) {
 #' @return A tibble summarizing the counts of occurrences of elements in 
 #' the `column`, grouped by the `by` column. The result includes the number 
 #' of occurrences (`count`) and is arranged in descending order of count.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -335,6 +341,7 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
 #' of each domain architecture for each lineage. The results are arranged in 
 #' descending order of `count`.
+#' @name summarize
 #'
 #' @export
 #'
@@ -369,6 +376,7 @@ summ.DA.byLin <- function(x) {
 #' - `totallin`: The total number of unique lineages in which each `DomArch` 
 #' appears.
 #' The results are arranged in descending order of `totallin` and `totalcount`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -401,6 +409,7 @@ summ.DA <- function(x) {
 #' `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -421,11 +430,12 @@ summ.GC.byDALin <- function(x) {
 
 #' summ.GC.byLin
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data.
 #'
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
 #' @return Describe return, in detail
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -459,6 +469,7 @@ summ.GC.byLin <- function(x) {
 #'  `GenContext` and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -512,6 +523,7 @@ summ.GC <- function(x) {
 #' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
 #' the overall count.
 #' - `CumulativePercent`: The cumulative percentage of total counts.
+#' @name summarize
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
diff --git a/man/count_bycol.Rd b/man/count_bycol.Rd
deleted file mode 100644
index 946a7ea2..00000000
--- a/man/count_bycol.Rd
+++ /dev/null
@@ -1,38 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{count_bycol}
-\alias{count_bycol}
-\title{Count Bycol}
-\usage{
-count_bycol(prot = prot, column = "DomArch", min.freq = 1)
-}
-\arguments{
-\item{prot}{A data frame containing the dataset to analyze, typically with
-multiple columns including the one specified by the \code{column} parameter.}
-
-\item{column}{A character string specifying the name of the column to analyze.
-The default is "DomArch".}
-
-\item{min.freq}{An integer specifying the minimum frequency an element must
-have to be included in the output. Default is 1.}
-}
-\value{
-A tibble with two columns:
-\describe{
-\item{\code{column}}{The unique elements from the specified column
-(e.g., "DomArch").}
-\item{\code{freq}}{The frequency of each element, i.e., the number of times
-each element appears in the specified column.}
-}
-The tibble is filtered to only include elements that have a frequency
-greater than or equal to \code{min.freq} and does not include elements with \code{NA}
-values or those starting with a hyphen ("-").
-}
-\description{
-Count Bycol
-}
-\examples{
-\dontrun{
-count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
-}
-}
diff --git a/man/elements2words.Rd b/man/elements2words.Rd
deleted file mode 100644
index bda447db..00000000
--- a/man/elements2words.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{elements2words}
-\alias{elements2words}
-\title{Elements 2 Words}
-\usage{
-elements2words(prot, column = "DomArch", conversion_type = "da2doms")
-}
-\arguments{
-\item{prot}{A dataframe containing the dataset to analyze. The specified
-\code{column} contains the string elements to be processed.}
-
-\item{column}{A character string specifying the name of the column to analyze.
-Default is "DomArch".}
-
-\item{conversion_type}{A character string specifying the type of conversion.
-Two options are available:
-\describe{
-\item{\code{da2doms}}{Convert domain architectures into individual domains by
-replacing \code{+} symbols with spaces.}
-\item{\code{gc2da}}{Convert genomic context into domain architectures by
-replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
-}}
-}
-\value{
-A single string where elements are delimited by spaces. The function
-performs necessary substitutions based on the \code{conversion_type} and cleans up
-extraneous characters like newlines, tabs, and multiple spaces.
-}
-\description{
-Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
-context (GC)
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", 
-"a+b", "b+c", "b-c")) |> elements2words()
-}
-
-}
diff --git a/man/filter_by_doms.Rd b/man/filter_by_doms.Rd
deleted file mode 100644
index cfe255ca..00000000
--- a/man/filter_by_doms.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filter_by_doms}
-\alias{filter_by_doms}
-\title{Filter by Domains}
-\usage{
-filter_by_doms(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-}
-\arguments{
-\item{prot}{Dataframe to filter}
-
-\item{column}{Column to search for domains in (DomArch column)}
-
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-}
-\value{
-Filtered data frame
-}
-\description{
-filter_by_doms filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-}
-\note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-}
-\examples{
-\dontrun{
-filter_by_doms()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/filter_freq.Rd b/man/filter_freq.Rd
deleted file mode 100644
index 9dfba73b..00000000
--- a/man/filter_freq.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filter_freq}
-\alias{filter_freq}
-\title{Filter Frequency}
-\usage{
-filter_freq(x, min.freq)
-}
-\arguments{
-\item{x}{A tibble (tbl_df) containing at least two columns: one for
-elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
-
-\item{min.freq}{A numeric value specifying the minimum frequency threshold.
-Only elements with frequencies greater than or equal to this value will be
-retained.}
-}
-\value{
-A tibble with the same structure as \code{x}, but filtered to include
-only rows where the frequency is greater than or equal to \code{min.freq}.
-}
-\description{
-Filter Frequency
-}
-\examples{
-\dontrun{
-filter_freq()
-}
-}
diff --git a/man/summ.DA.Rd b/man/summ.DA.Rd
deleted file mode 100644
index 01d15b3c..00000000
--- a/man/summ.DA.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.DA}
-\alias{summ.DA}
-\title{summ.DA}
-\usage{
-summ.DA(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have a column
-named \code{DomArch} and a count column, such as \code{count}, which represents the
-occurrences of each architecture in various lineages.}
-}
-\value{
-A tibble summarizing each unique \code{DomArch}, along with the following
-columns:
-\itemize{
-\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
-\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
-appears.
-The results are arranged in descending order of \code{totallin} and \code{totalcount}.
-}
-}
-\description{
-Function to retrieve counts of how many lineages a DomArch appears in
-}
-\examples{
-\dontrun{
-summ.DA()
-}
-}
diff --git a/man/summ.DA.byLin.Rd b/man/summ.DA.byLin.Rd
deleted file mode 100644
index d88e5d37..00000000
--- a/man/summ.DA.byLin.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.DA.byLin}
-\alias{summ.DA.byLin}
-\title{summ.DA.byLin}
-\usage{
-summ.DA.byLin(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{DomArch} and \code{Lineage}.}
-}
-\value{
-A tibble summarizing the counts of unique domain architectures
-(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
-columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
-of each domain architecture for each lineage. The results are arranged in
-descending order of \code{count}.
-}
-\description{
-Function to summarize and retrieve counts by Domains & Domains+Lineage
-}
-\examples{
-\dontrun{
-summ.DA.byLin()
-}
-}
diff --git a/man/summ.GC.Rd b/man/summ.GC.Rd
deleted file mode 100644
index 2ec4d651..00000000
--- a/man/summ.GC.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC}
-\alias{summ.GC}
-\title{summ.GC}
-\usage{
-summ.GC(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-}
-\value{
-A tibble summarizing each unique combination of \code{GenContext} and
-\code{Lineage}, along with the following columns:
-\itemize{
-\item \code{GenContext}: The genomic context for each entry.
-\item \code{Lineage}: The lineage associated with each entry.
-\item \code{count}: The total number of occurrences for each combination of
-\code{GenContext} and \code{Lineage}.
-}
-
-The results are arranged in descending order of \code{count}.
-}
-\description{
-summ.GC
-}
-\examples{
-\dontrun{
-summ.GC()
-}
-}
diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd
deleted file mode 100644
index 7fc8d443..00000000
--- a/man/summ.GC.byDALin.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byDALin}
-\alias{summ.GC.byDALin}
-\title{summ.GC.byDALin}
-\usage{
-summ.GC.byDALin(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-}
-\value{
-A tibble summarizing each unique combination of \code{GenContext},
-\code{DomArch}, and \code{Lineage}, along with the following columns:
-\itemize{
-\item \code{GenContext}: The genomic context for each entry.
-\item \code{DomArch}: The domain architecture for each entry.
-\item \code{Lineage}: The lineage associated with each entry.
-\item \code{count}: The total number of occurrences for each combination of
-\code{GenContext}, \code{DomArch}, and \code{Lineage}.
-}
-
-The results are arranged in descending order of \code{count}.
-}
-\description{
-summ.GC.byDALin
-}
-\examples{
-\dontrun{
-summ.GC.byDALin
-}
-}
diff --git a/man/summ.GC.byLin.Rd b/man/summ.GC.byLin.Rd
deleted file mode 100644
index df2a8fb8..00000000
--- a/man/summ.GC.byLin.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byLin}
-\alias{summ.GC.byLin}
-\title{summ.GC.byLin}
-\usage{
-summ.GC.byLin(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summ.GC.byLin
-}
-\examples{
-\dontrun{
-summ.GC.byLin()
-}
-}
diff --git a/man/summarize.Rd b/man/summarize.Rd
new file mode 100644
index 00000000..f149f686
--- /dev/null
+++ b/man/summarize.Rd
@@ -0,0 +1,260 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{summarize}
+\alias{summarize}
+\alias{filter_by_doms}
+\alias{count_bycol}
+\alias{elements2words}
+\alias{words2wc}
+\alias{filter_freq}
+\alias{summarize_bylin}
+\alias{summ.DA.byLin}
+\alias{summ.DA}
+\alias{summ.GC.byDALin}
+\alias{summ.GC.byLin}
+\alias{summ.GC}
+\alias{total_counts}
+\title{Filter by Domains}
+\usage{
+filter_by_doms(
+  prot,
+  column = "DomArch",
+  doms_keep = c(),
+  doms_remove = c(),
+  ignore.case = FALSE
+)
+
+count_bycol(prot = prot, column = "DomArch", min.freq = 1)
+
+elements2words(prot, column = "DomArch", conversion_type = "da2doms")
+
+words2wc(string)
+
+filter_freq(x, min.freq)
+
+summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
+
+summ.DA.byLin(x)
+
+summ.DA(x)
+
+summ.GC.byDALin(x)
+
+summ.GC.byLin(x)
+
+summ.GC(x)
+
+total_counts(
+  prot,
+  column = "DomArch",
+  lineage_col = "Lineage",
+  cutoff = 90,
+  RowsCutoff = FALSE,
+  digits = 2
+)
+}
+\arguments{
+\item{prot}{A data frame that must contain columns:
+\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
+
+\item{column}{Character. The column to summarize, default is "DomArch".}
+
+\item{doms_keep}{Vector of domains that must be identified within column in order for
+observation to be kept}
+
+\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
+
+\item{ignore.case}{Should the matching be non case sensitive}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
+
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
+
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
+
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
+
+\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
+Default is "Lineage".}
+
+\item{query}{A string specifying the query pattern for filtering the target
+column. Use "all" to skip filtering and include all rows.}
+
+\item{lineage_col}{Character. The name of the lineage column, default is
+"Lineage".}
+
+\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
+will not be shown. Default is 0.}
+
+\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
+cutoff. Default is FALSE.}
+
+\item{digits}{Numeric. Number of decimal places for percentage columns.
+Default is 2.}
+}
+\value{
+Filtered data frame
+
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
+
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
+
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
+
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
+
+A tibble summarizing the counts of occurrences of elements in
+the \code{column}, grouped by the \code{by} column. The result includes the number
+of occurrences (\code{count}) and is arranged in descending order of count.
+
+A tibble summarizing the counts of unique domain architectures
+(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
+columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
+of each domain architecture for each lineage. The results are arranged in
+descending order of \code{count}.
+
+A tibble summarizing each unique \code{DomArch}, along with the following
+columns:
+\itemize{
+\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
+\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
+appears.
+The results are arranged in descending order of \code{totallin} and \code{totalcount}.
+}
+
+A tibble summarizing each unique combination of \code{GenContext},
+\code{DomArch}, and \code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{DomArch}: The domain architecture for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext}, \code{DomArch}, and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
+
+Describe return, in detail
+
+A tibble summarizing each unique combination of \code{GenContext} and
+\code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext} and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
+
+A data frame with the following columns:
+\itemize{
+\item \code{{{ column }}}: Unique values from the specified column.
+\item \code{totalcount}: The total count of occurrences for each unique value in
+the specified column.
+\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
+the overall count.
+\item \code{CumulativePercent}: The cumulative percentage of total counts.
+}
+}
+\description{
+filter_by_doms filters a data frame by identifying exact domain matches
+and either keeping or removing rows with the identified domain
+
+Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
+context (GC)
+
+Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
+
+Function to summarize and retrieve counts by Domains & Domains+Lineage
+
+Function to retrieve counts of how many lineages a DomArch appears in
+
+Creates a data frame with a totalcount column
+
+This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
+}
+\note{
+There is no need to make the domains 'regex safe', that will be handled by this function
+
+Please refer to the source code if you have alternate file formats and/or
+column names.
+}
+\examples{
+\dontrun{
+filter_by_doms()
+}
+\dontrun{
+count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
+}
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2words()
+}
+
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
+    elements2words() |>
+    words2wc()
+}
+
+\dontrun{
+filter_freq()
+}
+\dontrun{
+library(tidyverse)
+tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
+    summarize_bylin(query = "all")
+}
+
+\dontrun{
+summ.DA.byLin()
+}
+\dontrun{
+summ.DA()
+}
+\dontrun{
+summ.GC.byDALin
+}
+\dontrun{
+summ.GC.byLin()
+}
+\dontrun{
+summ.GC()
+}
+\dontrun{
+total_counts(pspa - gc_lin_counts, 0, "GC")
+}
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/summarize_bylin.Rd b/man/summarize_bylin.Rd
deleted file mode 100644
index 92b93652..00000000
--- a/man/summarize_bylin.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarize_bylin}
-\alias{summarize_bylin}
-\title{Summarize by Lineage}
-\usage{
-summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{prot}{A dataframe or tibble containing the data.}
-
-\item{column}{A string representing the column to be summarized
-(e.g., \code{DomArch}). Default is "DomArch".}
-
-\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
-Default is "Lineage".}
-
-\item{query}{A string specifying the query pattern for filtering the target
-column. Use "all" to skip filtering and include all rows.}
-}
-\value{
-A tibble summarizing the counts of occurrences of elements in
-the \code{column}, grouped by the \code{by} column. The result includes the number
-of occurrences (\code{count}) and is arranged in descending order of count.
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarize_bylin(query = "all")
-}
-
-}
diff --git a/man/total_counts.Rd b/man/total_counts.Rd
deleted file mode 100644
index 53d70096..00000000
--- a/man/total_counts.Rd
+++ /dev/null
@@ -1,58 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{total_counts}
-\alias{total_counts}
-\title{Total Counts}
-\usage{
-total_counts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize, default is "DomArch".}
-
-\item{lineage_col}{Character. The name of the lineage column, default is
-"Lineage".}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
-will not be shown. Default is 0.}
-
-\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
-cutoff. Default is FALSE.}
-
-\item{digits}{Numeric. Number of decimal places for percentage columns.
-Default is 2.}
-}
-\value{
-A data frame with the following columns:
-\itemize{
-\item \code{{{ column }}}: Unique values from the specified column.
-\item \code{totalcount}: The total count of occurrences for each unique value in
-the specified column.
-\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
-the overall count.
-\item \code{CumulativePercent}: The cumulative percentage of total counts.
-}
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-total_counts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wc.Rd b/man/words2wc.Rd
deleted file mode 100644
index 69d006d5..00000000
--- a/man/words2wc.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{words2wc}
-\alias{words2wc}
-\title{Words 2 Word Counts}
-\usage{
-words2wc(string)
-}
-\arguments{
-\item{string}{A character string containing the elements (words) to count.
-This would typically be a space-delimited string representing domain
-architectures or genomic contexts.}
-}
-\value{
-A tibble (tbl_df) with two columns:
-\describe{
-\item{\code{words}}{A column containing the individual words
-(domains or domain architectures).}
-\item{\code{freq}}{A column containing the frequency counts for each word.}
-}
-}
-\description{
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2words() |>
-    words2wc()
-}
-
-}

From 5758ad993fa2b80cba9297a83786a4a59556e544 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Tue, 8 Oct 2024 00:29:13 +0300
Subject: [PATCH 07/61] add error handling in tree.R

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 R/tree.R | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/R/tree.R b/R/tree.R
index 01e9ead5..9386bbfe 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -51,6 +51,30 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"),
     # fa_path=here("data/alns/pspa_snf7.fa")
     # tre_path=here("data/alns/pspa_snf7.tre")
     # fasttree_path=here("src/FastTree")
+    
+    # Check if the FASTA file exists
+    if (!file.exists(fa_path)) {
+        stop(paste("Error: The FASTA file does not exist at:", fa_path))
+    }
+    
+    # Check if the FastTree executable exists
+    if (!file.exists(fasttree_path)) {
+        stop(paste("Error: The FastTree executable does not exist at:", 
+                   fasttree_path))
+    }
+    
+    # Check if the output directory exists
+    tre_dir <- dirname(tre_path)
+    if (!dir.exists(tre_dir)) {
+        stop(paste("Error: The output directory does not exist:", tre_dir))
+    }
+    
+    # Check if the output file already exists
+    if (file.exists(tre_path)) {
+        cat("Warning: The output file already exists and will be overwritten:", 
+            tre_path, "\n")
+    }
+    
     print(fa_path)
     system2(
         command = fasttree_path,
@@ -83,8 +107,18 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"),
 #'
 #' @examples
 generate_trees <- function(aln_path = here("data/alns/")) {
+    
+    # Check if the alignment directory exists
+    if (!dir.exists(aln_path)) {
+        stop(paste("Error: The alignment directory does not exist:", aln_path))
+    }
     # finding all fasta alignment files
     fa_filenames <- list.files(path = aln_path, pattern = "*.fa")
+    # Check if any FASTA files were found
+    if (length(fa_filenames) == 0) {
+        stop("Error: No FASTA files found in the specified directory.")
+    }
+    
     fa_paths <- paste0(aln_path, fa_filenames)
     variable <- str_replace_all(basename(fa_filenames),
         pattern = ".fa", replacement = ""
@@ -139,6 +173,23 @@ generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
     ## SAMPLE ARGS
     # fa_file="data/alns/pspa_snf7.fa"
     # out_file="data/alns/pspa_snf7.tre"
+    
+    # Check if the FASTA file exists
+    if (!file.exists(fa_file)) {
+        stop(paste("Error: The FASTA file does not exist at:", fa_file))
+    }
+    
+    # Check if the output directory exists
+    out_dir <- dirname(out_file)
+    if (!dir.exists(out_dir)) {
+        stop(paste("Error: The output directory does not exist:", out_dir))
+    }
+    
+    # Check if the output file already exists
+    if (file.exists(out_file)) {
+        cat("Warning: The output file already exists and will be overwritten:", 
+            out_file, "\n")
+    }
 
     ###########################
     ## Approach 1

From bf40f2da6cb35beb466a92dadf5e39c943b35d5d Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Tue, 8 Oct 2024 00:45:02 +0300
Subject: [PATCH 08/61] add error handling

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 R/summarize.R | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/R/summarize.R b/R/summarize.R
index a9b13e43..4b0eaa55 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -41,6 +41,23 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
     # Any row containing a domain in doms_remove will be removed
 
     # ^word$|(?<=\+)word$|(?<=\+)word(?=\+)|word(?=\+)
+    
+    # Check if prot is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
+    # Check if the specified column exists in the data frame
+    if (!column %in% names(prot)) {
+        stop(paste("Error: The specified column '", column, "' does not exist 
+                   in the data frame.", sep = ""))
+    }
+    
+    # If doms_keep or doms_remove are not provided, inform the user
+    if (length(doms_keep) == 0 && length(doms_remove) == 0) {
+        warning("Warning: No domains specified to keep or remove. Returning the
+                original data frame.")
+    }
 
     # Make regex safe
     doms_keep <- str_replace_all(string = doms_keep, pattern = "\\(", replacement = "\\\\(")
@@ -105,6 +122,23 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 #' count_bycol()
 #' }
 count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
+    
+    # Check if 'prot' is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
+    # Check if the specified column exists in the data frame
+    if (!column %in% names(prot)) {
+        stop(paste("Error: The specified column '", column, "' does not exist in
+                   the data frame.", sep = ""))
+    }
+    
+    # Check if min.freq is a positive integer
+    if (!is.numeric(min.freq) || length(min.freq) != 1 || min.freq < 1 || 
+        floor(min.freq) != min.freq) {
+        stop("Error: 'min.freq' must be a positive integer.")
+    }
     counts <- prot %>%
         select(column) %>%
         table() %>%
@@ -139,6 +173,24 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' }
 #'
 elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
+    # Check if 'prot' is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
+    # Check if the specified column exists in the data frame
+    if (!column %in% names(prot)) {
+        stop(paste("Error: The specified column '", column, "' does not exist in 
+                   the data frame.", sep = ""))
+    }
+    
+    # Check for valid conversion_type values
+    valid_types <- c("da2doms", "doms2da")
+    if (!conversion_type %in% valid_types) {
+        stop(paste("Error: Invalid 'conversion_type'. Must be one of:", 
+                   paste(valid_types, collapse = ", ")))
+    }
+    
     z1 <- prot %>%
         dplyr::pull(column) %>%
         str_replace_all("\\,", " ") %>%
@@ -189,6 +241,11 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #' }
 #'
 words2wc <- function(string) {
+    # Check if 'string' is a character vector of length 1
+    if (!is.character(string) || length(string) != 1) {
+        stop("Error: 'string' must be a single character vector.")
+    }
+    
     df_word_count <- string %>%
         # reduce spaces with length 2 or greater to a single space
         str_replace_all("\\s{2,}", " ") %>%
@@ -230,6 +287,22 @@ words2wc <- function(string) {
 #' filter_freq()
 #' }
 filter_freq <- function(x, min.freq) {
+    
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
+    
+    # Check if 'min.freq' is a positive integer
+    if (!is.numeric(min.freq) || length(min.freq) != 1 || min.freq < 1 || 
+        floor(min.freq) != min.freq) {
+        stop("Error: 'min.freq' must be a positive integer.")
+    }
+    
+    # Check if the 'freq' column exists in the data frame
+    if (!"freq" %in% names(x)) {
+        stop("Error: The data frame must contain a 'freq' column.")
+    }
     x %>%
         filter(freq >= min.freq)
 }
@@ -259,6 +332,23 @@ filter_freq <- function(x, min.freq) {
 #'
 summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
     query) {
+    # Check if 'prot' is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
+    # Check if the specified column exists in the data frame
+    if (!column %in% names(prot)) {
+        stop(paste("Error: The specified column '", column, "' does not exist in 
+                   the data frame.", sep = ""))
+    }
+    
+    # Check if the 'by' column exists in the data frame
+    if (!by %in% names(prot)) {
+        stop(paste("Error: The specified 'by' column '", by, "' does not exist 
+                   n the data frame.", sep = ""))
+    }
+    
     column <- sym(column)
     by <- sym(by)
     if (query == "all") {
@@ -295,6 +385,19 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #' summ.DA.byLin()
 #' }
 summ.DA.byLin <- function(x) {
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
+    
+    # Check if required columns exist in the data frame
+    required_columns <- c("DomArch", "Lineage")
+    missing_columns <- setdiff(required_columns, names(x))
+    
+    if (length(missing_columns) > 0) {
+        stop(paste("Error: The following required columns are 
+                   missing:", paste(missing_columns, collapse = ", ")))
+    }
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", DomArch)) %>%
@@ -321,6 +424,10 @@ summ.DA.byLin <- function(x) {
 #' summ.DA()
 #' }
 summ.DA <- function(x) {
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(DomArch) %>%
@@ -344,6 +451,10 @@ summ.DA <- function(x) {
 #' summ.GC.byDALin
 #' }
 summ.GC.byDALin <- function(x) {
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -369,6 +480,10 @@ summ.GC.byDALin <- function(x) {
 #' summ.GC.byLin()
 #' }
 summ.GC.byLin <- function(x) {
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -394,6 +509,10 @@ summ.GC.byLin <- function(x) {
 #' summ.GC()
 #' }
 summ.GC <- function(x) {
+    # Check if 'x' is a data frame
+    if (!is.data.frame(x)) {
+        stop("Error: 'x' must be a data frame.")
+    }
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(GenContext) %>%
@@ -442,6 +561,31 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
     cutoff = 90, RowsCutoff = FALSE, digits = 2
     # type = "GC"
 ) {
+    # Check if 'prot' is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
+    # Check if the specified columns exist in the data frame
+    required_columns <- c(column, lineage_col)
+    missing_columns <- setdiff(required_columns, names(prot))
+    
+    if (length(missing_columns) > 0) {
+        stop(paste("Error: The following required columns are missing:", 
+                   paste(missing_columns, collapse = ", ")))
+    }
+    
+    # Check that cutoff is a numeric value between 0 and 100
+    if (!is.numeric(cutoff) || length(cutoff) != 1 || cutoff < 0 || cutoff > 100) {
+        stop("Error: 'cutoff' must be a numeric value between 0 and 100.")
+    }
+    
+    # Check that digits is a non-negative integer
+    if (!is.numeric(digits) || length(digits) != 1 || digits < 0 || 
+        floor(digits) != digits) {
+        stop("Error: 'digits' must be a non-negative integer.")
+    }
+    
     column <- sym(column)
 
     prot <- select(prot, {{ column }}, {{ lineage_col }}) %>%
@@ -601,6 +745,11 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
 #' find_paralogs(pspa)
 #' }
 find_paralogs <- function(prot) {
+    # Check if 'prot' is a data frame
+    if (!is.data.frame(prot)) {
+        stop("Error: 'prot' must be a data frame.")
+    }
+    
     # Remove eukaryotes
     prot <- prot %>% filter(!grepl("^eukaryota", Lineage))
     paralogTable <- prot %>%

From 4aeaa113927b6f94b21c9f0dd0956bb7e48004a5 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Mon, 7 Oct 2024 22:50:16 +0100
Subject: [PATCH 09/61] Add error handling to multiple functions

- Implement error handling for mapOption2Process, get_proc_medians, write_proc_medians_table, get_proc_weights, advanced_opts2est_walltime, assign_job_queue, and plot_estimated_walltimes .
- Validate input arguments for each function to ensure they meet expected criteria.
- Use tryCatch blocks to gracefully handle errors and warnings.
- Provide informative error messages and detailed logging where appropriate.
- Ensure functions fail gracefully and provide useful feedback.

Also renamed the functions to the following;
assign_job_queue -> assignJobQueue
make_opts2procs	-> mapOption2Process
map_advanced_opts2procs	-> mapAdvOption2Process
get_proc_medians - calculateProcessRuntime
write_proc_medians_table -> writeProcessRuntime2TSV
write_proc_medians_yml -> writeProcessRuntime2YML
get_proc_weights -> getProcessRuntimeWeights
advanced_opts2est_walltime -> calculateEstimatedWallTimeFromOpts
plot_estimated_walltimes -> plotEstimatedWallTimes
---
 NAMESPACE                                     |  26 +-
 R/assign_job_queue.R                          | 484 ++++++++++++------
 R/clean_clust_file.R                          |   4 +-
 R/combine_analysis.R                          |   4 +-
 R/combine_files.R                             |  10 +-
 R/create_lineage_lookup.R                     |   6 +-
 ...{assign_job_queue.Rd => assignJobQueue.Rd} |  13 +-
 ... calculateEstimatedWallTimeFromOptions.Rd} |  12 +-
 ..._medians.Rd => calculateProcessRuntime.Rd} |  10 +-
 ...lean_clust_file.Rd => cleanClusterFile.Rd} |   8 +-
 man/{combine_files.Rd => combineFiles.Rd}     |   6 +-
 ...combine_full.Rd => combineFullAnalysis.Rd} |   6 +-
 man/{combine_ipr.Rd => combineIPR.Rd}         |   6 +-
 ...neage_lookup.Rd => createLineageLookup.Rd} |   6 +-
 ...weights.Rd => getProcessRuntimeWeights.Rd} |   8 +-
 ..._opts2procs.Rd => mapAdvOption2Process.Rd} |   8 +-
 ...ake_opts2procs.Rd => mapOption2Process.Rd} |   8 +-
 ...walltimes.Rd => plotEstimatedWallTimes.Rd} |  11 +-
 ...ns_table.Rd => writeProcessRuntime2TSV.Rd} |   8 +-
 ...ans_yml.Rd => writeProcessRuntimeToYML.Rd} |  13 +-
 20 files changed, 416 insertions(+), 241 deletions(-)
 rename man/{assign_job_queue.Rd => assignJobQueue.Rd} (64%)
 rename man/{advanced_opts2est_walltime.Rd => calculateEstimatedWallTimeFromOptions.Rd} (68%)
 rename man/{get_proc_medians.Rd => calculateProcessRuntime.Rd} (76%)
 rename man/{clean_clust_file.Rd => cleanClusterFile.Rd} (82%)
 rename man/{combine_files.Rd => combineFiles.Rd} (92%)
 rename man/{combine_full.Rd => combineFullAnalysis.Rd} (69%)
 rename man/{combine_ipr.Rd => combineIPR.Rd} (74%)
 rename man/{create_lineage_lookup.Rd => createLineageLookup.Rd} (91%)
 rename man/{get_proc_weights.Rd => getProcessRuntimeWeights.Rd} (73%)
 rename man/{map_advanced_opts2procs.Rd => mapAdvOption2Process.Rd} (76%)
 rename man/{make_opts2procs.Rd => mapOption2Process.Rd} (75%)
 rename man/{plot_estimated_walltimes.Rd => plotEstimatedWallTimes.Rd} (55%)
 rename man/{write_proc_medians_table.Rd => writeProcessRuntime2TSV.Rd} (77%)
 rename man/{write_proc_medians_yml.Rd => writeProcessRuntimeToYML.Rd} (61%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9c038631 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,26 +12,27 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
-export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assign_job_queue)
+export(assignJobQueue)
+export(calculateEstimatedWallTimeFromOptions)
+export(calculateProcessRuntime)
 export(cleanup_GeneDesc)
 export(cleanup_clust)
 export(cleanup_domarch)
 export(cleanup_gencontext)
 export(cleanup_lineage)
 export(cleanup_species)
-export(combine_files)
-export(combine_full)
-export(combine_ipr)
+export(combineFiles)
+export(combineFullAnalysis)
+export(combineIPR)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createLineageLookup)
 export(create_all_col_params)
-export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
@@ -45,10 +46,9 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
+export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
-export(get_proc_medians)
-export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -58,12 +58,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
-export(make_opts2procs)
+export(mapAdvOption2Process)
+export(mapOption2Process)
 export(map_acc2name)
-export(map_advanced_opts2procs)
 export(msa_pdf)
 export(pick_longer_duplicate)
-export(plot_estimated_walltimes)
+export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
 export(remove_astrk)
@@ -95,8 +95,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(write_proc_medians_table)
-export(write_proc_medians_yml)
+export(writeProcessRuntime2TSV)
+export(writeProcessRuntimeToYML)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index bc5253d4..f1fcb6db 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,22 +3,32 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
+# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- make_opts2procs
+#' example: list_opts2procs <- mapOption2Process
 #' @export
-make_opts2procs <- function() {
+mapOption2Process <- function() {
+  tryCatch({
     opts2processes <- list(
-        "homology_search" = c("dblast", "dblast_cleanup"),
-        "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
-        "always" = c("blast_clust", "clust2table") # processes always present agnostic of advanced options
+      "homology_search" = c("dblast", "dblast_cleanup"),
+      "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
+      # processes always present agnostic of advanced options
+      "always" = c("blast_clust", "clust2table")
     )
     return(opts2processes)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("mapOption2Process function execution completed.")
+  })
+
 }
 
 #' Use MolEvolvR advanced options to get associated processes
@@ -30,17 +40,29 @@ make_opts2procs <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- map_advanced_opts2procs(advanced_opts)
+#' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
-map_advanced_opts2procs <- function(advanced_opts) {
+mapAdvOption2Process <- function(advanced_opts) {
+  if (!is.character(advanced_opts)) {
+    stop("Argument must be a character vector!")
+  }
+  tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- make_opts2procs()
+    opts2proc <- mapOption2Process()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
     procs <- opts2proc[idx] |> unlist()
     return(procs)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("mapOption2Process function execution completed.")
+  })
+
 }
 
 #' Scrape MolEvolvR logs and calculate median processes
@@ -58,47 +80,68 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
-get_proc_medians <- function(dir_job_results) {
+calculateProcessRuntime <- function(dir_job_results) {
+  tryCatch({
+    # Check if dir_job_results is a character string
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    # Check if dir_job_results exists
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
+
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
 
     # aggregate logs from
-    path_log_data <- file.path(common_root, "molevol_scripts", "log_data", "prod_logs.rda")
+    path_log_data <- file.path(common_root,
+                               "molevol_scripts", "log_data", "prod_logs.rda")
 
     # ensure the folder exists to the location
     if (!dir.exists(path_log_data)) {
-        dir.create(dirname(path_log_data), recursive = TRUE, showWarnings = FALSE)
+      dir.create(dirname(path_log_data),
+                 recursive = TRUE, showWarnings = FALSE)
     }
 
     # attempt to load pre-generated logdata
     if (!file.exists(path_log_data)) {
-        logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-        save(logs, file = path_log_data)
+      logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
+      save(logs, file = path_log_data)
     } else {
-        load(path_log_data) # loads the logs object
+      load(path_log_data) # loads the logs object
     }
     df_log <- logs$df_log
     procs <- c(
-        "dblast", "dblast_cleanup", "iprscan",
-        "ipr2lineage", "ipr2da", "blast_clust",
-        "clust2table"
+      "dblast", "dblast_cleanup", "iprscan",
+      "ipr2lineage", "ipr2da", "blast_clust",
+      "clust2table"
     )
     list_proc_medians <- df_log |>
-        dplyr::select(dplyr::all_of(procs)) |>
-        dplyr::summarise(
-            dplyr::across(
-                dplyr::everything(),
-                \(x) median(x, na.rm = TRUE)
-            )
-        ) |>
-        as.list()
+      dplyr::select(dplyr::all_of(procs)) |>
+      dplyr::summarise(
+        dplyr::across(
+          dplyr::everything(),
+          \(x) median(x, na.rm = TRUE)
+        )
+      ) |>
+      as.list()
     return(list_proc_medians)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("calculateProcessRuntime function execution completed.")
+  })
+
 }
 
 #' Write a table of 2 columns: 1) process and 2) median seconds
@@ -113,51 +156,99 @@ get_proc_medians <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: write_proc_medians_table(
+#' example: writeProcessRuntime2TSV(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-write_proc_medians_table <- function(dir_job_results, filepath) {
-    df_proc_medians <- get_proc_medians(dir_job_results) |>
-        tibble::as_tibble() |>
-        tidyr::pivot_longer(
-            dplyr::everything(),
-            names_to = "process",
-            values_to = "median_seconds"
-        ) |>
-        dplyr::arrange(dplyr::desc(median_seconds))
+writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
+  tryCatch({
+    # Error handling for input arguments
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
+
+    if (!is.character(filepath) || length(filepath) != 1) {
+      stop("Input 'filepath' must be a single character string.")
+    }
+    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+      tibble::as_tibble() |>
+      tidyr::pivot_longer(
+        dplyr::everything(),
+        names_to = "process",
+        values_to = "median_seconds"
+      ) |>
+      dplyr::arrange(dplyr::desc(median_seconds))
+
+    # Write the resulting tibble to a TSV file
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("writeProcessRuntime2TSV function execution completed.")
+  })
+
 }
 
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which get_proc_weights() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
-#' @param filepath [chr] path to save YAML file; if NULL, uses ./molevol_scripts/log_data/job_proc_weights.yml
+#' @param filepath [chr] path to save YAML file; if NULL, 
+#'                 uses ./molevol_scripts/log_data/job_proc_weights.yml
 #'
 #' @importFrom yaml write_yaml
 #'
 #' @examples
 #' \dontrun{
-#' write_proc_medians_yml(
+#' writeProcessRuntimeToYML(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
+writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
+  tryCatch({
+    # Error handling for dir_job_results arguments
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
     if (is.null(filepath)) {
-        filepath <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+      filepath <- file.path(common_root,
+                            "molevol_scripts",
+                            "log_data",
+                            "job_proc_weights.yml")
+    }
+    if (!is.character(filepath) || length(filepath) != 1) {
+      stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- get_proc_medians(dir_job_results)
+    medians <- calculateProcessRuntime(dir_job_results)
     yaml::write_yaml(medians, filepath)
+  }, error = function(e) {
+    message(paste("Encountered an error: "), e$message)
+  }, warning = function(w) {
+    message(paste("Warning: "), w$message)
+  }, finally = {
+    message("write_proc_medians_table function execution completed.")
+  }
+  )
+
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -170,50 +261,52 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: get_proc_weights()
+#' example: writeProcessRuntimeToYML()
 #' @export
-get_proc_weights <- function(medians_yml_path = NULL) {
-    if (is.null(medians_yml_path)) {
-        medians_yml_path <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
+  if (is.null(medians_yml_path)) {
+    medians_yml_path <- file.path(common_root,
+                                  "molevol_scripts",
+                                  "log_data",
+                                  "job_proc_weights.yml")
+  }
+
+  proc_weights <- tryCatch({
+    # attempt to read the weights from the YAML file produced by
+    # writeProcessRuntimeToYML()
+    if (stringr::str_trim(medians_yml_path) == "") {
+      stop(
+        stringr::str_glue("medians_yml_path is empty 
+                          ({medians_yml_path}), returning default weights")
+      )
     }
 
-    proc_weights <- tryCatch(
-        {
-            # attempt to read the weights from the YAML file produced by
-            # write_proc_medians_yml()
-            if (stringr::str_trim(medians_yml_path) == "") {
-                stop(
-                    stringr::str_glue("medians_yml_path is empty ({medians_yml_path}), returning default weights")
-                )
-            }
-
-            proc_weights <- yaml::read_yaml(medians_yml_path)
-        },
-        # to avoid fatal errors in reading the proc weights yaml,
-        # some median process runtimes have been hardcoded based on
-        # the result of get_proc_medians() from Jan 2024
-        error = function(cond) {
-            proc_weights <- list(
-                "dblast" = 2810,
-                "iprscan" = 1016,
-                "dblast_cleanup" = 79,
-                "ipr2lineage" = 18,
-                "ipr2da" = 12,
-                "blast_clust" = 2,
-                "clust2table" = 2
-            )
-            proc_weights
-        }
+    proc_weights <- yaml::read_yaml(medians_yml_path)
+  },
+  # to avoid fatal errors in reading the proc weights yaml,
+  # some median process runtimes have been hardcoded based on
+  # the result of calculateProcessRuntime() from Jan 2024
+  error = function(cond) {
+    proc_weights <- list(
+      "dblast" = 2810,
+      "iprscan" = 1016,
+      "dblast_cleanup" = 79,
+      "ipr2lineage" = 18,
+      "ipr2da" = 12,
+      "blast_clust" = 2,
+      "clust2table" = 2
     )
+    proc_weights
+  })
 
-    return(proc_weights)
+  return(proc_weights)
 }
 
 #' Given MolEvolvR advanced options and number of inputs,
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see make_opts2procs for the options)
+#' (see mapOption2Process for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -221,68 +314,129 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: advanced_opts2est_walltime(c("homology_search", "domain_architecture"), n_inputs = 3, n_hits = 50L)
+#' example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+#'                                       "domain_architecture"),
+#'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, n_hits = NULL, verbose = FALSE) {
+calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
+                                                  n_inputs = 1L,
+                                                  n_hits = NULL,
+                                                  verbose = FALSE) {
+
+  tryCatch({
     # to calculate est walltime for a homology search job, the number of hits
     # must be provided
     validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
     stopifnot(!validation_fail)
 
-    proc_weights <- get_proc_weights()
+    # Validate advanced_opts
+    if (!is.character(advanced_opts)) {
+      stop("Argument 'advanced_opts' must be a character vector.")
+    }
+
+    # Validate n_inputs
+    if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
+      stop("Argument 'n_inputs' must be a single positive numeric value.")
+    }
+
+    # Validate n_hits if homology_search is in advanced_opts
+    if ("homology_search" %in% advanced_opts &&
+          (is.null(n_hits)|| !is.numeric(n_hits)
+           || length(n_hits) != 1 || n_hits < 0)) {
+      stop("Argument 'n_hits' must be a single non-negative numeric value when 
+           'homology_search' is in 'advanced_opts'.")
+    }
+
+    # Get process weights
+    proc_weights <- writeProcessRuntimeToYML()
+    if (!is.list(proc_weights)) {
+      stop("Process weights could not be retrieved correctly.")
+    }
+
     # sort process weights by names and convert to vec
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
+    procs_from_opts <- mapAdvOption2Process(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
     # dot product of weights and procs to run; scaled by the number of inputs
     est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
-        as.numeric()
+      as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-        opts2procs <- make_opts2procs()
-        # exclude the homology search processes for the homologous hits
-        procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-        procs_homologs <- procs_from_opts[!(procs_from_opts %in% procs2exclude_for_homologs)]
-        binary_proc_vec_homolog <- dplyr::if_else(all_procs %in% procs_homologs, 1L, 0L)
-        # add the estimated walltime for processes run on the homologous hits
-        est_walltime <- est_walltime +
-            (n_hits * (binary_proc_vec_homolog %*% proc_weights) |> as.numeric())
+      opts2procs <- mapOption2Process()
+      # exclude the homology search processes for the homologous hits
+      procs2exclude_for_homologs <- opts2procs[["homology_search"]]
+      procs_homologs <- procs_from_opts[!(procs_from_opts 
+                                          %in% procs2exclude_for_homologs)]
+      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+                                                %in% procs_homologs, 1L, 0L)
+      # add the estimated walltime for processes run on the homologous hits
+      est_walltime <- est_walltime +
+        (n_hits * (binary_proc_vec_homolog
+                   %*% proc_weights) |> as.numeric())
     }
     if (verbose) {
-        msg <- stringr::str_glue(
-            "warnings from advanced_opts2est_walltime():\n",
-            "\tn_inputs={n_inputs}\n",
-            "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
-            "\test_walltime={est_walltime}\n\n"
-        )
-        cat(file = stderr(), msg)
+      msg <- stringr::str_glue(
+        "warnings from calculateEstimatedWallTimeFromOptions():\n",
+        "\tn_inputs={n_inputs}\n",
+        "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
+        "\test_walltime={est_walltime}\n\n"
+      )
+      cat(file = stderr(), msg)
     }
     return(est_walltime)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("calculateEstimatedWallTimeFromOptions 
+            function execution completed.")
+  })
+
 }
 
+
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from advanced_opts2est_walltime())
+#' (from calculateEstimatedWallTimeFromOptions())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' advanced_opts2est_walltime(c("homology_search", "domain_architecture"), 3) |>
-#'   assign_job_queue()
+#' calculateEstimatedWallTimeFromOptions(c("homology_search",
+#'                                         "domain_architecture"), 3) |>
+#'   assignJobQueue()
 #' @export
-assign_job_queue <- function(
-        t_sec_estimate,
-        t_cutoff = 21600 # 6 hours
-    ) {
+assignJobQueue <- function(
+  t_sec_estimate,
+  t_cutoff = 21600 # 6 hours
+) {
+  tryCatch({
+    if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
+      stop("Argument 't_sec_estimate' must be a single numeric value.")
+    }
+
+    if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
+      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
+    }
+
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("assignJobQueue function execution completed.")
+  })
+
 }
 
 #' Plot the estimated runtimes for different advanced options and number
@@ -297,81 +451,97 @@ assign_job_queue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plot_estimated_walltimes()
-#' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
+#' p <- plotEstimatedWallTimes()
+#' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
+#'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plot_estimated_walltimes <- function() {
-    opts <- make_opts2procs() |> names()
+plotEstimatedWallTimes <- function() {
+  tryCatch({
+    opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
-        # generate powerset (do not include empty set)
-        n <- length(vec)
-        indices <- 1:n
-        powerset <- lapply(1:n, function(x) combn(indices, x, simplify = FALSE))
-        powerset <- unlist(powerset, recursive = FALSE)
-        powerset <- lapply(powerset, function(index) vec[index])
-        powerset
+      # generate powerset (do not include empty set)
+      n <- length(vec)
+      indices <- 1:n
+      powerset <- lapply(1:n, function(x) combn(indices, x, simplify = FALSE))
+      powerset <- unlist(powerset, recursive = FALSE)
+      powerset <- lapply(powerset, function(index) vec[index])
+      powerset
     }
     opts_power_set <- get_powerset(opts)
     est_walltimes <- list()
     for (i in 1:20) {
-        est_walltimes <- append(
-            x = est_walltimes,
-            values = sapply(
-                opts_power_set,
-                FUN = function(advanced_opts) {
-                    # for simplicity, assume the default number of homologus hits (100)
-                    n_hits <- if ("homology_search" %in% advanced_opts) {
-                        100
-                    } else {
-                        NULL
-                    }
-                    est_walltime <- advanced_opts2est_walltime(
-                        advanced_opts,
-                        n_inputs = i,
-                        n_hits = n_hits,
-                        verbose = TRUE
-                    )
-                    names(est_walltime) <- paste0(advanced_opts, collapse = "_")
-                    est_walltime
-                }
+      est_walltimes <- append(
+        x = est_walltimes,
+        values = sapply(
+          opts_power_set,
+          FUN = function(advanced_opts) {
+            # for simplicity, assume the default number of homologus hits (100)
+            n_hits <- if ("homology_search" %in% advanced_opts) {
+              100
+            } else {
+                NULL
+              }
+            est_walltime <- calculateEstimatedWallTimeFromOptions(
+              advanced_opts,
+              n_inputs = i,
+              n_hits = n_hits,
+              verbose = TRUE
             )
+            names(est_walltime) <- paste0(advanced_opts, collapse = "_")
+            est_walltime
+          }
         )
+      )
     }
     # concat all results to their unique names
     est_walltimes <- tapply(
-        unlist(
-            est_walltimes,
-            use.names = FALSE
-        ),
-        rep(
-            names(est_walltimes),
-            lengths(est_walltimes)
-        ),
-        FUN = c
+      unlist(
+        est_walltimes,
+        use.names = FALSE
+      ),
+      rep(
+        names(est_walltimes),
+        lengths(est_walltimes)
+      ),
+      FUN = c
     )
     df_walltimes <- est_walltimes |>
-        unlist() |>
-        matrix(nrow = length(est_walltimes[[1]]), ncol = length(names(est_walltimes)))
+      unlist() |>
+      matrix(nrow = length(est_walltimes[[1]]),
+             ncol = length(names(est_walltimes)))
     colnames(df_walltimes) <- names(est_walltimes)
     df_walltimes <- df_walltimes |> tibble::as_tibble()
     # rm always col or powerset outcome without the "always" processes
     col_idx_keep <- grep(pattern = "always$", x = names(df_walltimes))
     df_walltimes <- df_walltimes |>
-        dplyr::select(col_idx_keep)
+      dplyr::select(col_idx_keep)
     # bind n_inputs
     df_walltimes <- df_walltimes |>
-        dplyr::mutate(n_inputs = 1:20)
-    df_walltimes <- tidyr::gather(df_walltimes, key = "advanced_opts", value = "est_walltime", -n_inputs)
+      dplyr::mutate(n_inputs = 1:20)
+    df_walltimes <- tidyr::gather(df_walltimes,
+                                  key = "advanced_opts",
+                                  value = "est_walltime",
+                                  n_inputs)
     # sec to hrs
     df_walltimes <- df_walltimes |>
-        dplyr::mutate(est_walltime = est_walltime / 3600)
-    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, y = est_walltime, color = advanced_opts)) +
-        ggplot2::geom_line() +
-        ggplot2::labs(
-            title = "MolEvolvR estimated runtimes",
-            x = "Number of inputs",
-            y = "Estimated walltime (hours)"
-        )
+      dplyr::mutate(est_walltime = est_walltime / 3600)
+    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, 
+                                                    y = est_walltime, 
+                                                    color = advanced_opts)) +
+      ggplot2::geom_line() +
+      ggplot2::labs(
+        title = "MolEvolvR estimated runtimes",
+        x = "Number of inputs",
+        y = "Estimated walltime (hours)"
+      )
     return(p)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("plotEstimatedWallTimes function execution completed.")
+  })
+
 }
diff --git a/R/clean_clust_file.R b/R/clean_clust_file.R
index d3f813e5..87dcde70 100755
--- a/R/clean_clust_file.R
+++ b/R/clean_clust_file.R
@@ -55,9 +55,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' clean_clust_file("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
+#' cleanClusterFile("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
 #' }
-clean_clust_file <- function(path, writepath = NULL, query) {
+cleanClusterFile <- function(path, writepath = NULL, query) {
     # ?? does the following line need to be changed to read_lines()?
     prot <- read_tsv(path, col_names = F)
 
diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index bb3b3ce2..58ce1f14 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,7 +17,7 @@
 #' @export
 #'
 #' @examples
-combine_full <- function(inpath, ret = FALSE) {
+combineFullAnalysis <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
     full_combnd <- combine_files(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
@@ -44,7 +44,7 @@ combine_full <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combine_ipr <- function(inpath, ret = FALSE) {
+combineIPR <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
     ipr_combnd <- combine_files(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
diff --git a/R/combine_files.R b/R/combine_files.R
index 76c5fa09..455ddd53 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combine_files(inpath,
+# full_combnd <- combineFiles(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combine_files(inpath,
+# cln_combnd <- combineFiles(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combine_files(inpath,
+# cl_blast_combnd <- combineFiles(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combine_files(inpath,
+# ipr_combnd <- combineFiles(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index e7374df3..d911934a 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    shorten_NA <- function(Lineage) {
+    .shortenNA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
+        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
 
 
 
diff --git a/man/assign_job_queue.Rd b/man/assignJobQueue.Rd
similarity index 64%
rename from man/assign_job_queue.Rd
rename to man/assignJobQueue.Rd
index ceb6fa77..27511b6a 100644
--- a/man/assign_job_queue.Rd
+++ b/man/assignJobQueue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assign_job_queue}
-\alias{assign_job_queue}
+\name{assignJobQueue}
+\alias{assignJobQueue}
 \title{Decision function to assign job queue}
 \usage{
-assign_job_queue(t_sec_estimate, t_cutoff = 21600)
+assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from advanced_opts2est_walltime())}
+(from calculateEstimatedWallTimeFromOptions())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,8 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-advanced_opts2est_walltime(c("homology_search", "domain_architecture"), 3) |>
-assign_job_queue()
+calculateEstimatedWallTimeFromOptions(c("homology_search",
+"domain_architecture"), 3) |>
+assignJobQueue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/advanced_opts2est_walltime.Rd b/man/calculateEstimatedWallTimeFromOptions.Rd
similarity index 68%
rename from man/advanced_opts2est_walltime.Rd
rename to man/calculateEstimatedWallTimeFromOptions.Rd
index ea4b29e6..e4eec3fd 100644
--- a/man/advanced_opts2est_walltime.Rd
+++ b/man/calculateEstimatedWallTimeFromOptions.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{advanced_opts2est_walltime}
-\alias{advanced_opts2est_walltime}
+\name{calculateEstimatedWallTimeFromOptions}
+\alias{calculateEstimatedWallTimeFromOptions}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-advanced_opts2est_walltime(
+calculateEstimatedWallTimeFromOptions(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,16 @@ advanced_opts2est_walltime(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see make_opts2procs for the options)}
+(see mapOption2Process for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: advanced_opts2est_walltime(c("homology_search", "domain_architecture"), n_inputs = 3, n_hits = 50L)
+example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+"domain_architecture"),
+n_inputs = 3, n_hits = 50L)
 }
 \description{
 Given MolEvolvR advanced options and number of inputs,
diff --git a/man/get_proc_medians.Rd b/man/calculateProcessRuntime.Rd
similarity index 76%
rename from man/get_proc_medians.Rd
rename to man/calculateProcessRuntime.Rd
index b6db0b56..bb6dd1ed 100644
--- a/man/get_proc_medians.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_medians}
-\alias{get_proc_medians}
+\name{calculateProcessRuntime}
+\alias{calculateProcessRuntime}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-get_proc_medians(dir_job_results)
+calculateProcessRuntime(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 }
 }
 \description{
diff --git a/man/clean_clust_file.Rd b/man/cleanClusterFile.Rd
similarity index 82%
rename from man/clean_clust_file.Rd
rename to man/cleanClusterFile.Rd
index bba3072e..d2818662 100644
--- a/man/clean_clust_file.Rd
+++ b/man/cleanClusterFile.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/clean_clust_file.R
-\name{clean_clust_file}
-\alias{clean_clust_file}
+\name{cleanClusterFile}
+\alias{cleanClusterFile}
 \title{Clean Cluster File}
 \usage{
-clean_clust_file(path, writepath = NULL, query)
+cleanClusterFile(path, writepath = NULL, query)
 }
 \arguments{
 \item{path}{A character to the path of the cluster file to be cleaned}
@@ -24,6 +24,6 @@ This function reads a space-separated cluster file and converts it to a cleaned
 }
 \examples{
 \dontrun{
-clean_clust_file("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
+cleanClusterFile("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
 }
 }
diff --git a/man/combine_files.Rd b/man/combineFiles.Rd
similarity index 92%
rename from man/combine_files.Rd
rename to man/combineFiles.Rd
index 4126eb9e..3b56b923 100644
--- a/man/combine_files.Rd
+++ b/man/combineFiles.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combine_files}
-\alias{combine_files}
+\name{combineFiles}
+\alias{combineFiles}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combine_files(
+combineFiles(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combine_full.Rd b/man/combineFullAnalysis.Rd
similarity index 69%
rename from man/combine_full.Rd
rename to man/combineFullAnalysis.Rd
index f4e6597b..35925e86 100644
--- a/man/combine_full.Rd
+++ b/man/combineFullAnalysis.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_full}
-\alias{combine_full}
+\name{combineFullAnalysis}
+\alias{combineFullAnalysis}
 \title{Combining full_analysis files}
 \usage{
-combine_full(inpath, ret = FALSE)
+combineFullAnalysis(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combine_ipr.Rd b/man/combineIPR.Rd
similarity index 74%
rename from man/combine_ipr.Rd
rename to man/combineIPR.Rd
index 52aa3057..035c4274 100644
--- a/man/combine_ipr.Rd
+++ b/man/combineIPR.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_ipr}
-\alias{combine_ipr}
+\name{combineIPR}
+\alias{combineIPR}
 \title{Combining clean ipr files}
 \usage{
-combine_ipr(inpath, ret = FALSE)
+combineIPR(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/create_lineage_lookup.Rd b/man/createLineageLookup.Rd
similarity index 91%
rename from man/create_lineage_lookup.Rd
rename to man/createLineageLookup.Rd
index 51670f35..5dbab978 100644
--- a/man/create_lineage_lookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{create_lineage_lookup}
-\alias{create_lineage_lookup}
+\name{createLineageLookup}
+\alias{createLineageLookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-create_lineage_lookup(
+createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"
diff --git a/man/get_proc_weights.Rd b/man/getProcessRuntimeWeights.Rd
similarity index 73%
rename from man/get_proc_weights.Rd
rename to man/getProcessRuntimeWeights.Rd
index 0f4beb57..8eff0347 100644
--- a/man/get_proc_weights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_weights}
-\alias{get_proc_weights}
+\name{getProcessRuntimeWeights}
+\alias{getProcessRuntimeWeights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-get_proc_weights(medians_yml_path = NULL)
+getProcessRuntimeWeights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: get_proc_weights()
+example: writeProcessRuntimeToYML()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/map_advanced_opts2procs.Rd b/man/mapAdvOption2Process.Rd
similarity index 76%
rename from man/map_advanced_opts2procs.Rd
rename to man/mapAdvOption2Process.Rd
index 631708b4..5bd9ee65 100644
--- a/man/map_advanced_opts2procs.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{map_advanced_opts2procs}
-\alias{map_advanced_opts2procs}
+\name{mapAdvOption2Process}
+\alias{mapAdvOption2Process}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-map_advanced_opts2procs(advanced_opts)
+mapAdvOption2Process(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- map_advanced_opts2procs(advanced_opts)
+procs <- mapAdvOption2Process(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/make_opts2procs.Rd b/man/mapOption2Process.Rd
similarity index 75%
rename from man/make_opts2procs.Rd
rename to man/mapOption2Process.Rd
index 07e208b2..ff6905c5 100644
--- a/man/make_opts2procs.Rd
+++ b/man/mapOption2Process.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{make_opts2procs}
-\alias{make_opts2procs}
+\name{mapOption2Process}
+\alias{mapOption2Process}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-make_opts2procs()
+mapOption2Process()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- make_opts2procs
+example: list_opts2procs <- mapOption2Process
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/plot_estimated_walltimes.Rd b/man/plotEstimatedWallTimes.Rd
similarity index 55%
rename from man/plot_estimated_walltimes.Rd
rename to man/plotEstimatedWallTimes.Rd
index 3669e0e0..0d53cb32 100644
--- a/man/plot_estimated_walltimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -1,18 +1,19 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plot_estimated_walltimes}
-\alias{plot_estimated_walltimes}
+\name{plotEstimatedWallTimes}
+\alias{plotEstimatedWallTimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plot_estimated_walltimes()
+plotEstimatedWallTimes()
 }
 \value{
 line plot object
 
 example:
-p <- plot_estimated_walltimes()
-ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
+p <- plotEstimatedWallTimes()
+ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
+dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
 \description{
 this function was just for fun; very, very messy code
diff --git a/man/write_proc_medians_table.Rd b/man/writeProcessRuntime2TSV.Rd
similarity index 77%
rename from man/write_proc_medians_table.Rd
rename to man/writeProcessRuntime2TSV.Rd
index 2ae7a97b..03cbbd68 100644
--- a/man/write_proc_medians_table.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_table}
-\alias{write_proc_medians_table}
+\name{writeProcessRuntime2TSV}
+\alias{writeProcessRuntime2TSV}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-write_proc_medians_table(dir_job_results, filepath)
+writeProcessRuntime2TSV(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ write_proc_medians_table(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: write_proc_medians_table(
+example: writeProcessRuntime2TSV(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/write_proc_medians_yml.Rd b/man/writeProcessRuntimeToYML.Rd
similarity index 61%
rename from man/write_proc_medians_yml.Rd
rename to man/writeProcessRuntimeToYML.Rd
index a3d8ee5f..e4a5c8ad 100644
--- a/man/write_proc_medians_yml.Rd
+++ b/man/writeProcessRuntimeToYML.Rd
@@ -1,25 +1,26 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_yml}
-\alias{write_proc_medians_yml}
+\name{writeProcessRuntimeToYML}
+\alias{writeProcessRuntimeToYML}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-write_proc_medians_yml(dir_job_results, filepath = NULL)
+writeProcessRuntimeToYML(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
 
-\item{filepath}{\link{chr} path to save YAML file; if NULL, uses ./molevol_scripts/log_data/job_proc_weights.yml}
+\item{filepath}{\link{chr} path to save YAML file; if NULL,
+uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which get_proc_weights() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-write_proc_medians_yml(
+writeProcessRuntimeToYML(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From 091d32ebb31b6f295268b4e0a38ef0fab1066358 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 8 Oct 2024 07:17:56 +0100
Subject: [PATCH 10/61] fixing merge issue in NAMESPACE

---
 NAMESPACE | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 739c76d7..d2ef5463 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,6 +29,9 @@ export(cleanSpecies)
 export(combineFiles)
 export(combineFullAnalysis)
 export(combineIPR)
+export(condenseRepeatedDomains)
+export(convert2TitleCase)
+export(convertAlignment2FA)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
@@ -63,13 +66,15 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
+export(mapAcc2Name)
 export(mapAdvOption2Process)
 export(mapOption2Process)
-export(mapAcc2Name)
+export(map_acc2name)
 export(msa_pdf)
 export(pick_longer_duplicate)
 export(plotEstimatedWallTimes)
 export(prot2tax)
+export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From fc63187c4985d8a9fad15582691b4ee4f9c273e6 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 8 Oct 2024 08:18:42 +0100
Subject: [PATCH 11/61] Added updated function name to NAMESPACE and removed
 unused argument in readAAStringSet

---
 NAMESPACE |  3 +--
 R/msa.R   | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d2ef5463..cd135cc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,9 +20,9 @@ export(assert_count_df)
 export(assignJobQueue)
 export(calculateEstimatedWallTimeFromOptions)
 export(calculateProcessRuntime)
-export(cleanGeneDescription)
 export(cleanClusters)
 export(cleanDomainArchitecture)
+export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
@@ -71,7 +71,6 @@ export(mapAdvOption2Process)
 export(mapOption2Process)
 export(map_acc2name)
 export(msa_pdf)
-export(pick_longer_duplicate)
 export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
diff --git a/R/msa.R b/R/msa.R
index e56cc32c..0b1b6e34 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -197,21 +197,21 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 #'
 #' @examples
 generate_msa <- function(fa_file = "", outfile = "") {
-    prot_aa <- readAAStringSet(
-        path = fa_file,
-        format = "fasta"
-    )
-    prot_aa
+  prot_aa <- readAAStringSet(
+    fa_file,
+    format = "fasta"
+  )
+  prot_aa
 
-    ## Install kalign ?rMSA_INSTALL
-    ## Messed up! Reimplement from kalign.R
-    ## https://github.com/mhahsler/rMSA/blob/master/R/kalign.R
+  ## Install kalign ?rMSA_INSTALL
+  ## Messed up! Reimplement from kalign.R
+  ## https://github.com/mhahsler/rMSA/blob/master/R/kalign.R
 
-    # source("scripts/c2r.R")
+  # source("scripts/c2r.R")
 
-    ## align the sequences
-    al <- kalign(prot_aa) # !! won't work!
-    al
+  ## align the sequences
+  al <- kalign(prot_aa) # !! won't work!
+  al
 }
 
 ############################

From 208b9e02d0bedfd6d16d663dfb109fcce23040ac Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 18:42:40 +0100
Subject: [PATCH 12/61] refactor function names in R/ipr2vis.R

---
 R/ipr2viz.R | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/ipr2viz.R b/R/ipr2viz.R
index bf3650f7..5d8a0a03 100644
--- a/R/ipr2viz.R
+++ b/R/ipr2viz.R
@@ -13,7 +13,7 @@
 #################################
 ## Modified gggenes::theme_genes
 #################################
-## theme_genes2 adapted from theme_genes (w/o strip.text())
+## themeGenes2 adapted from theme_genes (w/o strip.text())
 ## https://github.com/wilkox/gggenes/blob/master/R/theme_genes.R
 #' Theme Genes2
 #'
@@ -23,7 +23,7 @@
 #' @export
 #'
 #' @examples
-theme_genes2 <- function() {
+themeGenes2 <- function() {
     ggplot2::theme_grey() + ggplot2::theme(
         panel.background = ggplot2::element_blank(),
         panel.grid.major.y = ggplot2::element_line(colour = "grey80", size = 0.2),
@@ -58,7 +58,7 @@ theme_genes2 <- function() {
 #' @export
 #'
 #' @examples
-find_top_acc <- function(infile_full,
+getTopAccByLinDomArch <- function(infile_full,
     DA_col = "DomArch.Pfam",
     lin_col = "Lineage_short",
     n = 20,
@@ -113,7 +113,7 @@ find_top_acc <- function(infile_full,
 #' @export
 #'
 #' @examples
-ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
+plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
     group_by = "Analysis", # "Analysis"
     topn = 20, name = "Name", text_size = 15, query = "All") {
@@ -141,8 +141,8 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     ## To filter by Analysis
     analysis <- paste(analysis, collapse = "|")
     ## @SAM: This can't be set in stone since the analysis may change!
-    ## Getting top n accession numbers using find_top_acc()
-    top_acc <- find_top_acc(
+    ## Getting top n accession numbers using getTopAccByLinDomArch()
+    top_acc <- getTopAccByLinDomArch(
         infile_full = infile_full,
         DA_col = "DomArch.Pfam",
         ## @SAM, you could pick by the Analysis w/ max rows!
@@ -202,7 +202,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             # , ncol = 1 + #scales = "free",
             scale_fill_manual(values = CPCOLS, na.value = "#A9A9A9") +
             theme_minimal() +
-            theme_genes2() +
+            themeGenes2() +
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",
@@ -232,7 +232,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             ) +
             scale_fill_manual(values = CPCOLS, na.value = "#A9A9A9") +
             theme_minimal() +
-            theme_genes2() +
+            themeGenes2() +
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",
@@ -268,7 +268,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
 #' @export
 #'
 #' @examples
-ipr2viz_web <- function(infile_ipr,
+plotIPR2VizWeb <- function(infile_ipr,
     accessions,
     analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
     group_by = "Analysis", name = "Name",
@@ -344,7 +344,7 @@ ipr2viz_web <- function(infile_ipr,
             # , ncol = 1 + #scales = "free",
             scale_fill_manual(values = CPCOLS, na.value = "#A9A9A9") +
             theme_minimal() +
-            theme_genes2() +
+            themeGenes2() +
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",
@@ -374,7 +374,7 @@ ipr2viz_web <- function(infile_ipr,
             ) +
             scale_fill_manual(values = CPCOLS, na.value = "#A9A9A9") +
             theme_minimal() +
-            theme_genes2() +
+            themeGenes2() +
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",

From 44f0a766f29b36cdab6d7fbddc9c31cd4d0df20d Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 18:51:23 +0100
Subject: [PATCH 13/61] update namespace and rd files with roxygen2

---
 NAMESPACE                                     |  8 ++--
 man/countbycolumn.Rd                          | 22 ----------
 man/filterbydomains.Rd                        | 44 -------------------
 man/filterbyfrequency.Rd                      | 22 ----------
 man/findparalogs.Rd                           | 26 -----------
 ...nd_top_acc.Rd => getTopAccByLinDomArch.Rd} |  6 +--
 man/{ipr2viz.Rd => plotIPR2Viz.Rd}            |  6 +--
 man/{ipr2viz_web.Rd => plotIPR2VizWeb.Rd}     |  6 +--
 man/summarizebylineage.Rd                     | 25 -----------
 man/{theme_genes2.Rd => themeGenes2.Rd}       |  6 +--
 man/totalgencontextordomarchcounts.Rd         | 42 ------------------
 man/words2wordcounts.Rd                       | 25 -----------
 12 files changed, 16 insertions(+), 222 deletions(-)
 delete mode 100644 man/countbycolumn.Rd
 delete mode 100644 man/filterbydomains.Rd
 delete mode 100644 man/filterbyfrequency.Rd
 delete mode 100644 man/findparalogs.Rd
 rename man/{find_top_acc.Rd => getTopAccByLinDomArch.Rd} (79%)
 rename man/{ipr2viz.Rd => plotIPR2Viz.Rd} (87%)
 rename man/{ipr2viz_web.Rd => plotIPR2VizWeb.Rd} (85%)
 delete mode 100644 man/summarizebylineage.Rd
 rename man/{theme_genes2.Rd => themeGenes2.Rd} (72%)
 delete mode 100644 man/totalgencontextordomarchcounts.Rd
 delete mode 100644 man/words2wordcounts.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 53332439..ddbd1dd5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,22 +46,22 @@ export(extractAccNum)
 export(filterByDomains)
 export(filterByFrequency)
 export(findParalogs)
-export(find_top_acc)
 export(formatJobArgumentsHTML)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_all_aln2fa)
 export(generate_msa)
+export(getTopAccByLinDomArch)
 export(get_accnums_from_fasta_file)
 export(get_proc_medians)
 export(get_proc_weights)
-export(ipr2viz)
-export(ipr2viz_web)
 export(make_opts2procs)
 export(mapAcc2Name)
 export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
+export(plotIPR2Viz)
+export(plotIPR2VizWeb)
 export(plotLineageDA)
 export(plotLineageDomainRepeats)
 export(plotLineageHeatmap)
@@ -97,7 +97,7 @@ export(summarizeDomArch_ByLineage)
 export(summarizeGenContext)
 export(summarizeGenContext_ByDomArchLineage)
 export(summarizeGenContext_ByLineage)
-export(theme_genes2)
+export(themeGenes2)
 export(to_titlecase)
 export(totalGenContextOrDomArchCounts)
 export(validateCountDF)
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
deleted file mode 100644
index 34fcc3e0..00000000
--- a/man/countbycolumn.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{countByColumn}
-\alias{countByColumn}
-\title{Count By Column}
-\usage{
-countByColumn(prot = prot, column = "DomArch", min.freq = 1)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Count By Column
-}
-\examples{
-\dontrun{
-countByColumn()
-}
-}
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
deleted file mode 100644
index 8c885363..00000000
--- a/man/filterbydomains.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByDomains}
-\alias{filterByDomains}
-\title{Filter by Domains}
-\usage{
-filterByDomains(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-}
-\arguments{
-\item{prot}{Dataframe to filter}
-
-\item{column}{Column to search for domains in (DomArch column)}
-
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-}
-\value{
-Filtered data frame
-}
-\description{
-filterByDomains filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-}
-\note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-}
-\examples{
-\dontrun{
-filterByDomains()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
deleted file mode 100644
index d2c5f9cd..00000000
--- a/man/filterbyfrequency.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByFrequency}
-\alias{filterByFrequency}
-\title{Filter Frequency}
-\usage{
-filterByFrequency(x, min.freq)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Filter Frequency
-}
-\examples{
-\dontrun{
-filterByFrequency()
-}
-}
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
deleted file mode 100644
index 4b5edbcf..00000000
--- a/man/findparalogs.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{findParalogs}
-\alias{findParalogs}
-\title{Find Paralogs}
-\usage{
-findParalogs(prot)
-}
-\arguments{
-\item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
-}
-\value{
-returns a dataframe containing paralogs and the counts.
-}
-\description{
-Creates a data frame of paralogs.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-findParalogs(pspa)
-}
-}
diff --git a/man/find_top_acc.Rd b/man/getTopAccByLinDomArch.Rd
similarity index 79%
rename from man/find_top_acc.Rd
rename to man/getTopAccByLinDomArch.Rd
index 780cde11..a00da5c7 100644
--- a/man/find_top_acc.Rd
+++ b/man/getTopAccByLinDomArch.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ipr2viz.R
-\name{find_top_acc}
-\alias{find_top_acc}
+\name{getTopAccByLinDomArch}
+\alias{getTopAccByLinDomArch}
 \title{Group by lineage + DA then take top 20}
 \usage{
-find_top_acc(
+getTopAccByLinDomArch(
   infile_full,
   DA_col = "DomArch.Pfam",
   lin_col = "Lineage_short",
diff --git a/man/ipr2viz.Rd b/man/plotIPR2Viz.Rd
similarity index 87%
rename from man/ipr2viz.Rd
rename to man/plotIPR2Viz.Rd
index 79063497..22297312 100644
--- a/man/ipr2viz.Rd
+++ b/man/plotIPR2Viz.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ipr2viz.R
-\name{ipr2viz}
-\alias{ipr2viz}
+\name{plotIPR2Viz}
+\alias{plotIPR2Viz}
 \title{IPR2Viz}
 \usage{
-ipr2viz(
+plotIPR2Viz(
   infile_ipr = NULL,
   infile_full = NULL,
   accessions = c(),
diff --git a/man/ipr2viz_web.Rd b/man/plotIPR2VizWeb.Rd
similarity index 85%
rename from man/ipr2viz_web.Rd
rename to man/plotIPR2VizWeb.Rd
index 896445bd..4b4394ad 100644
--- a/man/ipr2viz_web.Rd
+++ b/man/plotIPR2VizWeb.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ipr2viz.R
-\name{ipr2viz_web}
-\alias{ipr2viz_web}
+\name{plotIPR2VizWeb}
+\alias{plotIPR2VizWeb}
 \title{IPR2Viz Web}
 \usage{
-ipr2viz_web(
+plotIPR2VizWeb(
   infile_ipr,
   accessions,
   analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
deleted file mode 100644
index 2e445913..00000000
--- a/man/summarizebylineage.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeByLineage}
-\alias{summarizeByLineage}
-\title{Summarize by Lineage}
-\usage{
-summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{query}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizeByLineage(query = "all")
-}
-
-}
diff --git a/man/theme_genes2.Rd b/man/themeGenes2.Rd
similarity index 72%
rename from man/theme_genes2.Rd
rename to man/themeGenes2.Rd
index 29f79673..1553e019 100644
--- a/man/theme_genes2.Rd
+++ b/man/themeGenes2.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ipr2viz.R
-\name{theme_genes2}
-\alias{theme_genes2}
+\name{themeGenes2}
+\alias{themeGenes2}
 \title{Theme Genes2}
 \usage{
-theme_genes2()
+themeGenes2()
 }
 \description{
 Theme Genes2
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
deleted file mode 100644
index f457cb6a..00000000
--- a/man/totalgencontextordomarchcounts.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{totalGenContextOrDomArchCounts}
-\alias{totalGenContextOrDomArchCounts}
-\title{Total Counts}
-\usage{
-totalGenContextOrDomArchCounts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
-
-\item{digits}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
deleted file mode 100644
index 7f60f226..00000000
--- a/man/words2wordcounts.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{words2WordCounts}
-\alias{words2WordCounts}
-\title{Words 2 Word Counts}
-\usage{
-words2WordCounts(string)
-}
-\arguments{
-\item{string}{}
-}
-\value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
-}
-\description{
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2Words() |>
-    words2WordCounts()
-}
-
-}

From ae9e737616acc95e03ee4b7f4ca997e68675cc0d Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 22:20:07 +0100
Subject: [PATCH 14/61] refactor: externalize internal functions for global use

---
 .gitignore   |  1 +
 R/plotting.R | 87 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/.gitignore b/.gitignore
index 50d1aa13..ef11006e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .Rproj.user
 docs
 .Rhistory
+.DS_Store
\ No newline at end of file
diff --git a/R/plotting.R b/R/plotting.R
index da95ea5f..5d949cd5 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -18,6 +18,47 @@
 # suppressPackageStartupMessages(library(d3r))
 # suppressPackageStartupMessages(library(viridis))
 
+########################
+## Internal Functions ##
+########################
+#' 
+#' 
+.LevelReduction <- function(lin, level) {   
+    if (level == 1) {
+        gt_loc <- str_locate(lin, ">")[[1]]
+        if (is.na(gt_loc)) {
+            # No '>' in lineage
+            return(lin)
+        } else {
+            lin <- substring(lin, first = 0, last = (gt_loc - 1))
+            return(lin)
+        }
+    }
+    # Out of bounds guard
+    gt_loc <- str_locate_all(lin, ">")[[1]] 
+    l <- length(gt_loc) / 2
+    if (level > l) {
+        # Not enough '>' in lineage
+        return(lin)
+    } else {
+        gt_loc <- gt_loc[level, ][1] %>% as.numeric()
+        lin <- substring(lin, first = 0, last = (gt_loc - 1))
+        return(lin)
+    }
+}
+
+.GetKingdom <- function(lin) {
+    gt_loc <- str_locate(lin, ">")[, "start"]
+    if (is.na(gt_loc)) {
+        # No '>' in lineage
+        return(lin)
+    } else {
+        kingdom <- substring(lin, first = 0, last = (gt_loc - 1))
+        return(kingdom)
+    }
+}
+
+
 #' Shorten Lineage
 #'
 #' @param data
@@ -665,30 +706,6 @@ plotLineageDomainRepeats <- function(query_data, colname) {
 #' }
 #'
 plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size = 8) {
-    .LevelReduction <- function(lin) {
-        if (level == 1) {
-            gt_loc <- str_locate(lin, ">")[[1]]
-            if (is.na(gt_loc)) {
-                # No '>' in lineage
-                return(lin)
-            } else {
-                lin <- substring(lin, first = 0, last = (gt_loc - 1))
-                return(lin)
-            }
-        }
-        #### Add guard here to protect from out of bounds
-        gt_loc <- str_locate_all(lin, ">")[[1]] # [(level-1),][1]
-        l <- length(gt_loc) / 2
-        if (level > l) {
-            # Not enough '>' in lineage
-            return(lin)
-        } else {
-            gt_loc <- gt_loc[level, ][1] %>% as.numeric()
-            lin <- substring(lin, first = 0, last = (gt_loc - 1))
-            return(lin)
-        }
-    }
-
     all_grouped <- data.frame("Query" = character(0), "Lineage" = character(0), "count" = integer())
     for (dom in domains_of_interest)
     {
@@ -703,19 +720,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
         all_grouped <- dplyr::union(all_grouped, domSub)
     }
 
-    .GetKingdom <- function(lin) {
-        gt_loc <- str_locate(lin, ">")[, "start"]
-
-        if (is.na(gt_loc)) {
-            # No '>' in lineage
-            return(lin)
-        } else {
-            kingdom <- substring(lin, first = 0, last = (gt_loc - 1))
-            return(kingdom)
-        }
-    }
-
-    all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, .LevelReduction)))
+    all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, ~.LevelReduction(.x, level))))
 
     all_grouped_reduced <- all_grouped %>%
         group_by(Query, ReducedLin) %>%
@@ -739,6 +744,10 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
         append(eukaryota_colors) %>%
         append(virus_colors)
 
+    if (length(colors) < length(unique(all_grouped_reduced$ReducedLin))) {
+    colors <- rep("black", length(unique(all_grouped_reduced$ReducedLin)))  # Fallback to black
+    }
+
     all_grouped_reduced$ReducedLin <- map(
         all_grouped_reduced$ReducedLin,
         function(lin) {
@@ -766,7 +775,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
     )
     ggplot(
         data = all_grouped_reduced,
-        aes_string(x = "ReducedLin", y = "Query")
+        aes(x = "ReducedLin", y = "Query")
     ) +
         geom_tile(
             data = subset(
@@ -774,7 +783,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
                 !is.na(count)
             ),
             aes(fill = count),
-            colour = "darkred", size = 0.3
+            colour = "darkred", linewidth = 0.3
         ) + # , width=0.7, height=0.7),
         scale_fill_gradient(low = "white", high = "darkred") +
         # scale_x_discrete(position="top") +

From a246339f47d37ff60bdfb76a6861807b546c93f3 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 23:36:16 +0100
Subject: [PATCH 15/61] refactor function names in R/pre-msa-tree and
 R/reverse-operons.R

---
 NAMESPACE                                     |  7 ++-
 R/pre-msa-tree.R                              | 16 +++----
 R/reverse_operons.R                           | 12 ++---
 man/RepresentativeAccNums.Rd                  |  4 +-
 man/countbycolumn.Rd                          | 22 ----------
 man/createRepresentativeAccNum.Rd             | 27 ++++++++++++
 man/filterbydomains.Rd                        | 44 -------------------
 man/filterbyfrequency.Rd                      | 22 ----------
 man/findparalogs.Rd                           | 26 -----------
 man/getAccNumFromFA.Rd                        | 14 ++++++
 man/get_accnums_from_fasta_file.Rd            |  6 +--
 man/{reveql.Rd => reverseOperonSeq.Rd}        | 10 ++---
 ...verse_operon.Rd => straightenOperonSeq.Rd} | 10 ++---
 man/summarizebylineage.Rd                     | 25 -----------
 man/totalgencontextordomarchcounts.Rd         | 42 ------------------
 man/words2wordcounts.Rd                       | 25 -----------
 man/write.MsaAAMultipleAlignment.Rd           |  8 +---
 man/writeMSA_AA2FA.Rd                         | 21 +++++++++
 18 files changed, 94 insertions(+), 247 deletions(-)
 delete mode 100644 man/countbycolumn.Rd
 create mode 100644 man/createRepresentativeAccNum.Rd
 delete mode 100644 man/filterbydomains.Rd
 delete mode 100644 man/filterbyfrequency.Rd
 delete mode 100644 man/findparalogs.Rd
 create mode 100644 man/getAccNumFromFA.Rd
 rename man/{reveql.Rd => reverseOperonSeq.Rd} (56%)
 rename man/{reverse_operon.Rd => straightenOperonSeq.Rd} (53%)
 delete mode 100644 man/summarizebylineage.Rd
 delete mode 100644 man/totalgencontextordomarchcounts.Rd
 delete mode 100644 man/words2wordcounts.Rd
 create mode 100644 man/writeMSA_AA2FA.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 53332439..fe2ad999 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -36,6 +36,7 @@ export(countByColumn)
 export(createFA2Tree)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createRepresentativeAccNum)
 export(createWordCloud2Element)
 export(createWordCloudElement)
 export(create_lineage_lookup)
@@ -52,6 +53,7 @@ export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_all_aln2fa)
 export(generate_msa)
+export(getAccNumFromFA)
 export(get_accnums_from_fasta_file)
 export(get_proc_medians)
 export(get_proc_weights)
@@ -83,14 +85,14 @@ export(removeTails)
 export(renameFA)
 export(rename_fasta)
 export(replaceQuestionMarks)
-export(reveql)
-export(reverse_operon)
+export(reverseOperonSeq)
 export(run_deltablast)
 export(run_rpsblast)
 export(selectLongestDuplicate)
 export(sendJobStatusEmail)
 export(shortenLineage)
 export(sinkReset)
+export(straightenOperonSeq)
 export(summarizeByLineage)
 export(summarizeDomArch)
 export(summarizeDomArch_ByLineage)
@@ -103,6 +105,7 @@ export(totalGenContextOrDomArchCounts)
 export(validateCountDF)
 export(wordcloud3)
 export(write.MsaAAMultipleAlignment)
+export(writeMSA_AA2FA)
 export(write_proc_medians_table)
 export(write_proc_medians_yml)
 importFrom(Biostrings,AAStringSet)
diff --git a/R/pre-msa-tree.R b/R/pre-msa-tree.R
index 44979c3c..fed495f4 100644
--- a/R/pre-msa-tree.R
+++ b/R/pre-msa-tree.R
@@ -546,7 +546,7 @@ acc2fa <- function(accessions, outpath, plan = "sequential") {
     return(result)
 }
 
-#' RepresentativeAccNums
+#' createRepresentativeAccNum
 #'
 #' @description
 #' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
@@ -566,7 +566,7 @@ acc2fa <- function(accessions, outpath, plan = "sequential") {
 #' @export
 #'
 #' @examples
-RepresentativeAccNums <- function(prot_data,
+createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
     # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
@@ -623,15 +623,15 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     )
 
     if (typeof(outpath) == "character") {
-        write.MsaAAMultipleAlignment(aligned, outpath)
+        writeMSA_AA2FA(aligned, outpath)
     }
     return(aligned)
 }
 
-#' write.MsaAAMultipleAlignment
+#' writeMSA_AA2FA
 #'
 #' @description
-#' Write MsaAAMultpleAlignment Objects as algined fasta sequence
+#' Write MsaAAMultpleAlignment Objects as aligned fasta sequence
 #' MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
 #' and msaMuscle from the 'msa' package
 #'
@@ -647,7 +647,7 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @export
 #'
 #' @examples
-write.MsaAAMultipleAlignment <- function(alignment, outpath) {
+writeMSA_AA2FA <- function(alignment, outpath) {
     l <- length(rownames(alignment))
     fasta <- ""
     for (i in 1:l)
@@ -660,7 +660,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
     return(fasta)
 }
 
-#' get_accnums_from_fasta_file
+#' getAccNumFromFA
 #'
 #' @param fasta_file
 #'
@@ -671,7 +671,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
 #' @export
 #'
 #' @examples
-get_accnums_from_fasta_file <- function(fasta_file) {
+getAccNumFromFA <- function(fasta_file) {
     txt <- read_file(fasta_file)
     accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
     return(accnums)
diff --git a/R/reverse_operons.R b/R/reverse_operons.R
index e4bbd50e..a2570e8d 100755
--- a/R/reverse_operons.R
+++ b/R/reverse_operons.R
@@ -3,7 +3,7 @@
 # Modified by Janani Ravi and Samuel Chen
 
 
-#' reveql
+#' straightenOperonSeq
 #'
 #' @param prot
 #'
@@ -11,7 +11,7 @@
 #' @export
 #'
 #' @examples
-reveql <- function(prot) {
+straightenOperonSeq <- function(prot) {
     w <- prot # $GenContext.orig # was 'x'
 
     y <- rep(NA, length(w))
@@ -57,7 +57,7 @@ reveql <- function(prot) {
 
 ## The function to reverse operons
 
-#' reverse_operon
+#' reverseOperonSeq
 #'
 #' @param prot
 #'
@@ -65,7 +65,7 @@ reveql <- function(prot) {
 #' @export
 #'
 #' @examples
-reverse_operon <- function(prot) {
+reverseOperonSeq <- function(prot) {
     gencontext <- prot$GenContext
 
     gencontext <- gsub(pattern = ">", replacement = ">|", x = gencontext)
@@ -108,7 +108,7 @@ reverse_operon <- function(prot) {
 
 
 
-    ge <- lapply(1:length(ge), function(x) reveql(ge[[x]]))
+    ge <- lapply(1:length(ge), function(x) straightenOperonSeq(ge[[x]]))
 
     ye <- te[withouteq]
 
@@ -141,4 +141,4 @@ reverse_operon <- function(prot) {
 # colnames(prot) <- c("AccNum","GenContext.orig","len", "GeneName","TaxID","Species")
 
 ## ??? straighten operons
-# prot$GenContext.orig <- reverse_operon(prot)
+# prot$GenContext.orig <- reverseOperonSeq(prot)
diff --git a/man/RepresentativeAccNums.Rd b/man/RepresentativeAccNums.Rd
index f617cde4..57d1f1ab 100644
--- a/man/RepresentativeAccNums.Rd
+++ b/man/RepresentativeAccNums.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
 \name{RepresentativeAccNums}
 \alias{RepresentativeAccNums}
 \title{Function to generate a vector of one Accession number per distinct observation from 'reduced' column}
 \usage{
-RepresentativeAccNums(prot_data, reduced = "Lineage", accnum_col = "AccNum")
-
 RepresentativeAccNums(prot_data, reduced = "Lineage", accnum_col = "AccNum")
 }
 \arguments{
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
deleted file mode 100644
index 34fcc3e0..00000000
--- a/man/countbycolumn.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{countByColumn}
-\alias{countByColumn}
-\title{Count By Column}
-\usage{
-countByColumn(prot = prot, column = "DomArch", min.freq = 1)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Count By Column
-}
-\examples{
-\dontrun{
-countByColumn()
-}
-}
diff --git a/man/createRepresentativeAccNum.Rd b/man/createRepresentativeAccNum.Rd
new file mode 100644
index 00000000..3703fe1a
--- /dev/null
+++ b/man/createRepresentativeAccNum.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pre-msa-tree.R
+\name{createRepresentativeAccNum}
+\alias{createRepresentativeAccNum}
+\title{createRepresentativeAccNum}
+\usage{
+createRepresentativeAccNum(
+  prot_data,
+  reduced = "Lineage",
+  accnum_col = "AccNum"
+)
+}
+\arguments{
+\item{prot_data}{Data frame containing Accession Numbers}
+
+\item{reduced}{Column from prot_data from which distinct observations
+will be generated from.
+One accession number will be assigned for each of these observations}
+
+\item{accnum_col}{Column from prot_data that contains Accession Numbers}
+}
+\description{
+Function to generate a vector of one Accession number per distinct observation from 'reduced' column
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
deleted file mode 100644
index 8c885363..00000000
--- a/man/filterbydomains.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByDomains}
-\alias{filterByDomains}
-\title{Filter by Domains}
-\usage{
-filterByDomains(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-}
-\arguments{
-\item{prot}{Dataframe to filter}
-
-\item{column}{Column to search for domains in (DomArch column)}
-
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-}
-\value{
-Filtered data frame
-}
-\description{
-filterByDomains filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-}
-\note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-}
-\examples{
-\dontrun{
-filterByDomains()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
deleted file mode 100644
index d2c5f9cd..00000000
--- a/man/filterbyfrequency.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByFrequency}
-\alias{filterByFrequency}
-\title{Filter Frequency}
-\usage{
-filterByFrequency(x, min.freq)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Filter Frequency
-}
-\examples{
-\dontrun{
-filterByFrequency()
-}
-}
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
deleted file mode 100644
index 4b5edbcf..00000000
--- a/man/findparalogs.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{findParalogs}
-\alias{findParalogs}
-\title{Find Paralogs}
-\usage{
-findParalogs(prot)
-}
-\arguments{
-\item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
-}
-\value{
-returns a dataframe containing paralogs and the counts.
-}
-\description{
-Creates a data frame of paralogs.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-findParalogs(pspa)
-}
-}
diff --git a/man/getAccNumFromFA.Rd b/man/getAccNumFromFA.Rd
new file mode 100644
index 00000000..f2409965
--- /dev/null
+++ b/man/getAccNumFromFA.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pre-msa-tree.R
+\name{getAccNumFromFA}
+\alias{getAccNumFromFA}
+\title{getAccNumFromFA}
+\usage{
+getAccNumFromFA(fasta_file)
+}
+\arguments{
+\item{fasta_file}{}
+}
+\description{
+getAccNumFromFA
+}
diff --git a/man/get_accnums_from_fasta_file.Rd b/man/get_accnums_from_fasta_file.Rd
index 84c163cc..f545d1a0 100644
--- a/man/get_accnums_from_fasta_file.Rd
+++ b/man/get_accnums_from_fasta_file.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
 \name{get_accnums_from_fasta_file}
 \alias{get_accnums_from_fasta_file}
 \title{Get accnums from fasta file}
 \usage{
-get_accnums_from_fasta_file(fasta_file)
-
 get_accnums_from_fasta_file(fasta_file)
 }
 \arguments{
@@ -13,6 +11,4 @@ get_accnums_from_fasta_file(fasta_file)
 }
 \description{
 Get accnums from fasta file
-
-get_accnums_from_fasta_file
 }
diff --git a/man/reveql.Rd b/man/reverseOperonSeq.Rd
similarity index 56%
rename from man/reveql.Rd
rename to man/reverseOperonSeq.Rd
index 9dc2bcb8..d61ec5f2 100644
--- a/man/reveql.Rd
+++ b/man/reverseOperonSeq.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/reverse_operons.R
-\name{reveql}
-\alias{reveql}
-\title{reveql}
+\name{reverseOperonSeq}
+\alias{reverseOperonSeq}
+\title{reverseOperonSeq}
 \usage{
-reveql(prot)
+reverseOperonSeq(prot)
 }
 \arguments{
 \item{prot}{}
 }
 \description{
-reveql
+reverseOperonSeq
 }
diff --git a/man/reverse_operon.Rd b/man/straightenOperonSeq.Rd
similarity index 53%
rename from man/reverse_operon.Rd
rename to man/straightenOperonSeq.Rd
index 270e2a62..fcd0c923 100644
--- a/man/reverse_operon.Rd
+++ b/man/straightenOperonSeq.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/reverse_operons.R
-\name{reverse_operon}
-\alias{reverse_operon}
-\title{reverse_operon}
+\name{straightenOperonSeq}
+\alias{straightenOperonSeq}
+\title{straightenOperonSeq}
 \usage{
-reverse_operon(prot)
+straightenOperonSeq(prot)
 }
 \arguments{
 \item{prot}{}
 }
 \description{
-reverse_operon
+straightenOperonSeq
 }
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
deleted file mode 100644
index 2e445913..00000000
--- a/man/summarizebylineage.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeByLineage}
-\alias{summarizeByLineage}
-\title{Summarize by Lineage}
-\usage{
-summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{query}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizeByLineage(query = "all")
-}
-
-}
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
deleted file mode 100644
index f457cb6a..00000000
--- a/man/totalgencontextordomarchcounts.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{totalGenContextOrDomArchCounts}
-\alias{totalGenContextOrDomArchCounts}
-\title{Total Counts}
-\usage{
-totalGenContextOrDomArchCounts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
-
-\item{digits}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
deleted file mode 100644
index 7f60f226..00000000
--- a/man/words2wordcounts.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{words2WordCounts}
-\alias{words2WordCounts}
-\title{Words 2 Word Counts}
-\usage{
-words2WordCounts(string)
-}
-\arguments{
-\item{string}{}
-}
-\value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
-}
-\description{
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2Words() |>
-    words2WordCounts()
-}
-
-}
diff --git a/man/write.MsaAAMultipleAlignment.Rd b/man/write.MsaAAMultipleAlignment.Rd
index 17a05f50..e26f26e7 100644
--- a/man/write.MsaAAMultipleAlignment.Rd
+++ b/man/write.MsaAAMultipleAlignment.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
 \name{write.MsaAAMultipleAlignment}
 \alias{write.MsaAAMultipleAlignment}
 \title{Write MsaAAMultpleAlignment Objects as algined fasta sequence}
 \usage{
-write.MsaAAMultipleAlignment(alignment, outpath)
-
 write.MsaAAMultipleAlignment(alignment, outpath)
 }
 \arguments{
@@ -16,10 +14,6 @@ write.MsaAAMultipleAlignment(alignment, outpath)
 \description{
 MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
 and msaMuscle from the 'msa' package
-
-Write MsaAAMultpleAlignment Objects as algined fasta sequence
-MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
-and msaMuscle from the 'msa' package
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/writeMSA_AA2FA.Rd b/man/writeMSA_AA2FA.Rd
new file mode 100644
index 00000000..068e5b63
--- /dev/null
+++ b/man/writeMSA_AA2FA.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pre-msa-tree.R
+\name{writeMSA_AA2FA}
+\alias{writeMSA_AA2FA}
+\title{writeMSA_AA2FA}
+\usage{
+writeMSA_AA2FA(alignment, outpath)
+}
+\arguments{
+\item{alignment}{MsaAAMultipleAlignment object to be written as a fasta}
+
+\item{outpath}{Where the resulting FASTA file should be written to}
+}
+\description{
+Write MsaAAMultpleAlignment Objects as aligned fasta sequence
+MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
+and msaMuscle from the 'msa' package
+}
+\author{
+Samuel Chen, Janani Ravi
+}

From 38f3cb000ddf35028c1e7c940920dd051db1a2dc Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Wed, 9 Oct 2024 11:32:03 +0100
Subject: [PATCH 16/61] added error handling functionality for the
 run_deltablast and run_rpsblast functions. This includes arguments check
 before wrapping code logic  in a tryCatch block.

---
 R/blastWrappers.R | 109 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 84 insertions(+), 25 deletions(-)

diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 552b1ff6..15484a1b 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -18,25 +18,56 @@
 #'
 #' @examples
 run_deltablast <- function(deltablast_path, db_search_path,
-    db = "refseq", query, evalue = "1e-5",
-    out, num_alignments, num_threads = 1) {
-    start <- Sys.time()
+                           db = "refseq", query, evalue = "1e-5",
+                           out, num_alignments, num_threads = 1) {
 
+  # Argument validation
+  if (!file.exists(deltablast_path)) {
+    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
+  }
+  if (!dir.exists(db_search_path)) {
+    stop("The database search path is invalid: ", db_search_path)
+  }
+  if (!file.exists(query)) {
+    stop("The query file path is invalid: ", query)
+  }
+  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
+    stop("The evalue must be a positive number: ", evalue)
+  }
+  if (!is.numeric(num_alignments) || num_alignments <= 0) {
+    stop("The number of alignments must be a 
+         positive integer: ", num_alignments)
+  }
+  if (!is.numeric(num_threads) || num_threads <= 0) {
+    stop("The number of threads must be a positive integer: ", num_threads)
+  }
+
+  start <- Sys.time()
+
+  tryCatch({
     system(paste0("export BLASTDB=/", db_search_path))
 
     system2(
-        command = deltablast_path,
-        args = c(
-            "-db", db,
-            "-query", query,
-            "-evalue", evalue,
-            "-out", out,
-            "-num_threads", num_threads,
-            "-num_alignments", num_alignments
-            #   ,"-outfmt", outfmt
-        )
+      command = deltablast_path,
+      args = c(
+        "-db", db,
+        "-query", query,
+        "-evalue", evalue,
+        "-out", out,
+        "-num_threads", num_threads,
+        "-num_alignments", num_alignments
+        #   ,"-outfmt", outfmt
+      )
     )
     print(Sys.time() - start)
+  }, error = function(e) {
+    message(paste("Error in run_deltablast: ", e))
+  }, warning = function(w) {
+    message(paste("Warning in run_deltablast: ", w))
+  }, finally = {
+    message("run_deltablast completed")
+  })
+
 }
 
 
@@ -55,20 +86,48 @@ run_deltablast <- function(deltablast_path, db_search_path,
 #'
 #' @examples
 run_rpsblast <- function(rpsblast_path, db_search_path,
-    db = "refseq", query, evalue = "1e-5",
-    out, num_threads = 1) {
-    start <- Sys.time()
+                         db = "refseq", query, evalue = "1e-5",
+                         out, num_threads = 1) {
+  # Argument validation
+  if (!file.exists(rpsblast_path)) {
+    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
+  }
+  if (!dir.exists(db_search_path)) {
+    stop("The database search path is invalid: ", db_search_path)
+  }
+  if (!file.exists(query)) {
+    stop("The query file path is invalid: ", query)
+  }
+  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
+    stop("The evalue must be a positive number: ", evalue)
+  }
+  if (!is.numeric(num_threads) || num_threads <= 0) {
+    stop("The number of threads must be a positive integer: ", num_threads)
+  }
+
+  start <- Sys.time()
+
+  tryCatch({
+
     system(paste0("export BLASTDB=/", db_search_path))
+
     system2(
-        command = rpsblast_path,
-        args = c(
-            "-db", db,
-            "-query", query,
-            "-evalue", evalue,
-            "-out", out,
-            "-num_threads", num_threads
-            #                  , "-outfmt", outfmt
-        )
+      command = rpsblast_path,
+      args = c(
+        "-db", db,
+        "-query", query,
+        "-evalue", evalue,
+        "-out", out,
+        "-num_threads", num_threads
+      )
     )
     print(Sys.time() - start)
+  }, error = function(e) {
+    message(paste("Error in run_rpsblast: ", e))
+  }, warning = function(w) {
+    message(paste("Warning in run_rpsblast: ", w))
+  }, finally = {
+    message("run_rpsblast completed")
+  })
+
 }

From 527c470104805b093f7da3e9f45335f53945cb1a Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Wed, 9 Oct 2024 22:55:15 +0100
Subject: [PATCH 17/61] update CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 123 ++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 54 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 5db3f961..9fcd6b7f 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -5,72 +5,87 @@ For a detailed discussion on contributing to this and other tidyverse packages,
 
 ## Fixing typos
 
-You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 
-This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 
+You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file.
+This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file.
 You can find the `.R` file that generates the `.Rd` by reading the comment in the first line.
 
 ## Bigger changes
 
-If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 
-If you’ve found a bug, please file an issue that illustrates the bug with a minimal 
+If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed.
+If you’ve found a bug, please file an issue that illustrates the bug with a minimal
 [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed).
 See our guide on [how to create a great issue](https://code-review.tidyverse.org/issues/) for more advice.
 
 ### Pull request process
 
-*   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis`.
-
-*   Install and load the `usethis` package with:
-    ```
-    install.packages("usethis")
-    
-    library("usethis")
-    ```
-*   Clone and fork the MolEvolvR package using:
-    ```
-    usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)
-    ```
-*   Install all development dependencies and then make sure the package passes R CMD check using `devtools`:
-    ```
-    install.packages("devtools")
-    
-    library("devtools")
-    
-    devtools::install_dev_deps()
-    
-    devtools::check()
-    ```
-    _If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing._
-    
-*   Create a Git branch for your pull request (PR). We recommend using
-    ```
-    usethis::pr_init("brief-description-of-change")
-    ```
-
-*   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
-    The title of your PR should briefly describe the change.
-    The body of your PR should contain `Fixes #issue-number`.
-
-*  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>.
+- Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis`.
+
+- Install and load the `usethis` package with:
+
+  ```
+  install.packages("usethis")
+
+  library("usethis")
+  ```
+
+- Clone and fork the MolEvolvR package using:
+  ```
+  usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)
+  ```
+- Install Bioconductor dependencies:
+
+  ```
+  if (!require("BiocManager", quietly = TRUE))
+      install.packages("BiocManager")
+  BiocManager::install(version = "3.19")
+  ```
+
+- Install other development dependencies and then ensure that the package passes R CMD check using `devtools`:
+
+  ```
+  install.packages("devtools")
+
+  library("devtools")
+
+  devtools::install_dev_deps()
+
+  devtools::check()
+  ```
+
+  _If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing._
+
+- Create a Git branch for your pull request (PR). We recommend using:
+
+  ```
+  usethis::pr_init("brief-description-of-change")
+  ```
+
+- Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
+  The title of your PR should briefly describe the change.
+  The body of your PR should contain `Fixes #issue-number`.
+
+<!-- *  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>. -->
+
+<!-- No NEWS.md file exists at the moment. Suggest restoring this line when the file exists to avoid a y confusion.-->
 
 ### Code style
 
-*   New code should follow the tidyverse [style guide](https://style.tidyverse.org). 
-    You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
-    
-*   Lint Your Code: Ensure your code adheres to our style guidelines by using [lintr](https://lintr.r-lib.org/):
-    ```
-    install.packages("lintr")
-    
-    library("lintr")
-    
-    lintr::lint("path/to/your/file.R")
-    ```
-
-*  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.  
-
-*  We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 
-   Contributions with test cases included are easier to accept.  
+- New code should follow the tidyverse [style guide](https://style.tidyverse.org).
+  You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.
+- Lint Your Code: Ensure your code adheres to our style guidelines by using [lintr](https://lintr.r-lib.org/):
+
+  ```
+  install.packages("lintr")
+
+  library("lintr")
+
+  lintr::lint("path/to/your/file.R")
+  ```
+
+- We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.
+
+- We use [testthat](https://cran.r-project.org/package=testthat) for unit tests.
+  Contributions with test cases included are easier to accept.
 
 ## Code of Conduct
 

From 4ff68fb06395842093879dea47e45aaae1967225 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 08:27:02 +0100
Subject: [PATCH 18/61] Reverting to old function names for the following
 functions to create a separate pr for their updates and on a different
 branch: R/combine_analysis.R combine_full combine_ipr

R/combine_files.R
combine_files

R/create_lineage_lookup.R
create_lineage_lookup
shorten_NA
---
 R/combine_analysis.R      |  4 ++--
 R/combine_files.R         | 10 +++++-----
 R/create_lineage_lookup.R |  8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index 58ce1f14..bb3b3ce2 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,7 +17,7 @@
 #' @export
 #'
 #' @examples
-combineFullAnalysis <- function(inpath, ret = FALSE) {
+combine_full <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
     full_combnd <- combine_files(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
@@ -44,7 +44,7 @@ combineFullAnalysis <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combineIPR <- function(inpath, ret = FALSE) {
+combine_ipr <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
     ipr_combnd <- combine_files(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
diff --git a/R/combine_files.R b/R/combine_files.R
index 455ddd53..76c5fa09 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combineFiles(inpath,
+# full_combnd <- combine_files(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combineFiles(inpath,
+# cln_combnd <- combine_files(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combineFiles(inpath,
+# cl_blast_combnd <- combine_files(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combineFiles(inpath,
+# ipr_combnd <- combine_files(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index d911934a..8e365cbb 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    .shortenNA <- function(Lineage) {
+    shorten_NA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
+        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
 
 
 
@@ -101,7 +101,7 @@ createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
 
 
 
-#' CreateLineageLookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
+#' create_lineage_lookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
 #' {
 #'   #' Create a look up table that goes from GCA_ID, to TaxID, to Lineage
 #'   #' @author Samuel Chen

From 035c5e13b4cfe54b4ba7ff1d5c7618ade13720d1 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 08:41:47 +0100
Subject: [PATCH 19/61] minor updates to namespace and Rd files after running
 devtool::check()

---
 NAMESPACE                                                | 8 ++++----
 man/{combineFiles.Rd => combine_files.Rd}                | 6 +++---
 man/{combineFullAnalysis.Rd => combine_full.Rd}          | 6 +++---
 man/{combineIPR.Rd => combine_ipr.Rd}                    | 6 +++---
 man/{createLineageLookup.Rd => create_lineage_lookup.Rd} | 6 +++---
 5 files changed, 16 insertions(+), 16 deletions(-)
 rename man/{combineFiles.Rd => combine_files.Rd} (92%)
 rename man/{combineFullAnalysis.Rd => combine_full.Rd} (69%)
 rename man/{combineIPR.Rd => combine_ipr.Rd} (74%)
 rename man/{createLineageLookup.Rd => create_lineage_lookup.Rd} (91%)

diff --git a/NAMESPACE b/NAMESPACE
index cd135cc8..f49975b4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,9 +26,9 @@ export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
-export(combineFiles)
-export(combineFullAnalysis)
-export(combineIPR)
+export(combine_files)
+export(combine_full)
+export(combine_ipr)
 export(condenseRepeatedDomains)
 export(convert2TitleCase)
 export(convertAlignment2FA)
@@ -37,8 +37,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
-export(createLineageLookup)
 export(create_all_col_params)
+export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
diff --git a/man/combineFiles.Rd b/man/combine_files.Rd
similarity index 92%
rename from man/combineFiles.Rd
rename to man/combine_files.Rd
index 3b56b923..4126eb9e 100644
--- a/man/combineFiles.Rd
+++ b/man/combine_files.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combineFiles}
-\alias{combineFiles}
+\name{combine_files}
+\alias{combine_files}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combineFiles(
+combine_files(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combineFullAnalysis.Rd b/man/combine_full.Rd
similarity index 69%
rename from man/combineFullAnalysis.Rd
rename to man/combine_full.Rd
index 35925e86..f4e6597b 100644
--- a/man/combineFullAnalysis.Rd
+++ b/man/combine_full.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combineFullAnalysis}
-\alias{combineFullAnalysis}
+\name{combine_full}
+\alias{combine_full}
 \title{Combining full_analysis files}
 \usage{
-combineFullAnalysis(inpath, ret = FALSE)
+combine_full(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combineIPR.Rd b/man/combine_ipr.Rd
similarity index 74%
rename from man/combineIPR.Rd
rename to man/combine_ipr.Rd
index 035c4274..52aa3057 100644
--- a/man/combineIPR.Rd
+++ b/man/combine_ipr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combineIPR}
-\alias{combineIPR}
+\name{combine_ipr}
+\alias{combine_ipr}
 \title{Combining clean ipr files}
 \usage{
-combineIPR(inpath, ret = FALSE)
+combine_ipr(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/createLineageLookup.Rd b/man/create_lineage_lookup.Rd
similarity index 91%
rename from man/createLineageLookup.Rd
rename to man/create_lineage_lookup.Rd
index 5dbab978..51670f35 100644
--- a/man/createLineageLookup.Rd
+++ b/man/create_lineage_lookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{createLineageLookup}
-\alias{createLineageLookup}
+\name{create_lineage_lookup}
+\alias{create_lineage_lookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-createLineageLookup(
+create_lineage_lookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"

From fb5ac23f8a3e8e5709498aa24308a950802d1c29 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 09:20:22 +0100
Subject: [PATCH 20/61] Renamed the following function; R/combine_analysis.R
 combine_full combine_ipr

R/combine_files.R
combine_files

R/create_lineage_lookup.R
create_lineage_lookup
shorten_NA with approved names from #44
---
 NAMESPACE                                              |  8 ++++----
 R/acc2lin.R                                            |  2 +-
 R/combine_analysis.R                                   |  8 ++++----
 R/combine_files.R                                      | 10 +++++-----
 R/create_lineage_lookup.R                              |  8 ++++----
 R/lineage.R                                            |  4 ++--
 man/GCA2lin.Rd                                         |  2 +-
 man/{combine_files.Rd => combineFiles.Rd}              |  6 +++---
 man/{combine_full.Rd => combineFullAnalysis.Rd}        |  6 +++---
 man/{combine_ipr.Rd => combineIPR.Rd}                  |  6 +++---
 ...create_lineage_lookup.Rd => createLineageLookup.Rd} |  6 +++---
 man/ipg2lin.Rd                                         |  2 +-
 12 files changed, 34 insertions(+), 34 deletions(-)
 rename man/{combine_files.Rd => combineFiles.Rd} (92%)
 rename man/{combine_full.Rd => combineFullAnalysis.Rd} (69%)
 rename man/{combine_ipr.Rd => combineIPR.Rd} (74%)
 rename man/{create_lineage_lookup.Rd => createLineageLookup.Rd} (91%)

diff --git a/NAMESPACE b/NAMESPACE
index f49975b4..cd135cc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,9 +26,9 @@ export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
-export(combine_files)
-export(combine_full)
-export(combine_ipr)
+export(combineFiles)
+export(combineFullAnalysis)
+export(combineIPR)
 export(condenseRepeatedDomains)
 export(convert2TitleCase)
 export(convertAlignment2FA)
@@ -37,8 +37,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createLineageLookup)
 export(create_all_col_params)
-export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index dfb33da9..a6551247 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -277,7 +277,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #'
 #' @importFrom data.table fread
 #'
diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index bb3b3ce2..55e36925 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,9 +17,9 @@
 #' @export
 #'
 #' @examples
-combine_full <- function(inpath, ret = FALSE) {
+combineFullAnalysis <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
-    full_combnd <- combine_files(inpath,
+    full_combnd <- combineFiles(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
         col_names = T
     )
@@ -44,9 +44,9 @@ combine_full <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combine_ipr <- function(inpath, ret = FALSE) {
+combineIPR <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
-    ipr_combnd <- combine_files(inpath,
+    ipr_combnd <- combineFiles(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
         col_names = T
     )
diff --git a/R/combine_files.R b/R/combine_files.R
index 76c5fa09..455ddd53 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combine_files(inpath,
+# full_combnd <- combineFiles(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combine_files(inpath,
+# cln_combnd <- combineFiles(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combine_files(inpath,
+# cl_blast_combnd <- combineFiles(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combine_files(inpath,
+# ipr_combnd <- combineFiles(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index 8e365cbb..78e79048 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    shorten_NA <- function(Lineage) {
+    .shortenNA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
+        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
 
 
 
@@ -101,7 +101,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
 
 
 
-#' create_lineage_lookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
+#' createLineageLookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
 #' {
 #'   #' Create a look up table that goes from GCA_ID, to TaxID, to Lineage
 #'   #' @author Samuel Chen
diff --git a/R/lineage.R b/R/lineage.R
index 20acec04..7ceed847 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -77,7 +77,7 @@ DownloadAssemblySummary <- function(outpath,
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #' @param acc_col
 #'
 #' @importFrom dplyr pull
@@ -309,7 +309,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
 #' @param genbank_assembly_path
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #'
 #' @importFrom data.table fread setnames
 #'
diff --git a/man/GCA2lin.Rd b/man/GCA2lin.Rd
index ad83ca39..47acc3d7 100644
--- a/man/GCA2lin.Rd
+++ b/man/GCA2lin.Rd
@@ -19,7 +19,7 @@ This file can be generated using the "DownloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{acc_col}{}
 }
diff --git a/man/combine_files.Rd b/man/combineFiles.Rd
similarity index 92%
rename from man/combine_files.Rd
rename to man/combineFiles.Rd
index 4126eb9e..3b56b923 100644
--- a/man/combine_files.Rd
+++ b/man/combineFiles.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combine_files}
-\alias{combine_files}
+\name{combineFiles}
+\alias{combineFiles}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combine_files(
+combineFiles(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combine_full.Rd b/man/combineFullAnalysis.Rd
similarity index 69%
rename from man/combine_full.Rd
rename to man/combineFullAnalysis.Rd
index f4e6597b..35925e86 100644
--- a/man/combine_full.Rd
+++ b/man/combineFullAnalysis.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_full}
-\alias{combine_full}
+\name{combineFullAnalysis}
+\alias{combineFullAnalysis}
 \title{Combining full_analysis files}
 \usage{
-combine_full(inpath, ret = FALSE)
+combineFullAnalysis(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combine_ipr.Rd b/man/combineIPR.Rd
similarity index 74%
rename from man/combine_ipr.Rd
rename to man/combineIPR.Rd
index 52aa3057..035c4274 100644
--- a/man/combine_ipr.Rd
+++ b/man/combineIPR.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_ipr}
-\alias{combine_ipr}
+\name{combineIPR}
+\alias{combineIPR}
 \title{Combining clean ipr files}
 \usage{
-combine_ipr(inpath, ret = FALSE)
+combineIPR(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/create_lineage_lookup.Rd b/man/createLineageLookup.Rd
similarity index 91%
rename from man/create_lineage_lookup.Rd
rename to man/createLineageLookup.Rd
index 51670f35..5dbab978 100644
--- a/man/create_lineage_lookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{create_lineage_lookup}
-\alias{create_lineage_lookup}
+\name{createLineageLookup}
+\alias{createLineageLookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-create_lineage_lookup(
+createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
index 453668b0..5850e86c 100644
--- a/man/ipg2lin.Rd
+++ b/man/ipg2lin.Rd
@@ -29,7 +29,7 @@ file}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{assembly_path}{String of the path to the assembly_summary path
 This file can be generated using the "DownloadAssemblySummary()" function}

From 106eb14b4e2eace66737a07cf5840011e490d116 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 10:24:49 +0100
Subject: [PATCH 21/61] reverting to old function names; make_opts2procs,
 map_advanced_opts2procs, get_proc_medians, write_proc_medians_table,
 write_proc_medians_yml, get_proc_weights, advanced_opts2est_walltime in
 R/assign_job_queue.R to be updated in a separate full request

---
 NAMESPACE                                     | 18 ++--
 R/assign_job_queue.R                          | 84 +++++++++----------
 ...tions.Rd => advanced_opts2est_walltime.Rd} | 10 +--
 ...{assignJobQueue.Rd => assign_job_queue.Rd} | 12 +--
 ...eProcessRuntime.Rd => get_proc_medians.Rd} | 10 +--
 ...sRuntimeWeights.Rd => get_proc_weights.Rd} |  8 +-
 ...apOption2Process.Rd => make_opts2procs.Rd} |  8 +-
 ...2Process.Rd => map_advanced_opts2procs.Rd} |  8 +-
 ...llTimes.Rd => plot_estimated_walltimes.Rd} |  8 +-
 ...ime2TSV.Rd => write_proc_medians_table.Rd} |  8 +-
 ...timeToYML.Rd => write_proc_medians_yml.Rd} | 10 +--
 11 files changed, 92 insertions(+), 92 deletions(-)
 rename man/{calculateEstimatedWallTimeFromOptions.Rd => advanced_opts2est_walltime.Rd} (73%)
 rename man/{assignJobQueue.Rd => assign_job_queue.Rd} (68%)
 rename man/{calculateProcessRuntime.Rd => get_proc_medians.Rd} (76%)
 rename man/{getProcessRuntimeWeights.Rd => get_proc_weights.Rd} (73%)
 rename man/{mapOption2Process.Rd => make_opts2procs.Rd} (75%)
 rename man/{mapAdvOption2Process.Rd => map_advanced_opts2procs.Rd} (76%)
 rename man/{plotEstimatedWallTimes.Rd => plot_estimated_walltimes.Rd} (77%)
 rename man/{writeProcessRuntime2TSV.Rd => write_proc_medians_table.Rd} (77%)
 rename man/{writeProcessRuntimeToYML.Rd => write_proc_medians_yml.Rd} (74%)

diff --git a/NAMESPACE b/NAMESPACE
index f49975b4..b4be51ec 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,11 +15,10 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
+export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assignJobQueue)
-export(calculateEstimatedWallTimeFromOptions)
-export(calculateProcessRuntime)
+export(assign_job_queue)
 export(cleanClusters)
 export(cleanDomainArchitecture)
 export(cleanGeneDescription)
@@ -54,9 +53,10 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
-export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
+export(get_proc_medians)
+export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -66,12 +66,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
+export(make_opts2procs)
 export(mapAcc2Name)
-export(mapAdvOption2Process)
-export(mapOption2Process)
 export(map_acc2name)
+export(map_advanced_opts2procs)
 export(msa_pdf)
-export(plotEstimatedWallTimes)
+export(plot_estimated_walltimes)
 export(prot2tax)
 export(prot2tax_old)
 export(removeAsterisks)
@@ -103,8 +103,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(writeProcessRuntime2TSV)
-export(writeProcessRuntimeToYML)
+export(write_proc_medians_table)
+export(write_proc_medians_yml)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index f1fcb6db..c531fb09 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,16 +3,16 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
+# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- mapOption2Process
+#' example: list_opts2procs <- make_opts2procs
 #' @export
-mapOption2Process <- function() {
+make_opts2procs <- function() {
   tryCatch({
     opts2processes <- list(
       "homology_search" = c("dblast", "dblast_cleanup"),
@@ -26,7 +26,7 @@ mapOption2Process <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("mapOption2Process function execution completed.")
+    message("make_opts2procs function execution completed.")
   })
 
 }
@@ -40,16 +40,16 @@ mapOption2Process <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- mapAdvOption2Process(advanced_opts)
+#' procs <- map_advanced_opts2procs(advanced_opts)
 #' @export
-mapAdvOption2Process <- function(advanced_opts) {
+map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
     stop("Argument must be a character vector!")
   }
   tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- mapOption2Process()
+    opts2proc <- make_opts2procs()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
@@ -60,7 +60,7 @@ mapAdvOption2Process <- function(advanced_opts) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("mapOption2Process function execution completed.")
+    message("make_opts2procs function execution completed.")
   })
 
 }
@@ -80,14 +80,14 @@ mapAdvOption2Process <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
+#' list_proc_medians <- get_proc_medians(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
+#' list_proc_medians <- get_proc_medians(dir_job_results)
 #' @export
-calculateProcessRuntime <- function(dir_job_results) {
+get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -139,7 +139,7 @@ calculateProcessRuntime <- function(dir_job_results) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("calculateProcessRuntime function execution completed.")
+    message("get_proc_medians function execution completed.")
   })
 
 }
@@ -156,12 +156,12 @@ calculateProcessRuntime <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: writeProcessRuntime2TSV(
+#' example: write_proc_medians_table(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
+write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -175,7 +175,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
     if (!is.character(filepath) || length(filepath) != 1) {
       stop("Input 'filepath' must be a single character string.")
     }
-    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+    df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
       tidyr::pivot_longer(
         dplyr::everything(),
@@ -192,7 +192,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
+    message("write_proc_medians_table function execution completed.")
   })
 
 }
@@ -201,7 +201,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -212,13 +212,13 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #'
 #' @examples
 #' \dontrun{
-#' writeProcessRuntimeToYML(
+#' write_proc_medians_yml(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
+write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -238,7 +238,7 @@ writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
       stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- calculateProcessRuntime(dir_job_results)
+    medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
     message(paste("Encountered an error: "), e$message)
@@ -261,9 +261,9 @@ writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: writeProcessRuntimeToYML()
+#' example: write_proc_medians_yml()
 #' @export
-getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
+get_proc_weights <- function(medians_yml_path = NULL) {
   if (is.null(medians_yml_path)) {
     medians_yml_path <- file.path(common_root,
                                   "molevol_scripts",
@@ -273,7 +273,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 
   proc_weights <- tryCatch({
     # attempt to read the weights from the YAML file produced by
-    # writeProcessRuntimeToYML()
+    # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
       stop(
         stringr::str_glue("medians_yml_path is empty 
@@ -285,7 +285,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
-  # the result of calculateProcessRuntime() from Jan 2024
+  # the result of get_proc_medians() from Jan 2024
   error = function(cond) {
     proc_weights <- list(
       "dblast" = 2810,
@@ -306,7 +306,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see mapOption2Process for the options)
+#' (see make_opts2procs for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -314,11 +314,11 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+#' example: advanced_opts2est_walltime	(c("homology_search",
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
+advanced_opts2est_walltime	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
@@ -348,7 +348,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     }
 
     # Get process weights
-    proc_weights <- writeProcessRuntimeToYML()
+    proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
       stop("Process weights could not be retrieved correctly.")
     }
@@ -357,7 +357,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- mapAdvOption2Process(advanced_opts)
+    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
@@ -366,7 +366,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
       as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-      opts2procs <- mapOption2Process()
+      opts2procs <- make_opts2procs()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
       procs_homologs <- procs_from_opts[!(procs_from_opts 
@@ -380,7 +380,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     }
     if (verbose) {
       msg <- stringr::str_glue(
-        "warnings from calculateEstimatedWallTimeFromOptions():\n",
+        "warnings from advanced_opts2est_walltime	():\n",
         "\tn_inputs={n_inputs}\n",
         "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
         "\test_walltime={est_walltime}\n\n"
@@ -393,7 +393,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("calculateEstimatedWallTimeFromOptions 
+    message("advanced_opts2est_walltime	 
             function execution completed.")
   })
 
@@ -403,18 +403,18 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from calculateEstimatedWallTimeFromOptions())
+#' (from advanced_opts2est_walltime	())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' calculateEstimatedWallTimeFromOptions(c("homology_search",
+#' advanced_opts2est_walltime	(c("homology_search",
 #'                                         "domain_architecture"), 3) |>
-#'   assignJobQueue()
+#'   assign_job_queue()
 #' @export
-assignJobQueue <- function(
+assign_job_queue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
@@ -434,7 +434,7 @@ assignJobQueue <- function(
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("assignJobQueue function execution completed.")
+    message("assign_job_queue function execution completed.")
   })
 
 }
@@ -451,13 +451,13 @@ assignJobQueue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plotEstimatedWallTimes()
+#' p <- plot_estimated_walltimes()
 #' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plotEstimatedWallTimes <- function() {
+plot_estimated_walltimes <- function() {
   tryCatch({
-    opts <- mapOption2Process() |> names()
+    opts <- make_opts2procs() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
       # generate powerset (do not include empty set)
@@ -482,7 +482,7 @@ plotEstimatedWallTimes <- function() {
             } else {
                 NULL
               }
-            est_walltime <- calculateEstimatedWallTimeFromOptions(
+            est_walltime <- advanced_opts2est_walltime	(
               advanced_opts,
               n_inputs = i,
               n_hits = n_hits,
@@ -541,7 +541,7 @@ plotEstimatedWallTimes <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("plotEstimatedWallTimes function execution completed.")
+    message("plot_estimated_walltimes function execution completed.")
   })
 
 }
diff --git a/man/calculateEstimatedWallTimeFromOptions.Rd b/man/advanced_opts2est_walltime.Rd
similarity index 73%
rename from man/calculateEstimatedWallTimeFromOptions.Rd
rename to man/advanced_opts2est_walltime.Rd
index e4eec3fd..02ae9621 100644
--- a/man/calculateEstimatedWallTimeFromOptions.Rd
+++ b/man/advanced_opts2est_walltime.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{calculateEstimatedWallTimeFromOptions}
-\alias{calculateEstimatedWallTimeFromOptions}
+\name{advanced_opts2est_walltime}
+\alias{advanced_opts2est_walltime}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-calculateEstimatedWallTimeFromOptions(
+advanced_opts2est_walltime(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,14 @@ calculateEstimatedWallTimeFromOptions(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see mapOption2Process for the options)}
+(see make_opts2procs for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+example: advanced_opts2est_walltime	(c("homology_search",
 "domain_architecture"),
 n_inputs = 3, n_hits = 50L)
 }
diff --git a/man/assignJobQueue.Rd b/man/assign_job_queue.Rd
similarity index 68%
rename from man/assignJobQueue.Rd
rename to man/assign_job_queue.Rd
index 27511b6a..d2650fed 100644
--- a/man/assignJobQueue.Rd
+++ b/man/assign_job_queue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assignJobQueue}
-\alias{assignJobQueue}
+\name{assign_job_queue}
+\alias{assign_job_queue}
 \title{Decision function to assign job queue}
 \usage{
-assignJobQueue(t_sec_estimate, t_cutoff = 21600)
+assign_job_queue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from calculateEstimatedWallTimeFromOptions())}
+(from advanced_opts2est_walltime    ())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,9 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-calculateEstimatedWallTimeFromOptions(c("homology_search",
+advanced_opts2est_walltime	(c("homology_search",
 "domain_architecture"), 3) |>
-assignJobQueue()
+assign_job_queue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/calculateProcessRuntime.Rd b/man/get_proc_medians.Rd
similarity index 76%
rename from man/calculateProcessRuntime.Rd
rename to man/get_proc_medians.Rd
index bb6dd1ed..b6db0b56 100644
--- a/man/calculateProcessRuntime.Rd
+++ b/man/get_proc_medians.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{calculateProcessRuntime}
-\alias{calculateProcessRuntime}
+\name{get_proc_medians}
+\alias{get_proc_medians}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-calculateProcessRuntime(dir_job_results)
+get_proc_medians(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- calculateProcessRuntime(dir_job_results)
+list_proc_medians <- get_proc_medians(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- calculateProcessRuntime(dir_job_results)
+list_proc_medians <- get_proc_medians(dir_job_results)
 }
 }
 \description{
diff --git a/man/getProcessRuntimeWeights.Rd b/man/get_proc_weights.Rd
similarity index 73%
rename from man/getProcessRuntimeWeights.Rd
rename to man/get_proc_weights.Rd
index 8eff0347..f48585cc 100644
--- a/man/getProcessRuntimeWeights.Rd
+++ b/man/get_proc_weights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{getProcessRuntimeWeights}
-\alias{getProcessRuntimeWeights}
+\name{get_proc_weights}
+\alias{get_proc_weights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-getProcessRuntimeWeights(medians_yml_path = NULL)
+get_proc_weights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: writeProcessRuntimeToYML()
+example: write_proc_medians_yml()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/mapOption2Process.Rd b/man/make_opts2procs.Rd
similarity index 75%
rename from man/mapOption2Process.Rd
rename to man/make_opts2procs.Rd
index ff6905c5..07e208b2 100644
--- a/man/mapOption2Process.Rd
+++ b/man/make_opts2procs.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{mapOption2Process}
-\alias{mapOption2Process}
+\name{make_opts2procs}
+\alias{make_opts2procs}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-mapOption2Process()
+make_opts2procs()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- mapOption2Process
+example: list_opts2procs <- make_opts2procs
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/mapAdvOption2Process.Rd b/man/map_advanced_opts2procs.Rd
similarity index 76%
rename from man/mapAdvOption2Process.Rd
rename to man/map_advanced_opts2procs.Rd
index 5bd9ee65..631708b4 100644
--- a/man/mapAdvOption2Process.Rd
+++ b/man/map_advanced_opts2procs.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{mapAdvOption2Process}
-\alias{mapAdvOption2Process}
+\name{map_advanced_opts2procs}
+\alias{map_advanced_opts2procs}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-mapAdvOption2Process(advanced_opts)
+map_advanced_opts2procs(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- mapAdvOption2Process(advanced_opts)
+procs <- map_advanced_opts2procs(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/plotEstimatedWallTimes.Rd b/man/plot_estimated_walltimes.Rd
similarity index 77%
rename from man/plotEstimatedWallTimes.Rd
rename to man/plot_estimated_walltimes.Rd
index 0d53cb32..884fed50 100644
--- a/man/plotEstimatedWallTimes.Rd
+++ b/man/plot_estimated_walltimes.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plotEstimatedWallTimes}
-\alias{plotEstimatedWallTimes}
+\name{plot_estimated_walltimes}
+\alias{plot_estimated_walltimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plotEstimatedWallTimes()
+plot_estimated_walltimes()
 }
 \value{
 line plot object
 
 example:
-p <- plotEstimatedWallTimes()
+p <- plot_estimated_walltimes()
 ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
diff --git a/man/writeProcessRuntime2TSV.Rd b/man/write_proc_medians_table.Rd
similarity index 77%
rename from man/writeProcessRuntime2TSV.Rd
rename to man/write_proc_medians_table.Rd
index 03cbbd68..2ae7a97b 100644
--- a/man/writeProcessRuntime2TSV.Rd
+++ b/man/write_proc_medians_table.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{writeProcessRuntime2TSV}
-\alias{writeProcessRuntime2TSV}
+\name{write_proc_medians_table}
+\alias{write_proc_medians_table}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-writeProcessRuntime2TSV(dir_job_results, filepath)
+write_proc_medians_table(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ writeProcessRuntime2TSV(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: writeProcessRuntime2TSV(
+example: write_proc_medians_table(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/writeProcessRuntimeToYML.Rd b/man/write_proc_medians_yml.Rd
similarity index 74%
rename from man/writeProcessRuntimeToYML.Rd
rename to man/write_proc_medians_yml.Rd
index e4a5c8ad..74757f1f 100644
--- a/man/writeProcessRuntimeToYML.Rd
+++ b/man/write_proc_medians_yml.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{writeProcessRuntimeToYML}
-\alias{writeProcessRuntimeToYML}
+\name{write_proc_medians_yml}
+\alias{write_proc_medians_yml}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-writeProcessRuntimeToYML(dir_job_results, filepath = NULL)
+write_proc_medians_yml(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
@@ -15,12 +15,12 @@ uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-writeProcessRuntimeToYML(
+write_proc_medians_yml(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From a543898c8579065cbe3125f40b8cdf66200fc06f Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 11:00:41 +0100
Subject: [PATCH 22/61] Renamed the following functions in
 R/assign_job_queue.R;
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

| Original                        | Modified                         | User Facing                      |
|---------------------------------|----------------------------------|----------------------------------|
| assign_job_queue                | assignJobQueue                   | ✔️                               |
| make_opts2procs                 | mapOption2Process                | ✔️                               |
| map_advanced_opts2procs         | mapAdvOption2Process             | ✔️                               |
| get_proc_medians                | calculateProcessRuntime          | ✔️                               |
| write_proc_medians_table        | writeProcessRuntime2TSV          | ✔️                               |
| write_proc_medians_yml          | writeProcessRuntime2YML          | ✔️                               |
| get_proc_weights                | getProcessRuntimeWeights         | ✔️                               |
| advanced_opts2est_walltime      | calculateEstimatedWallTimeFromOpts| ✔️                               |
| plot_estimated_walltimes        | plotEstimatedWallTimes           | ✔️                               |
---
 NAMESPACE                                     | 18 ++--
 R/assign_job_queue.R                          | 86 +++++++++----------
 ...{assign_job_queue.Rd => assignJobQueue.Rd} | 12 +--
 ... => calculateEstimatedWallTimeFromOpts.Rd} | 10 +--
 ..._medians.Rd => calculateProcessRuntime.Rd} | 10 +--
 ...weights.Rd => getProcessRuntimeWeights.Rd} |  8 +-
 ..._opts2procs.Rd => mapAdvOption2Process.Rd} |  8 +-
 ...ake_opts2procs.Rd => mapOption2Process.Rd} |  8 +-
 ...walltimes.Rd => plotEstimatedWallTimes.Rd} |  8 +-
 ...ns_table.Rd => writeProcessRuntime2TSV.Rd} |  8 +-
 ...ians_yml.Rd => writeProcessRuntime2YML.Rd} | 10 +--
 11 files changed, 93 insertions(+), 93 deletions(-)
 rename man/{assign_job_queue.Rd => assignJobQueue.Rd} (68%)
 rename man/{advanced_opts2est_walltime.Rd => calculateEstimatedWallTimeFromOpts.Rd} (74%)
 rename man/{get_proc_medians.Rd => calculateProcessRuntime.Rd} (76%)
 rename man/{get_proc_weights.Rd => getProcessRuntimeWeights.Rd} (73%)
 rename man/{map_advanced_opts2procs.Rd => mapAdvOption2Process.Rd} (76%)
 rename man/{make_opts2procs.Rd => mapOption2Process.Rd} (75%)
 rename man/{plot_estimated_walltimes.Rd => plotEstimatedWallTimes.Rd} (77%)
 rename man/{write_proc_medians_table.Rd => writeProcessRuntime2TSV.Rd} (77%)
 rename man/{write_proc_medians_yml.Rd => writeProcessRuntime2YML.Rd} (74%)

diff --git a/NAMESPACE b/NAMESPACE
index c811bac3..65cc791e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,10 +15,11 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
-export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assign_job_queue)
+export(assignJobQueue)
+export(calculateEstimatedWallTimeFromOpts)
+export(calculateProcessRuntime)
 export(cleanClusters)
 export(cleanDomainArchitecture)
 export(cleanGeneDescription)
@@ -53,10 +54,9 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
+export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
-export(get_proc_medians)
-export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -66,12 +66,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
-export(make_opts2procs)
 export(mapAcc2Name)
+export(mapAdvOption2Process)
+export(mapOption2Process)
 export(map_acc2name)
-export(map_advanced_opts2procs)
 export(msa_pdf)
-export(plot_estimated_walltimes)
+export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
 export(removeAsterisks)
@@ -103,8 +103,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(write_proc_medians_table)
-export(write_proc_medians_yml)
+export(writeProcessRuntime2TSV)
+export(writeProcessRuntime2YML)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index c531fb09..10df1e3a 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,16 +3,16 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
+# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- make_opts2procs
+#' example: list_opts2procs <- mapOption2Process
 #' @export
-make_opts2procs <- function() {
+mapOption2Process <- function() {
   tryCatch({
     opts2processes <- list(
       "homology_search" = c("dblast", "dblast_cleanup"),
@@ -26,7 +26,7 @@ make_opts2procs <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("make_opts2procs function execution completed.")
+    message("mapOption2Process function execution completed.")
   })
 
 }
@@ -40,16 +40,16 @@ make_opts2procs <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- map_advanced_opts2procs(advanced_opts)
+#' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
-map_advanced_opts2procs <- function(advanced_opts) {
+mapAdvOption2Process <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
     stop("Argument must be a character vector!")
   }
   tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- make_opts2procs()
+    opts2proc <- mapOption2Process()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
@@ -60,7 +60,7 @@ map_advanced_opts2procs <- function(advanced_opts) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("make_opts2procs function execution completed.")
+    message("mapOption2Process function execution completed.")
   })
 
 }
@@ -80,14 +80,14 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
-get_proc_medians <- function(dir_job_results) {
+calculateProcessRuntime <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -139,7 +139,7 @@ get_proc_medians <- function(dir_job_results) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("get_proc_medians function execution completed.")
+    message("calculateProcessRuntime function execution completed.")
   })
 
 }
@@ -156,12 +156,12 @@ get_proc_medians <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: write_proc_medians_table(
+#' example: writeProcessRuntime2TSV(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-write_proc_medians_table <- function(dir_job_results, filepath) {
+writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -175,7 +175,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     if (!is.character(filepath) || length(filepath) != 1) {
       stop("Input 'filepath' must be a single character string.")
     }
-    df_proc_medians <- get_proc_medians(dir_job_results) |>
+    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
       tibble::as_tibble() |>
       tidyr::pivot_longer(
         dplyr::everything(),
@@ -192,7 +192,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    message("writeProcessRuntime2TSV function execution completed.")
   })
 
 }
@@ -201,7 +201,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -212,13 +212,13 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #'
 #' @examples
 #' \dontrun{
-#' write_proc_medians_yml(
+#' writeProcessRuntime2YML(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
+writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -238,14 +238,14 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
       stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- get_proc_medians(dir_job_results)
+    medians <- calculateProcessRuntime(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
     message(paste("Encountered an error: "), e$message)
   }, warning = function(w) {
     message(paste("Warning: "), w$message)
   }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    message("writeProcessRuntime2TSV function execution completed.")
   }
   )
 
@@ -261,9 +261,9 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: write_proc_medians_yml()
+#' example: writeProcessRuntime2YML()
 #' @export
-get_proc_weights <- function(medians_yml_path = NULL) {
+getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   if (is.null(medians_yml_path)) {
     medians_yml_path <- file.path(common_root,
                                   "molevol_scripts",
@@ -273,7 +273,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 
   proc_weights <- tryCatch({
     # attempt to read the weights from the YAML file produced by
-    # write_proc_medians_yml()
+    # writeProcessRuntime2YML()
     if (stringr::str_trim(medians_yml_path) == "") {
       stop(
         stringr::str_glue("medians_yml_path is empty 
@@ -285,7 +285,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
-  # the result of get_proc_medians() from Jan 2024
+  # the result of calculateProcessRuntime() from Jan 2024
   error = function(cond) {
     proc_weights <- list(
       "dblast" = 2810,
@@ -306,7 +306,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see make_opts2procs for the options)
+#' (see mapOption2Process for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -314,11 +314,11 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: advanced_opts2est_walltime	(c("homology_search",
+#' example: calculateEstimatedWallTimeFromOpts	(c("homology_search",
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime	 <- function(advanced_opts,
+calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
@@ -348,7 +348,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
 
     # Get process weights
-    proc_weights <- write_proc_medians_yml()
+    proc_weights <- writeProcessRuntime2YML()
     if (!is.list(proc_weights)) {
       stop("Process weights could not be retrieved correctly.")
     }
@@ -357,7 +357,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
+    procs_from_opts <- mapAdvOption2Process(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
@@ -366,7 +366,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
       as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-      opts2procs <- make_opts2procs()
+      opts2procs <- mapOption2Process()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
       procs_homologs <- procs_from_opts[!(procs_from_opts 
@@ -380,7 +380,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
     if (verbose) {
       msg <- stringr::str_glue(
-        "warnings from advanced_opts2est_walltime	():\n",
+        "warnings from calculateEstimatedWallTimeFromOpts	():\n",
         "\tn_inputs={n_inputs}\n",
         "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
         "\test_walltime={est_walltime}\n\n"
@@ -393,7 +393,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("advanced_opts2est_walltime	 
+    message("calculateEstimatedWallTimeFromOpts	 
             function execution completed.")
   })
 
@@ -403,18 +403,18 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from advanced_opts2est_walltime	())
+#' (from calculateEstimatedWallTimeFromOpts	())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' advanced_opts2est_walltime	(c("homology_search",
+#' calculateEstimatedWallTimeFromOpts	(c("homology_search",
 #'                                         "domain_architecture"), 3) |>
-#'   assign_job_queue()
+#'   assignJobQueue()
 #' @export
-assign_job_queue <- function(
+assignJobQueue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
@@ -434,7 +434,7 @@ assign_job_queue <- function(
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("assign_job_queue function execution completed.")
+    message("assignJobQueue function execution completed.")
   })
 
 }
@@ -451,13 +451,13 @@ assign_job_queue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plot_estimated_walltimes()
+#' p <- plotEstimatedWallTimes()
 #' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plot_estimated_walltimes <- function() {
+plotEstimatedWallTimes <- function() {
   tryCatch({
-    opts <- make_opts2procs() |> names()
+    opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
       # generate powerset (do not include empty set)
@@ -482,7 +482,7 @@ plot_estimated_walltimes <- function() {
             } else {
                 NULL
               }
-            est_walltime <- advanced_opts2est_walltime	(
+            est_walltime <- calculateEstimatedWallTimeFromOpts	(
               advanced_opts,
               n_inputs = i,
               n_hits = n_hits,
@@ -541,7 +541,7 @@ plot_estimated_walltimes <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("plot_estimated_walltimes function execution completed.")
+    message("plotEstimatedWallTimes function execution completed.")
   })
 
 }
diff --git a/man/assign_job_queue.Rd b/man/assignJobQueue.Rd
similarity index 68%
rename from man/assign_job_queue.Rd
rename to man/assignJobQueue.Rd
index d2650fed..3663ce56 100644
--- a/man/assign_job_queue.Rd
+++ b/man/assignJobQueue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assign_job_queue}
-\alias{assign_job_queue}
+\name{assignJobQueue}
+\alias{assignJobQueue}
 \title{Decision function to assign job queue}
 \usage{
-assign_job_queue(t_sec_estimate, t_cutoff = 21600)
+assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from advanced_opts2est_walltime    ())}
+(from calculateEstimatedWallTimeFromOpts    ())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,9 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-advanced_opts2est_walltime	(c("homology_search",
+calculateEstimatedWallTimeFromOpts	(c("homology_search",
 "domain_architecture"), 3) |>
-assign_job_queue()
+assignJobQueue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/advanced_opts2est_walltime.Rd b/man/calculateEstimatedWallTimeFromOpts.Rd
similarity index 74%
rename from man/advanced_opts2est_walltime.Rd
rename to man/calculateEstimatedWallTimeFromOpts.Rd
index 02ae9621..c09cf6a6 100644
--- a/man/advanced_opts2est_walltime.Rd
+++ b/man/calculateEstimatedWallTimeFromOpts.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{advanced_opts2est_walltime}
-\alias{advanced_opts2est_walltime}
+\name{calculateEstimatedWallTimeFromOpts}
+\alias{calculateEstimatedWallTimeFromOpts}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-advanced_opts2est_walltime(
+calculateEstimatedWallTimeFromOpts(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,14 @@ advanced_opts2est_walltime(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see make_opts2procs for the options)}
+(see mapOption2Process for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: advanced_opts2est_walltime	(c("homology_search",
+example: calculateEstimatedWallTimeFromOpts	(c("homology_search",
 "domain_architecture"),
 n_inputs = 3, n_hits = 50L)
 }
diff --git a/man/get_proc_medians.Rd b/man/calculateProcessRuntime.Rd
similarity index 76%
rename from man/get_proc_medians.Rd
rename to man/calculateProcessRuntime.Rd
index b6db0b56..bb6dd1ed 100644
--- a/man/get_proc_medians.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_medians}
-\alias{get_proc_medians}
+\name{calculateProcessRuntime}
+\alias{calculateProcessRuntime}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-get_proc_medians(dir_job_results)
+calculateProcessRuntime(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 }
 }
 \description{
diff --git a/man/get_proc_weights.Rd b/man/getProcessRuntimeWeights.Rd
similarity index 73%
rename from man/get_proc_weights.Rd
rename to man/getProcessRuntimeWeights.Rd
index f48585cc..ff3c8e5d 100644
--- a/man/get_proc_weights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_weights}
-\alias{get_proc_weights}
+\name{getProcessRuntimeWeights}
+\alias{getProcessRuntimeWeights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-get_proc_weights(medians_yml_path = NULL)
+getProcessRuntimeWeights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: write_proc_medians_yml()
+example: writeProcessRuntime2YML()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/map_advanced_opts2procs.Rd b/man/mapAdvOption2Process.Rd
similarity index 76%
rename from man/map_advanced_opts2procs.Rd
rename to man/mapAdvOption2Process.Rd
index 631708b4..5bd9ee65 100644
--- a/man/map_advanced_opts2procs.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{map_advanced_opts2procs}
-\alias{map_advanced_opts2procs}
+\name{mapAdvOption2Process}
+\alias{mapAdvOption2Process}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-map_advanced_opts2procs(advanced_opts)
+mapAdvOption2Process(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- map_advanced_opts2procs(advanced_opts)
+procs <- mapAdvOption2Process(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/make_opts2procs.Rd b/man/mapOption2Process.Rd
similarity index 75%
rename from man/make_opts2procs.Rd
rename to man/mapOption2Process.Rd
index 07e208b2..ff6905c5 100644
--- a/man/make_opts2procs.Rd
+++ b/man/mapOption2Process.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{make_opts2procs}
-\alias{make_opts2procs}
+\name{mapOption2Process}
+\alias{mapOption2Process}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-make_opts2procs()
+mapOption2Process()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- make_opts2procs
+example: list_opts2procs <- mapOption2Process
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/plot_estimated_walltimes.Rd b/man/plotEstimatedWallTimes.Rd
similarity index 77%
rename from man/plot_estimated_walltimes.Rd
rename to man/plotEstimatedWallTimes.Rd
index 884fed50..0d53cb32 100644
--- a/man/plot_estimated_walltimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plot_estimated_walltimes}
-\alias{plot_estimated_walltimes}
+\name{plotEstimatedWallTimes}
+\alias{plotEstimatedWallTimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plot_estimated_walltimes()
+plotEstimatedWallTimes()
 }
 \value{
 line plot object
 
 example:
-p <- plot_estimated_walltimes()
+p <- plotEstimatedWallTimes()
 ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
diff --git a/man/write_proc_medians_table.Rd b/man/writeProcessRuntime2TSV.Rd
similarity index 77%
rename from man/write_proc_medians_table.Rd
rename to man/writeProcessRuntime2TSV.Rd
index 2ae7a97b..03cbbd68 100644
--- a/man/write_proc_medians_table.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_table}
-\alias{write_proc_medians_table}
+\name{writeProcessRuntime2TSV}
+\alias{writeProcessRuntime2TSV}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-write_proc_medians_table(dir_job_results, filepath)
+writeProcessRuntime2TSV(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ write_proc_medians_table(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: write_proc_medians_table(
+example: writeProcessRuntime2TSV(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/write_proc_medians_yml.Rd b/man/writeProcessRuntime2YML.Rd
similarity index 74%
rename from man/write_proc_medians_yml.Rd
rename to man/writeProcessRuntime2YML.Rd
index 74757f1f..b43f39ee 100644
--- a/man/write_proc_medians_yml.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_yml}
-\alias{write_proc_medians_yml}
+\name{writeProcessRuntime2YML}
+\alias{writeProcessRuntime2YML}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-write_proc_medians_yml(dir_job_results, filepath = NULL)
+writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
@@ -15,12 +15,12 @@ uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-write_proc_medians_yml(
+writeProcessRuntime2YML(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From 823af96d484a1ec075548ce181f52147cff54af5 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Thu, 10 Oct 2024 09:13:26 -0600
Subject: [PATCH 23/61] - remove old .Rd leftovers and update with new docs -
 let R-CMD sort NAMESPACE

---
 NAMESPACE          | 1 -
 man/IPG2Lineage.Rd | 3 ++-
 man/acc2Lineage.Rd | 3 ++-
 man/acc2lin.Rd     | 0
 man/efetchIPG.Rd   | 3 ++-
 man/efetch_ipg.Rd  | 0
 man/ipg2lin.Rd     | 0
 man/sink.reset.Rd  | 0
 man/sinkReset.Rd   | 1 +
 9 files changed, 7 insertions(+), 4 deletions(-)
 delete mode 100644 man/acc2lin.Rd
 delete mode 100644 man/efetch_ipg.Rd
 delete mode 100644 man/ipg2lin.Rd
 delete mode 100644 man/sink.reset.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 50af36df..078f971b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -77,7 +77,6 @@ export(prepareColumnParams)
 export(prepareSingleColumnParams)
 export(proteinAcc2TaxID)
 export(proteinAcc2TaxID_old)
-export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index e24ab617..f8434c7f 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -38,7 +38,8 @@ This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} f
 Describe return, in detail
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index a24bdc9a..836a677f 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -38,7 +38,8 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 Describe return, in detail
 }
 \description{
-This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
+This function combines 'efetchIPG()'
+and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 6a5d85a4..5d2e8372 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,7 +23,8 @@ the ipg database}
 Describe return, in detail
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index 0285c0b2..e3fc7ce4 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,6 +8,7 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From b116442be77ea2dc267b638f4ecd604a090a9ede Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Fri, 11 Oct 2024 01:40:21 +0300
Subject: [PATCH 24/61] document functions

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE                                     |   1 +
 R/CHANGED-pre-msa-tree.R                      | 108 ++++++--
 R/blastWrappers.R                             |  51 ++--
 R/cleanup.R                                   |  81 +++---
 R/combine_analysis.R                          |  28 ++-
 R/combine_files.R                             |  24 +-
 R/create_lineage_lookup.R                     |  17 +-
 R/fa2domain.R                                 |  21 +-
 R/ipr2viz.R                                   | 121 ++++++---
 R/lineage.R                                   | 155 +++++++++---
 R/msa.R                                       |  20 +-
 R/networks_domarch.R                          |  39 +--
 R/networks_gencontext.R                       |  36 ++-
 R/plotme.R                                    |  43 ++--
 R/plotting.R                                  | 230 ++++++++++++------
 R/pre-msa-tree.R                              | 114 ++++++---
 R/reverse_operons.R                           |  38 ++-
 man/BinaryDomainNetwork.Rd                    |  24 +-
 man/GCA2Lineage.Rd                            |  15 +-
 man/GenContextNetwork.Rd                      |  11 +-
 man/IPG2Lineage.Rd                            |  16 ++
 man/RepresentativeAccNums.Rd                  |  23 +-
 man/acc2FA.Rd                                 |  39 +++
 man/acc2Lineage.Rd                            |  15 +-
 man/acc2fa.Rd                                 |  16 +-
 man/addLeaves2Alignment.Rd                    |   4 +
 man/addLineage.Rd                             |  32 ++-
 man/addName.Rd                                |  10 +
 man/addTaxID.Rd                               |  20 +-
 man/add_leaves.Rd                             |   4 +
 man/add_name.Rd                               |   9 +-
 man/alignFasta.Rd                             |  18 +-
 man/cleanDomainArchitecture.Rd                |  27 +-
 man/cleanFAHeaders.Rd                         |   4 +-
 man/cleanGeneDescription.Rd                   |   5 +-
 man/cleanLineage.Rd                           |   9 +-
 man/cleanSpecies.Rd                           |   2 +-
 man/combine_files.Rd                          |  26 +-
 man/combine_full.Rd                           |  16 +-
 man/combine_ipr.Rd                            |  16 +-
 man/condenseRepeatedDomains.Rd                |   2 +-
 man/convert2TitleCase.Rd                      |   8 +
 man/convertAlignment2FA.Rd                    |   5 +
 man/convert_aln2fa.Rd                         |   9 +-
 man/{countbycolumn.Rd => countByColumn.Rd}    |   0
 man/createWordCloud2Element.Rd                |  13 +-
 man/createWordCloudElement.Rd                 |  13 +-
 man/create_lineage_lookup.Rd                  |  19 +-
 man/domain_network.Rd                         |  17 +-
 man/downloadAssemblySummary.Rd                |  16 +-
 man/efetchIPG.Rd                              |  12 +-
 man/extractAccNum.Rd                          |   3 +-
 ...{filterbydomains.Rd => filterByDomains.Rd} |   0
 ...terbyfrequency.Rd => filterByFrequency.Rd} |   0
 man/{findparalogs.Rd => findParalogs.Rd}      |   0
 man/find_top_acc.Rd                           |  26 +-
 man/gc_undirected_network.Rd                  |  27 +-
 man/generateAllAlignments2FA.Rd               |  19 +-
 man/generate_all_aln2fa.Rd                    |  18 +-
 man/generate_msa.Rd                           |  15 +-
 man/get_accnums_from_fasta_file.Rd            |  19 +-
 man/ipr2viz.Rd                                |  45 +++-
 man/ipr2viz_web.Rd                            |  46 +++-
 man/mapAcc2Name.Rd                            |  15 +-
 man/map_acc2name.Rd                           |  15 +-
 man/msa_pdf.Rd                                |   8 +-
 man/plotLineageDA.Rd                          |   8 +
 man/plotLineageDomainRepeats.Rd               |  11 +-
 man/plotLineageHeatmap.Rd                     |   5 +
 man/plotLineageNeighbors.Rd                   |   5 +
 man/plotLineageQuery.Rd                       |  20 +-
 man/plotLineageSunburst.Rd                    |  31 ++-
 man/plotStackedLineage.Rd                     |  39 ++-
 man/plotSunburst.Rd                           |   6 +-
 man/plotUpSet.Rd                              |  19 +-
 man/prepareColumnParams.Rd                    |  17 +-
 man/prepareSingleColumnParams.Rd              |  18 +-
 man/proteinAcc2TaxID.Rd                       |  26 +-
 man/proteinAcc2TaxID_old.Rd                   |  20 +-
 man/removeAsterisks.Rd                        |  10 +-
 man/removeEmptyRows.Rd                        |   3 +-
 man/removeTails.Rd                            |   3 +-
 man/renameFA.Rd                               |   9 +
 man/rename_fasta.Rd                           |   9 +
 man/replaceQuestionMarks.Rd                   |   4 +-
 man/reveql.Rd                                 |  19 +-
 man/reverse_operon.Rd                         |  21 +-
 man/runIPRScan.Rd                             |  24 +-
 man/run_deltablast.Rd                         |  29 ++-
 man/run_rpsblast.Rd                           |  27 +-
 man/selectLongestDuplicate.Rd                 |   9 +-
 man/shortenLineage.Rd                         |  24 +-
 ...rizebylineage.Rd => summarizeByLineage.Rd} |   0
 man/theme_genes2.Rd                           |  13 +
 man/to_titlecase.Rd                           |   7 +
 ...s.Rd => totalGenContextOrDomArchCounts.Rd} |   0
 man/validateCountDF.Rd                        |  10 +-
 man/wordcloud3.Rd                             |  54 +++-
 ...ords2wordcounts.Rd => words2WordCounts.Rd} |   0
 man/write.MsaAAMultipleAlignment.Rd           |  16 ++
 100 files changed, 1913 insertions(+), 461 deletions(-)
 create mode 100644 man/acc2FA.Rd
 rename man/{countbycolumn.Rd => countByColumn.Rd} (100%)
 rename man/{filterbydomains.Rd => filterByDomains.Rd} (100%)
 rename man/{filterbyfrequency.Rd => filterByFrequency.Rd} (100%)
 rename man/{findparalogs.Rd => findParalogs.Rd} (100%)
 rename man/{summarizebylineage.Rd => summarizeByLineage.Rd} (100%)
 rename man/{totalgencontextordomarchcounts.Rd => totalGenContextOrDomArchCounts.Rd} (100%)
 rename man/{words2wordcounts.Rd => words2WordCounts.Rd} (100%)

diff --git a/NAMESPACE b/NAMESPACE
index 078f971b..50943690 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -230,6 +230,7 @@ importFrom(purrr,map2)
 importFrom(purrr,map_chr)
 importFrom(purrr,pmap)
 importFrom(purrr,pmap_dfr)
+importFrom(rMSA,kalign)
 importFrom(readr,cols)
 importFrom(readr,read_delim)
 importFrom(readr,read_file)
diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index c4a97589..76c13859 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -40,10 +40,14 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @param y Delimitter. Default is space (" ").
 #' @seealso chartr, toupper, and tolower.
 #'
-#' @return
+#' @return Character vector with the input strings converted to title case.
+#'
 #' @export
 #'
 #' @examples
+#' # Convert a single string to title case
+#' convert2TitleCase("hello world") # Returns "Hello World"
+#' 
 convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
@@ -76,7 +80,8 @@ convert2TitleCase <- function(x, y = " ") {
 #' @importFrom stringr str_sub
 #' @importFrom tidyr replace_na separate
 #'
-#' @return
+#' @return A data frame containing the enriched alignment data with lineage 
+#' information.
 #'
 #' @details The alignment file would need two columns: 1. accession +
 #' number and 2. alignment. The protein homolog accession to lineage mapping +
@@ -203,6 +208,14 @@ addLeaves2Alignment <- function(aln_file = "",
 #' @export
 #'
 #' @examples
+#' # Example usage of the addName function
+#' data <- data.frame(
+#'   AccNum = c("ACC123", "ACC456"),
+#'   Species = c("Homo sapiens", "Mus musculus"),
+#'   Lineage = c("Eukaryota>Chordata", "Eukaryota>Chordata")
+#' )
+#' enriched_data <- addName(data)
+#' print(enriched_data)
 addName <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
@@ -278,7 +291,9 @@ addName <- function(data,
 #' @note Please refer to the source code if you have alternate +
 #' file formats and/or column names.
 #'
-#' @return
+#' @return A character string representing the FASTA formatted sequences.
+#' If `fa_outpath` is provided, the FASTA will also be saved to the specified 
+#' file.
 #' @export
 #'
 #' @examples
@@ -323,18 +338,24 @@ convertAlignment2FA <- function(aln_file = "",
 #' Default renameFA() replacement function. Maps an accession number to its name
 #'
 #' @param line The line of a fasta file starting with '>'
-#' @param acc2name Data Table containing a column of accession numbers and a name column
+#' @param acc2name Data Table containing a column of accession numbers and a 
+#' name column
 #' @param acc_col Name of the column containing Accession numbers
-#' @param name_col Name of the column containing the names that the accession numbers
+#' @param name_col Name of the column containing the names that the accession 
+#' numbers
 #' are mapped to
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A character string representing the updated FASTA line, where the 
+#' accession number is replaced with its corresponding name.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' mapAcc2Name(">P12345 some description", acc2name, "AccNum", "Name")
+#' }
 mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an addNames column
     # Find the first ' '
@@ -360,10 +381,14 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
 #' @importFrom purrr map
 #' @importFrom readr read_lines write_lines
 #'
-#' @return
+#' @return A character vector of the modified lines in the FASTA file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' renameFA("path/to/input.fasta", 
+#' "path/to/output.fasta", mapAcc2Name, acc2name)
+#' }
 renameFA <- function(fa_path, outpath,
     replacement_function = mapAcc2Name, ...) {
     lines <- read_lines(fa_path)
@@ -389,20 +414,26 @@ renameFA <- function(fa_path, outpath,
 #'
 #' @param aln_path Character. Path to alignment files.
 #' Default is 'here("data/rawdata_aln/")'
-#' @param fa_outpath Character. Path to file. Master protein file with AccNum & lineages.
+#' @param fa_outpath Character. Path to file. Master protein file with AccNum & 
+#' lineages.
 #' Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
 #' @param lin_file Character. Path to the written fasta file.
 #' Default is 'here("data/alns/")'.
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' sequence per lineage.
 #' Default is 'FALSE'.
 #'
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return
+#' @return NULL. The function saves the output FASTA files to the specified 
+#' directory.
 #'
-#' @details The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
-#' @note Please refer to the source code if you have alternate + file formats and/or column names.
+#' @details The alignment files would need two columns separated by spaces: 
+#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum, 
+#' Species, Lineages.
+#' @note Please refer to the source code if you have alternate + file formats 
+#' and/or column names.
 #'
 #' @export
 #'
@@ -449,24 +480,29 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
 #'
-#' @param accessions  Character vector containing protein accession numbers to generate fasta sequences for.
+#' @param accessions  Character vector containing protein accession numbers to 
+#' generate fasta sequences for.
 #' Function may not work for vectors of length > 10,000
 #' @param outpath [str] Location where fasta file should be written to.
-#' @param plan
+#' @param plan Character string specifying the parallel processing strategy to 
+#' use with the `future` package. Default is "sequential".
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom future future plan value
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return
+#' @return A logical value indicating whether the retrieval and conversion were 
+#' successful. Returns `TRUE` if successful and `FALSE` otherwise.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+#' outpath = "my_proteins.fasta")
 #' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
+#' EBI:accessions <- c("P12345", "Q9UHC1", 
+#' "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
 #' }
 acc2FA <- function(accessions, outpath, plan = "sequential") {
     # validation
@@ -539,7 +575,8 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
     return(result)
 }
 
-#' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
+#' Function to generate a vector of one Accession number per distinct 
+#' observation from 'reduced' column
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
@@ -552,14 +589,20 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A character vector containing one Accession number per distinct 
+#' observation from the specified reduced column.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' representative_accessions <- RepresentativeAccNums(prot_data, 
+#' reduced = "Lineage", accnum_col = "AccNum")
+#' }
 RepresentativeAccNums <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
+    # Get Unique reduced column and then bind the AccNums back to get one 
+    # AccNum per reduced column
     reduced_sym <- sym(reduced)
     accnum_sym <- sym(accnum_col)
 
@@ -590,8 +633,10 @@ RepresentativeAccNums <- function(prot_data,
 #' @author Samuel Chen, Janani Ravi
 #'
 #' @param fasta_file Path to the FASTA file to be aligned
-#' @param tool Type of alignment tool to use. One of three options: "Muscle", "ClustalO", or "ClustalW"
-#' @param outpath Path to write the resulting alignment to as a FASTA file. If NULL, no file is written
+#' @param tool Type of alignment tool to use. One of three options: "Muscle", 
+#' "ClustalO", or "ClustalW"
+#' @param outpath Path to write the resulting alignment to as a FASTA file. 
+#' If NULL, no file is written
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom msa msaClustalOmega msaMuscle msaClustalW
@@ -600,6 +645,10 @@ RepresentativeAccNums <- function(prot_data,
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' aligned_sequences <- alignFasta("my_sequences.fasta", 
+#' tool = "Muscle", outpath = "aligned_output.fasta")
+#' }
 alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     fasta <- readAAStringSet(fasta_file)
 
@@ -628,10 +677,14 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @importFrom Biostrings toString unmasked
 #' @importFrom readr write_file
 #'
-#' @return
+#' @return Character string representing the content of the written FASTA file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' alignment <- msaMuscle("my_sequences.fasta")
+#' write.MsaAAMultipleAlignment(alignment, "aligned_sequences.fasta")
+#' }
 write.MsaAAMultipleAlignment <- function(alignment, outpath) {
     l <- length(rownames(alignment))
     fasta <- ""
@@ -647,14 +700,19 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
 
 #' Get accnums from fasta file
 #'
-#' @param fasta_file
+#' @param fasta_file Character. The path to the FASTA file from which 
+#' accession numbers will be extracted.
 #'
 #' @importFrom stringi stri_extract_all_regex
 #'
-#' @return
+#' @return A character vector containing the extracted accession numbers.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' accnums <- get_accnums_from_fasta_file("my_sequences.fasta")
+#' print(accnums)
+#' }
 get_accnums_from_fasta_file <- function(fasta_file) {
     txt <- read_file(fasta_file)
     accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 552b1ff6..2a0325ca 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -3,20 +3,28 @@
 #' Run DELTABLAST to find homologs for proteins of interest
 #'
 #' @author Samuel Chen, Janani Ravi
+#' @description
+#' This function executes a Delta-BLAST search using the specified parameters 
+#' and database. It sets the BLAST database path, runs the Delta-BLAST command 
+#' with the given query, and outputs the results.
 #'
-#' @param deltablast_path
-#' @param db_search_path Path to the BLAST databases
-#' @param db
-#' @param query
-#' @param evalue
-#' @param out
-#' @param num_alignments
-#' @param num_threads
+#' @param deltablast_path Path to the Delta-BLAST executable.
+#' @param db_search_path Path to the BLAST databases.
+#' @param db Name of the BLAST database to search against (default is "refseq").
+#' @param query Path to the input query file.
+#' @param evalue E-value threshold for reporting matches (default is "1e-5").
+#' @param out Path to the output file where results will be saved.
+#' @param num_alignments Number of alignments to report.
+#' @param num_threads Number of threads to use for the search (default is 1).
 #'
-#' @return
+#' @return This function does not return a value; it outputs results to the 
+#' specified file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' run_deltablast(deltablast_path, db_search_path, query, out, num_alignments)
+#' }
 run_deltablast <- function(deltablast_path, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_alignments, num_threads = 1) {
@@ -42,18 +50,27 @@ run_deltablast <- function(deltablast_path, db_search_path,
 
 #' Run RPSBLAST to generate domain architectures for proteins of interest
 #'
-#' @param rpsblast_path
-#' @param db_search_path Path to the BLAST databases
-#' @param db
-#' @param query
-#' @param evalue
-#' @param out
-#' @param num_threads
+#' @description
+#' This function executes an RPS-BLAST search to generate domain architectures 
+#' for specified proteins. It sets the BLAST database path, runs the RPS-BLAST 
+#' command with the provided query, and outputs the results.
 #'
-#' @return
+#' @param rpsblast_path Path to the RPS-BLAST executable.
+#' @param db_search_path Path to the BLAST databases.
+#' @param db Name of the BLAST database to search against (default is "refseq").
+#' @param query Path to the input query file.
+#' @param evalue E-value threshold for reporting matches (default is "1e-5").
+#' @param out Path to the output file where results will be saved.
+#' @param num_threads Number of threads to use for the search (default is 1).
+#'
+#' @return This function does not return a value; it outputs results to the 
+#' specified file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' run_rpsblast(rpsblast_path, db_search_path, query, out)
+#' }
 run_rpsblast <- function(rpsblast_path, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_threads = 1) {
diff --git a/R/cleanup.R b/R/cleanup.R
index 4fe074ee..a8e79e33 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -46,7 +46,8 @@ cleanString <- function(string) {
 # get_sequences() function to extract accession numbers
 #' extractAccNum
 #'
-#' @param string
+#' @param string A string from which to extract the accession number.
+#'  The string may contain accession information delimited by `|` or spaces.
 #'
 #' @return Describe return, in detail
 #' @export
@@ -103,7 +104,9 @@ ensureUniqAccNum <- function(accnums) {
 #' Parse accesion numbers from fasta and add a
 #' suffix of the ith occurence to handle duplicates
 #'
-#' @param fasta
+#' @param fasta An [XStringSet] object representing the sequences from a 
+#' FASTA file. The sequence names (headers) will be adjusted for uniqueness 
+#' and sanitized.
 #'
 #' @importFrom purrr map_chr
 #' @importFrom fs path_sanitize
@@ -148,7 +151,8 @@ cleanFAHeaders <- function(fasta) {
 #'
 #' @importFrom dplyr as_tibble filter
 #'
-#' @return Describe return, in detail
+#' @return A tibble with rows removed where the specified column contains 
+#' `"-"`, `"NA"`, or an empty string.
 #' @export
 #'
 #' @examples
@@ -183,7 +187,7 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
 #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s).
 #' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()
 #'
-#' @return Describe return, in detail
+#' @return A data frame with condensed repeated domains in the specified column.
 #' @export
 #'
 #' @importFrom dplyr pull
@@ -244,7 +248,9 @@ condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots
 #' @param prot DataTable to operate on
 #' @param by_column Column to operate on
 #'
-#' @return Describe return, in detail
+#' @return The original data frame with the specified column updated. All 
+#' consecutive '?' characters will be replaced with 'X(s)', and individual '?' 
+#' characters will be replaced with 'X'.
 #' @export
 #'
 #' @importFrom dplyr pull
@@ -273,19 +279,21 @@ replaceQuestionMarks <- function(prot, by_column = "GenContext") {
 }
 
 
-#' Remove Astrk
+#' Remove Asterisk
 #'
 #' @description
 #' Remove the asterisks from a column of data
 #' Used for removing * from GenContext columns
 #'
-#' @param query_data
-#' @param colname
+#' @param query_data A data frame containing the data to be processed.
+#' @param colname The name of the column from which asterisks should be removed. 
+#' Defaults to "GenContext".
 #'
 #' @importFrom purrr map
 #' @importFrom stringr str_remove_all
 #'
-#' @return Describe return, in detail
+#' @return The original data frame with asterisks removed from the specified 
+#' column.
 #' @export
 #'
 #' @examples
@@ -315,7 +323,8 @@ removeAsterisks <- function(query_data, colname = "GenContext") {
 #' @param by_column Default column is 'DomArch'. Can also take 'ClustName', 'GenContext' as input.
 #' @param keep_domains Default is False Keeps tail entries that contain the query domains.
 #'
-#' @return Describe return, in detail
+#' @return The original data frame with singletons removed from the specified 
+#' column.
 #' @export
 #'
 #' @importFrom dplyr count filter group_by pull n summarize
@@ -374,7 +383,7 @@ removeTails <- function(prot, by_column = "DomArch",
 #'
 #' @importFrom stringr coll str_replace_all
 #'
-#' @return Describe return, in detail
+#' @return The original data frame with Species cleaned.
 #' @export
 #'
 #' @examples
@@ -504,25 +513,34 @@ cleanClusters <- function(prot,
 #' The original data frame is returned with the clean DomArchs column and the old domains in the DomArchs.old column.
 #'
 #' @param prot A data frame containing a 'DomArch' column
-#' @param old
-#' @param new
+#' @param old The name of the original column containing domain architecture. 
+#' Defaults to "DomArch.orig".
+#' @param new The name of the cleaned column to be created. Defaults to 
+#' "DomArch".
 #' @param domains_keep A data frame containing the domain names to be retained.
-#' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the
+#' @param domains_rename A data frame containing the domain names to be replaced 
+#' in a column 'old' and the
 #' corresponding replacement values in a column 'new'.
-#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
-#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
-#' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains'
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 
+#' 'DomArch' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on 
+#' domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values 
+#' in 'DomArch' are removed. Default is FALSE.
+#' @param domains_ignore A data frame containing the domain names to be removed 
+#' in a column called 'domains'
 #'
 #' @importFrom dplyr pull
 #' @importFrom stringr coll str_replace_all
 #'
-#' @return The original data frame is returned with the clean DomArchs column and the old domains in the DomArchs.old column.
+#' @return The original data frame is returned with the clean DomArchs column 
+#' and the old domains in the DomArchs.old column.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+#' cleanDomainArchitecture(prot, TRUE, FALSE, 
+#' omains_keep, domains_rename, domains_ignore = NULL)
 #' }
 cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
     domains_keep, domains_rename,
@@ -658,8 +676,9 @@ cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = charac
 
 #' Cleanup GeneDesc
 #'
-#' @param prot
-#' @param column
+#' @param prot A data frame containing the gene descriptions.
+#' @param column The name of the column from which gene descriptions are pulled 
+#' for cleanup.
 #'
 #' @return Return trailing period that occurs in GeneDesc column
 #' @export
@@ -677,13 +696,16 @@ cleanGeneDescription <- function(prot, column) {
 
 #' Pick Longer Duplicate
 #'
-#' @param prot
-#' @param column
+#' @param prot A data frame containing the data, with at least one column 
+#' named 'AccNum' for identification of duplicates.
+#' @param column The name of the column from which the longest entry among 
+#' duplicates will be selected.
 #'
 #' @importFrom dplyr arrange filter group_by pull n select summarize
 #' @importFrom rlang sym
 #'
-#' @return Describe return, in detail
+#' @return A data frame containing only the longest entries among duplicates 
+#' based on the specified column. 
 #' @export
 #'
 #' @examples
@@ -728,10 +750,13 @@ selectLongestDuplicate <- function(prot, column) {
 
 #' Cleanup Lineage
 #'
-#' @param prot
-#' @param lins_rename
+#' @param prot A data frame containing a 'Lineage' column that needs to be 
+#' cleaned up.
+#' @param lins_rename A data frame with two columns: 'old' containing terms 
+#' to be replaced and 'new' containing the corresponding replacement terms.
 #'
-#' @return Describe return, in detail
+#' @return The original data frame with the 'Lineage' column updated based on 
+#' the provided replacements.
 #' @export
 #'
 #' @examples
diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index bb3b3ce2..2361c213 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -8,15 +8,23 @@
 
 #' Combining full_analysis files
 #'
-#' @param inpath
-#' @param ret
+#' @param inpath Character. The path to the directory containing the 
+#' `.full_analysis.tsv` files to be combined.
+#' @param ret Logical. If TRUE, the function will return the combined data frame. 
+#' Default is FALSE, meaning it will only write the file and not return the data.
 #'
 #' @importFrom readr write_tsv
 #'
-#' @return
+#' @return If `ret` is TRUE, a data frame containing the combined data from all 
+#' input files. If `ret` is FALSE, the function writes the combined data to a 
+#' TSV file named `cln_combined.tsv` in the specified directory and returns NULL.
+#' 
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' combined_data <- combine_full("path/to/full_analysis/files", ret = TRUE)
+#' }
 combine_full <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
     full_combnd <- combine_files(inpath,
@@ -35,15 +43,23 @@ combine_full <- function(inpath, ret = FALSE) {
 
 #' Combining clean ipr files
 #'
-#' @param inpath
-#' @param ret
+#' @param inpath Character. The path to the directory containing the 
+#' `.iprscan_cln.tsv` files to be combined.
+#' @param ret Logical. If TRUE, the function will return the combined data frame. 
+#' Default is FALSE, meaning it will only write the file and not return the data.
 #'
 #' @importFrom readr write_tsv
 #'
-#' @return
+#' @return If `ret` is TRUE, a data frame containing the combined data from all 
+#' input files. If `ret` is FALSE, the function writes the combined data to a 
+#' TSV file named `ipr_combined.tsv` in the specified directory and returns NULL.
+#' 
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' combined_ipr_data <- combine_ipr("path/to/ipr/files", ret = TRUE)
+#' }
 combine_ipr <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
     ipr_combnd <- combine_files(inpath,
diff --git a/R/combine_files.R b/R/combine_files.R
index 76c5fa09..088f2d7b 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -24,20 +24,30 @@
 #'
 #' @author Janani Ravi
 #'
-#' @param inpath String of 'master' path where the files reside (recursive=T)
-#' @param pattern Character vector containing search pattern for files
-#' @param delim
-#' @param skip
-#' @param col_names Takes logical T/F arguments OR column names vector;
-#' usage similar to col_names parameter in `readr::read_delim`
+#' @param inpath Character. The master directory path where the files reside. 
+#' The search is recursive (i.e., it will look in subdirectories as well).
+#' @param pattern Character. A search pattern to identify files to be combined. 
+#' Default is "*full_analysis.tsv".
+#' @param delim Character. The delimiter used in the input files. 
+#' Default is tab ("\t").
+#' @param skip Integer. The number of lines to skip at the beginning of each file. 
+#' Default is 0.
+#' @param col_names Logical or character vector. If TRUE, the first row of each file 
+#' is treated as column names. Alternatively, a character vector can 
+#' be provided to specify custom column names.
 #'
 #' @importFrom purrr pmap_dfr
 #' @importFrom readr cols
 #'
-#' @return
+#' @return A data frame containing the combined contents of all matched files. 
+#' Each row will include a new column "ByFile" indicating the source file of the data.
+#'
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' combined_data <- combine_files(inpath = "../molevol_data/project_data/phage_defense/")
+#' }
 combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index e7374df3..7c581d2e 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -7,12 +7,12 @@
 #'
 #' @author Samuel Chen
 #'
-#' @param lineage_file Path to the rankedlineage.dmp file containing taxid's and their
-#' corresponding taxonomic rank. rankedlineage.dmp can be downloaded at
+#' @param lineage_file Path to the rankedlineage.dmp file containing taxid's 
+#' and their corresponding taxonomic rank. rankedlineage.dmp can be downloaded at
 #' https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/
 #' @param outfile File the resulting lineage lookup table should be written to
-#' @param taxonomic_rank The upperbound of taxonomic rank that the lineage includes. The lineaege will
-#' include superkingdom>...>taxonomic_rank.
+#' @param taxonomic_rank The upperbound of taxonomic rank that the lineage 
+#' includes. The lineaege will include superkingdom>...>taxonomic_rank.
 #' Choices include: "supperkingdom", "phylum",   "class","order", "family",
 #' "genus", and "species"
 #'
@@ -22,10 +22,17 @@
 #' @importFrom stringr str_locate str_replace_all
 #' @importFrom tidyr unite
 #'
-#' @return
+#' @return A tibble containing the tax IDs and their respective lineages up to 
+#' the specified taxonomic rank, saved as a tab-separated file.
+#'
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' create_lineage_lookup(lineage_file = "data/rankedlineage.dmp", 
+#'                       outfile = "data/lineage_lookup.tsv", 
+#'                       taxonomic_rank = "family")
+#' }
 create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
     shorten_NA <- function(Lineage) {
diff --git a/R/fa2domain.R b/R/fa2domain.R
index 6dc6f622..29803b85 100644
--- a/R/fa2domain.R
+++ b/R/fa2domain.R
@@ -5,16 +5,29 @@
 # interproscan CLI will return a completely empty file (0Bytes)
 
 #' runIPRScan
+#' 
+#' Run InterProScan on a given FASTA file and save the results to an 
+#' output file.
 #'
-#' @param filepath_fasta
-#' @param filepath_out
-#' @param appl
+#' @param filepath_fasta A string representing the path to the input FASTA file.
+#' @param filepath_out A string representing the base path for the output file.
+#' @param appl A character vector specifying the InterProScan applications to 
+#' use (e.g., "Pfam", "Gene3D"). Default is `c("Pfam", "Gene3D")`.
 #'
 #' @importFrom stringr str_glue
 #'
-#' @return
+#' @return A data frame containing the results from the InterProScan output 
+#' TSV file.
 #'
 #' @examples
+#' \dontrun{
+#' results <- runIPRScan(
+#'     filepath_fasta = "path/to/your_fasta_file.fasta",
+#'     filepath_out = "path/to/output_file",
+#'     appl = c("Pfam", "Gene3D")
+#' )
+#' print(results)
+#' }
 runIPRScan <- function(
         filepath_fasta,
         filepath_out, # do not inlucde file extension since ipr handles this
diff --git a/R/ipr2viz.R b/R/ipr2viz.R
index 0d417be0..c4006e51 100644
--- a/R/ipr2viz.R
+++ b/R/ipr2viz.R
@@ -19,10 +19,17 @@
 #'
 #' @importFrom ggplot2 element_blank element_line theme theme_grey
 #'
-#' @return
+#' @return A ggplot2 theme object.
 #' @export
-#'
 #' @examples
+#' library(ggplot2)
+#' 
+#' # Create a sample plot using the custom theme
+#' ggplot(mtcars, aes(x = wt, y = mpg)) +
+#'     geom_point() +
+#'     theme_genes2() +
+#'     labs(title = "Car Weight vs MPG")
+#'
 theme_genes2 <- function() {
     ggplot2::theme_grey() + ggplot2::theme(
         panel.background = ggplot2::element_blank(),
@@ -43,11 +50,16 @@ theme_genes2 <- function() {
 ##################################
 #' Group by lineage + DA then take top 20
 #'
-#' @param infile_full
-#' @param DA_col
-#' @param lin_col
-#' @param n
-#' @param query
+#' @param infile_full A data frame containing the full dataset with lineage and 
+#' domain architecture information.
+#' @param DA_col A string representing the name of the domain architecture 
+#' column. Default is "DomArch.Pfam".
+#' @param lin_col A string representing the name of the lineage column. 
+#' Default is "Lineage_short".
+#' @param n An integer specifying the number of top accession numbers to return. 
+#' Default is 20.
+#' @param query A string for filtering a specific query name. If it is not 
+#' "All", only the data matching this query will be processed.
 #'
 #' @importFrom dplyr arrange filter group_by select summarise
 #' @importFrom shiny showNotification
@@ -55,10 +67,16 @@ theme_genes2 <- function() {
 #' @importFrom rlang sym
 #' @importFrom rlang .data
 #'
-#' @return
+#' @return A vector of the top N accession numbers (`AccNum`) based on counts 
+#' grouped by lineage and domain architecture.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' top_accessions <- find_top_acc(infile_full = my_data, 
+#' DA_col = "DomArch.Pfam", lin_col = "Lineage_short", 
+#' n = 20, query = "specific_query_name")
+#' }
 find_top_acc <- function(infile_full,
     DA_col = "DomArch.Pfam",
     lin_col = "Lineage_short",
@@ -94,16 +112,26 @@ find_top_acc <- function(infile_full,
 #############################################
 #' IPR2Viz
 #'
-#' @param infile_ipr
-#' @param infile_full
-#' @param accessions
-#' @param analysis
-#' @param group_by
-#' @param topn
-#' @param name
-#' @param text_size
-#' @param query
-#'
+#' @param infile_ipr A path to the input IPR file (TSV format) containing 
+#' domain information.
+#' @param infile_full A path to the full input file (TSV format) containing 
+#' lineage and accession information.
+#' @param accessions A character vector of accession numbers to filter the 
+#' analysis. Default is an empty vector.
+#' @param analysis A character vector specifying the types of analysis to 
+#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a 
+#' vector of these analyses.
+#' @param group_by A string specifying how to group the visualization. 
+#' Default is "Analysis". Options include "Analysis" or "Query".
+#' @param topn An integer specifying the number of top accessions to visualize. 
+#' Default is 20.
+#' @param name A string representing the name to use for y-axis labels. 
+#' Default is "Name".
+#' @param text_size An integer specifying the text size for the plot. 
+#' Default is 15.
+#' @param query A string for filtering a specific query name. If it is not 
+#' "All", only the data matching this query will be processed.
+#' 
 #' @importFrom dplyr distinct filter select
 #' @importFrom gggenes geom_gene_arrow geom_subgene_arrow
 #' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal unit ylab
@@ -111,10 +139,22 @@ find_top_acc <- function(infile_full,
 #' @importFrom tidyr pivot_wider
 #' @importFrom stats as.formula
 #'
-#' @return
+#' @return A ggplot object representing the domain architecture visualization.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' plot <- ipr2viz(infile_ipr = "path/to/ipr_file.tsv", 
+#'                  infile_full = "path/to/full_file.tsv", 
+#'                  accessions = c("ACC123", "ACC456"), 
+#'                  analysis = c("Pfam", "TMHMM"), 
+#'                  group_by = "Analysis", 
+#'                  topn = 20, 
+#'                  name = "Gene Name", 
+#'                  text_size = 15, 
+#'                  query = "All")
+#' print(plot)
+#' }
 ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
     group_by = "Analysis", # "Analysis"
@@ -250,15 +290,25 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
 
 #' IPR2Viz Web
 #'
-#' @param infile_ipr
-#' @param accessions
-#' @param analysis
-#' @param group_by
-#' @param name
-#' @param text_size
-#' @param legend_name
-#' @param cols
-#' @param rows
+#' @param infile_ipr A path to the input IPR file (TSV format) containing 
+#' domain information.
+#' @param accessions A character vector of accession numbers to filter the 
+#' analysis.
+#' @param analysis A character vector specifying the types of analysis to 
+#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a vector 
+#' of these analyses.
+#' @param group_by A string specifying how to group the visualization. 
+#' Default is "Analysis". Options include "Analysis" or "Query".
+#' @param name A string representing the name to use for y-axis labels. 
+#' Default is "Name".
+#' @param text_size An integer specifying the text size for the plot. 
+#' Default is 15.
+#' @param legend_name A string representing the column to use for legend labels. 
+#' Default is "ShortName".
+#' @param cols An integer specifying the number of columns in the facet wrap. 
+#' Default is 5.
+#' @param rows An integer specifying the number of rows in the legend. 
+#' Default is 10.
 #'
 #' @importFrom dplyr arrange distinct filter select
 #' @importFrom gggenes geom_gene_arrow geom_subgene_arrow
@@ -266,10 +316,23 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
 #' @importFrom readr read_tsv
 #' @importFrom tidyr pivot_wider
 #'
-#' @return
+#' @return A ggplot object representing the domain architecture visualization 
+#' for web display.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' plot <- ipr2viz_web(infile_ipr = "path/to/ipr_file.tsv", 
+#'                      accessions = c("ACC123", "ACC456"), 
+#'                      analysis = c("Pfam", "TMHMM"), 
+#'                      group_by = "Analysis", 
+#'                      name = "Gene Name", 
+#'                      text_size = 15, 
+#'                      legend_name = "ShortName", 
+#'                      cols = 5, 
+#'                      rows = 10)
+#' print(plot)
+#' }
 ipr2viz_web <- function(infile_ipr,
     accessions,
     analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
diff --git a/R/lineage.R b/R/lineage.R
index d14246d7..ea1cd13a 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -11,17 +11,24 @@
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @param outpath String of path where the assembly summary file should be written
-#' @param keep Character vector containing which columns should be retained and downloaded
+#' @param outpath String of path where the assembly summary file should be 
+#' written
+#' @param keep Character vector containing which columns should be retained and 
+#' downloaded
 #'
 #' @importFrom data.table fwrite setnames
 #' @importFrom dplyr bind_rows select
 #' @importFrom biomartr getKingdomAssemblySummary
 #'
-#' @return
+#' @return A tab-separated file containing the assembly summary. The function 
+#' does notreturn any value but writes the output directly to the specified file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' downloadAssemblySummary(outpath = "assembly_summary.tsv", 
+#'      keep = c("assembly_accession", "taxid", "organism_name"))
+#' }
 downloadAssemblySummary <- function(outpath,
     keep = c(
         "assembly_accession", "taxid",
@@ -78,15 +85,24 @@ downloadAssemblySummary <- function(outpath,
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' "create_lineage_lookup()" function
-#' @param acc_col
+#' @param acc_col Character. The name of the column in `prot_data` containing 
+#' accession numbers. Default is "AccNum".
 #'
 #' @importFrom dplyr pull
 #' @importFrom data.table fread setnames
 #'
-#' @return
+#' @return A dataframe containing the merged information of GCA_IDs, TaxIDs, 
+#' and their corresponding lineage up to the phylum level. The dataframe 
+#' will include information from the input `prot_data` and lineage data.
+#' 
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' result <- GCA2Lineage(prot_data = my_prot_data,
+#'                        assembly_path = "path/to/assembly_summary.txt",
+#'                        lineagelookup_path = "path/to/lineage_lookup.tsv")
+#' }
 GCA2Lineage <- function(prot_data,
     assembly_path = "/data/research/jravilab/common_data/assembly_summary_genbank.txt",
     lineagelookup_path = "/data/research/jravilab/common_data/lineage_lookup.tsv",
@@ -135,20 +151,34 @@ GCA2Lineage <- function(prot_data,
 ###################################
 #' addLineage
 #'
-#' @param df
-#' @param acc_col
-#' @param assembly_path
-#' @param lineagelookup_path
-#' @param ipgout_path
-#' @param plan
+#' @param df Dataframe containing accession numbers. The dataframe should 
+#' have a column specified by `acc_col` that contains these accession numbers.
+#' @param acc_col Character. The name of the column in `df` containing 
+#' accession numbers. Default is "AccNum".
+#' @param assembly_path String. The path to the assembly summary file generated 
+#' using the `downloadAssemblySummary()` function.
+#' @param lineagelookup_path String. The path to the lineage lookup file (taxid 
+#' to lineage mapping) generated using the `create_lineage_lookup()` function.
+#' @param ipgout_path String. Optional path to save intermediate output files. 
+#' Default is NULL.
+#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' Default is "multicore".
 #'
 #' @importFrom dplyr pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A dataframe that combines the original dataframe `df` with lineage 
+#' information retrieved based on the provided accession numbers.
+#' 
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' enriched_df <- addLineage(df = my_data,
+#'                            acc_col = "AccNum",
+#'                            assembly_path = "path/to/assembly_summary.txt",
+#'                            lineagelookup_path = "path/to/lineage_lookup.tsv")
+#' }
 addLineage <- function(df, acc_col = "AccNum", assembly_path,
     lineagelookup_path, ipgout_path = NULL, plan = "multicore") {
     acc_sym <- sym(acc_col)
@@ -194,12 +224,23 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' (taxid to lineage mapping). This file can be generated using the
 #' @param ipgout_path Path to write the results of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
-#' @param plan
+#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' Default is "multicore".
 #'
-#' @return
+#' @return A dataframe containing lineage information mapped to the given protein 
+#' accessions. The dataframe includes relevant columns such as TaxID, GCA_ID, 
+#' Protein, Protein Name, Species, and Lineage.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' lineage_data <- acc2Lineage(
+#'   accessions = c("P12345", "Q67890"),
+#'   assembly_path = "path/to/assembly_summary.txt",
+#'   lineagelookup_path = "path/to/lineage_lookup.tsv",
+#'   ipgout_path = "path/to/output.txt"
+#' )
+#' }
 acc2Lineage <- function(accessions, assembly_path, lineagelookup_path,
     ipgout_path = NULL, plan = "multicore") {
     tmp_ipg <- F
@@ -235,16 +276,25 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path,
 #' @param accessions Character vector containing the accession numbers to query on
 #' the ipg database
 #' @param out_path Path to write the efetch results to
-#' @param plan
+#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' Default is "multicore".
 #'
 #' @importFrom future future plan
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return
+#' @return The function does not return a value but writes the efetch results 
+#' directly to the specified `out_path`.
+#' 
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' efetchIPG(
+#'   accessions = c("P12345", "Q67890", "A12345"),
+#'   out_path = "path/to/efetch_results.xml"
+#' )
+#' }
 efetchIPG <- function(accessions, out_path, plan = "multicore") {
     if (length(accessions) > 0) {
         partition <- function(v, groups) {
@@ -305,18 +355,28 @@ efetchIPG <- function(accessions, out_path, plan = "multicore") {
 #' @param ipg_file Path to the file containing results of an efetch run on the
 #' ipg database. The protein accession in 'accessions' should be contained in this
 #' file
-#' @param refseq_assembly_path
-#' @param genbank_assembly_path
+#' @param refseq_assembly_path String. Path to the RefSeq assembly summary file.
+#' @param genbank_assembly_path String. Path to the GenBank assembly summary file.
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' "create_lineage_lookup()" function
 #'
 #' @importFrom data.table fread setnames
 #'
-#' @return
+#' @return A data table containing protein accessions along with their 
+#' corresponding TaxIDs and lineage information.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' lins <- IPG2Lineage(
+#'   accessions = c("P12345", "Q67890"),
+#'   ipg_file = "path/to/ipg_results.txt",
+#'   refseq_assembly_path = "path/to/refseq_assembly_summary.txt",
+#'   genbank_assembly_path = "path/to/genbank_assembly_summary.txt",
+#'   lineagelookup_path = "path/to/lineage_lookup.tsv"
+#' )
+#' }
 IPG2Lineage <- function(accessions, ipg_file,
     refseq_assembly_path, genbank_assembly_path,
     lineagelookup_path) {
@@ -383,16 +443,25 @@ IPG2Lineage <- function(accessions, ipg_file,
 #########################################
 #' addTaxID
 #'
-#' @param data
-#' @param acc_col
-#' @param version
+#' @param data A data frame or data table containing protein accession numbers.
+#' @param acc_col A string specifying the column name in `data` that contains 
+#' the accession numbers. Defaults to "AccNum".
+#' @param version A logical indicating whether to remove the last two characters 
+#' from the accession numbers for TaxID retrieval. Defaults to TRUE.
 #'
 #' @importFrom data.table as.data.table
 #'
-#' @return
+#' @return A data table that includes the original data along with a new column 
+#' containing the corresponding TaxIDs.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Create a sample data table with accession numbers
+#' sample_data <- data.table(AccNum = c("ABC123.1", "XYZ456.1", "LMN789.2"))
+#' enriched_data <- addTaxID(sample_data, acc_col = "AccNum", version = TRUE)
+#' print(enriched_data)
+#' }
 addTaxID <- function(data, acc_col = "AccNum", version = T) {
     if (!is.data.table(data)) {
         data <- as.data.table(data)
@@ -421,17 +490,30 @@ addTaxID <- function(data, acc_col = "AccNum", version = T) {
 ##################################
 #' proteinAcc2TaxID
 #'
-#' @param accnums
-#' @param suffix
-#' @param out_path
-#' @param return_dt
+#' @param accnums A character vector of protein accession numbers to be mapped 
+#' to TaxIDs.
+#' @param suffix A string suffix used to name the output file generated by the 
+#' script.
+#' @param out_path A string specifying the directory where the output file will 
+#' be saved.
+#' @param return_dt A logical indicating whether to return the result as a data 
+#' table. Defaults to FALSE. If TRUE, the output file is read into a data table 
+#' and returned.
 #'
 #' @importFrom data.table fread
 #'
-#' @return
+#' @return If `return_dt` is TRUE, a data table containing the mapping of protein 
+#' accession numbers to TaxIDs. If FALSE, the function returns NULL.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Example accession numbers
+#' accessions <- c("ABC123", "XYZ456", "LMN789")
+#' tax_data <- proteinAcc2TaxID(accessions, suffix = "example", 
+#' out_path = "/path/to/output", return_dt = TRUE)
+#' print(tax_data)
+#' }
 proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) {
     # Write accnums to a file
     acc_file <- tempfile()
@@ -456,18 +538,25 @@ proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) {
 #' @description Perform elink to go from protein database to taxonomy database
 #' and write the resulting file of taxid and lineage to out_path
 #'
-#' @param accessions Character vector containing the accession numbers to query on
-#' the ipg database
-#' @param out_path Path to write the efetch results to
-#' @param plan
+#' @param accessions A character vector containing the accession numbers to query 
+#' in the protein database.
+#' @param out_path A string specifying the path where the results of the query 
+#' will be written. If set to NULL, a temporary directory will be used.
+#' @param plan A character string that specifies the execution plan for parallel 
+#' processing. The default is "multicore".
 #'
 #' @importFrom future plan
 #' @importFrom purrr map
 #'
-#' @return
+#' @return This function does not return a value. It writes the results to the 
+#'         specified output path.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' accessions <- c("ABC123", "XYZ456", "LMN789")
+#' proteinAcc2TaxID_old(accessions, out_path = "/path/to/output")
+#' }
 proteinAcc2TaxID_old <- function(accessions, out_path, plan = "multicore") {
     if (length(accessions) > 0) {
         partition <- function(v, groups) {
diff --git a/R/msa.R b/R/msa.R
index e56cc32c..20089dba 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -50,12 +50,15 @@
 #' @importFrom msa msa msaPrettyPrint
 #' @importFrom stringr str_replace
 #'
-#' @return
+#' @return A PDF file containing the multiple sequence alignment.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' msa_pdf()
+#' msa_pdf(fasta_path = "path/to/your/file.fasta", 
+#'         out_path = "path/to/output/alignment.pdf", 
+#'         lowerbound = 10, 
+#'         upperbound = 200)
 #' }
 msa_pdf <- function(fasta_path, out_path = NULL,
     lowerbound = NULL, upperbound = NULL) {
@@ -187,15 +190,22 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 ## https://github.com/mhahsler/rMSA
 #' Function to generate MSA using kalign
 #'
-#' @param fa_file
-#' @param outfile
+#' @param fa_file Character. The path to the input FASTA file containing protein 
+#' sequences.
+#' @param outfile Character. The path to the output file where the alignment 
+#' will be saved.
 #'
 #' @importFrom Biostrings readAAStringSet
+#' @importFrom rMSA kalign
 #'
-#' @return
+#' @return A list containing the alignment object and the output file path.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' generate_msa(fa_file = "path/to/sequences.fasta", 
+#' outfile = "path/to/alignment.txt")
+#' }
 generate_msa <- function(fa_file = "", outfile = "") {
     prot_aa <- readAAStringSet(
         path = fa_file,
diff --git a/R/networks_domarch.R b/R/networks_domarch.R
index fea0a195..65090fa4 100755
--- a/R/networks_domarch.R
+++ b/R/networks_domarch.R
@@ -24,13 +24,17 @@
 #' A network of domains is returned based on shared domain architectures.
 #'
 #' @param prot A data frame that contains the column 'DomArch'.
-#' @param column Name of column containing Domain architecture from which nodes and edges are generated.
-#' @param domains_of_interest
-#' @param cutoff Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-#' Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.
+#' @param column Name of column containing Domain architecture from which nodes 
+#' and edges are generated.
+#' @param domains_of_interest Character vector specifying domains of interest.
+#' @param cutoff Integer. Only use domains that occur at or above the cutoff for 
+#' total counts if cutoff_type is "Total Count".
+#' Only use domains that appear in cutoff or greater lineages if cutoff_type is 
+#' Lineage.
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
-#' @param query_color
+#' @param query_color Character. Color to represent the queried domain in the 
+#' network.
 #'
 #' @importFrom dplyr across add_row all_of distinct filter mutate pull select
 #' @importFrom igraph delete_vertices graph_from_edgelist vertex
@@ -41,7 +45,7 @@
 #' @importFrom tidyr pivot_wider
 #' @importFrom visNetwork visIgraph visIgraphLayout visNetwork visOptions
 #'
-#' @return
+#' @return A network visualization of domain architectures.
 #' @export
 #'
 #' @examples
@@ -227,15 +231,20 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
 #'
 #'
 #' @param prot A data frame that contains the column 'DomArch'.
-#' @param column Name of column containing Domain architecture from which nodes and edges are generated.
-#' @param domains_of_interest
-#' @param cutoff Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-#' Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.
+#' @param column Name of column containing Domain architecture from which nodes 
+#' and edges are generated.
+#' @param domains_of_interest Character vector specifying the domains of interest.
+#' @param cutoff Integer. Only use domains that occur at or above the cutoff for 
+#' total counts if cutoff_type is "Total Count".
+#' Only use domains that appear in cutoff or greater lineages if cutoff_type is 
+#' Lineage.
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
-#' @param query_color Color that the nodes of the domains in the domains_of_interest vector are colored
-#' @param partner_color Color that the nodes that are not part of the domains_of_interest vector are colored
-#' @param border_color
+#' @param query_color Color that the nodes of the domains in the 
+#' domains_of_interest vector are colored
+#' @param partner_color Color that the nodes that are not part of the 
+#' domains_of_interest vector are colored
+#' @param border_color Color for the borders of the nodes.
 #' @param IsDirected Is the network directed? Set to false to eliminate arrows
 #'
 #' @importFrom dplyr distinct filter group_by mutate pull select summarize
@@ -245,12 +254,12 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
 #' @importFrom stringr str_replace_all str_split
 #' @importFrom visNetwork visEdges visGroups visIgraphLayout visLegend visNetwork visOptions
 #'
-#' @return
+#' @return A network visualization of domain architectures.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' domain_network(pspa)
+#' BinaryDomainNetwork(pspa)
 #' }
 BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, cutoff = 70,
     layout = "nice", query_color = adjustcolor("yellow", alpha.f = .5),
diff --git a/R/networks_gencontext.R b/R/networks_gencontext.R
index e0dd63da..02733cdf 100755
--- a/R/networks_gencontext.R
+++ b/R/networks_gencontext.R
@@ -17,13 +17,19 @@
 #'
 #'
 #' @param prot A data frame that contains the column 'DomArch'.
-#' @param column Name of column containing Domain architecture from which nodes and edges are generated.
-#' @param domains_of_interest
-#' @param cutoff_type Character. Used to determine how data should be filtered. Either
-#' \itemize{\item "Lineage" to filter domains based off how many lineages the Domain architecture appears in
-#' \item "Total Count" to filter off the total amount of times a domain architecture occurs }
-#' @param cutoff Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-#' Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.
+#' @param column Name of column containing Domain architecture from which nodes 
+#' and edges are generated.
+#' @param domains_of_interest Character vector specifying the domains of interest.
+#' @param cutoff_type Character. Used to determine how data should be filtered. 
+#' Either
+#' \itemize{\item "Lineage" to filter domains based off how many lineages the 
+#' Domain architecture appears in
+#' \item "Total Count" to filter off the total amount of times a 
+#' domain architecture occurs }
+#' @param cutoff Integer. Only use domains that occur at or above the cutoff 
+#' for total counts if cutoff_type is "Total Count".
+#' Only use domains that appear in cutoff or greater lineages if cutoff_type is 
+#' Lineage.
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
 #'
@@ -32,12 +38,14 @@
 #' @importFrom igraph E graph_from_edgelist layout.auto layout.circle layout_on_grid layout_randomly  plot.igraph V
 #' @importFrom stringr str_replace_all str_split
 #'
-#' @return
+#' @return A plot of the domain architecture network.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' domain_network(pspa)
+#' domain_network(pspa, column = "DomArch", 
+#' domains_of_interest = c("Domain1", "Domain2"), 
+#' cutoff_type = "Total Count", cutoff = 10)
 #' }
 gc_undirected_network <- function(prot, column = "GenContext", domains_of_interest, cutoff_type = "Lineage", cutoff = 1, layout = "grid") {
     # by domain networks or all, as required.
@@ -127,8 +135,10 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere
 #'
 #' @param prot A data frame that contains the column 'GenContext'.
 #' @param domains_of_interest Character vector of domains of interest.
-#' @param column Name of column containing Genomic Context from which nodes and edges are generated.
-#' @param cutoff Integer. Only use GenContexts that occur at or above the cutoff percentage for total count
+#' @param column Name of column containing Genomic Context from which nodes and 
+#' edges are generated.
+#' @param cutoff Integer. Only use GenContexts that occur at or above the cutoff 
+#' percentage for total count
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto" \item "nice"}
 #' @param directed Is the network directed?
@@ -139,12 +149,12 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere
 #' @importFrom stringr str_replace_all
 #' @importFrom visNetwork visIgraphLayout visLegend visNetwork visOptions
 #'
-#' @return
+#' @return A plot of the genomic context network.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' gc_directed_network(pspa, column = "GenContex", cutoff = 55)
+#' gc_directed_network(pspa, column = "GenContext", cutoff = 55)
 #' }
 GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
     cutoff = 40,
diff --git a/R/plotme.R b/R/plotme.R
index 906e85ec..3527f170 100644
--- a/R/plotme.R
+++ b/R/plotme.R
@@ -44,10 +44,9 @@ plotSunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxde
 }
 
 
-#' @param count_data
-#'
-#' @param fill_by_n
-#' @param sort_by_n
+#' @param count_data A data frame containing the data.
+#' @param fill_by_n Logical indicating if fill color is based on counts.
+#' @param sort_by_n Logical indicating if data should be sorted by counts.
 #'
 #' @importFrom plotly plot_ly
 #' @importFrom purrr exec
@@ -68,18 +67,24 @@ plotTreemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
 
 #' prepareColumnParams
 #'
-#' @param count_data
-#' @param fill_by_n
-#' @param sort_by_n
+#' @param count_data A data frame containing the data.
+#' @param fill_by_n Logical indicating if fill color is based on counts.
+#' @param sort_by_n Logical indicating if data should be sorted by counts.
 #'
 #' @importFrom assertthat assert_that
 #' @importFrom dplyr bind_rows mutate
 #' @importFrom purrr map
 #'
-#' @return
+#' @return A data frame of parameters for treemap visualization.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' count_data <- data.frame(Category = c("A", "B", "C"),
+#'                           n = c(10, 20, 15))
+#' params <- prepareColumnParams(count_data, fill_by_n = TRUE, sort_by_n = FALSE)
+#' print(params)
+#' }
 prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
     validateCountDF(count_data)
     assertthat::assert_that(is.logical(fill_by_n),
@@ -116,17 +121,24 @@ prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
 
 #' prepareSingleColumnParams
 #'
-#' @param df
-#' @param col_num
-#' @param root
+#' @param df A data frame containing the data to be processed.
+#' @param col_num An integer representing the column number to process.
+#' @param root A string representing the root node for the treemap.
 #'
 #' @importFrom dplyr c_across group_by mutate rowwise select summarise ungroup
 #' @importFrom stringr str_glue
 #'
-#' @return
+#' @return A data frame containing parameters for the specified column for 
+#' treemap visualization.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' df <- data.frame(Category = c("A", "A", "B", "B", "C"),
+#'                  n = c(10, 20, 30, 40, 50))
+#' params <- prepareSingleColumnParams(df, col_num = 1, root = "Root")
+#' print(params)
+#' }
 prepareSingleColumnParams <- function(df,
     col_num,
     root) {
@@ -158,15 +170,18 @@ prepareSingleColumnParams <- function(df,
 }
 #' validateCountDF
 #'
-#' @param var
+#' @param var A data frame whose columns are to be converted.
 #'
 #' @importFrom assertthat assert_that has_name
 #' @importFrom dplyr across mutate
 #'
-#' @return
+#' @return A data frame with non-'n' columns converted to character type.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' new_df <- .all_non_n_cols_to_char(my_data)
+#' }
 validateCountDF <- function(var) {
     msg <- paste(substitute(var), "must be a count dataframe (output of dplyr::count)")
     assertthat::assert_that(is.data.frame(var),
diff --git a/R/plotting.R b/R/plotting.R
index 7191eace..b9a2758a 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -18,20 +18,34 @@
 # suppressPackageStartupMessages(library(d3r))
 # suppressPackageStartupMessages(library(viridis))
 
-#' Shorten Lineage
+#' Shorten Lineage Names
 #'
-#' @param data
-#' @param colname
-#' @param abr_len
+#' @description
+#' This function abbreviates lineage names by shortening the first part of the 
+#' string (up to a given delimiter). 
+#'
+#' @param data A data frame that contains a column with lineage names to be 
+#' shortened.
+#' @param colname Character. The name of the column in the data frame containing 
+#' the lineage strings to be shortened. Default is `"Lineage"`.
+#' @param abr_len Integer. The number of characters to retain after the first 
+#' letter. If set to 1, only the first letter of each segment before the 
+#' delimiter (`>`) is retained. Default is 1.
 #'
 #' @importFrom stringr str_locate
+#' @importFrom purrr pmap
+#'
+#' @return A modified data frame where the specified lineage column has been 
+#' shortened.
 #'
-#' @return
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' shortenLineage()
+#' df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia", 
+#' "Archaea>Euryarchaeota>Thermococci"))
+#' shortened_df <- shortenLineage(df, colname = "Lineage", abr_len = 1)
+#' print(shortened_df)
 #' }
 shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
     abbrv <- function(x) {
@@ -65,23 +79,29 @@ shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
 #'
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format). Default is toast_rack.sub
-#' @param colname
+#' @param colname Column name from query_data: "DomArch.norep", "GenContext.norep",
+#' "DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".
 #' @param cutoff Numeric. Cutoff for word frequency. Default is 90.
-#' @param RowsCutoff
-#' @param text.scale  Allows scaling of axis title, tick lables, and numbers above the intersection size bars.
+#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows 
+#' based on a certain condition. Default is FALSE.
+#' @param text.scale  Allows scaling of axis title, tick lables, and numbers 
+#' above the intersection size bars.
 #' text.scale can either take a universal scale in the form of an integer,
 #' or a vector of specific scales in the format: c(intersection size title,
 #' intersection size tick labels, set size title, set size tick labels, set names,
 #'  numbers above bars)
-#' @param point.size
-#' @param line.size
+#' @param point.size Numeric. Sets the size of points in the UpSet plot. 
+#' Default is 2.2.
+#' @param line.size Numeric. Sets the line width in the UpSet plot. 
+#' Default is 0.8.
 #'
 #' @importFrom dplyr across distinct filter if_else mutate pull select where
 #' @importFrom rlang sym
 #' @importFrom stringr str_detect str_replace_all str_split
 #' @importFrom UpSetR upset
 #'
-#' @return
+#' @return An UpSet plot object. The plot visualizes intersections of sets based 
+#' on the provided colname in query_data.
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
@@ -230,8 +250,9 @@ plotUpSet <- function(query_data = "toast_rack.sub",
 #' Default is prot (variable w/ protein data).
 #' @param colname Column name from query_data: "DomArch.norep", "GenContext.norep",
 #' "DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".
-#' @param cutoff
-#' @param RowsCutoff
+#' @param cutoff Numeric. Cutoff for word frequency. Default is 90.
+#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows 
+#' based on a certain condition. Default is FALSE.
 #' @param color Color for the heatmap. One of six options: "default", "magma", "inferno",
 #' "plasma", "viridis", or "cividis"
 #'
@@ -243,7 +264,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
 #' @importFrom viridis scale_fill_viridis
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A LineageDA plot object.
 #' @export
 #'
 #' @details
@@ -325,7 +346,7 @@ plotLineageDA <- function(query_data = "prot",
 
 #' Lineage Plot: Heatmap of Queries vs Lineages
 #'
-#' @authors Janani Ravi, Samuel Chen
+#' @author Janani Ravi, Samuel Chen
 #' @keywords Lineages, Domains, Domain Architectures, GenomicContexts
 #' @description
 #' Lineage plot for queries. Heatmap.
@@ -333,10 +354,14 @@ plotLineageDA <- function(query_data = "prot",
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format).
 #' Default is prot (variable w/ protein data).
-#' @param queries Character Vector containing the queries that will be used for the categories
-#' @param colname
-#' @param cutoff
-#' @param color
+#' @param queries Character Vector containing the queries that will be used for 
+#' the categories.
+#' @param colname Character. The column used for filtering based on the `queries`. 
+#' Default is "ClustName".
+#' @param cutoff Numeric. The cutoff value for filtering rows based on their 
+#' total count. Rows with values below this cutoff are excluded.
+#' @param color Character. Defines the color palette used for the heatmap. 
+#' Default is a red gradient.
 #'
 #' @importFrom dplyr arrange desc filter group_by select summarise union
 #' @importFrom ggplot2 aes aes_string element_rect element_text geom_tile ggplot scale_fill_gradient scale_x_discrete theme theme_minimal
@@ -346,7 +371,9 @@ plotLineageDA <- function(query_data = "prot",
 #' @importFrom tidyr drop_na
 #' @importFrom viridis scale_fill_viridis
 #'
-#' @return
+#' @return A ggplot object representing a heatmap (tile plot) showing the 
+#' relationship between queries and lineages, with the intensity of color 
+#' representing the count of matching records.
 #' @export
 #'
 #' @note
@@ -476,7 +503,9 @@ plotLineageQuery <- function(query_data = all,
 #' @importFrom stringr str_replace_all
 #' @importFrom tidyr gather
 #'
-#' @return
+#' @return A ggplot object representing a heatmap (tile plot) of lineage versus 
+#' the top neighboring domain architectures, with color intensity representing 
+#' the frequency of occurrences.
 #' @export
 #'
 #' @details
@@ -554,15 +583,19 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
 
 #' Lineage Domain Repeats Plot
 #'
-#' @param query_data
-#' @param colname
+#' @param query_data Data frame containing protein homolog data, including 
+#' relevant domain architectures and lineages.
+#' @param colname Character. The name of the column in query_data that contains 
+#' domain architectures or other structural information.
 #'
 #' @importFrom dplyr across mutate select where
 #' @importFrom ggplot2 aes element_text geom_tile ggplot scale_fill_gradient scale_x_discrete theme theme_minimal
 #' @importFrom stringr str_count str_replace_all
 #' @importFrom tidyr gather
 #'
-#' @return
+#' @return A ggplot object representing a heatmap (tile plot) of domain repeat 
+#' counts across different lineages, with color intensity representing the 
+#' occurrence of domains.
 #' @export
 #'
 #' @examples
@@ -646,7 +679,9 @@ plotLineageDomainRepeats <- function(query_data, colname) {
 #' @importFrom purrr map
 #' @importFrom stringr str_locate str_locate_all
 #'
-#' @return
+#' @return A ggplot object representing a heatmap (tile plot) of domain repeat 
+#' counts across different lineages, with color intensity representing the 
+#' occurrence of domains.
 #' @export
 #'
 #' @examples
@@ -791,25 +826,35 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
 
 #' Stacked Lineage Plot
 #'
-#' @param prot
-#' @param column
-#' @param cutoff
-#' @param Lineage_col
-#' @param xlabel
-#' @param reduce_lineage
-#' @param label.size
-#' @param legend.position
-#' @param legend.text.size
-#' @param legend.cols
-#' @param legend.size
-#' @param coord_flip
-#' @param legend
+#' @param prot Data frame containing protein data including domain architecture 
+#' and lineage information.
+#' @param column Character. The name of the column in prot representing domain 
+#' architectures (default is "DomArch").
+#' @param cutoff Numeric. A threshold value for filtering domain architectures 
+#' or protein counts.
+#' @param Lineage_col Character. The name of the column representing lineage 
+#' data (default is "Lineage").
+#' @param xlabel Character. Label for the x-axis 
+#' (default is "Domain Architecture").
+#' @param reduce_lineage Logical. Whether to shorten lineage names 
+#' (default is TRUE).
+#' @param label.size Numeric. The size of axis text labels (default is 8).
+#' @param legend.position Numeric vector. Coordinates for placing the legend 
+#' (default is c(0.7, 0.4)).
+#' @param legend.text.size Numeric. Size of the text in the legend 
+#' (default is 10).
+#' @param legend.cols Numeric. Number of columns in the legend (default is 2).
+#' @param legend.size Numeric. Size of the legend keys (default is 0.7).
+#' @param coord_flip Logical. Whether to flip the coordinates of the plot 
+#' (default is TRUE).
+#' @param legend Logical. Whether to display the legend (default is TRUE).
 #'
 #' @importFrom dplyr pull select
 #' @importFrom ggplot2 aes_string coord_flip element_blank element_line element_rect element_text geom_bar ggplot guides guide_legend scale_fill_manual xlab ylab theme theme_minimal
 #' @importFrom purrr map
 #'
-#' @return
+#' @return A ggplot object representing a stacked bar plot showing the 
+#' distribution of protein domain architectures across lineages.
 #' @export
 #'
 #' @examples
@@ -937,31 +982,46 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
 
 #' plotWordCloud3
 #'
-#' @param data
-#' @param size
-#' @param minSize
-#' @param gridSize
-#' @param fontFamily
-#' @param fontWeight
-#' @param color
-#' @param backgroundColor
-#' @param minRotation
-#' @param maxRotation
-#' @param shuffle
-#' @param rotateRatio
-#' @param shape
-#' @param ellipticity
-#' @param widgetsize
-#' @param figPath
-#' @param hoverFunction
+#' @param data Data frame or table containing words and their frequencies for 
+#' the word cloud.
+#' @param size Numeric. Scaling factor for word sizes (default is 1).
+#' @param minSize Numeric. Minimum font size for the smallest word 
+#' (default is 0).
+#' @param gridSize Numeric. Size of the grid for placing words (default is 0).
+#' @param fontFamily Character. Font family to use for the words 
+#' (default is "Segoe UI").
+#' @param fontWeight Character. Font weight for the words (default is "bold").
+#' @param color Character or vector. Color of the words. Use "random-dark" for 
+#' random dark colors (default) or specify a color.
+#' @param backgroundColor Character. Background color of the word cloud 
+#' (default is "white").
+#' @param minRotation Numeric. Minimum rotation angle of words in radians 
+#' (default is -π/4).
+#' @param maxRotation Numeric. Maximum rotation angle of words in radians 
+#' (default is π/4).
+#' @param shuffle Logical. Whether to shuffle the words (default is TRUE).
+#' @param rotateRatio Numeric. Proportion of words that are rotated 
+#' (default is 0.4).
+#' @param shape Character. Shape of the word cloud ("circle" is default, but 
+#' you can use "cardioid", "star", "triangle", etc.).
+#' @param ellipticity Numeric. Degree of ellipticity (default is 0.65).
+#' @param widgetsize Numeric vector. Width and height of the widget 
+#' (default is NULL, which uses default size).
+#' @param figPath Character. Path to an image file to use as a mask for the 
+#' word cloud (optional).
+#' @param hoverFunction JS function. JavaScript function to run when hovering 
+#' over words (optional).
 #'
 #' @importFrom base64enc base64encode
 #' @importFrom htmlwidgets createWidget JS sizingPolicy
 #'
-#' @return
+#' @return An HTML widget object displaying a word cloud.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' wordcloud3(data = your_data, size = 1.5, color = "random-light")
+#' }
 wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = "Segoe UI",
     fontWeight = "bold", color = "random-dark", backgroundColor = "white",
     minRotation = -pi / 4, maxRotation = pi / 4, shuffle = TRUE,
@@ -1022,16 +1082,20 @@ wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = "
 #'
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format). Default is "prot".
-#' @param colname
-#' @param cutoff
-#' @param UsingRowsCutoff
+#' @param colname Character. The name of the column in `query_data` to generate 
+#' the word cloud from. Default is "DomArch".
+#' @param cutoff Numeric. The cutoff value for filtering elements based on their 
+#' frequency. Default is 70.
+#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of 
+#' a frequency cutoff. Default is FALSE.
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom RColorBrewer brewer.pal
 #' @importFrom rlang sym
 #' @importFrom wordcloud wordcloud
 #'
-#' @return
+#' @return A word cloud plot showing the frequency of elements from the selected 
+#' column.
 #' @export
 #'
 #' @details
@@ -1102,14 +1166,18 @@ createWordCloudElement <- function(query_data = "prot",
 #'
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format). Default is "prot".
-#' @param colname
-#' @param cutoff
-#' @param UsingRowsCutoff
+#' @param colname Character. The name of the column in `query_data` to generate 
+#' the word cloud from. Default is "DomArch".
+#' @param cutoff Numeric. The cutoff value for filtering elements based on their 
+#' frequency. Default is 70.
+#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of 
+#' a frequency cutoff. Default is FALSE.
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A word cloud plot showing the frequency of elements from the selected 
+#' column.
 #' @export
 #'
 #' @details
@@ -1172,16 +1240,23 @@ createWordCloud2Element <- function(query_data = "prot",
 #### Sunburst #####
 #' Lineage Sunburst
 #'
-#' @param prot Data frame containing a lineage column that the sunburst plot will be generated for
-#' @param lineage_column String. Name of the lineage column within the data frame. Defaults to "Lineage"
-#' @param type String, either "sunburst" or "sund2b". If type is "sunburst", a sunburst plot of the lineage
+#' @param prot Data frame containing a lineage column that the sunburst plot 
+#' will be generated for
+#' @param lineage_column String. Name of the lineage column within the 
+#' data frame. Defaults to "Lineage"
+#' @param type String, either "sunburst" or "sund2b". If type is "sunburst", 
+#' a sunburst plot of the lineage
 #' @param levels Integer. Number of levels the sunburst will have.
-#' @param colors
-#' @param legendOrder String vector. The order of the legend. If legendOrder is NULL,
-#' @param showLegend Boolean. If TRUE, the legend will be enabled when the component first renders.
-#' @param maxLevels Integer, the maximum number of levels to display in the sunburst; 5 by default, NULL to disable
-#' then the legend will be in the descending order of the top level hierarchy.
-#' will be rendered. If the type is sund2b, a sund2b plot will be rendered.
+#' @param colors A vector of colors for the sunburst plot. 
+#' If NULL, default colors are used.
+#' @param legendOrder String vector. The order of the legend. If legendOrder 
+#' is NULL,
+#' @param showLegend Boolean. If TRUE, the legend will be enabled when the 
+#' component first renders.
+#' @param maxLevels Integer, the maximum number of levels to display in the 
+#' sunburst; 5 by default, NULL to disable then the legend will be in the 
+#' descending order of the top level hierarchy. will be rendered. If the type is 
+#' sund2b, a sund2b plot will be rendered.
 #'
 #' @importFrom d3r d3_nest
 #' @importFrom dplyr arrange desc group_by_at select summarise
@@ -1190,12 +1265,13 @@ createWordCloud2Element <- function(query_data = "prot",
 #' @importFrom sunburstR sunburst sund2b
 #' @importFrom tidyr drop_na separate
 #'
-#' @return
+#' @return A sunburst or sund2b plot based on the input lineage data.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' plotLineageSunburst()
+#' plotLineageSunburst(prot, lineage_column = "Lineage", 
+#' type = "sunburst", levels = 3)
 #' }
 plotLineageSunburst <- function(prot, lineage_column = "Lineage",
     type = "sunburst",
diff --git a/R/pre-msa-tree.R b/R/pre-msa-tree.R
index 44979c3c..5904a522 100644
--- a/R/pre-msa-tree.R
+++ b/R/pre-msa-tree.R
@@ -45,10 +45,12 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @param x Character vector.
 #' @param y Delimitter. Default is space (" ").
 #'
-#' @return
+#' @return A character vector in title case.
 #' @export
 #'
 #' @examples
+#' to_titlecase("hello world") 
+#' to_titlecase("this is a test", "_") 
 to_titlecase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
@@ -87,7 +89,8 @@ to_titlecase <- function(x, y = " ") {
 #' @importFrom stringr str_sub
 #' @importFrom tidyr replace_na separate
 #'
-#' @return
+#' @return A data frame containing the combined alignment and lineage 
+#' information.
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate +
@@ -188,8 +191,8 @@ add_leaves <- function(aln_file = "",
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description This function adds a new 'Name' column that is comprised of components from
-#' Kingdom, Phylum, Genus, and species, as well as the accession
+#' @description This function adds a new 'Name' column that is comprised of 
+#' components from Kingdom, Phylum, Genus, and species, as well as the accession
 #'
 #' @param data Data to add name column to
 #' @param accnum_col Column containing accession numbers
@@ -209,6 +212,9 @@ add_leaves <- function(aln_file = "",
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' add_name(data_frame)
+#' }
 add_name <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
@@ -272,8 +278,8 @@ add_name <- function(data,
 #' Default is 'pspa.txt'
 #' @param fa_outpath Character. Path to the written fasta file.
 #' Default is 'NULL'
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-#' Default is 'FALSE'
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' sequence per lineage. Default is 'FALSE'
 #'
 #' @details The alignment file would need two columns: 1. accession +
 #' number and 2. alignment. The protein homolog accession to lineage mapping +
@@ -283,7 +289,9 @@ add_name <- function(data,
 #'
 #' @importFrom readr write_file
 #'
-#' @return
+#' @return Character string containing the Fasta formatted sequences. 
+#' If `fa_outpath` is specified, the function also writes the sequences to the 
+#' Fasta file.
 #' @export
 #'
 #' @examples
@@ -326,7 +334,7 @@ convert_aln2fa <- function(aln_file = "",
 
 #' Default rename_fasta() replacement function. Maps an accession number to its name
 #'
-#' @param line he line of a fasta file starting with '>'
+#' @param line The line of a fasta file starting with '>'
 #' @param acc2name Data Table containing a column of accession numbers and a name column
 #' @param acc_col Name of the column containing Accession numbers
 #' @param name_col Name of the column containing the names that the accession numbers
@@ -336,10 +344,18 @@ convert_aln2fa <- function(aln_file = "",
 #' @importFrom stringr str_locate
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return Character string. The modified line from the Fasta file header with 
+#' the name instead of the accession number.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"), 
+#' Name = c("Species A", "Species B"))
+#' line <- ">ACC001 some additional info"
+#' mapped_line <- map_acc2name(line, acc2name_table)
+#' print(mapped_line)  # Expected output: ">Species A"
+#' }
 map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an add_names column
     # Find the first ' '
@@ -365,10 +381,14 @@ map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name")
 #' @importFrom purrr map
 #' @importFrom readr read_lines write_lines
 #'
-#' @return
+#' @return Character vector containing the modified lines of the Fasta file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' rename_fasta("input.fasta", "output.fasta", 
+#' replacement_function = map_acc2name, acc2name = acc2name_table)
+#' }
 rename_fasta <- function(fa_path, outpath,
     replacement_function = map_acc2name, ...) {
     lines <- read_lines(fa_path)
@@ -397,18 +417,21 @@ rename_fasta <- function(fa_path, outpath,
 #' Default is 'here("data/rawdata_aln/")'
 #' @param fa_outpath Character. Path to the written fasta file.
 #' Default is 'here("data/alns/")'.
-#' @param lin_file Character. Path to file. Master protein file with AccNum & lineages.
-#' Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-#' Default is 'FALSE'.
+#' @param lin_file Character. Path to file. Master protein file with AccNum & 
+#' lineages. Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' sequence per lineage. Default is 'FALSE'.
 #'
-#' @details The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
-#' @note Please refer to the source code if you have alternate + file formats and/or column names.
+#' @details The alignment files would need two columns separated by spaces: 1. 
+#' AccNum and 2. alignment. The protein homolog file should have AccNum, 
+#' Species, Lineages.
+#' @note Please refer to the source code if you have alternate + file 
+#' formats and/or column names.
 #'
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return
+#' @return A list of paths to the generated Fasta files.
 #' @export
 #'
 #' @examples
@@ -456,24 +479,27 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
 #' Resulting fasta file is written to the outpath.
 #'
 #'
-#' @param accessions Character vector containing protein accession numbers to generate fasta sequences for.
-#' Function may not work for vectors of length > 10,000
+#' @param accessions Character vector containing protein accession numbers to 
+#' generate fasta sequences for. Function may not work for vectors of 
+#' length > 10,000
 #' @param outpath [str]. Location where fasta file should be written to.
-#' @param plan
+#' @param plan Character. The plan to use for processing. Default is "sequential".
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom future future plan
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return
+#' @return A Fasta file is written to the specified `outpath`.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+#' outpath = "my_proteins.fasta")
 #' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
+#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
+#' acc2fa(outpath = "ebi.fa")
 #' }
 acc2fa <- function(accessions, outpath, plan = "sequential") {
     # validation
@@ -562,14 +588,23 @@ acc2fa <- function(accessions, outpath, plan = "sequential") {
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A character vector containing representative accession numbers, 
+#' one for each distinct observation in the specified 'reduced' column.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Example usage with a data frame called `protein_data`
+#' representative_accessions <- RepresentativeAccNums(prot_data = protein_data, 
+#'                                                    reduced = "Lineage", 
+#'                                                    accnum_col = "AccNum")
+#' print(representative_accessions)
+#' }
 RepresentativeAccNums <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
+    # Get Unique reduced column and then bind the AccNums back to get one 
+    # AccNum per reduced column
     reduced_sym <- sym(reduced)
     accnum_sym <- sym(accnum_col)
 
@@ -603,8 +638,10 @@ RepresentativeAccNums <- function(prot_data,
 #' @author Samuel Chen, Janani Ravi
 #'
 #' @param fasta_file Path to the FASTA file to be aligned
-#' @param tool Type of alignment tool to use. One of three options: "Muscle", "ClustalO", or "ClustalW"
-#' @param outpath Path to write the resulting alignment to as a FASTA file. If NULL, no file is written
+#' @param tool Type of alignment tool to use. One of three options: "Muscle", 
+#' "ClustalO", or "ClustalW"
+#' @param outpath Path to write the resulting alignment to as a FASTA file. If 
+#' NULL, no file is written
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom msa msaMuscle msaClustalOmega msaClustalW
@@ -613,6 +650,12 @@ RepresentativeAccNums <- function(prot_data,
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Example usage
+#' aligned_sequences <- alignFasta("path/to/sequences.fasta", 
+#' tool = "ClustalO", outpath = "path/to/aligned_sequences.fasta")
+#' print(aligned_sequences)
+#' }
 alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     fasta <- readAAStringSet(fasta_file)
 
@@ -643,10 +686,15 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @importFrom Biostrings unmasked
 #' @importFrom readr write_file
 #'
-#' @return
+#' @return Character string of the FASTA content that was written to the file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Example usage
+#' alignment <- alignFasta("path/to/sequences.fasta")
+#' write.MsaAAMultipleAlignment(alignment, "path/to/aligned_sequences.fasta")
+#' }
 write.MsaAAMultipleAlignment <- function(alignment, outpath) {
     l <- length(rownames(alignment))
     fasta <- ""
@@ -662,15 +710,21 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
 
 #' get_accnums_from_fasta_file
 #'
-#' @param fasta_file
+#' @param fasta_file Character. Path to the FASTA file from which 
+#' accession numbers will be extracted.
 #'
 #' @importFrom readr read_file
 #' @importFrom stringi stri_extract_all_regex
 #'
-#' @return
+#' @return A character vector containing the extracted accession numbers.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' # Example usage
+#' accnums <- get_accnums_from_fasta_file("path/to/sequences.fasta")
+#' print(accnums)
+#' }
 get_accnums_from_fasta_file <- function(fasta_file) {
     txt <- read_file(fasta_file)
     accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
diff --git a/R/reverse_operons.R b/R/reverse_operons.R
index e4bbd50e..b165ef72 100755
--- a/R/reverse_operons.R
+++ b/R/reverse_operons.R
@@ -3,14 +3,26 @@
 # Modified by Janani Ravi and Samuel Chen
 
 
-#' reveql
+#' reveql: Reverse Equalities in Genomic Context
 #'
-#' @param prot
+#' @description
+#' This function processes the genomic context strings (GenContext) and reverses
+#'  directional signs based on the presence of an equal sign ("="). 
+#'
+#' @param prot [vector] A vector of genomic context strings to be processed.
+#'
+#' @return [vector] A vector of the same length as the input, where each genomic 
+#' element is annotated with either a forward ("->") or reverse ("<-") direction, 
+#' depending on its position relative to the "=" symbols.
 #'
-#' @return
 #' @export
 #'
 #' @examples
+#' # Example input: Genomic context with directional symbols and an asterisk
+#' genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F")
+#' reveql(genomic_context)
+#'
+#' # Output: "A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->"
 reveql <- function(prot) {
     w <- prot # $GenContext.orig # was 'x'
 
@@ -57,14 +69,28 @@ reveql <- function(prot) {
 
 ## The function to reverse operons
 
-#' reverse_operon
+#' reverse_operon: Reverse the Direction of Operons in Genomic Context
+#'
+#' @description
+#' This function processes a genomic context data frame to reverse the direction
+#' of operons based on specific patterns in the GenContext column. It handles 
+#' elements represented by ">" and "<" and restructures the genomic context by 
+#' flipping the direction of operons while preserving the relationships 
+#' indicated by "=".
+#'
+#' @param prot [data.frame] A data frame containing at least a column named 
+#' 'GenContext', which represents the genomic contexts that need to be reversed.
 #'
-#' @param prot
+#' @return [data.frame] The input data frame with the 'GenContext' column updated t
+#' o reflect the reversed operons.
 #'
-#' @return
 #' @export
 #'
 #' @examples
+#' # Example genomic context data frame
+#' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+#' reversed_prot <- reverse_operon(prot)
+#' print(reversed_prot)
 reverse_operon <- function(prot) {
     gencontext <- prot$GenContext
 
diff --git a/man/BinaryDomainNetwork.Rd b/man/BinaryDomainNetwork.Rd
index bb7e2353..5c35be0f 100644
--- a/man/BinaryDomainNetwork.Rd
+++ b/man/BinaryDomainNetwork.Rd
@@ -19,20 +19,32 @@ BinaryDomainNetwork(
 \arguments{
 \item{prot}{A data frame that contains the column 'DomArch'.}
 
-\item{column}{Name of column containing Domain architecture from which nodes and edges are generated.}
+\item{column}{Name of column containing Domain architecture from which nodes
+and edges are generated.}
 
-\item{cutoff}{Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.}
+\item{domains_of_interest}{Character vector specifying the domains of interest.}
+
+\item{cutoff}{Integer. Only use domains that occur at or above the cutoff for
+total counts if cutoff_type is "Total Count".
+Only use domains that appear in cutoff or greater lineages if cutoff_type is
+Lineage.}
 
 \item{layout}{Character. Layout type to be used for the network. Options are:
 \itemize{\item "grid" \item "circle" \item "random" \item "auto"}}
 
-\item{query_color}{Color that the nodes of the domains in the domains_of_interest vector are colored}
+\item{query_color}{Color that the nodes of the domains in the
+domains_of_interest vector are colored}
+
+\item{partner_color}{Color that the nodes that are not part of the
+domains_of_interest vector are colored}
 
-\item{partner_color}{Color that the nodes that are not part of the domains_of_interest vector are colored}
+\item{border_color}{Color for the borders of the nodes.}
 
 \item{IsDirected}{Is the network directed? Set to false to eliminate arrows}
 }
+\value{
+A network visualization of domain architectures.
+}
 \description{
 This function creates a domain network from the 'DomArch' column.
 
@@ -42,6 +54,6 @@ A network of domains is returned based on shared domain architectures.
 }
 \examples{
 \dontrun{
-domain_network(pspa)
+BinaryDomainNetwork(pspa)
 }
 }
diff --git a/man/GCA2Lineage.Rd b/man/GCA2Lineage.Rd
index 9ec0ce56..796c2efb 100644
--- a/man/GCA2Lineage.Rd
+++ b/man/GCA2Lineage.Rd
@@ -21,7 +21,13 @@ This file can be generated using the "downloadAssemblySummary()" function}
 (taxid to lineage mapping). This file can be generated using the
 "create_lineage_lookup()" function}
 
-\item{acc_col}{}
+\item{acc_col}{Character. The name of the column in \code{prot_data} containing
+accession numbers. Default is "AccNum".}
+}
+\value{
+A dataframe containing the merged information of GCA_IDs, TaxIDs,
+and their corresponding lineage up to the phylum level. The dataframe
+will include information from the input \code{prot_data} and lineage data.
 }
 \description{
 Function to map GCA_ID to TaxID, and TaxID to Lineage
@@ -29,6 +35,13 @@ Function to map GCA_ID to TaxID, and TaxID to Lineage
 \note{
 Currently configured to have at most kingdom and phylum
 }
+\examples{
+\dontrun{
+result <- GCA2Lineage(prot_data = my_prot_data,
+                       assembly_path = "path/to/assembly_summary.txt",
+                       lineagelookup_path = "path/to/lineage_lookup.tsv")
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/GenContextNetwork.Rd b/man/GenContextNetwork.Rd
index 2eeebbc5..08d4f476 100644
--- a/man/GenContextNetwork.Rd
+++ b/man/GenContextNetwork.Rd
@@ -18,15 +18,20 @@ GenContextNetwork(
 
 \item{domains_of_interest}{Character vector of domains of interest.}
 
-\item{column}{Name of column containing Genomic Context from which nodes and edges are generated.}
+\item{column}{Name of column containing Genomic Context from which nodes and
+edges are generated.}
 
-\item{cutoff}{Integer. Only use GenContexts that occur at or above the cutoff percentage for total count}
+\item{cutoff}{Integer. Only use GenContexts that occur at or above the cutoff
+percentage for total count}
 
 \item{layout}{Character. Layout type to be used for the network. Options are:
 \itemize{\item "grid" \item "circle" \item "random" \item "auto" \item "nice"}}
 
 \item{directed}{Is the network directed?}
 }
+\value{
+A plot of the genomic context network.
+}
 \description{
 This function creates a Genomic Context network from the 'GenContext' column.
 
@@ -34,6 +39,6 @@ A network of Genomic Context is returned.
 }
 \examples{
 \dontrun{
-gc_directed_network(pspa, column = "GenContex", cutoff = 55)
+gc_directed_network(pspa, column = "GenContext", cutoff = 55)
 }
 }
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index 282d5cbf..42b9b943 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -27,6 +27,10 @@ IPG2Lineage(
 ipg database. The protein accession in 'accessions' should be contained in this
 file}
 
+\item{refseq_assembly_path}{String. Path to the RefSeq assembly summary file.}
+
+\item{genbank_assembly_path}{String. Path to the GenBank assembly summary file.}
+
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
 "create_lineage_lookup()" function}
@@ -37,6 +41,9 @@ This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} f
 \value{
 A \code{data.table} with the lineage information for the provided protein
 accessions.
+
+A data table containing protein accessions along with their
+corresponding TaxIDs and lineage information.
 }
 \description{
 Takes the resulting file of an efetch run on the ipg database and
@@ -49,6 +56,15 @@ append lineage, and taxid columns
 IPG2Lineage()
 }
 
+\dontrun{
+lins <- IPG2Lineage(
+  accessions = c("P12345", "Q67890"),
+  ipg_file = "path/to/ipg_results.txt",
+  refseq_assembly_path = "path/to/refseq_assembly_summary.txt",
+  genbank_assembly_path = "path/to/genbank_assembly_summary.txt",
+  lineagelookup_path = "path/to/lineage_lookup.tsv"
+)
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/RepresentativeAccNums.Rd b/man/RepresentativeAccNums.Rd
index f617cde4..49192f8e 100644
--- a/man/RepresentativeAccNums.Rd
+++ b/man/RepresentativeAccNums.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{RepresentativeAccNums}
 \alias{RepresentativeAccNums}
-\title{Function to generate a vector of one Accession number per distinct observation from 'reduced' column}
+\title{Function to generate a vector of one Accession number per distinct
+observation from 'reduced' column}
 \usage{
 RepresentativeAccNums(prot_data, reduced = "Lineage", accnum_col = "AccNum")
 
@@ -17,9 +18,29 @@ One accession number will be assigned for each of these observations}
 
 \item{accnum_col}{Column from prot_data that contains Accession Numbers}
 }
+\value{
+A character vector containing one Accession number per distinct
+observation from the specified reduced column.
+
+A character vector containing representative accession numbers,
+one for each distinct observation in the specified 'reduced' column.
+}
 \description{
 Function to generate a vector of one Accession number per distinct observation from 'reduced' column
 }
+\examples{
+\dontrun{
+representative_accessions <- RepresentativeAccNums(prot_data, 
+reduced = "Lineage", accnum_col = "AccNum")
+}
+\dontrun{
+# Example usage with a data frame called `protein_data`
+representative_accessions <- RepresentativeAccNums(prot_data = protein_data, 
+                                                   reduced = "Lineage", 
+                                                   accnum_col = "AccNum")
+print(representative_accessions)
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/acc2FA.Rd b/man/acc2FA.Rd
new file mode 100644
index 00000000..6c6ea43c
--- /dev/null
+++ b/man/acc2FA.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{acc2FA}
+\alias{acc2FA}
+\title{acc2FA converts protein accession numbers to a fasta format.}
+\usage{
+acc2FA(accessions, outpath, plan = "sequential")
+}
+\arguments{
+\item{accessions}{Character vector containing protein accession numbers to
+generate fasta sequences for.
+Function may not work for vectors of length > 10,000}
+
+\item{outpath}{\link{str} Location where fasta file should be written to.}
+
+\item{plan}{Character string specifying the parallel processing strategy to
+use with the \code{future} package. Default is "sequential".}
+}
+\value{
+A logical value indicating whether the retrieval and conversion were
+successful. Returns \code{TRUE} if successful and \code{FALSE} otherwise.
+}
+\description{
+Resulting fasta file is written to the outpath.
+}
+\examples{
+\dontrun{
+acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+outpath = "my_proteins.fasta")
+Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
+EBI:accessions <- c("P12345", "Q9UHC1", 
+"O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
+}
+}
+\author{
+Samuel Chen, Janani Ravi
+}
+\keyword{accnum,}
+\keyword{fasta}
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index a46b6f20..ce499592 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -32,11 +32,16 @@ This file can be generated using the "downloadAssemblySummary()" function}
 \item{ipgout_path}{Path to write the results of the efetch run of the accessions
 on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 
-\item{plan}{}
+\item{plan}{Character. Specifies the execution plan for parallel processing.
+Default is "multicore".}
 }
 \value{
 A \code{data.table} that contains the lineage information, mapping protein
 accessions to their tax IDs and lineages.
+
+A dataframe containing lineage information mapped to the given protein
+accessions. The dataframe includes relevant columns such as TaxID, GCA_ID,
+Protein, Protein Name, Species, and Lineage.
 }
 \description{
 This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
@@ -51,6 +56,14 @@ of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 \dontrun{
 acc2Lineage()
 }
+\dontrun{
+lineage_data <- acc2Lineage(
+  accessions = c("P12345", "Q67890"),
+  assembly_path = "path/to/assembly_summary.txt",
+  lineagelookup_path = "path/to/lineage_lookup.tsv",
+  ipgout_path = "path/to/output.txt"
+)
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/acc2fa.Rd b/man/acc2fa.Rd
index 158b2d51..517ee3d6 100644
--- a/man/acc2fa.Rd
+++ b/man/acc2fa.Rd
@@ -7,12 +7,16 @@
 acc2fa(accessions, outpath, plan = "sequential")
 }
 \arguments{
-\item{accessions}{Character vector containing protein accession numbers to generate fasta sequences for.
-Function may not work for vectors of length > 10,000}
+\item{accessions}{Character vector containing protein accession numbers to
+generate fasta sequences for. Function may not work for vectors of
+length > 10,000}
 
 \item{outpath}{\link{str}. Location where fasta file should be written to.}
 
-\item{plan}{}
+\item{plan}{Character. The plan to use for processing. Default is "sequential".}
+}
+\value{
+A Fasta file is written to the specified \code{outpath}.
 }
 \description{
 acc2fa converts protein accession numbers to a fasta format.
@@ -20,9 +24,11 @@ Resulting fasta file is written to the outpath.
 }
 \examples{
 \dontrun{
-acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+outpath = "my_proteins.fasta")
 Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
+EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
+acc2fa(outpath = "ebi.fa")
 }
 }
 \author{
diff --git a/man/addLeaves2Alignment.Rd b/man/addLeaves2Alignment.Rd
index a758ebd5..d7055fbf 100644
--- a/man/addLeaves2Alignment.Rd
+++ b/man/addLeaves2Alignment.Rd
@@ -22,6 +22,10 @@ Default is 'pspa.txt'}
 \item{reduced}{Boolean. If TRUE, a reduced data frame will be generated with
 only one sequence per lineage. Default is FALSE.}
 }
+\value{
+A data frame containing the enriched alignment data with lineage
+information.
+}
 \description{
 Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
diff --git a/man/addLineage.Rd b/man/addLineage.Rd
index ab02a5ab..e2363463 100644
--- a/man/addLineage.Rd
+++ b/man/addLineage.Rd
@@ -23,26 +23,30 @@ addLineage(
 )
 }
 \arguments{
-\item{df}{A \code{data.frame} containing the input data. One column must contain
-the accession numbers.}
+\item{df}{Dataframe containing accession numbers. The dataframe should
+have a column specified by \code{acc_col} that contains these accession numbers.}
 
-\item{acc_col}{A string specifying the column name in \code{df} that holds the
-accession numbers. Defaults to \code{"AccNum"}.}
+\item{acc_col}{Character. The name of the column in \code{df} containing
+accession numbers. Default is "AccNum".}
 
-\item{assembly_path}{A string specifying the path to the \code{assembly_summary.txt}
-file. This file contains metadata about assemblies.}
+\item{assembly_path}{String. The path to the assembly summary file generated
+using the \code{downloadAssemblySummary()} function.}
 
-\item{lineagelookup_path}{A string specifying the path to the lineage lookup
-file, which contains a mapping from tax IDs to their corresponding lineages.}
+\item{lineagelookup_path}{String. The path to the lineage lookup file (taxid
+to lineage mapping) generated using the \code{create_lineage_lookup()} function.}
 
-\item{ipgout_path}{(Optional) A string specifying the path where IPG database
-fetch results will be saved. If \code{NULL}, the results are not written to a file.}
+\item{ipgout_path}{String. Optional path to save intermediate output files.
+Default is NULL.}
 
-\item{plan}{}
+\item{plan}{Character. Specifies the execution plan for parallel processing.
+Default is "multicore".}
 }
 \value{
 A \code{data.frame} that combines the original \code{df} with the lineage
 information.
+
+A dataframe that combines the original dataframe \code{df} with lineage
+information retrieved based on the provided accession numbers.
 }
 \description{
 addLineage
@@ -53,4 +57,10 @@ addLineage
 \dontrun{
 addLineage()
 }
+\dontrun{
+enriched_df <- addLineage(df = my_data,
+                           acc_col = "AccNum",
+                           assembly_path = "path/to/assembly_summary.txt",
+                           lineagelookup_path = "path/to/lineage_lookup.tsv")
+}
 }
diff --git a/man/addName.Rd b/man/addName.Rd
index e04f9849..5bf400b4 100644
--- a/man/addName.Rd
+++ b/man/addName.Rd
@@ -34,6 +34,16 @@ Original data with a 'Name' column
 This function adds a new 'Name' column that is comprised of components from
 Kingdom, Phylum, Genus, and species, as well as the accession
 }
+\examples{
+# Example usage of the addName function
+data <- data.frame(
+  AccNum = c("ACC123", "ACC456"),
+  Species = c("Homo sapiens", "Mus musculus"),
+  Lineage = c("Eukaryota>Chordata", "Eukaryota>Chordata")
+)
+enriched_data <- addName(data)
+print(enriched_data)
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/addTaxID.Rd b/man/addTaxID.Rd
index d2fe139d..e960769b 100644
--- a/man/addTaxID.Rd
+++ b/man/addTaxID.Rd
@@ -7,8 +7,26 @@
 addTaxID(data, acc_col = "AccNum", version = T)
 }
 \arguments{
-\item{version}{}
+\item{data}{A data frame or data table containing protein accession numbers.}
+
+\item{acc_col}{A string specifying the column name in \code{data} that contains
+the accession numbers. Defaults to "AccNum".}
+
+\item{version}{A logical indicating whether to remove the last two characters
+from the accession numbers for TaxID retrieval. Defaults to TRUE.}
+}
+\value{
+A data table that includes the original data along with a new column
+containing the corresponding TaxIDs.
 }
 \description{
 addTaxID
 }
+\examples{
+\dontrun{
+# Create a sample data table with accession numbers
+sample_data <- data.table(AccNum = c("ABC123.1", "XYZ456.1", "LMN789.2"))
+enriched_data <- addTaxID(sample_data, acc_col = "AccNum", version = TRUE)
+print(enriched_data)
+}
+}
diff --git a/man/add_leaves.Rd b/man/add_leaves.Rd
index f1eeed10..5e462a2b 100644
--- a/man/add_leaves.Rd
+++ b/man/add_leaves.Rd
@@ -22,6 +22,10 @@ Default is 'pspa.txt'}
 \item{reduced}{Boolean. If TRUE, a reduced data frame will be generated with
 only one sequence per lineage. Default is FALSE.}
 }
+\value{
+A data frame containing the combined alignment and lineage
+information.
+}
 \description{
 Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
diff --git a/man/add_name.Rd b/man/add_name.Rd
index f19139e1..db7b7339 100644
--- a/man/add_name.Rd
+++ b/man/add_name.Rd
@@ -31,8 +31,13 @@ Lineage, and AccNum info}
 Original data with a 'Name' column
 }
 \description{
-This function adds a new 'Name' column that is comprised of components from
-Kingdom, Phylum, Genus, and species, as well as the accession
+This function adds a new 'Name' column that is comprised of
+components from Kingdom, Phylum, Genus, and species, as well as the accession
+}
+\examples{
+\dontrun{
+add_name(data_frame)
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/alignFasta.Rd b/man/alignFasta.Rd
index 21b020cf..54678d0a 100644
--- a/man/alignFasta.Rd
+++ b/man/alignFasta.Rd
@@ -11,9 +11,11 @@ alignFasta(fasta_file, tool = "Muscle", outpath = NULL)
 \arguments{
 \item{fasta_file}{Path to the FASTA file to be aligned}
 
-\item{tool}{Type of alignment tool to use. One of three options: "Muscle", "ClustalO", or "ClustalW"}
+\item{tool}{Type of alignment tool to use. One of three options: "Muscle",
+"ClustalO", or "ClustalW"}
 
-\item{outpath}{Path to write the resulting alignment to as a FASTA file. If NULL, no file is written}
+\item{outpath}{Path to write the resulting alignment to as a FASTA file. If
+NULL, no file is written}
 }
 \value{
 aligned fasta sequence as a MsaAAMultipleAlignment object
@@ -23,6 +25,18 @@ aligned fasta sequence as a MsaAAMultipleAlignment object
 \description{
 Perform a Multiple Sequence Alignment on a FASTA file.
 }
+\examples{
+\dontrun{
+aligned_sequences <- alignFasta("my_sequences.fasta", 
+tool = "Muscle", outpath = "aligned_output.fasta")
+}
+\dontrun{
+# Example usage
+aligned_sequences <- alignFasta("path/to/sequences.fasta", 
+tool = "ClustalO", outpath = "path/to/aligned_sequences.fasta")
+print(aligned_sequences)
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/cleanDomainArchitecture.Rd b/man/cleanDomainArchitecture.Rd
index 887b5388..f12f1083 100644
--- a/man/cleanDomainArchitecture.Rd
+++ b/man/cleanDomainArchitecture.Rd
@@ -19,21 +19,33 @@ cleanDomainArchitecture(
 \arguments{
 \item{prot}{A data frame containing a 'DomArch' column}
 
+\item{old}{The name of the original column containing domain architecture.
+Defaults to "DomArch.orig".}
+
+\item{new}{The name of the cleaned column to be created. Defaults to
+"DomArch".}
+
 \item{domains_keep}{A data frame containing the domain names to be retained.}
 
-\item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the
+\item{domains_rename}{A data frame containing the domain names to be replaced
+in a column 'old' and the
 corresponding replacement values in a column 'new'.}
 
-\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in
+'DomArch' are condensed. Default is TRUE.}
 
-\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
+\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on
+domains to keep/remove. Default is FALSE.}
 
-\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.}
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values
+in 'DomArch' are removed. Default is FALSE.}
 
-\item{domains_ignore}{A data frame containing the domain names to be removed in a column called 'domains'}
+\item{domains_ignore}{A data frame containing the domain names to be removed
+in a column called 'domains'}
 }
 \value{
-The original data frame is returned with the clean DomArchs column and the old domains in the DomArchs.old column.
+The original data frame is returned with the clean DomArchs column
+and the old domains in the DomArchs.old column.
 }
 \description{
 Cleanup Domain Architectures
@@ -46,6 +58,7 @@ The original data frame is returned with the clean DomArchs column and the old d
 }
 \examples{
 \dontrun{
-cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+cleanDomainArchitecture(prot, TRUE, FALSE, 
+omains_keep, domains_rename, domains_ignore = NULL)
 }
 }
diff --git a/man/cleanFAHeaders.Rd b/man/cleanFAHeaders.Rd
index e9ad9b30..e93d0ca3 100644
--- a/man/cleanFAHeaders.Rd
+++ b/man/cleanFAHeaders.Rd
@@ -7,7 +7,9 @@
 cleanFAHeaders(fasta)
 }
 \arguments{
-\item{fasta}{}
+\item{fasta}{An \link{XStringSet} object representing the sequences from a
+FASTA file. The sequence names (headers) will be adjusted for uniqueness
+and sanitized.}
 }
 \value{
 \link{XStringSet} fasta with adjusted names (headers)
diff --git a/man/cleanGeneDescription.Rd b/man/cleanGeneDescription.Rd
index f98a25d4..3d106ae6 100644
--- a/man/cleanGeneDescription.Rd
+++ b/man/cleanGeneDescription.Rd
@@ -7,7 +7,10 @@
 cleanGeneDescription(prot, column)
 }
 \arguments{
-\item{column}{}
+\item{prot}{A data frame containing the gene descriptions.}
+
+\item{column}{The name of the column from which gene descriptions are pulled
+for cleanup.}
 }
 \value{
 Return trailing period that occurs in GeneDesc column
diff --git a/man/cleanLineage.Rd b/man/cleanLineage.Rd
index adcea312..071b37d2 100644
--- a/man/cleanLineage.Rd
+++ b/man/cleanLineage.Rd
@@ -7,10 +7,15 @@
 cleanLineage(prot, lins_rename)
 }
 \arguments{
-\item{lins_rename}{}
+\item{prot}{A data frame containing a 'Lineage' column that needs to be
+cleaned up.}
+
+\item{lins_rename}{A data frame with two columns: 'old' containing terms
+to be replaced and 'new' containing the corresponding replacement terms.}
 }
 \value{
-Describe return, in detail
+The original data frame with the 'Lineage' column updated based on
+the provided replacements.
 }
 \description{
 Cleanup Lineage
diff --git a/man/cleanSpecies.Rd b/man/cleanSpecies.Rd
index 82b5444c..93fc2e05 100644
--- a/man/cleanSpecies.Rd
+++ b/man/cleanSpecies.Rd
@@ -13,7 +13,7 @@ cleanSpecies(prot, removeEmptyRows = FALSE)
 Default is false.}
 }
 \value{
-Describe return, in detail
+The original data frame with Species cleaned.
 }
 \description{
 Cleanup Species
diff --git a/man/combine_files.Rd b/man/combine_files.Rd
index 4126eb9e..432513d6 100644
--- a/man/combine_files.Rd
+++ b/man/combine_files.Rd
@@ -13,16 +13,34 @@ combine_files(
 )
 }
 \arguments{
-\item{inpath}{String of 'master' path where the files reside (recursive=T)}
+\item{inpath}{Character. The master directory path where the files reside.
+The search is recursive (i.e., it will look in subdirectories as well).}
 
-\item{pattern}{Character vector containing search pattern for files}
+\item{pattern}{Character. A search pattern to identify files to be combined.
+Default is "*full_analysis.tsv".}
 
-\item{col_names}{Takes logical T/F arguments OR column names vector;
-usage similar to col_names parameter in \code{readr::read_delim}}
+\item{delim}{Character. The delimiter used in the input files.
+Default is tab ("\t").}
+
+\item{skip}{Integer. The number of lines to skip at the beginning of each file.
+Default is 0.}
+
+\item{col_names}{Logical or character vector. If TRUE, the first row of each file
+is treated as column names. Alternatively, a character vector can
+be provided to specify custom column names.}
+}
+\value{
+A data frame containing the combined contents of all matched files.
+Each row will include a new column "ByFile" indicating the source file of the data.
 }
 \description{
 Download the combined assembly summaries of genbank and refseq
 }
+\examples{
+\dontrun{
+combined_data <- combine_files(inpath = "../molevol_data/project_data/phage_defense/")
+}
+}
 \author{
 Janani Ravi
 }
diff --git a/man/combine_full.Rd b/man/combine_full.Rd
index f4e6597b..563a5450 100644
--- a/man/combine_full.Rd
+++ b/man/combine_full.Rd
@@ -7,8 +7,22 @@
 combine_full(inpath, ret = FALSE)
 }
 \arguments{
-\item{ret}{}
+\item{inpath}{Character. The path to the directory containing the
+\code{.full_analysis.tsv} files to be combined.}
+
+\item{ret}{Logical. If TRUE, the function will return the combined data frame.
+Default is FALSE, meaning it will only write the file and not return the data.}
+}
+\value{
+If \code{ret} is TRUE, a data frame containing the combined data from all
+input files. If \code{ret} is FALSE, the function writes the combined data to a
+TSV file named \code{cln_combined.tsv} in the specified directory and returns NULL.
 }
 \description{
 Combining full_analysis files
 }
+\examples{
+\dontrun{
+combined_data <- combine_full("path/to/full_analysis/files", ret = TRUE)
+}
+}
diff --git a/man/combine_ipr.Rd b/man/combine_ipr.Rd
index 52aa3057..ddb3e6af 100644
--- a/man/combine_ipr.Rd
+++ b/man/combine_ipr.Rd
@@ -7,8 +7,22 @@
 combine_ipr(inpath, ret = FALSE)
 }
 \arguments{
-\item{ret}{}
+\item{inpath}{Character. The path to the directory containing the
+\code{.iprscan_cln.tsv} files to be combined.}
+
+\item{ret}{Logical. If TRUE, the function will return the combined data frame.
+Default is FALSE, meaning it will only write the file and not return the data.}
+}
+\value{
+If \code{ret} is TRUE, a data frame containing the combined data from all
+input files. If \code{ret} is FALSE, the function writes the combined data to a
+TSV file named \code{ipr_combined.tsv} in the specified directory and returns NULL.
 }
 \description{
 Combining clean ipr files
 }
+\examples{
+\dontrun{
+combined_ipr_data <- combine_ipr("path/to/ipr/files", ret = TRUE)
+}
+}
diff --git a/man/condenseRepeatedDomains.Rd b/man/condenseRepeatedDomains.Rd
index 3b239129..ee51a544 100644
--- a/man/condenseRepeatedDomains.Rd
+++ b/man/condenseRepeatedDomains.Rd
@@ -14,7 +14,7 @@ condenseRepeatedDomains(prot, by_column = "DomArch", excluded_prots = c())
 \item{excluded_prots}{Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()}
 }
 \value{
-Describe return, in detail
+A data frame with condensed repeated domains in the specified column.
 }
 \description{
 Condense repeated domains
diff --git a/man/convert2TitleCase.Rd b/man/convert2TitleCase.Rd
index 84e7fa00..cd8634ef 100644
--- a/man/convert2TitleCase.Rd
+++ b/man/convert2TitleCase.Rd
@@ -13,8 +13,16 @@ convert2TitleCase(text, delimitter)
 
 \item{y}{Delimitter. Default is space (" ").}
 }
+\value{
+Character vector with the input strings converted to title case.
+}
 \description{
 Translate string to Title Case w/ delimitter.
+}
+\examples{
+# Convert a single string to title case
+convert2TitleCase("hello world") # Returns "Hello World"
+
 }
 \seealso{
 chartr, toupper, and tolower.
diff --git a/man/convertAlignment2FA.Rd b/man/convertAlignment2FA.Rd
index d6b4dc56..027267ad 100644
--- a/man/convertAlignment2FA.Rd
+++ b/man/convertAlignment2FA.Rd
@@ -26,6 +26,11 @@ Default is 'NULL'}
 \item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
 Default is 'FALSE'}
 }
+\value{
+A character string representing the FASTA formatted sequences.
+If \code{fa_outpath} is provided, the FASTA will also be saved to the specified
+file.
+}
 \description{
 Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
diff --git a/man/convert_aln2fa.Rd b/man/convert_aln2fa.Rd
index 8bebe31d..8ca9a3a0 100644
--- a/man/convert_aln2fa.Rd
+++ b/man/convert_aln2fa.Rd
@@ -23,8 +23,13 @@ Default is 'pspa.txt'}
 \item{fa_outpath}{Character. Path to the written fasta file.
 Default is 'NULL'}
 
-\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-Default is 'FALSE'}
+\item{reduced}{Boolean. If TRUE, the fasta file will contain only one
+sequence per lineage. Default is 'FALSE'}
+}
+\value{
+Character string containing the Fasta formatted sequences.
+If \code{fa_outpath} is specified, the function also writes the sequences to the
+Fasta file.
 }
 \description{
 Adding Leaves to an alignment file w/ accessions
diff --git a/man/countbycolumn.Rd b/man/countByColumn.Rd
similarity index 100%
rename from man/countbycolumn.Rd
rename to man/countByColumn.Rd
diff --git a/man/createWordCloud2Element.Rd b/man/createWordCloud2Element.Rd
index a6279e2f..b1fd827f 100644
--- a/man/createWordCloud2Element.Rd
+++ b/man/createWordCloud2Element.Rd
@@ -15,7 +15,18 @@ createWordCloud2Element(
 \item{query_data}{Data frame of protein homologs with the usual 11 columns +
 additional word columns (0/1 format). Default is "prot".}
 
-\item{UsingRowsCutoff}{}
+\item{colname}{Character. The name of the column in \code{query_data} to generate
+the word cloud from. Default is "DomArch".}
+
+\item{cutoff}{Numeric. The cutoff value for filtering elements based on their
+frequency. Default is 70.}
+
+\item{UsingRowsCutoff}{Logical. Whether to use a row-based cutoff instead of
+a frequency cutoff. Default is FALSE.}
+}
+\value{
+A word cloud plot showing the frequency of elements from the selected
+column.
 }
 \description{
 Wordclouds for the predominant domains (from DAs) and DAs (from GC)
diff --git a/man/createWordCloudElement.Rd b/man/createWordCloudElement.Rd
index 7f27ef41..42b32da0 100644
--- a/man/createWordCloudElement.Rd
+++ b/man/createWordCloudElement.Rd
@@ -15,7 +15,18 @@ createWordCloudElement(
 \item{query_data}{Data frame of protein homologs with the usual 11 columns +
 additional word columns (0/1 format). Default is "prot".}
 
-\item{UsingRowsCutoff}{}
+\item{colname}{Character. The name of the column in \code{query_data} to generate
+the word cloud from. Default is "DomArch".}
+
+\item{cutoff}{Numeric. The cutoff value for filtering elements based on their
+frequency. Default is 70.}
+
+\item{UsingRowsCutoff}{Logical. Whether to use a row-based cutoff instead of
+a frequency cutoff. Default is FALSE.}
+}
+\value{
+A word cloud plot showing the frequency of elements from the selected
+column.
 }
 \description{
 Wordclouds for the predominant domains (from DAs) and DAs (from GC)
diff --git a/man/create_lineage_lookup.Rd b/man/create_lineage_lookup.Rd
index 51670f35..869db71a 100644
--- a/man/create_lineage_lookup.Rd
+++ b/man/create_lineage_lookup.Rd
@@ -11,20 +11,31 @@ create_lineage_lookup(
 )
 }
 \arguments{
-\item{lineage_file}{Path to the rankedlineage.dmp file containing taxid's and their
-corresponding taxonomic rank. rankedlineage.dmp can be downloaded at
+\item{lineage_file}{Path to the rankedlineage.dmp file containing taxid's
+and their corresponding taxonomic rank. rankedlineage.dmp can be downloaded at
 https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/}
 
 \item{outfile}{File the resulting lineage lookup table should be written to}
 
-\item{taxonomic_rank}{The upperbound of taxonomic rank that the lineage includes. The lineaege will
-include superkingdom>...>taxonomic_rank.
+\item{taxonomic_rank}{The upperbound of taxonomic rank that the lineage
+includes. The lineaege will include superkingdom>...>taxonomic_rank.
 Choices include: "supperkingdom", "phylum",   "class","order", "family",
 "genus", and "species"}
 }
+\value{
+A tibble containing the tax IDs and their respective lineages up to
+the specified taxonomic rank, saved as a tab-separated file.
+}
 \description{
 Create a look up table that goes from TaxID, to Lineage
 }
+\examples{
+\dontrun{
+create_lineage_lookup(lineage_file = "data/rankedlineage.dmp", 
+                      outfile = "data/lineage_lookup.tsv", 
+                      taxonomic_rank = "family")
+}
+}
 \author{
 Samuel Chen
 }
diff --git a/man/domain_network.Rd b/man/domain_network.Rd
index 528e4924..0580b4d2 100644
--- a/man/domain_network.Rd
+++ b/man/domain_network.Rd
@@ -16,15 +16,24 @@ domain_network(
 \arguments{
 \item{prot}{A data frame that contains the column 'DomArch'.}
 
-\item{column}{Name of column containing Domain architecture from which nodes and edges are generated.}
+\item{column}{Name of column containing Domain architecture from which nodes
+and edges are generated.}
 
-\item{cutoff}{Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.}
+\item{domains_of_interest}{Character vector specifying domains of interest.}
+
+\item{cutoff}{Integer. Only use domains that occur at or above the cutoff for
+total counts if cutoff_type is "Total Count".
+Only use domains that appear in cutoff or greater lineages if cutoff_type is
+Lineage.}
 
 \item{layout}{Character. Layout type to be used for the network. Options are:
 \itemize{\item "grid" \item "circle" \item "random" \item "auto"}}
 
-\item{query_color}{}
+\item{query_color}{Character. Color to represent the queried domain in the
+network.}
+}
+\value{
+A network visualization of domain architectures.
 }
 \description{
 This function creates a domain network from the 'DomArch' column.
diff --git a/man/downloadAssemblySummary.Rd b/man/downloadAssemblySummary.Rd
index 636af878..bad2b603 100644
--- a/man/downloadAssemblySummary.Rd
+++ b/man/downloadAssemblySummary.Rd
@@ -10,13 +10,25 @@ downloadAssemblySummary(
 )
 }
 \arguments{
-\item{outpath}{String of path where the assembly summary file should be written}
+\item{outpath}{String of path where the assembly summary file should be
+written}
 
-\item{keep}{Character vector containing which columns should be retained and downloaded}
+\item{keep}{Character vector containing which columns should be retained and
+downloaded}
+}
+\value{
+A tab-separated file containing the assembly summary. The function
+does notreturn any value but writes the output directly to the specified file.
 }
 \description{
 Download the combined assembly summaries of genbank and refseq
 }
+\examples{
+\dontrun{
+downloadAssemblySummary(outpath = "assembly_summary.tsv", 
+     keep = c("assembly_accession", "taxid", "organism_name"))
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 047e2652..e55c342a 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -14,13 +14,17 @@ the ipg database}
 
 \item{out_path}{Path to write the efetch results to}
 
-\item{plan}{}
+\item{plan}{Character. Specifies the execution plan for parallel processing.
+Default is "multicore".}
 
 \item{accnums}{Character vector containing the accession numbers to query on
 the ipg database}
 }
 \value{
 No return value. The function writes the fetched results to \code{out_path}.
+
+The function does not return a value but writes the efetch results
+directly to the specified \code{out_path}.
 }
 \description{
 Perform efetch on the ipg database and write the results to out_path
@@ -31,6 +35,12 @@ Perform efetch on the ipg database and write the results to out_path
 \dontrun{
 efetchIPG()
 }
+\dontrun{
+efetchIPG(
+  accessions = c("P12345", "Q67890", "A12345"),
+  out_path = "path/to/efetch_results.xml"
+)
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/extractAccNum.Rd b/man/extractAccNum.Rd
index 15870f3f..caf9e5db 100644
--- a/man/extractAccNum.Rd
+++ b/man/extractAccNum.Rd
@@ -7,7 +7,8 @@
 extractAccNum(string)
 }
 \arguments{
-\item{string}{}
+\item{string}{A string from which to extract the accession number.
+The string may contain accession information delimited by \code{|} or spaces.}
 }
 \value{
 Describe return, in detail
diff --git a/man/filterbydomains.Rd b/man/filterByDomains.Rd
similarity index 100%
rename from man/filterbydomains.Rd
rename to man/filterByDomains.Rd
diff --git a/man/filterbyfrequency.Rd b/man/filterByFrequency.Rd
similarity index 100%
rename from man/filterbyfrequency.Rd
rename to man/filterByFrequency.Rd
diff --git a/man/findparalogs.Rd b/man/findParalogs.Rd
similarity index 100%
rename from man/findparalogs.Rd
rename to man/findParalogs.Rd
diff --git a/man/find_top_acc.Rd b/man/find_top_acc.Rd
index 780cde11..ffce1640 100644
--- a/man/find_top_acc.Rd
+++ b/man/find_top_acc.Rd
@@ -13,8 +13,32 @@ find_top_acc(
 )
 }
 \arguments{
-\item{query}{}
+\item{infile_full}{A data frame containing the full dataset with lineage and
+domain architecture information.}
+
+\item{DA_col}{A string representing the name of the domain architecture
+column. Default is "DomArch.Pfam".}
+
+\item{lin_col}{A string representing the name of the lineage column.
+Default is "Lineage_short".}
+
+\item{n}{An integer specifying the number of top accession numbers to return.
+Default is 20.}
+
+\item{query}{A string for filtering a specific query name. If it is not
+"All", only the data matching this query will be processed.}
+}
+\value{
+A vector of the top N accession numbers (\code{AccNum}) based on counts
+grouped by lineage and domain architecture.
 }
 \description{
 Group by lineage + DA then take top 20
 }
+\examples{
+\dontrun{
+top_accessions <- find_top_acc(infile_full = my_data, 
+DA_col = "DomArch.Pfam", lin_col = "Lineage_short", 
+n = 20, query = "specific_query_name")
+}
+}
diff --git a/man/gc_undirected_network.Rd b/man/gc_undirected_network.Rd
index 28cf1abb..5dab8a70 100644
--- a/man/gc_undirected_network.Rd
+++ b/man/gc_undirected_network.Rd
@@ -16,18 +16,29 @@ gc_undirected_network(
 \arguments{
 \item{prot}{A data frame that contains the column 'DomArch'.}
 
-\item{column}{Name of column containing Domain architecture from which nodes and edges are generated.}
+\item{column}{Name of column containing Domain architecture from which nodes
+and edges are generated.}
 
-\item{cutoff_type}{Character. Used to determine how data should be filtered. Either
-\itemize{\item "Lineage" to filter domains based off how many lineages the Domain architecture appears in
-\item "Total Count" to filter off the total amount of times a domain architecture occurs }}
+\item{domains_of_interest}{Character vector specifying the domains of interest.}
 
-\item{cutoff}{Integer. Only use domains that occur at or above the cutoff for total counts if cutoff_type is "Total Count".
-Only use domains that appear in cutoff or greater lineages if cutoff_type is Lineage.}
+\item{cutoff_type}{Character. Used to determine how data should be filtered.
+Either
+\itemize{\item "Lineage" to filter domains based off how many lineages the
+Domain architecture appears in
+\item "Total Count" to filter off the total amount of times a
+domain architecture occurs }}
+
+\item{cutoff}{Integer. Only use domains that occur at or above the cutoff
+for total counts if cutoff_type is "Total Count".
+Only use domains that appear in cutoff or greater lineages if cutoff_type is
+Lineage.}
 
 \item{layout}{Character. Layout type to be used for the network. Options are:
 \itemize{\item "grid" \item "circle" \item "random" \item "auto"}}
 }
+\value{
+A plot of the domain architecture network.
+}
 \description{
 This function creates a domain network from the 'DomArch' column.
 
@@ -35,6 +46,8 @@ A network of domains is returned based on shared domain architectures.
 }
 \examples{
 \dontrun{
-domain_network(pspa)
+domain_network(pspa, column = "DomArch", 
+domains_of_interest = c("Domain1", "Domain2"), 
+cutoff_type = "Total Count", cutoff = 10)
 }
 }
diff --git a/man/generateAllAlignments2FA.Rd b/man/generateAllAlignments2FA.Rd
index 3bf9938a..1100f241 100644
--- a/man/generateAllAlignments2FA.Rd
+++ b/man/generateAllAlignments2FA.Rd
@@ -15,23 +15,34 @@ generateAllAlignments2FA(
 \item{aln_path}{Character. Path to alignment files.
 Default is 'here("data/rawdata_aln/")'}
 
-\item{fa_outpath}{Character. Path to file. Master protein file with AccNum & lineages.
+\item{fa_outpath}{Character. Path to file. Master protein file with AccNum &
+lineages.
 Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
 
 \item{lin_file}{Character. Path to the written fasta file.
 Default is 'here("data/alns/")'.}
 
-\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
+\item{reduced}{Boolean. If TRUE, the fasta file will contain only one
+sequence per lineage.
 Default is 'FALSE'.}
 }
+\value{
+NULL. The function saves the output FASTA files to the specified
+directory.
+}
 \description{
 Adding Leaves to all alignment files w/ accessions & DAs?
 }
 \details{
-The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
+The alignment files would need two columns separated by spaces:
+\enumerate{
+\item AccNum and 2. alignment. The protein homolog file should have AccNum,
+Species, Lineages.
+}
 }
 \note{
-Please refer to the source code if you have alternate + file formats and/or column names.
+Please refer to the source code if you have alternate + file formats
+and/or column names.
 }
 \examples{
 \dontrun{
diff --git a/man/generate_all_aln2fa.Rd b/man/generate_all_aln2fa.Rd
index ad6b7136..0a9b7e0f 100644
--- a/man/generate_all_aln2fa.Rd
+++ b/man/generate_all_aln2fa.Rd
@@ -18,20 +18,26 @@ Default is 'here("data/rawdata_aln/")'}
 \item{fa_outpath}{Character. Path to the written fasta file.
 Default is 'here("data/alns/")'.}
 
-\item{lin_file}{Character. Path to file. Master protein file with AccNum & lineages.
-Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
+\item{lin_file}{Character. Path to file. Master protein file with AccNum &
+lineages. Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
 
-\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-Default is 'FALSE'.}
+\item{reduced}{Boolean. If TRUE, the fasta file will contain only one
+sequence per lineage. Default is 'FALSE'.}
+}
+\value{
+A list of paths to the generated Fasta files.
 }
 \description{
 Adding Leaves to all alignment files w/ accessions & DAs?
 }
 \details{
-The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
+The alignment files would need two columns separated by spaces: 1.
+AccNum and 2. alignment. The protein homolog file should have AccNum,
+Species, Lineages.
 }
 \note{
-Please refer to the source code if you have alternate + file formats and/or column names.
+Please refer to the source code if you have alternate + file
+formats and/or column names.
 }
 \examples{
 \dontrun{
diff --git a/man/generate_msa.Rd b/man/generate_msa.Rd
index a68eb8b4..90f2ca91 100644
--- a/man/generate_msa.Rd
+++ b/man/generate_msa.Rd
@@ -7,8 +7,21 @@
 generate_msa(fa_file = "", outfile = "")
 }
 \arguments{
-\item{outfile}{}
+\item{fa_file}{Character. The path to the input FASTA file containing protein
+sequences.}
+
+\item{outfile}{Character. The path to the output file where the alignment
+will be saved.}
+}
+\value{
+A list containing the alignment object and the output file path.
 }
 \description{
 Function to generate MSA using kalign
 }
+\examples{
+\dontrun{
+generate_msa(fa_file = "path/to/sequences.fasta", 
+outfile = "path/to/alignment.txt")
+}
+}
diff --git a/man/get_accnums_from_fasta_file.Rd b/man/get_accnums_from_fasta_file.Rd
index 84c163cc..3a3c1784 100644
--- a/man/get_accnums_from_fasta_file.Rd
+++ b/man/get_accnums_from_fasta_file.Rd
@@ -9,10 +9,27 @@ get_accnums_from_fasta_file(fasta_file)
 get_accnums_from_fasta_file(fasta_file)
 }
 \arguments{
-\item{fasta_file}{}
+\item{fasta_file}{Character. Path to the FASTA file from which
+accession numbers will be extracted.}
+}
+\value{
+A character vector containing the extracted accession numbers.
+
+A character vector containing the extracted accession numbers.
 }
 \description{
 Get accnums from fasta file
 
 get_accnums_from_fasta_file
 }
+\examples{
+\dontrun{
+accnums <- get_accnums_from_fasta_file("my_sequences.fasta")
+print(accnums)
+}
+\dontrun{
+# Example usage
+accnums <- get_accnums_from_fasta_file("path/to/sequences.fasta")
+print(accnums)
+}
+}
diff --git a/man/ipr2viz.Rd b/man/ipr2viz.Rd
index 79063497..728c188c 100644
--- a/man/ipr2viz.Rd
+++ b/man/ipr2viz.Rd
@@ -17,8 +17,51 @@ ipr2viz(
 )
 }
 \arguments{
-\item{query}{}
+\item{infile_ipr}{A path to the input IPR file (TSV format) containing
+domain information.}
+
+\item{infile_full}{A path to the full input file (TSV format) containing
+lineage and accession information.}
+
+\item{accessions}{A character vector of accession numbers to filter the
+analysis. Default is an empty vector.}
+
+\item{analysis}{A character vector specifying the types of analysis to
+include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a
+vector of these analyses.}
+
+\item{group_by}{A string specifying how to group the visualization.
+Default is "Analysis". Options include "Analysis" or "Query".}
+
+\item{topn}{An integer specifying the number of top accessions to visualize.
+Default is 20.}
+
+\item{name}{A string representing the name to use for y-axis labels.
+Default is "Name".}
+
+\item{text_size}{An integer specifying the text size for the plot.
+Default is 15.}
+
+\item{query}{A string for filtering a specific query name. If it is not
+"All", only the data matching this query will be processed.}
+}
+\value{
+A ggplot object representing the domain architecture visualization.
 }
 \description{
 IPR2Viz
 }
+\examples{
+\dontrun{
+plot <- ipr2viz(infile_ipr = "path/to/ipr_file.tsv", 
+                 infile_full = "path/to/full_file.tsv", 
+                 accessions = c("ACC123", "ACC456"), 
+                 analysis = c("Pfam", "TMHMM"), 
+                 group_by = "Analysis", 
+                 topn = 20, 
+                 name = "Gene Name", 
+                 text_size = 15, 
+                 query = "All")
+print(plot)
+}
+}
diff --git a/man/ipr2viz_web.Rd b/man/ipr2viz_web.Rd
index 896445bd..defa5b2d 100644
--- a/man/ipr2viz_web.Rd
+++ b/man/ipr2viz_web.Rd
@@ -17,8 +17,52 @@ ipr2viz_web(
 )
 }
 \arguments{
-\item{rows}{}
+\item{infile_ipr}{A path to the input IPR file (TSV format) containing
+domain information.}
+
+\item{accessions}{A character vector of accession numbers to filter the
+analysis.}
+
+\item{analysis}{A character vector specifying the types of analysis to
+include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a vector
+of these analyses.}
+
+\item{group_by}{A string specifying how to group the visualization.
+Default is "Analysis". Options include "Analysis" or "Query".}
+
+\item{name}{A string representing the name to use for y-axis labels.
+Default is "Name".}
+
+\item{text_size}{An integer specifying the text size for the plot.
+Default is 15.}
+
+\item{legend_name}{A string representing the column to use for legend labels.
+Default is "ShortName".}
+
+\item{cols}{An integer specifying the number of columns in the facet wrap.
+Default is 5.}
+
+\item{rows}{An integer specifying the number of rows in the legend.
+Default is 10.}
+}
+\value{
+A ggplot object representing the domain architecture visualization
+for web display.
 }
 \description{
 IPR2Viz Web
 }
+\examples{
+\dontrun{
+plot <- ipr2viz_web(infile_ipr = "path/to/ipr_file.tsv", 
+                     accessions = c("ACC123", "ACC456"), 
+                     analysis = c("Pfam", "TMHMM"), 
+                     group_by = "Analysis", 
+                     name = "Gene Name", 
+                     text_size = 15, 
+                     legend_name = "ShortName", 
+                     cols = 5, 
+                     rows = 10)
+print(plot)
+}
+}
diff --git a/man/mapAcc2Name.Rd b/man/mapAcc2Name.Rd
index 0f5d447d..a59c8760 100644
--- a/man/mapAcc2Name.Rd
+++ b/man/mapAcc2Name.Rd
@@ -9,13 +9,24 @@ mapAcc2Name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 \arguments{
 \item{line}{The line of a fasta file starting with '>'}
 
-\item{acc2name}{Data Table containing a column of accession numbers and a name column}
+\item{acc2name}{Data Table containing a column of accession numbers and a
+name column}
 
 \item{acc_col}{Name of the column containing Accession numbers}
 
-\item{name_col}{Name of the column containing the names that the accession numbers
+\item{name_col}{Name of the column containing the names that the accession
+numbers
 are mapped to}
 }
+\value{
+A character string representing the updated FASTA line, where the
+accession number is replaced with its corresponding name.
+}
 \description{
 Default renameFA() replacement function. Maps an accession number to its name
 }
+\examples{
+\dontrun{
+mapAcc2Name(">P12345 some description", acc2name, "AccNum", "Name")
+}
+}
diff --git a/man/map_acc2name.Rd b/man/map_acc2name.Rd
index fcdb3023..88377eea 100644
--- a/man/map_acc2name.Rd
+++ b/man/map_acc2name.Rd
@@ -7,7 +7,7 @@
 map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 }
 \arguments{
-\item{line}{he line of a fasta file starting with '>'}
+\item{line}{The line of a fasta file starting with '>'}
 
 \item{acc2name}{Data Table containing a column of accession numbers and a name column}
 
@@ -16,6 +16,19 @@ map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 \item{name_col}{Name of the column containing the names that the accession numbers
 are mapped to}
 }
+\value{
+Character string. The modified line from the Fasta file header with
+the name instead of the accession number.
+}
 \description{
 Default rename_fasta() replacement function. Maps an accession number to its name
 }
+\examples{
+\dontrun{
+acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"), 
+Name = c("Species A", "Species B"))
+line <- ">ACC001 some additional info"
+mapped_line <- map_acc2name(line, acc2name_table)
+print(mapped_line)  # Expected output: ">Species A"
+}
+}
diff --git a/man/msa_pdf.Rd b/man/msa_pdf.Rd
index 4d5fed17..0f42eb9f 100644
--- a/man/msa_pdf.Rd
+++ b/man/msa_pdf.Rd
@@ -18,6 +18,9 @@ Default is NULL. If value is NULL, the entire multiple sequence alignment is pri
 \item{upperbound}{Numeric. The column that determines the ending location of the MSA.
 Default is NULL. If value is NULL, the entire multiple sequence alignment is printed.}
 }
+\value{
+A PDF file containing the multiple sequence alignment.
+}
 \description{
 Generates a multiple sequence alignment from a fasta file
 
@@ -26,6 +29,9 @@ a pdf
 }
 \examples{
 \dontrun{
-msa_pdf()
+msa_pdf(fasta_path = "path/to/your/file.fasta", 
+        out_path = "path/to/output/alignment.pdf", 
+        lowerbound = 10, 
+        upperbound = 200)
 }
 }
diff --git a/man/plotLineageDA.Rd b/man/plotLineageDA.Rd
index 7e84bcfd..a752eb9b 100644
--- a/man/plotLineageDA.Rd
+++ b/man/plotLineageDA.Rd
@@ -20,9 +20,17 @@ Default is prot (variable w/ protein data).}
 \item{colname}{Column name from query_data: "DomArch.norep", "GenContext.norep",
 "DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".}
 
+\item{cutoff}{Numeric. Cutoff for word frequency. Default is 90.}
+
+\item{RowsCutoff}{Boolean. If TRUE, applies a row cutoff to remove data rows
+based on a certain condition. Default is FALSE.}
+
 \item{color}{Color for the heatmap. One of six options: "default", "magma", "inferno",
 "plasma", "viridis", or "cividis"}
 }
+\value{
+A LineageDA plot object.
+}
 \description{
 Lineage plot for Domains, Domain Architectures and
 Genomic Contexts. Heatmap.
diff --git a/man/plotLineageDomainRepeats.Rd b/man/plotLineageDomainRepeats.Rd
index 8ccfba41..45d31d68 100644
--- a/man/plotLineageDomainRepeats.Rd
+++ b/man/plotLineageDomainRepeats.Rd
@@ -7,7 +7,16 @@
 plotLineageDomainRepeats(query_data, colname)
 }
 \arguments{
-\item{colname}{}
+\item{query_data}{Data frame containing protein homolog data, including
+relevant domain architectures and lineages.}
+
+\item{colname}{Character. The name of the column in query_data that contains
+domain architectures or other structural information.}
+}
+\value{
+A ggplot object representing a heatmap (tile plot) of domain repeat
+counts across different lineages, with color intensity representing the
+occurrence of domains.
 }
 \description{
 Lineage Domain Repeats Plot
diff --git a/man/plotLineageHeatmap.Rd b/man/plotLineageHeatmap.Rd
index 5449f8ec..e6870edb 100644
--- a/man/plotLineageHeatmap.Rd
+++ b/man/plotLineageHeatmap.Rd
@@ -15,6 +15,11 @@ plotLineageHeatmap(prot, domains_of_interest, level = 3, label.size = 8)
 
 \item{label.size}{Size of the text labels}
 }
+\value{
+A ggplot object representing a heatmap (tile plot) of domain repeat
+counts across different lineages, with color intensity representing the
+occurrence of domains.
+}
 \description{
 Generate a lineage plot
 }
diff --git a/man/plotLineageNeighbors.Rd b/man/plotLineageNeighbors.Rd
index 85adf175..2c7ca448 100644
--- a/man/plotLineageNeighbors.Rd
+++ b/man/plotLineageNeighbors.Rd
@@ -18,6 +18,11 @@ additional word columns (0/1 format). Default is pspa_data.}
 
 \item{colname}{Column name from query_data. Default is "GenContext.norep".}
 }
+\value{
+A ggplot object representing a heatmap (tile plot) of lineage versus
+the top neighboring domain architectures, with color intensity representing
+the frequency of occurrences.
+}
 \description{
 Lineage plot for top neighbors obtained from DAs of
 Genomic Contexts.
diff --git a/man/plotLineageQuery.Rd b/man/plotLineageQuery.Rd
index ad52a4d2..aa3793b7 100644
--- a/man/plotLineageQuery.Rd
+++ b/man/plotLineageQuery.Rd
@@ -17,9 +17,22 @@ plotLineageQuery(
 additional word columns (0/1 format).
 Default is prot (variable w/ protein data).}
 
-\item{queries}{Character Vector containing the queries that will be used for the categories}
+\item{queries}{Character Vector containing the queries that will be used for
+the categories.}
 
-\item{color}{}
+\item{colname}{Character. The column used for filtering based on the \code{queries}.
+Default is "ClustName".}
+
+\item{cutoff}{Numeric. The cutoff value for filtering rows based on their
+total count. Rows with values below this cutoff are excluded.}
+
+\item{color}{Character. Defines the color palette used for the heatmap.
+Default is a red gradient.}
+}
+\value{
+A ggplot object representing a heatmap (tile plot) showing the
+relationship between queries and lineages, with the intensity of color
+representing the count of matching records.
 }
 \description{
 Lineage plot for queries. Heatmap.
@@ -33,6 +46,9 @@ column names.
 plotLineageQuery(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95)
 }
 }
+\author{
+Janani Ravi, Samuel Chen
+}
 \keyword{Architectures,}
 \keyword{Domain}
 \keyword{Domains,}
diff --git a/man/plotLineageSunburst.Rd b/man/plotLineageSunburst.Rd
index 972bbe5d..3240d77d 100644
--- a/man/plotLineageSunburst.Rd
+++ b/man/plotLineageSunburst.Rd
@@ -16,27 +16,40 @@ plotLineageSunburst(
 )
 }
 \arguments{
-\item{prot}{Data frame containing a lineage column that the sunburst plot will be generated for}
+\item{prot}{Data frame containing a lineage column that the sunburst plot
+will be generated for}
 
-\item{lineage_column}{String. Name of the lineage column within the data frame. Defaults to "Lineage"}
+\item{lineage_column}{String. Name of the lineage column within the
+data frame. Defaults to "Lineage"}
 
-\item{type}{String, either "sunburst" or "sund2b". If type is "sunburst", a sunburst plot of the lineage}
+\item{type}{String, either "sunburst" or "sund2b". If type is "sunburst",
+a sunburst plot of the lineage}
 
 \item{levels}{Integer. Number of levels the sunburst will have.}
 
-\item{legendOrder}{String vector. The order of the legend. If legendOrder is NULL,}
+\item{colors}{A vector of colors for the sunburst plot.
+If NULL, default colors are used.}
 
-\item{showLegend}{Boolean. If TRUE, the legend will be enabled when the component first renders.}
+\item{legendOrder}{String vector. The order of the legend. If legendOrder
+is NULL,}
 
-\item{maxLevels}{Integer, the maximum number of levels to display in the sunburst; 5 by default, NULL to disable
-then the legend will be in the descending order of the top level hierarchy.
-will be rendered. If the type is sund2b, a sund2b plot will be rendered.}
+\item{showLegend}{Boolean. If TRUE, the legend will be enabled when the
+component first renders.}
+
+\item{maxLevels}{Integer, the maximum number of levels to display in the
+sunburst; 5 by default, NULL to disable then the legend will be in the
+descending order of the top level hierarchy. will be rendered. If the type is
+sund2b, a sund2b plot will be rendered.}
+}
+\value{
+A sunburst or sund2b plot based on the input lineage data.
 }
 \description{
 Lineage Sunburst
 }
 \examples{
 \dontrun{
-plotLineageSunburst()
+plotLineageSunburst(prot, lineage_column = "Lineage", 
+type = "sunburst", levels = 3)
 }
 }
diff --git a/man/plotStackedLineage.Rd b/man/plotStackedLineage.Rd
index 9d1cde6d..63ae9b66 100644
--- a/man/plotStackedLineage.Rd
+++ b/man/plotStackedLineage.Rd
@@ -21,7 +21,44 @@ plotStackedLineage(
 )
 }
 \arguments{
-\item{legend}{}
+\item{prot}{Data frame containing protein data including domain architecture
+and lineage information.}
+
+\item{column}{Character. The name of the column in prot representing domain
+architectures (default is "DomArch").}
+
+\item{cutoff}{Numeric. A threshold value for filtering domain architectures
+or protein counts.}
+
+\item{Lineage_col}{Character. The name of the column representing lineage
+data (default is "Lineage").}
+
+\item{xlabel}{Character. Label for the x-axis
+(default is "Domain Architecture").}
+
+\item{reduce_lineage}{Logical. Whether to shorten lineage names
+(default is TRUE).}
+
+\item{label.size}{Numeric. The size of axis text labels (default is 8).}
+
+\item{legend.position}{Numeric vector. Coordinates for placing the legend
+(default is c(0.7, 0.4)).}
+
+\item{legend.text.size}{Numeric. Size of the text in the legend
+(default is 10).}
+
+\item{legend.cols}{Numeric. Number of columns in the legend (default is 2).}
+
+\item{legend.size}{Numeric. Size of the legend keys (default is 0.7).}
+
+\item{coord_flip}{Logical. Whether to flip the coordinates of the plot
+(default is TRUE).}
+
+\item{legend}{Logical. Whether to display the legend (default is TRUE).}
+}
+\value{
+A ggplot object representing a stacked bar plot showing the
+distribution of protein domain architectures across lineages.
 }
 \description{
 Stacked Lineage Plot
diff --git a/man/plotSunburst.Rd b/man/plotSunburst.Rd
index 5ee465a6..37da9df5 100644
--- a/man/plotSunburst.Rd
+++ b/man/plotSunburst.Rd
@@ -10,11 +10,11 @@ plotSunburst(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2)
 plotTreemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE)
 }
 \arguments{
-\item{count_data}{}
+\item{count_data}{A data frame containing the data.}
 
-\item{fill_by_n}{If TRUE, uses a continuous scale to fill plot by group size}
+\item{fill_by_n}{Logical indicating if fill color is based on counts.}
 
-\item{sort_by_n}{}
+\item{sort_by_n}{Logical indicating if data should be sorted by counts.}
 }
 \description{
 These functions help you quickly create interactive hierarchical plots
diff --git a/man/plotUpSet.Rd b/man/plotUpSet.Rd
index 84169987..47dd12e1 100644
--- a/man/plotUpSet.Rd
+++ b/man/plotUpSet.Rd
@@ -18,15 +18,30 @@ plotUpSet(
 \item{query_data}{Data frame of protein homologs with the usual 11 columns +
 additional word columns (0/1 format). Default is toast_rack.sub}
 
+\item{colname}{Column name from query_data: "DomArch.norep", "GenContext.norep",
+"DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".}
+
 \item{cutoff}{Numeric. Cutoff for word frequency. Default is 90.}
 
-\item{text.scale}{Allows scaling of axis title, tick lables, and numbers above the intersection size bars.
+\item{RowsCutoff}{Boolean. If TRUE, applies a row cutoff to remove data rows
+based on a certain condition. Default is FALSE.}
+
+\item{text.scale}{Allows scaling of axis title, tick lables, and numbers
+above the intersection size bars.
 text.scale can either take a universal scale in the form of an integer,
 or a vector of specific scales in the format: c(intersection size title,
 intersection size tick labels, set size title, set size tick labels, set names,
 numbers above bars)}
 
-\item{line.size}{}
+\item{point.size}{Numeric. Sets the size of points in the UpSet plot.
+Default is 2.2.}
+
+\item{line.size}{Numeric. Sets the line width in the UpSet plot.
+Default is 0.8.}
+}
+\value{
+An UpSet plot object. The plot visualizes intersections of sets based
+on the provided colname in query_data.
 }
 \description{
 UpSet plot for Domain Architectures vs Domains and
diff --git a/man/prepareColumnParams.Rd b/man/prepareColumnParams.Rd
index bb0b9a29..8a9f566b 100644
--- a/man/prepareColumnParams.Rd
+++ b/man/prepareColumnParams.Rd
@@ -7,8 +7,23 @@
 prepareColumnParams(count_data, fill_by_n, sort_by_n)
 }
 \arguments{
-\item{sort_by_n}{}
+\item{count_data}{A data frame containing the data.}
+
+\item{fill_by_n}{Logical indicating if fill color is based on counts.}
+
+\item{sort_by_n}{Logical indicating if data should be sorted by counts.}
+}
+\value{
+A data frame of parameters for treemap visualization.
 }
 \description{
 prepareColumnParams
 }
+\examples{
+\dontrun{
+count_data <- data.frame(Category = c("A", "B", "C"),
+                          n = c(10, 20, 15))
+params <- prepareColumnParams(count_data, fill_by_n = TRUE, sort_by_n = FALSE)
+print(params)
+}
+}
diff --git a/man/prepareSingleColumnParams.Rd b/man/prepareSingleColumnParams.Rd
index d823852b..0070497e 100644
--- a/man/prepareSingleColumnParams.Rd
+++ b/man/prepareSingleColumnParams.Rd
@@ -7,8 +7,24 @@
 prepareSingleColumnParams(df, col_num, root)
 }
 \arguments{
-\item{root}{}
+\item{df}{A data frame containing the data to be processed.}
+
+\item{col_num}{An integer representing the column number to process.}
+
+\item{root}{A string representing the root node for the treemap.}
+}
+\value{
+A data frame containing parameters for the specified column for
+treemap visualization.
 }
 \description{
 prepareSingleColumnParams
 }
+\examples{
+\dontrun{
+df <- data.frame(Category = c("A", "A", "B", "B", "C"),
+                 n = c(10, 20, 30, 40, 50))
+params <- prepareSingleColumnParams(df, col_num = 1, root = "Root")
+print(params)
+}
+}
diff --git a/man/proteinAcc2TaxID.Rd b/man/proteinAcc2TaxID.Rd
index c0317bba..9be09d53 100644
--- a/man/proteinAcc2TaxID.Rd
+++ b/man/proteinAcc2TaxID.Rd
@@ -7,8 +7,32 @@
 proteinAcc2TaxID(accnums, suffix, out_path, return_dt = FALSE)
 }
 \arguments{
-\item{return_dt}{}
+\item{accnums}{A character vector of protein accession numbers to be mapped
+to TaxIDs.}
+
+\item{suffix}{A string suffix used to name the output file generated by the
+script.}
+
+\item{out_path}{A string specifying the directory where the output file will
+be saved.}
+
+\item{return_dt}{A logical indicating whether to return the result as a data
+table. Defaults to FALSE. If TRUE, the output file is read into a data table
+and returned.}
+}
+\value{
+If \code{return_dt} is TRUE, a data table containing the mapping of protein
+accession numbers to TaxIDs. If FALSE, the function returns NULL.
 }
 \description{
 proteinAcc2TaxID
 }
+\examples{
+\dontrun{
+# Example accession numbers
+accessions <- c("ABC123", "XYZ456", "LMN789")
+tax_data <- proteinAcc2TaxID(accessions, suffix = "example", 
+out_path = "/path/to/output", return_dt = TRUE)
+print(tax_data)
+}
+}
diff --git a/man/proteinAcc2TaxID_old.Rd b/man/proteinAcc2TaxID_old.Rd
index 0c2a85ba..fb6cd5a0 100644
--- a/man/proteinAcc2TaxID_old.Rd
+++ b/man/proteinAcc2TaxID_old.Rd
@@ -7,17 +7,29 @@
 proteinAcc2TaxID_old(accessions, out_path, plan = "multicore")
 }
 \arguments{
-\item{accessions}{Character vector containing the accession numbers to query on
-the ipg database}
+\item{accessions}{A character vector containing the accession numbers to query
+in the protein database.}
 
-\item{out_path}{Path to write the efetch results to}
+\item{out_path}{A string specifying the path where the results of the query
+will be written. If set to NULL, a temporary directory will be used.}
 
-\item{plan}{}
+\item{plan}{A character string that specifies the execution plan for parallel
+processing. The default is "multicore".}
+}
+\value{
+This function does not return a value. It writes the results to the
+specified output path.
 }
 \description{
 Perform elink to go from protein database to taxonomy database
 and write the resulting file of taxid and lineage to out_path
 }
+\examples{
+\dontrun{
+accessions <- c("ABC123", "XYZ456", "LMN789")
+proteinAcc2TaxID_old(accessions, out_path = "/path/to/output")
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/removeAsterisks.Rd b/man/removeAsterisks.Rd
index 691a7adf..c62b7651 100644
--- a/man/removeAsterisks.Rd
+++ b/man/removeAsterisks.Rd
@@ -2,15 +2,19 @@
 % Please edit documentation in R/cleanup.R
 \name{removeAsterisks}
 \alias{removeAsterisks}
-\title{Remove Astrk}
+\title{Remove Asterisk}
 \usage{
 removeAsterisks(query_data, colname = "GenContext")
 }
 \arguments{
-\item{colname}{}
+\item{query_data}{A data frame containing the data to be processed.}
+
+\item{colname}{The name of the column from which asterisks should be removed.
+Defaults to "GenContext".}
 }
 \value{
-Describe return, in detail
+The original data frame with asterisks removed from the specified
+column.
 }
 \description{
 Remove the asterisks from a column of data
diff --git a/man/removeEmptyRows.Rd b/man/removeEmptyRows.Rd
index 66551810..4e52cc99 100644
--- a/man/removeEmptyRows.Rd
+++ b/man/removeEmptyRows.Rd
@@ -13,7 +13,8 @@ removeEmptyRows(prot, by_column = "DomArch")
 Default column is 'DomArch'. Can also take the following as input, 'Species', 'GenContext', 'ClustName'.}
 }
 \value{
-Describe return, in detail
+A tibble with rows removed where the specified column contains
+\code{"-"}, \code{"NA"}, or an empty string.
 }
 \description{
 Remove empty rows by column
diff --git a/man/removeTails.Rd b/man/removeTails.Rd
index 76d1e18a..0c63e89d 100644
--- a/man/removeTails.Rd
+++ b/man/removeTails.Rd
@@ -14,7 +14,8 @@ removeTails(prot, by_column = "DomArch", keep_domains = FALSE)
 \item{keep_domains}{Default is False Keeps tail entries that contain the query domains.}
 }
 \value{
-Describe return, in detail
+The original data frame with singletons removed from the specified
+column.
 }
 \description{
 Remove tails/singletons
diff --git a/man/renameFA.Rd b/man/renameFA.Rd
index 7b6fd579..da7d339b 100644
--- a/man/renameFA.Rd
+++ b/man/renameFA.Rd
@@ -15,6 +15,15 @@ renameFA(fa_path, outpath, replacement_function = mapAcc2Name, ...)
 
 \item{...}{Additional arguments to pass to replacement_function}
 }
+\value{
+A character vector of the modified lines in the FASTA file.
+}
 \description{
 Rename the labels of fasta files
 }
+\examples{
+\dontrun{
+renameFA("path/to/input.fasta", 
+"path/to/output.fasta", mapAcc2Name, acc2name)
+}
+}
diff --git a/man/rename_fasta.Rd b/man/rename_fasta.Rd
index 6b4e5dd7..3089d530 100644
--- a/man/rename_fasta.Rd
+++ b/man/rename_fasta.Rd
@@ -15,6 +15,15 @@ rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...)
 
 \item{...}{Additional arguments to pass to replacement_function}
 }
+\value{
+Character vector containing the modified lines of the Fasta file.
+}
 \description{
 Rename the labels of fasta files
 }
+\examples{
+\dontrun{
+rename_fasta("input.fasta", "output.fasta", 
+replacement_function = map_acc2name, acc2name = acc2name_table)
+}
+}
diff --git a/man/replaceQuestionMarks.Rd b/man/replaceQuestionMarks.Rd
index 0949568f..8b16992a 100644
--- a/man/replaceQuestionMarks.Rd
+++ b/man/replaceQuestionMarks.Rd
@@ -12,7 +12,9 @@ replaceQuestionMarks(prot, by_column = "GenContext")
 \item{by_column}{Column to operate on}
 }
 \value{
-Describe return, in detail
+The original data frame with the specified column updated. All
+consecutive '?' characters will be replaced with 'X(s)', and individual '?'
+characters will be replaced with 'X'.
 }
 \description{
 Replace consecutive '?' separated by '->', '<-' or '||' with 'X(s)'
diff --git a/man/reveql.Rd b/man/reveql.Rd
index 9dc2bcb8..b16ed7be 100644
--- a/man/reveql.Rd
+++ b/man/reveql.Rd
@@ -2,13 +2,26 @@
 % Please edit documentation in R/reverse_operons.R
 \name{reveql}
 \alias{reveql}
-\title{reveql}
+\title{reveql: Reverse Equalities in Genomic Context}
 \usage{
 reveql(prot)
 }
 \arguments{
-\item{prot}{}
+\item{prot}{\link{vector} A vector of genomic context strings to be processed.}
+}
+\value{
+\link{vector} A vector of the same length as the input, where each genomic
+element is annotated with either a forward ("->") or reverse ("<-") direction,
+depending on its position relative to the "=" symbols.
 }
 \description{
-reveql
+This function processes the genomic context strings (GenContext) and reverses
+directional signs based on the presence of an equal sign ("=").
+}
+\examples{
+# Example input: Genomic context with directional symbols and an asterisk
+genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F")
+reveql(genomic_context)
+
+# Output: "A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->"
 }
diff --git a/man/reverse_operon.Rd b/man/reverse_operon.Rd
index 270e2a62..1c27aecc 100644
--- a/man/reverse_operon.Rd
+++ b/man/reverse_operon.Rd
@@ -2,13 +2,28 @@
 % Please edit documentation in R/reverse_operons.R
 \name{reverse_operon}
 \alias{reverse_operon}
-\title{reverse_operon}
+\title{reverse_operon: Reverse the Direction of Operons in Genomic Context}
 \usage{
 reverse_operon(prot)
 }
 \arguments{
-\item{prot}{}
+\item{prot}{\link{data.frame} A data frame containing at least a column named
+'GenContext', which represents the genomic contexts that need to be reversed.}
+}
+\value{
+\link{data.frame} The input data frame with the 'GenContext' column updated t
+o reflect the reversed operons.
 }
 \description{
-reverse_operon
+This function processes a genomic context data frame to reverse the direction
+of operons based on specific patterns in the GenContext column. It handles
+elements represented by ">" and "<" and restructures the genomic context by
+flipping the direction of operons while preserving the relationships
+indicated by "=".
+}
+\examples{
+# Example genomic context data frame
+prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+reversed_prot <- reverse_operon(prot)
+print(reversed_prot)
 }
diff --git a/man/runIPRScan.Rd b/man/runIPRScan.Rd
index 678d8652..8431efb4 100644
--- a/man/runIPRScan.Rd
+++ b/man/runIPRScan.Rd
@@ -7,8 +7,28 @@
 runIPRScan(filepath_fasta, filepath_out, appl = c("Pfam", "Gene3D"))
 }
 \arguments{
-\item{appl}{}
+\item{filepath_fasta}{A string representing the path to the input FASTA file.}
+
+\item{filepath_out}{A string representing the base path for the output file.}
+
+\item{appl}{A character vector specifying the InterProScan applications to
+use (e.g., "Pfam", "Gene3D"). Default is \code{c("Pfam", "Gene3D")}.}
+}
+\value{
+A data frame containing the results from the InterProScan output
+TSV file.
 }
 \description{
-runIPRScan
+Run InterProScan on a given FASTA file and save the results to an
+output file.
+}
+\examples{
+\dontrun{
+results <- runIPRScan(
+    filepath_fasta = "path/to/your_fasta_file.fasta",
+    filepath_out = "path/to/output_file",
+    appl = c("Pfam", "Gene3D")
+)
+print(results)
+}
 }
diff --git a/man/run_deltablast.Rd b/man/run_deltablast.Rd
index 3c934d77..2a9f01b0 100644
--- a/man/run_deltablast.Rd
+++ b/man/run_deltablast.Rd
@@ -16,12 +16,35 @@ run_deltablast(
 )
 }
 \arguments{
-\item{db_search_path}{Path to the BLAST databases}
+\item{deltablast_path}{Path to the Delta-BLAST executable.}
 
-\item{num_threads}{}
+\item{db_search_path}{Path to the BLAST databases.}
+
+\item{db}{Name of the BLAST database to search against (default is "refseq").}
+
+\item{query}{Path to the input query file.}
+
+\item{evalue}{E-value threshold for reporting matches (default is "1e-5").}
+
+\item{out}{Path to the output file where results will be saved.}
+
+\item{num_alignments}{Number of alignments to report.}
+
+\item{num_threads}{Number of threads to use for the search (default is 1).}
+}
+\value{
+This function does not return a value; it outputs results to the
+specified file.
 }
 \description{
-Run DELTABLAST to find homologs for proteins of interest
+This function executes a Delta-BLAST search using the specified parameters
+and database. It sets the BLAST database path, runs the Delta-BLAST command
+with the given query, and outputs the results.
+}
+\examples{
+\dontrun{
+run_deltablast(deltablast_path, db_search_path, query, out, num_alignments)
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/run_rpsblast.Rd b/man/run_rpsblast.Rd
index bc4474f1..4b638a72 100644
--- a/man/run_rpsblast.Rd
+++ b/man/run_rpsblast.Rd
@@ -15,10 +15,31 @@ run_rpsblast(
 )
 }
 \arguments{
-\item{db_search_path}{Path to the BLAST databases}
+\item{rpsblast_path}{Path to the RPS-BLAST executable.}
 
-\item{num_threads}{}
+\item{db_search_path}{Path to the BLAST databases.}
+
+\item{db}{Name of the BLAST database to search against (default is "refseq").}
+
+\item{query}{Path to the input query file.}
+
+\item{evalue}{E-value threshold for reporting matches (default is "1e-5").}
+
+\item{out}{Path to the output file where results will be saved.}
+
+\item{num_threads}{Number of threads to use for the search (default is 1).}
+}
+\value{
+This function does not return a value; it outputs results to the
+specified file.
 }
 \description{
-Run RPSBLAST to generate domain architectures for proteins of interest
+This function executes an RPS-BLAST search to generate domain architectures
+for specified proteins. It sets the BLAST database path, runs the RPS-BLAST
+command with the provided query, and outputs the results.
+}
+\examples{
+\dontrun{
+run_rpsblast(rpsblast_path, db_search_path, query, out)
+}
 }
diff --git a/man/selectLongestDuplicate.Rd b/man/selectLongestDuplicate.Rd
index c177d289..bd535455 100644
--- a/man/selectLongestDuplicate.Rd
+++ b/man/selectLongestDuplicate.Rd
@@ -7,10 +7,15 @@
 selectLongestDuplicate(prot, column)
 }
 \arguments{
-\item{column}{}
+\item{prot}{A data frame containing the data, with at least one column
+named 'AccNum' for identification of duplicates.}
+
+\item{column}{The name of the column from which the longest entry among
+duplicates will be selected.}
 }
 \value{
-Describe return, in detail
+A data frame containing only the longest entries among duplicates
+based on the specified column.
 }
 \description{
 Pick Longer Duplicate
diff --git a/man/shortenLineage.Rd b/man/shortenLineage.Rd
index f495fb32..00200f96 100644
--- a/man/shortenLineage.Rd
+++ b/man/shortenLineage.Rd
@@ -2,18 +2,34 @@
 % Please edit documentation in R/plotting.R
 \name{shortenLineage}
 \alias{shortenLineage}
-\title{Shorten Lineage}
+\title{Shorten Lineage Names}
 \usage{
 shortenLineage(data, colname = "Lineage", abr_len = 1)
 }
 \arguments{
-\item{abr_len}{}
+\item{data}{A data frame that contains a column with lineage names to be
+shortened.}
+
+\item{colname}{Character. The name of the column in the data frame containing
+the lineage strings to be shortened. Default is \code{"Lineage"}.}
+
+\item{abr_len}{Integer. The number of characters to retain after the first
+letter. If set to 1, only the first letter of each segment before the
+delimiter (\code{>}) is retained. Default is 1.}
+}
+\value{
+A modified data frame where the specified lineage column has been
+shortened.
 }
 \description{
-Shorten Lineage
+This function abbreviates lineage names by shortening the first part of the
+string (up to a given delimiter).
 }
 \examples{
 \dontrun{
-shortenLineage()
+df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia", 
+"Archaea>Euryarchaeota>Thermococci"))
+shortened_df <- shortenLineage(df, colname = "Lineage", abr_len = 1)
+print(shortened_df)
 }
 }
diff --git a/man/summarizebylineage.Rd b/man/summarizeByLineage.Rd
similarity index 100%
rename from man/summarizebylineage.Rd
rename to man/summarizeByLineage.Rd
diff --git a/man/theme_genes2.Rd b/man/theme_genes2.Rd
index 29f79673..d1420067 100644
--- a/man/theme_genes2.Rd
+++ b/man/theme_genes2.Rd
@@ -6,6 +6,19 @@
 \usage{
 theme_genes2()
 }
+\value{
+A ggplot2 theme object.
+}
 \description{
 Theme Genes2
 }
+\examples{
+library(ggplot2)
+
+# Create a sample plot using the custom theme
+ggplot(mtcars, aes(x = wt, y = mpg)) +
+    geom_point() +
+    theme_genes2() +
+    labs(title = "Car Weight vs MPG")
+
+}
diff --git a/man/to_titlecase.Rd b/man/to_titlecase.Rd
index 45139d3b..1b142875 100644
--- a/man/to_titlecase.Rd
+++ b/man/to_titlecase.Rd
@@ -13,10 +13,17 @@ to_titlecase(text, delimitter)
 
 \item{y}{Delimitter. Default is space (" ").}
 }
+\value{
+A character vector in title case.
+}
 \description{
 Translate string to Title Case w/ delimitter.
 Changing case to 'Title Case'
 }
+\examples{
+to_titlecase("hello world") 
+to_titlecase("this is a test", "_") 
+}
 \seealso{
 chartr, toupper, and tolower.
 }
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalGenContextOrDomArchCounts.Rd
similarity index 100%
rename from man/totalgencontextordomarchcounts.Rd
rename to man/totalGenContextOrDomArchCounts.Rd
diff --git a/man/validateCountDF.Rd b/man/validateCountDF.Rd
index fc4aefa2..5943723e 100644
--- a/man/validateCountDF.Rd
+++ b/man/validateCountDF.Rd
@@ -7,8 +7,16 @@
 validateCountDF(var)
 }
 \arguments{
-\item{var}{}
+\item{var}{A data frame whose columns are to be converted.}
+}
+\value{
+A data frame with non-'n' columns converted to character type.
 }
 \description{
 validateCountDF
 }
+\examples{
+\dontrun{
+new_df <- .all_non_n_cols_to_char(my_data)
+}
+}
diff --git a/man/wordcloud3.Rd b/man/wordcloud3.Rd
index cce07a82..1406ea0d 100644
--- a/man/wordcloud3.Rd
+++ b/man/wordcloud3.Rd
@@ -25,8 +25,60 @@ wordcloud3(
 )
 }
 \arguments{
-\item{hoverFunction}{}
+\item{data}{Data frame or table containing words and their frequencies for
+the word cloud.}
+
+\item{size}{Numeric. Scaling factor for word sizes (default is 1).}
+
+\item{minSize}{Numeric. Minimum font size for the smallest word
+(default is 0).}
+
+\item{gridSize}{Numeric. Size of the grid for placing words (default is 0).}
+
+\item{fontFamily}{Character. Font family to use for the words
+(default is "Segoe UI").}
+
+\item{fontWeight}{Character. Font weight for the words (default is "bold").}
+
+\item{color}{Character or vector. Color of the words. Use "random-dark" for
+random dark colors (default) or specify a color.}
+
+\item{backgroundColor}{Character. Background color of the word cloud
+(default is "white").}
+
+\item{minRotation}{Numeric. Minimum rotation angle of words in radians
+(default is -π/4).}
+
+\item{maxRotation}{Numeric. Maximum rotation angle of words in radians
+(default is π/4).}
+
+\item{shuffle}{Logical. Whether to shuffle the words (default is TRUE).}
+
+\item{rotateRatio}{Numeric. Proportion of words that are rotated
+(default is 0.4).}
+
+\item{shape}{Character. Shape of the word cloud ("circle" is default, but
+you can use "cardioid", "star", "triangle", etc.).}
+
+\item{ellipticity}{Numeric. Degree of ellipticity (default is 0.65).}
+
+\item{widgetsize}{Numeric vector. Width and height of the widget
+(default is NULL, which uses default size).}
+
+\item{figPath}{Character. Path to an image file to use as a mask for the
+word cloud (optional).}
+
+\item{hoverFunction}{JS function. JavaScript function to run when hovering
+over words (optional).}
+}
+\value{
+An HTML widget object displaying a word cloud.
 }
 \description{
 plotWordCloud3
 }
+\examples{
+\dontrun{
+wordcloud3(data = your_data, size = 1.5, color = "random-light")
+}
+}
diff --git a/man/words2wordcounts.Rd b/man/words2WordCounts.Rd
similarity index 100%
rename from man/words2wordcounts.Rd
rename to man/words2WordCounts.Rd
diff --git a/man/write.MsaAAMultipleAlignment.Rd b/man/write.MsaAAMultipleAlignment.Rd
index 17a05f50..6d660b9e 100644
--- a/man/write.MsaAAMultipleAlignment.Rd
+++ b/man/write.MsaAAMultipleAlignment.Rd
@@ -13,6 +13,11 @@ write.MsaAAMultipleAlignment(alignment, outpath)
 
 \item{outpath}{Where the resulting FASTA file should be written to}
 }
+\value{
+Character string representing the content of the written FASTA file.
+
+Character string of the FASTA content that was written to the file.
+}
 \description{
 MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
 and msaMuscle from the 'msa' package
@@ -21,6 +26,17 @@ Write MsaAAMultpleAlignment Objects as algined fasta sequence
 MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
 and msaMuscle from the 'msa' package
 }
+\examples{
+\dontrun{
+alignment <- msaMuscle("my_sequences.fasta")
+write.MsaAAMultipleAlignment(alignment, "aligned_sequences.fasta")
+}
+\dontrun{
+# Example usage
+alignment <- alignFasta("path/to/sequences.fasta")
+write.MsaAAMultipleAlignment(alignment, "path/to/aligned_sequences.fasta")
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }

From 74b83ab58bbd3463217f211b861918f5daa2b6dd Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Fri, 11 Oct 2024 01:59:14 +0300
Subject: [PATCH 25/61] remove import

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE | 1 -
 R/msa.R   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 50943690..078f971b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -230,7 +230,6 @@ importFrom(purrr,map2)
 importFrom(purrr,map_chr)
 importFrom(purrr,pmap)
 importFrom(purrr,pmap_dfr)
-importFrom(rMSA,kalign)
 importFrom(readr,cols)
 importFrom(readr,read_delim)
 importFrom(readr,read_file)
diff --git a/R/msa.R b/R/msa.R
index 20089dba..7d0d9be5 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -196,7 +196,6 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 #' will be saved.
 #'
 #' @importFrom Biostrings readAAStringSet
-#' @importFrom rMSA kalign
 #'
 #' @return A list containing the alignment object and the output file path.
 #' @export

From 2da3d1a1eadb1c3d6f140700444e15db46c341d2 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 08:40:17 -0600
Subject: [PATCH 26/61] summarize.R adjustments - add back importFrom
 n_distinct() as it appears to be used by summarizeGenContext() - use function
 call as title -- may specify this in MolEvolvR style guide for consistency -
 adjust Rd grouping with MolEvolvR_summary @rdname tag for functions that had
 a clear summary element. This will hopefully avoid confusion with the rather
 ubiquitous dplyr::summarize - converted some code comments to placeholder
 descriptions

---
 NAMESPACE                                   |   1 +
 R/summarize.R                               |  58 +++----
 man/{summarize.Rd => MolEvolvR_summary.Rd}  | 159 ++++----------------
 man/countbycolumn.Rd                        |  26 +++-
 man/elements2Words.Rd                       |  25 ++-
 man/filterbydomains.Rd                      |   2 +-
 man/filterbyfrequency.Rd                    |  14 +-
 man/findparalogs.Rd                         |   2 +-
 man/summarizeDomArch.Rd                     |  22 ---
 man/summarizeDomArch_ByLineage.Rd           |  22 ---
 man/summarizeGenContext.Rd                  |  22 ---
 man/summarizeGenContext_ByDomArchLineage.Rd |  22 ---
 man/summarizeGenContext_ByLineage.Rd        |  22 ---
 man/summarizebylineage.Rd                   |  25 ---
 man/totalgencontextordomarchcounts.Rd       |  42 ------
 man/words2wordcounts.Rd                     |  13 +-
 16 files changed, 122 insertions(+), 355 deletions(-)
 rename man/{summarize.Rd => MolEvolvR_summary.Rd} (52%)
 delete mode 100644 man/summarizeDomArch.Rd
 delete mode 100644 man/summarizeDomArch_ByLineage.Rd
 delete mode 100644 man/summarizeGenContext.Rd
 delete mode 100644 man/summarizeGenContext_ByDomArchLineage.Rd
 delete mode 100644 man/summarizeGenContext_ByLineage.Rd
 delete mode 100644 man/summarizebylineage.Rd
 delete mode 100644 man/totalgencontextordomarchcounts.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2326fc1f..53332439 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -139,6 +139,7 @@ importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
+importFrom(dplyr,n_distinct)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,right_join)
diff --git a/R/summarize.R b/R/summarize.R
index 321a0488..2816f174 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -10,7 +10,7 @@
 # suppressPackageStartupMessages(library(rlang))
 # conflicted::conflict_prefer("filter", "dplyr")
 
-#' Filter by Domains
+#' filterByDomains
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description filterByDomains filters a data frame by identifying exact domain matches
@@ -29,7 +29,6 @@
 #'
 #' @return Filtered data frame
 #' @note There is no need to make the domains 'regex safe', that will be handled by this function
-#' @name summarize
 #' @export
 #'
 #' @examples
@@ -89,9 +88,11 @@ filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 ## COUNTS of DAs and GCs ##
 ## Before/after break up ##
 ###########################
-## Function to obtain element counts (DA, GC)
-#' Count By Column
-#'
+
+#' countByColumn
+#' @description
+#' Function to obtain element counts (DA, GC)
+#' 
 #' @param prot A data frame containing the dataset to analyze, typically with 
 #' multiple columns including the one specified by the `column` parameter.
 #' @param column A character string specifying the name of the column to analyze. 
@@ -111,7 +112,6 @@ filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 #' The tibble is filtered to only include elements that have a frequency 
 #' greater than or equal to `min.freq` and does not include elements with `NA` 
 #' values or those starting with a hyphen ("-").
-#' @name summarize
 #' @export
 #'
 #' @examples
@@ -131,7 +131,7 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
     return(counts)
 }
 
-#' Elements 2 Words
+#' elements2Words
 #'
 #' @description
 #' Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -156,7 +156,6 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' @return A single string where elements are delimited by spaces. The function 
 #' performs necessary substitutions based on the `conversion_type` and cleans up 
 #' extraneous characters like newlines, tabs, and multiple spaces.
-#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -196,7 +195,7 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
     return(z3)
 }
 
-#' Words 2 Word Counts
+#' words2WordCounts
 #'
 #' @description
 #' Get word counts (wc) [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
@@ -215,7 +214,6 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #'   \item{`freq`}{A column containing the frequency counts for each word.}
 #' }
 #' 
-#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -252,9 +250,11 @@ words2WordCounts <- function(string) {
         arrange(-freq)
     return(df_word_count)
 }
-## Function to filter based on frequencies
-#' Filter Frequency
-#'
+
+#' filterByFrequency
+#' @description
+#' Function to filter based on frequencies
+#' 
 #' @param x A tibble (tbl_df) containing at least two columns: one for 
 #' elements (e.g., `words`) and one for their frequency (e.g., `freq`).
 #' @param min.freq A numeric value specifying the minimum frequency threshold. 
@@ -263,7 +263,6 @@ words2WordCounts <- function(string) {
 #'
 #' @return A tibble with the same structure as `x`, but filtered to include 
 #' only rows where the frequency is greater than or equal to `min.freq`.
-#' @name summarize
 #'
 #' @export
 #'
@@ -279,7 +278,14 @@ filterByFrequency <- function(x, min.freq) {
 #########################
 ## SUMMARY FUNCTIONS ####
 #########################
-#' Summarize by Lineage
+#' MolEvolvR Summary
+#' @name MolEvolvR_summary
+#' @description
+#' A collection of summary functions for the MolEvolvR package.
+#' 
+NULL
+
+#' summarizeByLineage
 #'
 #' @param prot A dataframe or tibble containing the data.
 #' @param column A string representing the column to be summarized 
@@ -295,7 +301,7 @@ filterByFrequency <- function(x, min.freq) {
 #' @return A tibble summarizing the counts of occurrences of elements in 
 #' the `column`, grouped by the `by` column. The result includes the number 
 #' of occurrences (`count`) and is arranged in descending order of count.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -341,7 +347,7 @@ summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage"
 #' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
 #' of each domain architecture for each lineage. The results are arranged in 
 #' descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #'
 #' @export
 #'
@@ -357,7 +363,7 @@ summarizeDomArch_ByLineage <- function(x) {
         arrange(desc(count))
 }
 
-## Function to retrieve counts of how many lineages a DomArch appears in
+
 #' summarizeDomArch
 #'
 #' @description
@@ -375,7 +381,7 @@ summarizeDomArch_ByLineage <- function(x) {
 #' - `totallin`: The total number of unique lineages in which each `DomArch` 
 #' appears.
 #' The results are arranged in descending order of `totallin` and `totalcount`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -407,7 +413,7 @@ summarizeDomArch <- function(x) {
 #' `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -432,7 +438,7 @@ summarizeGenContext_ByDomArchLineage <- function(x) {
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
 #' @return Describe return, in detail
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -455,7 +461,7 @@ summarizeGenContext_ByLineage <- function(x) {
 #' @param x A dataframe or tibble containing the data. It must have columns 
 #' named `GenContext`, `DomArch`, and `Lineage`.
 #'
-#' @importFrom dplyr arrange desc filter group_by n summarise
+#' @importFrom dplyr arrange desc filter group_by n n_distinct summarise
 #'
 #' @return A tibble summarizing each unique combination of `GenContext` and 
 #' `Lineage`, along with the following columns:
@@ -465,7 +471,7 @@ summarizeGenContext_ByLineage <- function(x) {
 #'  `GenContext` and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -487,7 +493,7 @@ summarizeGenContext <- function(x) {
 
 
 ##################
-#' Total Counts
+#' totalGenContextOrDomArchCounts
 #'
 #' @description
 #' Creates a data frame with a totalcount column
@@ -518,7 +524,7 @@ summarizeGenContext <- function(x) {
 #' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
 #' the overall count.
 #' - `CumulativePercent`: The cumulative percentage of total counts.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
@@ -670,7 +676,7 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
 
 
 
-#' Find Paralogs
+#' findParalogs
 #'
 #' @description
 #' Creates a data frame of paralogs.
diff --git a/man/summarize.Rd b/man/MolEvolvR_summary.Rd
similarity index 52%
rename from man/summarize.Rd
rename to man/MolEvolvR_summary.Rd
index f149f686..262c4719 100644
--- a/man/summarize.Rd
+++ b/man/MolEvolvR_summary.Rd
@@ -1,50 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summarize}
-\alias{summarize}
-\alias{filter_by_doms}
-\alias{count_bycol}
-\alias{elements2words}
-\alias{words2wc}
-\alias{filter_freq}
-\alias{summarize_bylin}
-\alias{summ.DA.byLin}
-\alias{summ.DA}
-\alias{summ.GC.byDALin}
-\alias{summ.GC.byLin}
-\alias{summ.GC}
-\alias{total_counts}
-\title{Filter by Domains}
+\name{MolEvolvR_summary}
+\alias{MolEvolvR_summary}
+\alias{summarizeByLineage}
+\alias{summarizeDomArch_ByLineage}
+\alias{summarizeDomArch}
+\alias{summarizeGenContext_ByDomArchLineage}
+\alias{summarizeGenContext_ByLineage}
+\alias{summarizeGenContext}
+\alias{totalGenContextOrDomArchCounts}
+\title{MolEvolvR Summary}
 \usage{
-filter_by_doms(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-
-count_bycol(prot = prot, column = "DomArch", min.freq = 1)
-
-elements2words(prot, column = "DomArch", conversion_type = "da2doms")
-
-words2wc(string)
+summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
 
-filter_freq(x, min.freq)
+summarizeDomArch_ByLineage(x)
 
-summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
+summarizeDomArch(x)
 
-summ.DA.byLin(x)
+summarizeGenContext_ByDomArchLineage(x)
 
-summ.DA(x)
+summarizeGenContext_ByLineage(x)
 
-summ.GC.byDALin(x)
+summarizeGenContext(x)
 
-summ.GC.byLin(x)
-
-summ.GC(x)
-
-total_counts(
+totalGenContextOrDomArchCounts(
   prot,
   column = "DomArch",
   lineage_col = "Lineage",
@@ -59,39 +38,15 @@ total_counts(
 
 \item{column}{Character. The column to summarize, default is "DomArch".}
 
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-
-\item{min.freq}{A numeric value specifying the minimum frequency threshold.
-Only elements with frequencies greater than or equal to this value will be
-retained.}
-
-\item{conversion_type}{A character string specifying the type of conversion.
-Two options are available:
-\describe{
-\item{\code{da2doms}}{Convert domain architectures into individual domains by
-replacing \code{+} symbols with spaces.}
-\item{\code{gc2da}}{Convert genomic context into domain architectures by
-replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
-}}
-
-\item{string}{A character string containing the elements (words) to count.
-This would typically be a space-delimited string representing domain
-architectures or genomic contexts.}
-
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-
 \item{by}{A string representing the grouping column (e.g., \code{Lineage}).
 Default is "Lineage".}
 
 \item{query}{A string specifying the query pattern for filtering the target
 column. Use "all" to skip filtering and include all rows.}
 
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
+
 \item{lineage_col}{Character. The name of the lineage column, default is
 "Lineage".}
 
@@ -105,33 +60,6 @@ cutoff. Default is FALSE.}
 Default is 2.}
 }
 \value{
-Filtered data frame
-
-A tibble with two columns:
-\describe{
-\item{\code{column}}{The unique elements from the specified column
-(e.g., "DomArch").}
-\item{\code{freq}}{The frequency of each element, i.e., the number of times
-each element appears in the specified column.}
-}
-The tibble is filtered to only include elements that have a frequency
-greater than or equal to \code{min.freq} and does not include elements with \code{NA}
-values or those starting with a hyphen ("-").
-
-A single string where elements are delimited by spaces. The function
-performs necessary substitutions based on the \code{conversion_type} and cleans up
-extraneous characters like newlines, tabs, and multiple spaces.
-
-A tibble (tbl_df) with two columns:
-\describe{
-\item{\code{words}}{A column containing the individual words
-(domains or domain architectures).}
-\item{\code{freq}}{A column containing the frequency counts for each word.}
-}
-
-A tibble with the same structure as \code{x}, but filtered to include
-only rows where the frequency is greater than or equal to \code{min.freq}.
-
 A tibble summarizing the counts of occurrences of elements in
 the \code{column}, grouped by the \code{by} column. The result includes the number
 of occurrences (\code{count}) and is arranged in descending order of count.
@@ -187,13 +115,7 @@ the overall count.
 }
 }
 \description{
-filter_by_doms filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-
-Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
-context (GC)
-
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
+A collection of summary functions for the MolEvolvR package.
 
 Function to summarize and retrieve counts by Domains & Domains+Lineage
 
@@ -204,57 +126,32 @@ Creates a data frame with a totalcount column
 This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
 }
 \note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-
 Please refer to the source code if you have alternate file formats and/or
 column names.
 }
 \examples{
 \dontrun{
-filter_by_doms()
-}
-\dontrun{
-count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
-}
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", 
-"a+b", "b+c", "b-c")) |> elements2words()
-}
-
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2words() |>
-    words2wc()
-}
-
-\dontrun{
-filter_freq()
-}
-\dontrun{
 library(tidyverse)
 tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarize_bylin(query = "all")
+    summarizeByLineage(query = "all")
 }
 
 \dontrun{
-summ.DA.byLin()
+summarizeDomArch_ByLineage()
 }
 \dontrun{
-summ.DA()
+summarizeDomArch()
 }
 \dontrun{
-summ.GC.byDALin
+summarizeGenContext_ByDomArchLineage
 }
 \dontrun{
-summ.GC.byLin()
+summarizeGenContext_ByLineage()
 }
 \dontrun{
-summ.GC()
+summarizeGenContext()
 }
 \dontrun{
-total_counts(pspa - gc_lin_counts, 0, "GC")
-}
+totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
 }
-\author{
-Samuel Chen, Janani Ravi
 }
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
index 34fcc3e0..57ff9ac4 100644
--- a/man/countbycolumn.Rd
+++ b/man/countbycolumn.Rd
@@ -2,21 +2,37 @@
 % Please edit documentation in R/summarize.R
 \name{countByColumn}
 \alias{countByColumn}
-\title{Count By Column}
+\title{countByColumn}
 \usage{
 countByColumn(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
-\item{min.freq}{}
+\item{prot}{A data frame containing the dataset to analyze, typically with
+multiple columns including the one specified by the \code{column} parameter.}
+
+\item{column}{A character string specifying the name of the column to analyze.
+The default is "DomArch".}
+
+\item{min.freq}{An integer specifying the minimum frequency an element must
+have to be included in the output. Default is 1.}
 }
 \value{
-Describe return, in detail
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
 }
 \description{
-Count By Column
+Function to obtain element counts (DA, GC)
 }
 \examples{
 \dontrun{
-countByColumn()
+countByColumn(prot = my_data, column = "DomArch", min.freq = 10)
 }
 }
diff --git a/man/elements2Words.Rd b/man/elements2Words.Rd
index 1094d363..bfd3071b 100644
--- a/man/elements2Words.Rd
+++ b/man/elements2Words.Rd
@@ -2,20 +2,30 @@
 % Please edit documentation in R/summarize.R
 \name{elements2Words}
 \alias{elements2Words}
-\title{Elements 2 Words}
+\title{elements2Words}
 \usage{
 elements2Words(prot, column = "DomArch", conversion_type = "da2doms")
 }
 \arguments{
-\item{prot}{\link{dataframe}}
+\item{prot}{A dataframe containing the dataset to analyze. The specified
+\code{column} contains the string elements to be processed.}
 
-\item{column}{\link{string} column name}
+\item{column}{A character string specifying the name of the column to analyze.
+Default is "DomArch".}
 
-\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
-domains. 'gc2da' genomic context to domain architectures}
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
 }
 \value{
-\link{string} with words delimited by spaces
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
 }
 \description{
 Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -23,7 +33,8 @@ context (GC)
 }
 \examples{
 \dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words()
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2Words()
 }
 
 }
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
index 8c885363..afb3e5cb 100644
--- a/man/filterbydomains.Rd
+++ b/man/filterbydomains.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/summarize.R
 \name{filterByDomains}
 \alias{filterByDomains}
-\title{Filter by Domains}
+\title{filterByDomains}
 \usage{
 filterByDomains(
   prot,
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
index d2c5f9cd..15d06d67 100644
--- a/man/filterbyfrequency.Rd
+++ b/man/filterbyfrequency.Rd
@@ -2,18 +2,24 @@
 % Please edit documentation in R/summarize.R
 \name{filterByFrequency}
 \alias{filterByFrequency}
-\title{Filter Frequency}
+\title{filterByFrequency}
 \usage{
 filterByFrequency(x, min.freq)
 }
 \arguments{
-\item{min.freq}{}
+\item{x}{A tibble (tbl_df) containing at least two columns: one for
+elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
 }
 \value{
-Describe return, in detail
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
 }
 \description{
-Filter Frequency
+Function to filter based on frequencies
 }
 \examples{
 \dontrun{
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
index 4b5edbcf..d92edf71 100644
--- a/man/findparalogs.Rd
+++ b/man/findparalogs.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/summarize.R
 \name{findParalogs}
 \alias{findParalogs}
-\title{Find Paralogs}
+\title{findParalogs}
 \usage{
 findParalogs(prot)
 }
diff --git a/man/summarizeDomArch.Rd b/man/summarizeDomArch.Rd
deleted file mode 100644
index 11db1afa..00000000
--- a/man/summarizeDomArch.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeDomArch}
-\alias{summarizeDomArch}
-\title{summarizeDomArch}
-\usage{
-summarizeDomArch(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Function to retrieve counts of how many lineages a DomArch appears in
-}
-\examples{
-\dontrun{
-summarizeDomArch()
-}
-}
diff --git a/man/summarizeDomArch_ByLineage.Rd b/man/summarizeDomArch_ByLineage.Rd
deleted file mode 100644
index cf5fac22..00000000
--- a/man/summarizeDomArch_ByLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeDomArch_ByLineage}
-\alias{summarizeDomArch_ByLineage}
-\title{summarizeDomArch_ByLineage}
-\usage{
-summarizeDomArch_ByLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Function to summarize and retrieve counts by Domains & Domains+Lineage
-}
-\examples{
-\dontrun{
-summarizeDomArch_ByLineage()
-}
-}
diff --git a/man/summarizeGenContext.Rd b/man/summarizeGenContext.Rd
deleted file mode 100644
index 5a40811b..00000000
--- a/man/summarizeGenContext.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext}
-\alias{summarizeGenContext}
-\title{summarizeGenContext}
-\usage{
-summarizeGenContext(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summarizeGenContext
-}
-\examples{
-\dontrun{
-summarizeGenContext()
-}
-}
diff --git a/man/summarizeGenContext_ByDomArchLineage.Rd b/man/summarizeGenContext_ByDomArchLineage.Rd
deleted file mode 100644
index 59e0376e..00000000
--- a/man/summarizeGenContext_ByDomArchLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext_ByDomArchLineage}
-\alias{summarizeGenContext_ByDomArchLineage}
-\title{summarizeGenContext_ByDomArchLineage}
-\usage{
-summarizeGenContext_ByDomArchLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-summarizeGenContext_ByDomArchLineage
-}
-\examples{
-\dontrun{
-summarizeGenContext_ByDomArchLineage
-}
-}
diff --git a/man/summarizeGenContext_ByLineage.Rd b/man/summarizeGenContext_ByLineage.Rd
deleted file mode 100644
index 932fe6a7..00000000
--- a/man/summarizeGenContext_ByLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext_ByLineage}
-\alias{summarizeGenContext_ByLineage}
-\title{summarizeGenContext_ByLineage}
-\usage{
-summarizeGenContext_ByLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summarizeGenContext_ByLineage
-}
-\examples{
-\dontrun{
-summarizeGenContext_ByLineage()
-}
-}
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
deleted file mode 100644
index 2e445913..00000000
--- a/man/summarizebylineage.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeByLineage}
-\alias{summarizeByLineage}
-\title{Summarize by Lineage}
-\usage{
-summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{query}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizeByLineage(query = "all")
-}
-
-}
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
deleted file mode 100644
index f457cb6a..00000000
--- a/man/totalgencontextordomarchcounts.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{totalGenContextOrDomArchCounts}
-\alias{totalGenContextOrDomArchCounts}
-\title{Total Counts}
-\usage{
-totalGenContextOrDomArchCounts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
-
-\item{digits}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
index 7f60f226..370dec7f 100644
--- a/man/words2wordcounts.Rd
+++ b/man/words2wordcounts.Rd
@@ -2,15 +2,22 @@
 % Please edit documentation in R/summarize.R
 \name{words2WordCounts}
 \alias{words2WordCounts}
-\title{Words 2 Word Counts}
+\title{words2WordCounts}
 \usage{
 words2WordCounts(string)
 }
 \arguments{
-\item{string}{}
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
 }
 \value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
 }
 \description{
 Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}

From 11b22113b52087c6a72e7df4b845d8f0323c367b Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 09:02:19 -0600
Subject: [PATCH 27/61] minor phrasing adjustment

---
 .github/CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 9fcd6b7f..f9f8de97 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -32,7 +32,7 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
   ```
   usethis::create_from_github("JRaviLab/MolEvolvR", fork = TRUE)
   ```
-- Install Bioconductor dependencies:
+- Install BiocManager from Bioconductor:
 
   ```
   if (!require("BiocManager", quietly = TRUE))

From 851d8796c9d9f4d895fd92f5eacb8f1eab45eda9 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 09:02:36 -0600
Subject: [PATCH 28/61] skip sending quarto files to Git

---
 .github/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/.gitignore b/.github/.gitignore
index 2d19fc76..5c86aa40 100644
--- a/.github/.gitignore
+++ b/.github/.gitignore
@@ -1 +1,3 @@
 *.html
+
+/.quarto/

From 2d00b6fa42b124acf8cd3cd63e60cec745d71a10 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 13:46:03 -0600
Subject: [PATCH 29/61] modify .Rd names

---
 R/ipr2viz.R                  |  9 ++++----
 man/countByColumn.Rd         | 38 +++++++++++++++++++++++++++++++
 man/filterByDomains.Rd       | 44 ++++++++++++++++++++++++++++++++++++
 man/filterByFrequency.Rd     | 28 +++++++++++++++++++++++
 man/findParalogs.Rd          | 26 +++++++++++++++++++++
 man/getTopAccByLinDomArch.Rd |  2 +-
 man/plotIPR2Viz.Rd           |  4 ++--
 man/plotIPR2VizWeb.Rd        |  4 ++--
 man/themeGenes2.Rd           |  4 ++--
 man/words2WordCounts.Rd      | 32 ++++++++++++++++++++++++++
 10 files changed, 180 insertions(+), 11 deletions(-)
 create mode 100644 man/countByColumn.Rd
 create mode 100644 man/filterByDomains.Rd
 create mode 100644 man/filterByFrequency.Rd
 create mode 100644 man/findParalogs.Rd
 create mode 100644 man/words2WordCounts.Rd

diff --git a/R/ipr2viz.R b/R/ipr2viz.R
index dff6e67a..9b625d4e 100644
--- a/R/ipr2viz.R
+++ b/R/ipr2viz.R
@@ -15,7 +15,7 @@
 #################################
 ## themeGenes2 adapted from theme_genes (w/o strip.text())
 ## https://github.com/wilkox/gggenes/blob/master/R/theme_genes.R
-#' Theme Genes2
+#' themeGenes2
 #'
 #' @importFrom ggplot2 element_blank element_line theme theme_grey
 #'
@@ -41,7 +41,8 @@ themeGenes2 <- function() {
 ##################################
 ## Get Top N AccNum by Lin+DomArch
 ##################################
-#' Group by lineage + DA then take top 20
+#' getTopAccByLinDomArch
+#' @description Group by lineage + DA then take top 20
 #'
 #' @param infile_full
 #' @param DA_col
@@ -92,7 +93,7 @@ getTopAccByLinDomArch <- function(infile_full,
 #############################################
 ## IPR + FULL files --> DomArch Visualization
 #############################################
-#' IPR2Viz
+#' plotIPR2Viz
 #'
 #' @param infile_ipr
 #' @param infile_full
@@ -248,7 +249,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     return(plot)
 }
 
-#' IPR2Viz Web
+#' plotIPR2VizWeb
 #'
 #' @param infile_ipr
 #' @param accessions
diff --git a/man/countByColumn.Rd b/man/countByColumn.Rd
new file mode 100644
index 00000000..57ff9ac4
--- /dev/null
+++ b/man/countByColumn.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{countByColumn}
+\alias{countByColumn}
+\title{countByColumn}
+\usage{
+countByColumn(prot = prot, column = "DomArch", min.freq = 1)
+}
+\arguments{
+\item{prot}{A data frame containing the dataset to analyze, typically with
+multiple columns including the one specified by the \code{column} parameter.}
+
+\item{column}{A character string specifying the name of the column to analyze.
+The default is "DomArch".}
+
+\item{min.freq}{An integer specifying the minimum frequency an element must
+have to be included in the output. Default is 1.}
+}
+\value{
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
+}
+\description{
+Function to obtain element counts (DA, GC)
+}
+\examples{
+\dontrun{
+countByColumn(prot = my_data, column = "DomArch", min.freq = 10)
+}
+}
diff --git a/man/filterByDomains.Rd b/man/filterByDomains.Rd
new file mode 100644
index 00000000..afb3e5cb
--- /dev/null
+++ b/man/filterByDomains.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{filterByDomains}
+\alias{filterByDomains}
+\title{filterByDomains}
+\usage{
+filterByDomains(
+  prot,
+  column = "DomArch",
+  doms_keep = c(),
+  doms_remove = c(),
+  ignore.case = FALSE
+)
+}
+\arguments{
+\item{prot}{Dataframe to filter}
+
+\item{column}{Column to search for domains in (DomArch column)}
+
+\item{doms_keep}{Vector of domains that must be identified within column in order for
+observation to be kept}
+
+\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
+
+\item{ignore.case}{Should the matching be non case sensitive}
+}
+\value{
+Filtered data frame
+}
+\description{
+filterByDomains filters a data frame by identifying exact domain matches
+and either keeping or removing rows with the identified domain
+}
+\note{
+There is no need to make the domains 'regex safe', that will be handled by this function
+}
+\examples{
+\dontrun{
+filterByDomains()
+}
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/filterByFrequency.Rd b/man/filterByFrequency.Rd
new file mode 100644
index 00000000..15d06d67
--- /dev/null
+++ b/man/filterByFrequency.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{filterByFrequency}
+\alias{filterByFrequency}
+\title{filterByFrequency}
+\usage{
+filterByFrequency(x, min.freq)
+}
+\arguments{
+\item{x}{A tibble (tbl_df) containing at least two columns: one for
+elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
+}
+\value{
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
+}
+\description{
+Function to filter based on frequencies
+}
+\examples{
+\dontrun{
+filterByFrequency()
+}
+}
diff --git a/man/findParalogs.Rd b/man/findParalogs.Rd
new file mode 100644
index 00000000..d92edf71
--- /dev/null
+++ b/man/findParalogs.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{findParalogs}
+\alias{findParalogs}
+\title{findParalogs}
+\usage{
+findParalogs(prot)
+}
+\arguments{
+\item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
+}
+\value{
+returns a dataframe containing paralogs and the counts.
+}
+\description{
+Creates a data frame of paralogs.
+}
+\note{
+Please refer to the source code if you have alternate file formats and/or
+column names.
+}
+\examples{
+\dontrun{
+findParalogs(pspa)
+}
+}
diff --git a/man/getTopAccByLinDomArch.Rd b/man/getTopAccByLinDomArch.Rd
index a00da5c7..b8571350 100644
--- a/man/getTopAccByLinDomArch.Rd
+++ b/man/getTopAccByLinDomArch.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ipr2viz.R
 \name{getTopAccByLinDomArch}
 \alias{getTopAccByLinDomArch}
-\title{Group by lineage + DA then take top 20}
+\title{getTopAccByLinDomArch}
 \usage{
 getTopAccByLinDomArch(
   infile_full,
diff --git a/man/plotIPR2Viz.Rd b/man/plotIPR2Viz.Rd
index 22297312..7ed420c9 100644
--- a/man/plotIPR2Viz.Rd
+++ b/man/plotIPR2Viz.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ipr2viz.R
 \name{plotIPR2Viz}
 \alias{plotIPR2Viz}
-\title{IPR2Viz}
+\title{plotIPR2Viz}
 \usage{
 plotIPR2Viz(
   infile_ipr = NULL,
@@ -20,5 +20,5 @@ plotIPR2Viz(
 \item{query}{}
 }
 \description{
-IPR2Viz
+plotIPR2Viz
 }
diff --git a/man/plotIPR2VizWeb.Rd b/man/plotIPR2VizWeb.Rd
index 4b4394ad..3b94a5a7 100644
--- a/man/plotIPR2VizWeb.Rd
+++ b/man/plotIPR2VizWeb.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ipr2viz.R
 \name{plotIPR2VizWeb}
 \alias{plotIPR2VizWeb}
-\title{IPR2Viz Web}
+\title{plotIPR2VizWeb}
 \usage{
 plotIPR2VizWeb(
   infile_ipr,
@@ -20,5 +20,5 @@ plotIPR2VizWeb(
 \item{rows}{}
 }
 \description{
-IPR2Viz Web
+plotIPR2VizWeb
 }
diff --git a/man/themeGenes2.Rd b/man/themeGenes2.Rd
index 1553e019..64ae9273 100644
--- a/man/themeGenes2.Rd
+++ b/man/themeGenes2.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/ipr2viz.R
 \name{themeGenes2}
 \alias{themeGenes2}
-\title{Theme Genes2}
+\title{themeGenes2}
 \usage{
 themeGenes2()
 }
 \description{
-Theme Genes2
+themeGenes2
 }
diff --git a/man/words2WordCounts.Rd b/man/words2WordCounts.Rd
new file mode 100644
index 00000000..370dec7f
--- /dev/null
+++ b/man/words2WordCounts.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{words2WordCounts}
+\alias{words2WordCounts}
+\title{words2WordCounts}
+\usage{
+words2WordCounts(string)
+}
+\arguments{
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
+}
+\value{
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
+}
+\description{
+Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
+}
+\examples{
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
+    elements2Words() |>
+    words2WordCounts()
+}
+
+}

From 56b39da61292ae0facc31c104e90927f2483413e Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 13:54:22 -0600
Subject: [PATCH 30/61] let R manage NAMESPACE sort order

---
 NAMESPACE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 08f3aa92..dc5c95a4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -52,8 +52,8 @@ export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_all_aln2fa)
 export(generate_msa)
-export(getTopAccByLinDomArch)
 export(getAccNumFromFA)
+export(getTopAccByLinDomArch)
 export(get_accnums_from_fasta_file)
 export(get_proc_medians)
 export(get_proc_weights)

From a74fb69a54f6a6ca39005f0b4d8cbf4dc15ee91c Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 18:41:02 -0600
Subject: [PATCH 31/61] maintain function name consistency with
 CHANGED-pre-msa-tree.R and pre-msa-tree.R while we determine where these
 functions should live.

---
 NAMESPACE                           |  1 -
 R/CHANGED-pre-msa-tree.R            |  6 +++---
 man/write.MsaAAMultipleAlignment.Rd | 20 --------------------
 man/writeMSA_AA2FA.Rd               |  7 ++++++-
 4 files changed, 9 insertions(+), 25 deletions(-)
 delete mode 100644 man/write.MsaAAMultipleAlignment.Rd

diff --git a/NAMESPACE b/NAMESPACE
index dc5c95a4..7271b65f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -104,7 +104,6 @@ export(to_titlecase)
 export(totalGenContextOrDomArchCounts)
 export(validateCountDF)
 export(wordcloud3)
-export(write.MsaAAMultipleAlignment)
 export(writeMSA_AA2FA)
 export(write_proc_medians_table)
 export(write_proc_medians_yml)
diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index c4a97589..a755df8c 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -610,12 +610,12 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     )
 
     if (typeof(outpath) == "character") {
-        write.MsaAAMultipleAlignment(aligned, outpath)
+        writeMSA_AA2FA(aligned, outpath)
     }
     return(aligned)
 }
 
-#' Write MsaAAMultpleAlignment Objects as algined fasta sequence
+#' writeMSA_AA2FA
 #'
 #' @description
 #' MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
@@ -632,7 +632,7 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @export
 #'
 #' @examples
-write.MsaAAMultipleAlignment <- function(alignment, outpath) {
+writeMSA_AA2FA <- function(alignment, outpath) {
     l <- length(rownames(alignment))
     fasta <- ""
     for (i in 1:l)
diff --git a/man/write.MsaAAMultipleAlignment.Rd b/man/write.MsaAAMultipleAlignment.Rd
deleted file mode 100644
index e26f26e7..00000000
--- a/man/write.MsaAAMultipleAlignment.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
-\name{write.MsaAAMultipleAlignment}
-\alias{write.MsaAAMultipleAlignment}
-\title{Write MsaAAMultpleAlignment Objects as algined fasta sequence}
-\usage{
-write.MsaAAMultipleAlignment(alignment, outpath)
-}
-\arguments{
-\item{alignment}{MsaAAMultipleAlignment object to be written as a fasta}
-
-\item{outpath}{Where the resulting FASTA file should be written to}
-}
-\description{
-MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
-and msaMuscle from the 'msa' package
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/writeMSA_AA2FA.Rd b/man/writeMSA_AA2FA.Rd
index 068e5b63..a6798469 100644
--- a/man/writeMSA_AA2FA.Rd
+++ b/man/writeMSA_AA2FA.Rd
@@ -1,9 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{writeMSA_AA2FA}
 \alias{writeMSA_AA2FA}
 \title{writeMSA_AA2FA}
 \usage{
+writeMSA_AA2FA(alignment, outpath)
+
 writeMSA_AA2FA(alignment, outpath)
 }
 \arguments{
@@ -12,6 +14,9 @@ writeMSA_AA2FA(alignment, outpath)
 \item{outpath}{Where the resulting FASTA file should be written to}
 }
 \description{
+MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
+and msaMuscle from the 'msa' package
+
 Write MsaAAMultpleAlignment Objects as aligned fasta sequence
 MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
 and msaMuscle from the 'msa' package

From 5fcd985a88ab270245a554a44adb557fa02acaed Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 18:42:56 -0600
Subject: [PATCH 32/61] maintain function name consistency across .R files
 while other determinations are made - getAccNumFromFA()

---
 NAMESPACE                          |  1 -
 R/CHANGED-pre-msa-tree.R           |  4 ++--
 man/getAccNumFromFA.Rd             |  6 +++++-
 man/get_accnums_from_fasta_file.Rd | 14 --------------
 4 files changed, 7 insertions(+), 18 deletions(-)
 delete mode 100644 man/get_accnums_from_fasta_file.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 7271b65f..23b29248 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -54,7 +54,6 @@ export(generate_all_aln2fa)
 export(generate_msa)
 export(getAccNumFromFA)
 export(getTopAccByLinDomArch)
-export(get_accnums_from_fasta_file)
 export(get_proc_medians)
 export(get_proc_weights)
 export(make_opts2procs)
diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index a755df8c..767d51aa 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -645,7 +645,7 @@ writeMSA_AA2FA <- function(alignment, outpath) {
     return(fasta)
 }
 
-#' Get accnums from fasta file
+#' getAccNumFromFA
 #'
 #' @param fasta_file
 #'
@@ -655,7 +655,7 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 #' @export
 #'
 #' @examples
-get_accnums_from_fasta_file <- function(fasta_file) {
+getAccNumFromFA <- function(fasta_file) {
     txt <- read_file(fasta_file)
     accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
     return(accnums)
diff --git a/man/getAccNumFromFA.Rd b/man/getAccNumFromFA.Rd
index f2409965..d3ab8177 100644
--- a/man/getAccNumFromFA.Rd
+++ b/man/getAccNumFromFA.Rd
@@ -1,14 +1,18 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{getAccNumFromFA}
 \alias{getAccNumFromFA}
 \title{getAccNumFromFA}
 \usage{
+getAccNumFromFA(fasta_file)
+
 getAccNumFromFA(fasta_file)
 }
 \arguments{
 \item{fasta_file}{}
 }
 \description{
+getAccNumFromFA
+
 getAccNumFromFA
 }
diff --git a/man/get_accnums_from_fasta_file.Rd b/man/get_accnums_from_fasta_file.Rd
deleted file mode 100644
index f545d1a0..00000000
--- a/man/get_accnums_from_fasta_file.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
-\name{get_accnums_from_fasta_file}
-\alias{get_accnums_from_fasta_file}
-\title{Get accnums from fasta file}
-\usage{
-get_accnums_from_fasta_file(fasta_file)
-}
-\arguments{
-\item{fasta_file}{}
-}
-\description{
-Get accnums from fasta file
-}

From d544f7ef932be8b44f04d1fae85bf715d976260b Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 18:54:49 -0600
Subject: [PATCH 33/61] additional cross .R file consistency while other
 function placement decisions are made

---
 NAMESPACE                         |  7 ----
 R/CHANGED-pre-msa-tree.R          | 25 +++++++++++----
 R/pre-msa-tree.R                  | 36 +++++++++++++--------
 man/RepresentativeAccNums.Rd      | 23 --------------
 man/acc2fa.Rd                     |  3 ++
 man/addLeaves2Alignment.Rd        | 25 +++++++++++++--
 man/addName.Rd                    | 18 +++++++++--
 man/add_leaves.Rd                 | 50 -----------------------------
 man/add_name.Rd                   | 39 -----------------------
 man/alignFasta.Rd                 |  4 ++-
 man/convert2TitleCase.Rd          |  9 +++++-
 man/convertAlignment2FA.Rd        | 21 ++++++++++--
 man/convert_aln2fa.Rd             | 53 -------------------------------
 man/createRepresentativeAccNum.Rd | 10 +++++-
 man/generateAllAlignments2FA.Rd   | 35 ++++++++++++++++----
 man/generate_all_aln2fa.Rd        | 48 ----------------------------
 man/mapAcc2Name.Rd                | 10 ++++--
 man/map_acc2name.Rd               | 21 ------------
 man/to_titlecase.Rd               | 25 ---------------
 19 files changed, 158 insertions(+), 304 deletions(-)
 delete mode 100644 man/RepresentativeAccNums.Rd
 delete mode 100644 man/add_leaves.Rd
 delete mode 100644 man/add_name.Rd
 delete mode 100644 man/convert_aln2fa.Rd
 delete mode 100644 man/generate_all_aln2fa.Rd
 delete mode 100644 man/map_acc2name.Rd
 delete mode 100644 man/to_titlecase.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 23b29248..fe4c23d6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,6 @@ export(BinaryDomainNetwork)
 export(GCA2Lineage)
 export(GenContextNetwork)
 export(IPG2Lineage)
-export(RepresentativeAccNums)
 export(acc2FA)
 export(acc2Lineage)
 export(acc2fa)
@@ -12,8 +11,6 @@ export(addLeaves2Alignment)
 export(addLineage)
 export(addName)
 export(addTaxID)
-export(add_leaves)
-export(add_name)
 export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assign_job_queue)
@@ -31,7 +28,6 @@ export(convert2TitleCase)
 export(convertAlignment2FA)
 export(convertAlignment2Trees)
 export(convertFA2Tree)
-export(convert_aln2fa)
 export(countByColumn)
 export(createFA2Tree)
 export(createJobResultsURL)
@@ -50,7 +46,6 @@ export(findParalogs)
 export(formatJobArgumentsHTML)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
-export(generate_all_aln2fa)
 export(generate_msa)
 export(getAccNumFromFA)
 export(getTopAccByLinDomArch)
@@ -58,7 +53,6 @@ export(get_proc_medians)
 export(get_proc_weights)
 export(make_opts2procs)
 export(mapAcc2Name)
-export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
 export(plotIPR2Viz)
@@ -99,7 +93,6 @@ export(summarizeGenContext)
 export(summarizeGenContext_ByDomArchLineage)
 export(summarizeGenContext_ByLineage)
 export(themeGenes2)
-export(to_titlecase)
 export(totalGenContextOrDomArchCounts)
 export(validateCountDF)
 export(wordcloud3)
diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index 767d51aa..2f6c8a62 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -54,7 +54,7 @@ convert2TitleCase <- function(x, y = " ") {
 ################################
 ## Function to add leaves to an alignment file
 ## !! Add DA to leaves?
-#' Adding Leaves to an alignment file w/ accessions
+#' addLeaves2Alignment
 #'
 #' @author Janani Ravi
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -178,7 +178,7 @@ addLeaves2Alignment <- function(aln_file = "",
 }
 
 
-#' Add Name
+#' addName
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description This function adds a new 'Name' column that is comprised of components from
@@ -252,7 +252,7 @@ addName <- function(data,
 
 ################################
 ## Function to convert alignment 'aln' to fasta format for MSA + Tree
-#' Adding Leaves to an alignment file w/ accessions
+#' convertAlignment2FA
 #'
 #' @author Janani Ravi
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -320,6 +320,9 @@ convertAlignment2FA <- function(aln_file = "",
     return(fasta)
 }
 
+#' mapAcc2Name
+#' 
+#' @description
 #' Default renameFA() replacement function. Maps an accession number to its name
 #'
 #' @param line The line of a fasta file starting with '>'
@@ -382,6 +385,9 @@ renameFA <- function(fa_path, outpath,
 
 ################################
 ## generateAllAlignments2FA
+#' generateAllAlignments2FA
+#' 
+#' @description 
 #' Adding Leaves to an alignment file w/ accessions
 #'
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -441,10 +447,11 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 
 # accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
 # accessions <- rep("ANY95992.1", 201)
-#' acc2FA converts protein accession numbers to a fasta format.
+#' acc2FA 
 #'
 #' @description
-#' Resulting fasta file is written to the outpath.
+#' converts protein accession numbers to a fasta format. Resulting 
+#' fasta file is written to the outpath.
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
@@ -539,6 +546,9 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
     return(result)
 }
 
+#' createRepresentativeAccNum
+#' 
+#' @description
 #' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
 #'
 #' @author Samuel Chen, Janani Ravi
@@ -556,7 +566,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' @export
 #'
 #' @examples
-RepresentativeAccNums <- function(prot_data,
+createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
     # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
@@ -585,6 +595,9 @@ RepresentativeAccNums <- function(prot_data,
     return(accessions)
 }
 
+#' alignFasta
+#' 
+#' @description
 #' Perform a Multiple Sequence Alignment on a FASTA file.
 #'
 #' @author Samuel Chen, Janani Ravi
diff --git a/R/pre-msa-tree.R b/R/pre-msa-tree.R
index fed495f4..290a1644 100644
--- a/R/pre-msa-tree.R
+++ b/R/pre-msa-tree.R
@@ -49,7 +49,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @export
 #'
 #' @examples
-to_titlecase <- function(x, y = " ") {
+convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
         sep = "", collapse = y
@@ -59,7 +59,7 @@ to_titlecase <- function(x, y = " ") {
 ################################
 ## Function to add leaves to an alignment file
 ## !! Add DA to leaves?
-#' Adding Leaves to an alignment file w/ accessions
+#' addLeaves2Alignment
 #'
 #' @author Janani Ravi
 #'
@@ -95,9 +95,9 @@ to_titlecase <- function(x, y = " ") {
 #'
 #' @examples
 #' \dontrun{
-#' add_leaves("pspa_snf7.aln", "pspa.txt")
+#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
 #' }
-add_leaves <- function(aln_file = "",
+addLeaves2Alignment <- function(aln_file = "",
     lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
     # lin_file="data/rawdata_tsv/PspA.txt",
     reduced = FALSE) {
@@ -184,7 +184,7 @@ add_leaves <- function(aln_file = "",
 }
 
 
-#' Title
+#' addName
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
@@ -209,7 +209,7 @@ add_leaves <- function(aln_file = "",
 #' @export
 #'
 #' @examples
-add_name <- function(data,
+addName <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
     cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
@@ -258,7 +258,7 @@ add_name <- function(data,
 
 ################################
 ## Function to convert alignment 'aln' to fasta format for MSA + Tree
-#' Adding Leaves to an alignment file w/ accessions
+#' convertAlignment2FA
 #'
 #' @author Janani Ravi
 #'
@@ -288,9 +288,9 @@ add_name <- function(data,
 #'
 #' @examples
 #' \dontrun{
-#' add_leaves("pspa_snf7.aln", "pspa.txt")
+#' convertAlignment2FA("pspa_snf7.aln", "pspa.txt")
 #' }
-convert_aln2fa <- function(aln_file = "",
+convertAlignment2FA <- function(aln_file = "",
     lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
     fa_outpath = "",
     reduced = FALSE) {
@@ -324,6 +324,9 @@ convert_aln2fa <- function(aln_file = "",
     return(fasta)
 }
 
+#' mapAcc2Name
+#' 
+#' @description
 #' Default rename_fasta() replacement function. Maps an accession number to its name
 #'
 #' @param line he line of a fasta file starting with '>'
@@ -340,7 +343,7 @@ convert_aln2fa <- function(aln_file = "",
 #' @export
 #'
 #' @examples
-map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
+mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an add_names column
     # Find the first ' '
     end_acc <- str_locate(line, " ")[[1]]
@@ -386,7 +389,10 @@ rename_fasta <- function(fa_path, outpath,
 }
 
 ################################
-## generate_all_aln2fa
+## generateAllAlignments2FA
+#' generateAllAlignments2FA
+#' 
+#' @description
 #' Adding Leaves to an alignment file w/ accessions
 #'
 #' @author Janani Ravi
@@ -413,9 +419,9 @@ rename_fasta <- function(fa_path, outpath,
 #'
 #' @examples
 #' \dontrun{
-#' generate_all_aln2fa()
+#' generateAllAlignments2FA()
 #' }
-generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
+generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
     fa_outpath = here("data/alns/"),
     lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
     reduced = F) {
@@ -448,6 +454,10 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
 # accessions <- rep("ANY95992.1", 201)
 #' acc2fa
 #'
+#' @description
+#' converts protein accession numbers to a fasta format. Resulting 
+#' fasta file is written to the outpath.
+#' 
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
 #'
diff --git a/man/RepresentativeAccNums.Rd b/man/RepresentativeAccNums.Rd
deleted file mode 100644
index 57d1f1ab..00000000
--- a/man/RepresentativeAccNums.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
-\name{RepresentativeAccNums}
-\alias{RepresentativeAccNums}
-\title{Function to generate a vector of one Accession number per distinct observation from 'reduced' column}
-\usage{
-RepresentativeAccNums(prot_data, reduced = "Lineage", accnum_col = "AccNum")
-}
-\arguments{
-\item{prot_data}{Data frame containing Accession Numbers}
-
-\item{reduced}{Column from prot_data from which distinct observations
-will be generated from.
-One accession number will be assigned for each of these observations}
-
-\item{accnum_col}{Column from prot_data that contains Accession Numbers}
-}
-\description{
-Function to generate a vector of one Accession number per distinct observation from 'reduced' column
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/acc2fa.Rd b/man/acc2fa.Rd
index 158b2d51..3e7a756d 100644
--- a/man/acc2fa.Rd
+++ b/man/acc2fa.Rd
@@ -15,6 +15,9 @@ Function may not work for vectors of length > 10,000}
 \item{plan}{}
 }
 \description{
+converts protein accession numbers to a fasta format. Resulting
+fasta file is written to the outpath.
+
 acc2fa converts protein accession numbers to a fasta format.
 Resulting fasta file is written to the outpath.
 }
diff --git a/man/addLeaves2Alignment.Rd b/man/addLeaves2Alignment.Rd
index a758ebd5..d00e6df7 100644
--- a/man/addLeaves2Alignment.Rd
+++ b/man/addLeaves2Alignment.Rd
@@ -1,9 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{addLeaves2Alignment}
 \alias{addLeaves2Alignment}
-\title{Adding Leaves to an alignment file w/ accessions}
+\title{addLeaves2Alignment}
 \usage{
+addLeaves2Alignment(
+  aln_file = "",
+  lin_file = "data/rawdata_tsv/all_semiclean.txt",
+  reduced = FALSE
+)
+
 addLeaves2Alignment(
   aln_file = "",
   lin_file = "data/rawdata_tsv/all_semiclean.txt",
@@ -11,7 +17,7 @@ addLeaves2Alignment(
 )
 }
 \arguments{
-\item{aln_file}{haracter. Path to file. Input tab-delimited file +
+\item{aln_file}{Character. Path to file. Input tab-delimited file +
 alignment file accnum & alignment.
 Default is 'pspa_snf7.aln'}
 
@@ -23,15 +29,25 @@ Default is 'pspa.txt'}
 only one sequence per lineage. Default is FALSE.}
 }
 \description{
+Adding Leaves to an alignment file w/ accessions
+Genomic Contexts vs Domain Architectures.
+
 Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
 }
 \details{
+The alignment file would need two columns: 1. accession +
+number and 2. alignment. The protein homolog accession to lineage mapping +
+file should have
+
 The alignment file would need two columns: 1. accession +
 number and 2. alignment. The protein homolog accession to lineage mapping +
 file should have
 }
 \note{
+Please refer to the source code if you have alternate +
+file formats and/or column names.
+
 Please refer to the source code if you have alternate +
 file formats and/or column names.
 }
@@ -39,6 +55,9 @@ file formats and/or column names.
 \dontrun{
 addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
 }
+\dontrun{
+addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
+}
 }
 \author{
 Janani Ravi
diff --git a/man/addName.Rd b/man/addName.Rd
index e04f9849..6f171456 100644
--- a/man/addName.Rd
+++ b/man/addName.Rd
@@ -1,9 +1,18 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{addName}
 \alias{addName}
-\title{Add Name}
+\title{addName}
 \usage{
+addName(
+  data,
+  accnum_col = "AccNum",
+  spec_col = "Species",
+  lin_col = "Lineage",
+  lin_sep = ">",
+  out_col = "Name"
+)
+
 addName(
   data,
   accnum_col = "AccNum",
@@ -28,9 +37,14 @@ addName(
 Lineage, and AccNum info}
 }
 \value{
+Original data with a 'Name' column
+
 Original data with a 'Name' column
 }
 \description{
+This function adds a new 'Name' column that is comprised of components from
+Kingdom, Phylum, Genus, and species, as well as the accession
+
 This function adds a new 'Name' column that is comprised of components from
 Kingdom, Phylum, Genus, and species, as well as the accession
 }
diff --git a/man/add_leaves.Rd b/man/add_leaves.Rd
deleted file mode 100644
index f1eeed10..00000000
--- a/man/add_leaves.Rd
+++ /dev/null
@@ -1,50 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{add_leaves}
-\alias{add_leaves}
-\title{Adding Leaves to an alignment file w/ accessions}
-\usage{
-add_leaves(
-  aln_file = "",
-  lin_file = "data/rawdata_tsv/all_semiclean.txt",
-  reduced = FALSE
-)
-}
-\arguments{
-\item{aln_file}{Character. Path to file. Input tab-delimited file +
-alignment file accnum & alignment.
-Default is 'pspa_snf7.aln'}
-
-\item{lin_file}{Character. Path to file. Protein file with accession +
-number to lineage mapping.
-Default is 'pspa.txt'}
-
-\item{reduced}{Boolean. If TRUE, a reduced data frame will be generated with
-only one sequence per lineage. Default is FALSE.}
-}
-\description{
-Adding Leaves to an alignment file w/ accessions
-Genomic Contexts vs Domain Architectures.
-}
-\details{
-The alignment file would need two columns: 1. accession +
-number and 2. alignment. The protein homolog accession to lineage mapping +
-file should have
-}
-\note{
-Please refer to the source code if you have alternate +
-file formats and/or column names.
-}
-\examples{
-\dontrun{
-add_leaves("pspa_snf7.aln", "pspa.txt")
-}
-}
-\author{
-Janani Ravi
-}
-\keyword{accnum,}
-\keyword{alignment,}
-\keyword{leaves,}
-\keyword{lineage,}
-\keyword{species}
diff --git a/man/add_name.Rd b/man/add_name.Rd
deleted file mode 100644
index f19139e1..00000000
--- a/man/add_name.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{add_name}
-\alias{add_name}
-\title{Title}
-\usage{
-add_name(
-  data,
-  accnum_col = "AccNum",
-  spec_col = "Species",
-  lin_col = "Lineage",
-  lin_sep = ">",
-  out_col = "Name"
-)
-}
-\arguments{
-\item{data}{Data to add name column to}
-
-\item{accnum_col}{Column containing accession numbers}
-
-\item{spec_col}{Column containing species}
-
-\item{lin_col}{Column containing lineage}
-
-\item{lin_sep}{Character separating lineage levels}
-
-\item{out_col}{Column that contains the new 'Name' derived from Species,
-Lineage, and AccNum info}
-}
-\value{
-Original data with a 'Name' column
-}
-\description{
-This function adds a new 'Name' column that is comprised of components from
-Kingdom, Phylum, Genus, and species, as well as the accession
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/alignFasta.Rd b/man/alignFasta.Rd
index 21b020cf..02a3026b 100644
--- a/man/alignFasta.Rd
+++ b/man/alignFasta.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{alignFasta}
 \alias{alignFasta}
-\title{Perform a Multiple Sequence Alignment on a FASTA file.}
+\title{alignFasta}
 \usage{
 alignFasta(fasta_file, tool = "Muscle", outpath = NULL)
 
@@ -21,6 +21,8 @@ aligned fasta sequence as a MsaAAMultipleAlignment object
 aligned fasta sequence as a MsaAAMultipleAlignment object
 }
 \description{
+Perform a Multiple Sequence Alignment on a FASTA file.
+
 Perform a Multiple Sequence Alignment on a FASTA file.
 }
 \author{
diff --git a/man/convert2TitleCase.Rd b/man/convert2TitleCase.Rd
index 84e7fa00..72619285 100644
--- a/man/convert2TitleCase.Rd
+++ b/man/convert2TitleCase.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{convert2TitleCase}
 \alias{convert2TitleCase}
 \alias{totitle,}
@@ -7,6 +7,8 @@
 \title{Changing case to 'Title Case'}
 \usage{
 convert2TitleCase(text, delimitter)
+
+to_titlecase(text, delimitter)
 }
 \arguments{
 \item{x}{Character vector.}
@@ -15,8 +17,13 @@ convert2TitleCase(text, delimitter)
 }
 \description{
 Translate string to Title Case w/ delimitter.
+
+Translate string to Title Case w/ delimitter.
+Changing case to 'Title Case'
 }
 \seealso{
+chartr, toupper, and tolower.
+
 chartr, toupper, and tolower.
 }
 \author{
diff --git a/man/convertAlignment2FA.Rd b/man/convertAlignment2FA.Rd
index d6b4dc56..8e9ceb94 100644
--- a/man/convertAlignment2FA.Rd
+++ b/man/convertAlignment2FA.Rd
@@ -1,9 +1,16 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{convertAlignment2FA}
 \alias{convertAlignment2FA}
-\title{Adding Leaves to an alignment file w/ accessions}
+\title{convertAlignment2FA}
 \usage{
+convertAlignment2FA(
+  aln_file = "",
+  lin_file = "data/rawdata_tsv/all_semiclean.txt",
+  fa_outpath = "",
+  reduced = FALSE
+)
+
 convertAlignment2FA(
   aln_file = "",
   lin_file = "data/rawdata_tsv/all_semiclean.txt",
@@ -31,11 +38,18 @@ Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
 }
 \details{
+The alignment file would need two columns: 1. accession +
+number and 2. alignment. The protein homolog accession to lineage mapping +
+file should have
+
 The alignment file would need two columns: 1. accession +
 number and 2. alignment. The protein homolog accession to lineage mapping +
 file should have
 }
 \note{
+Please refer to the source code if you have alternate +
+file formats and/or column names.
+
 Please refer to the source code if you have alternate +
 file formats and/or column names.
 }
@@ -44,6 +58,9 @@ file formats and/or column names.
 addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
 }
 
+\dontrun{
+convertAlignment2FA("pspa_snf7.aln", "pspa.txt")
+}
 }
 \author{
 Janani Ravi
diff --git a/man/convert_aln2fa.Rd b/man/convert_aln2fa.Rd
deleted file mode 100644
index 8bebe31d..00000000
--- a/man/convert_aln2fa.Rd
+++ /dev/null
@@ -1,53 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{convert_aln2fa}
-\alias{convert_aln2fa}
-\title{Adding Leaves to an alignment file w/ accessions}
-\usage{
-convert_aln2fa(
-  aln_file = "",
-  lin_file = "data/rawdata_tsv/all_semiclean.txt",
-  fa_outpath = "",
-  reduced = FALSE
-)
-}
-\arguments{
-\item{aln_file}{Character. Path to file. Input tab-delimited file +
-alignment file accnum & alignment.
-Default is 'pspa_snf7.aln'}
-
-\item{lin_file}{Character. Path to file. Protein file with accession +
-number to lineage mapping.
-Default is 'pspa.txt'}
-
-\item{fa_outpath}{Character. Path to the written fasta file.
-Default is 'NULL'}
-
-\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-Default is 'FALSE'}
-}
-\description{
-Adding Leaves to an alignment file w/ accessions
-}
-\details{
-The alignment file would need two columns: 1. accession +
-number and 2. alignment. The protein homolog accession to lineage mapping +
-file should have
-}
-\note{
-Please refer to the source code if you have alternate +
-file formats and/or column names.
-}
-\examples{
-\dontrun{
-add_leaves("pspa_snf7.aln", "pspa.txt")
-}
-}
-\author{
-Janani Ravi
-}
-\keyword{accnum,}
-\keyword{alignment,}
-\keyword{leaves,}
-\keyword{lineage,}
-\keyword{species}
diff --git a/man/createRepresentativeAccNum.Rd b/man/createRepresentativeAccNum.Rd
index 3703fe1a..3bd20522 100644
--- a/man/createRepresentativeAccNum.Rd
+++ b/man/createRepresentativeAccNum.Rd
@@ -1,9 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{createRepresentativeAccNum}
 \alias{createRepresentativeAccNum}
 \title{createRepresentativeAccNum}
 \usage{
+createRepresentativeAccNum(
+  prot_data,
+  reduced = "Lineage",
+  accnum_col = "AccNum"
+)
+
 createRepresentativeAccNum(
   prot_data,
   reduced = "Lineage",
@@ -20,6 +26,8 @@ One accession number will be assigned for each of these observations}
 \item{accnum_col}{Column from prot_data that contains Accession Numbers}
 }
 \description{
+Function to generate a vector of one Accession number per distinct observation from 'reduced' column
+
 Function to generate a vector of one Accession number per distinct observation from 'reduced' column
 }
 \author{
diff --git a/man/generateAllAlignments2FA.Rd b/man/generateAllAlignments2FA.Rd
index 3bf9938a..8f9d8ffc 100644
--- a/man/generateAllAlignments2FA.Rd
+++ b/man/generateAllAlignments2FA.Rd
@@ -1,9 +1,16 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{generateAllAlignments2FA}
 \alias{generateAllAlignments2FA}
-\title{Adding Leaves to an alignment file w/ accessions}
+\title{generateAllAlignments2FA}
 \usage{
+generateAllAlignments2FA(
+  aln_path = here("data/rawdata_aln/"),
+  fa_outpath = here("data/alns/"),
+  lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
+  reduced = F
+)
+
 generateAllAlignments2FA(
   aln_path = here("data/rawdata_aln/"),
   fa_outpath = here("data/alns/"),
@@ -15,28 +22,44 @@ generateAllAlignments2FA(
 \item{aln_path}{Character. Path to alignment files.
 Default is 'here("data/rawdata_aln/")'}
 
-\item{fa_outpath}{Character. Path to file. Master protein file with AccNum & lineages.
-Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
-
-\item{lin_file}{Character. Path to the written fasta file.
+\item{fa_outpath}{Character. Path to the written fasta file.
 Default is 'here("data/alns/")'.}
 
+\item{lin_file}{Character. Path to file. Master protein file with AccNum & lineages.
+Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
+
 \item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
 Default is 'FALSE'.}
 }
 \description{
+Adding Leaves to an alignment file w/ accessions
+
+Adding Leaves to all alignment files w/ accessions & DAs?
+
+Adding Leaves to an alignment file w/ accessions
+
 Adding Leaves to all alignment files w/ accessions & DAs?
 }
 \details{
+The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
+
 The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
 }
 \note{
+Please refer to the source code if you have alternate + file formats and/or column names.
+
 Please refer to the source code if you have alternate + file formats and/or column names.
 }
 \examples{
 \dontrun{
 generateAllAlignments2FA()
 }
+\dontrun{
+generateAllAlignments2FA()
+}
+}
+\author{
+Janani Ravi
 }
 \keyword{accnum,}
 \keyword{alignment,}
diff --git a/man/generate_all_aln2fa.Rd b/man/generate_all_aln2fa.Rd
deleted file mode 100644
index ad6b7136..00000000
--- a/man/generate_all_aln2fa.Rd
+++ /dev/null
@@ -1,48 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{generate_all_aln2fa}
-\alias{generate_all_aln2fa}
-\title{Adding Leaves to an alignment file w/ accessions}
-\usage{
-generate_all_aln2fa(
-  aln_path = here("data/rawdata_aln/"),
-  fa_outpath = here("data/alns/"),
-  lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
-  reduced = F
-)
-}
-\arguments{
-\item{aln_path}{Character. Path to alignment files.
-Default is 'here("data/rawdata_aln/")'}
-
-\item{fa_outpath}{Character. Path to the written fasta file.
-Default is 'here("data/alns/")'.}
-
-\item{lin_file}{Character. Path to file. Master protein file with AccNum & lineages.
-Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
-
-\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
-Default is 'FALSE'.}
-}
-\description{
-Adding Leaves to all alignment files w/ accessions & DAs?
-}
-\details{
-The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
-}
-\note{
-Please refer to the source code if you have alternate + file formats and/or column names.
-}
-\examples{
-\dontrun{
-generate_all_aln2fa()
-}
-}
-\author{
-Janani Ravi
-}
-\keyword{accnum,}
-\keyword{alignment,}
-\keyword{leaves,}
-\keyword{lineage,}
-\keyword{species}
diff --git a/man/mapAcc2Name.Rd b/man/mapAcc2Name.Rd
index 0f5d447d..39ecb065 100644
--- a/man/mapAcc2Name.Rd
+++ b/man/mapAcc2Name.Rd
@@ -1,13 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
+% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
 \name{mapAcc2Name}
 \alias{mapAcc2Name}
-\title{Default renameFA() replacement function. Maps an accession number to its name}
+\title{mapAcc2Name}
 \usage{
+mapAcc2Name(line, acc2name, acc_col = "AccNum", name_col = "Name")
+
 mapAcc2Name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 }
 \arguments{
-\item{line}{The line of a fasta file starting with '>'}
+\item{line}{he line of a fasta file starting with '>'}
 
 \item{acc2name}{Data Table containing a column of accession numbers and a name column}
 
@@ -18,4 +20,6 @@ are mapped to}
 }
 \description{
 Default renameFA() replacement function. Maps an accession number to its name
+
+Default rename_fasta() replacement function. Maps an accession number to its name
 }
diff --git a/man/map_acc2name.Rd b/man/map_acc2name.Rd
deleted file mode 100644
index fcdb3023..00000000
--- a/man/map_acc2name.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{map_acc2name}
-\alias{map_acc2name}
-\title{Default rename_fasta() replacement function. Maps an accession number to its name}
-\usage{
-map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
-}
-\arguments{
-\item{line}{he line of a fasta file starting with '>'}
-
-\item{acc2name}{Data Table containing a column of accession numbers and a name column}
-
-\item{acc_col}{Name of the column containing Accession numbers}
-
-\item{name_col}{Name of the column containing the names that the accession numbers
-are mapped to}
-}
-\description{
-Default rename_fasta() replacement function. Maps an accession number to its name
-}
diff --git a/man/to_titlecase.Rd b/man/to_titlecase.Rd
deleted file mode 100644
index 45139d3b..00000000
--- a/man/to_titlecase.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{to_titlecase}
-\alias{to_titlecase}
-\alias{totitle,}
-\alias{to_title}
-\title{To Titlecase}
-\usage{
-to_titlecase(text, delimitter)
-}
-\arguments{
-\item{x}{Character vector.}
-
-\item{y}{Delimitter. Default is space (" ").}
-}
-\description{
-Translate string to Title Case w/ delimitter.
-Changing case to 'Title Case'
-}
-\seealso{
-chartr, toupper, and tolower.
-}
-\author{
-Andrie, Janani Ravi
-}

From 6500e367effd56d9db7bada9505ec42aa3bb8dfa Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Fri, 11 Oct 2024 19:50:52 -0700
Subject: [PATCH 34/61] refactor function names in R/blastWrappers.R

---
 R/blastWrappers.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 552b1ff6..dc11f589 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -17,7 +17,7 @@
 #' @export
 #'
 #' @examples
-run_deltablast <- function(deltablast_path, db_search_path,
+runDeltaBlast <- function(deltablast_path, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_alignments, num_threads = 1) {
     start <- Sys.time()
@@ -54,7 +54,7 @@ run_deltablast <- function(deltablast_path, db_search_path,
 #' @export
 #'
 #' @examples
-run_rpsblast <- function(rpsblast_path, db_search_path,
+runRPSBlast <- function(rpsblast_path, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_threads = 1) {
     start <- Sys.time()

From e45bb21f97ba1ecc1e7ee5fdaaa69349a6eca0e0 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Fri, 11 Oct 2024 19:52:32 -0700
Subject: [PATCH 35/61] update .rd files and NAMESPACE

---
 NAMESPACE                                   |  4 +-
 man/countbycolumn.Rd                        | 22 -----------
 man/filterbydomains.Rd                      | 44 ---------------------
 man/filterbyfrequency.Rd                    | 22 -----------
 man/findparalogs.Rd                         | 26 ------------
 man/{run_deltablast.Rd => runDeltaBlast.Rd} |  6 +--
 man/{run_rpsblast.Rd => runRPSBlast.Rd}     |  6 +--
 man/summarizebylineage.Rd                   | 25 ------------
 man/totalgencontextordomarchcounts.Rd       | 42 --------------------
 man/words2wordcounts.Rd                     | 25 ------------
 10 files changed, 8 insertions(+), 214 deletions(-)
 delete mode 100644 man/countbycolumn.Rd
 delete mode 100644 man/filterbydomains.Rd
 delete mode 100644 man/filterbyfrequency.Rd
 delete mode 100644 man/findparalogs.Rd
 rename man/{run_deltablast.Rd => runDeltaBlast.Rd} (88%)
 rename man/{run_rpsblast.Rd => runRPSBlast.Rd} (89%)
 delete mode 100644 man/summarizebylineage.Rd
 delete mode 100644 man/totalgencontextordomarchcounts.Rd
 delete mode 100644 man/words2wordcounts.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 53332439..dbab97b3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -85,8 +85,8 @@ export(rename_fasta)
 export(replaceQuestionMarks)
 export(reveql)
 export(reverse_operon)
-export(run_deltablast)
-export(run_rpsblast)
+export(runDeltaBlast)
+export(runRPSBlast)
 export(selectLongestDuplicate)
 export(sendJobStatusEmail)
 export(shortenLineage)
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
deleted file mode 100644
index 34fcc3e0..00000000
--- a/man/countbycolumn.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{countByColumn}
-\alias{countByColumn}
-\title{Count By Column}
-\usage{
-countByColumn(prot = prot, column = "DomArch", min.freq = 1)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Count By Column
-}
-\examples{
-\dontrun{
-countByColumn()
-}
-}
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
deleted file mode 100644
index 8c885363..00000000
--- a/man/filterbydomains.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByDomains}
-\alias{filterByDomains}
-\title{Filter by Domains}
-\usage{
-filterByDomains(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-}
-\arguments{
-\item{prot}{Dataframe to filter}
-
-\item{column}{Column to search for domains in (DomArch column)}
-
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-}
-\value{
-Filtered data frame
-}
-\description{
-filterByDomains filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-}
-\note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-}
-\examples{
-\dontrun{
-filterByDomains()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
deleted file mode 100644
index d2c5f9cd..00000000
--- a/man/filterbyfrequency.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filterByFrequency}
-\alias{filterByFrequency}
-\title{Filter Frequency}
-\usage{
-filterByFrequency(x, min.freq)
-}
-\arguments{
-\item{min.freq}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Filter Frequency
-}
-\examples{
-\dontrun{
-filterByFrequency()
-}
-}
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
deleted file mode 100644
index 4b5edbcf..00000000
--- a/man/findparalogs.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{findParalogs}
-\alias{findParalogs}
-\title{Find Paralogs}
-\usage{
-findParalogs(prot)
-}
-\arguments{
-\item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
-}
-\value{
-returns a dataframe containing paralogs and the counts.
-}
-\description{
-Creates a data frame of paralogs.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-findParalogs(pspa)
-}
-}
diff --git a/man/run_deltablast.Rd b/man/runDeltaBlast.Rd
similarity index 88%
rename from man/run_deltablast.Rd
rename to man/runDeltaBlast.Rd
index 3c934d77..8a32b954 100644
--- a/man/run_deltablast.Rd
+++ b/man/runDeltaBlast.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/blastWrappers.R
-\name{run_deltablast}
-\alias{run_deltablast}
+\name{runDeltaBlast}
+\alias{runDeltaBlast}
 \title{Run DELTABLAST to find homologs for proteins of interest}
 \usage{
-run_deltablast(
+runDeltaBlast(
   deltablast_path,
   db_search_path,
   db = "refseq",
diff --git a/man/run_rpsblast.Rd b/man/runRPSBlast.Rd
similarity index 89%
rename from man/run_rpsblast.Rd
rename to man/runRPSBlast.Rd
index bc4474f1..088254ea 100644
--- a/man/run_rpsblast.Rd
+++ b/man/runRPSBlast.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/blastWrappers.R
-\name{run_rpsblast}
-\alias{run_rpsblast}
+\name{runRPSBlast}
+\alias{runRPSBlast}
 \title{Run RPSBLAST to generate domain architectures for proteins of interest}
 \usage{
-run_rpsblast(
+runRPSBlast(
   rpsblast_path,
   db_search_path,
   db = "refseq",
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
deleted file mode 100644
index 2e445913..00000000
--- a/man/summarizebylineage.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeByLineage}
-\alias{summarizeByLineage}
-\title{Summarize by Lineage}
-\usage{
-summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{query}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizeByLineage(query = "all")
-}
-
-}
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
deleted file mode 100644
index f457cb6a..00000000
--- a/man/totalgencontextordomarchcounts.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{totalGenContextOrDomArchCounts}
-\alias{totalGenContextOrDomArchCounts}
-\title{Total Counts}
-\usage{
-totalGenContextOrDomArchCounts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
-
-\item{digits}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
deleted file mode 100644
index 7f60f226..00000000
--- a/man/words2wordcounts.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{words2WordCounts}
-\alias{words2WordCounts}
-\title{Words 2 Word Counts}
-\usage{
-words2WordCounts(string)
-}
-\arguments{
-\item{string}{}
-}
-\value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
-}
-\description{
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2Words() |>
-    words2WordCounts()
-}
-
-}

From e9460610fb054c1c3109cf728561efe2e6619104 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:09:40 -0600
Subject: [PATCH 36/61] remove outdated .Rd

---
 man/GCA2lin.Rd    |  0
 man/acc2lin.Rd    | 57 -----------------------------------------------
 man/efetch_ipg.Rd |  0
 man/ipg2lin.Rd    |  0
 man/sink.reset.Rd |  0
 5 files changed, 57 deletions(-)
 delete mode 100644 man/GCA2lin.Rd
 delete mode 100644 man/acc2lin.Rd
 delete mode 100644 man/efetch_ipg.Rd
 delete mode 100644 man/ipg2lin.Rd
 delete mode 100644 man/sink.reset.Rd

diff --git a/man/GCA2lin.Rd b/man/GCA2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index d3f2468b..00000000
--- a/man/acc2lin.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{acc2lin}
-\alias{acc2lin}
-\title{acc2lin}
-\usage{
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-}
-\arguments{
-\item{accessions}{Character vector of protein accessions}
-
-\item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
-
-\item{lineagelookup_path}{String of the path to the lineage lookup file
-(taxid to lineage mapping). This file can be generated using the}
-
-\item{ipgout_path}{Path to write the results of the efetch run of the accessions
-on the ipg database. If NULL, the file will not be written. Defaults to NULL}
-
-\item{plan}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-This function combines 'efetch_ipg()'
-and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-
-Function to map protein accession numbers to lineage
-
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-}
-\examples{
-\dontrun{
-acc2lin()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
deleted file mode 100644
index e69de29b..00000000

From 9571333c44ac879d9b2b6bc1a38d454fdda69a39 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:10 -0600
Subject: [PATCH 37/61] let R sort NAMESPACE

---
 NAMESPACE | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 60bec5b1..c448ff13 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,9 +11,7 @@ export(addLeaves2Alignment)
 export(addLineage)
 export(addName)
 export(addTaxID)
-export(advanced_opts2est_walltime)
 export(alignFasta)
-export(assert_count_df)
 export(assignJobQueue)
 export(calculateEstimatedWallTimeFromOpts)
 export(calculateProcessRuntime)
@@ -35,9 +33,9 @@ export(countByColumn)
 export(createFA2Tree)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createLineageLookup)
 export(createRepresentativeAccNum)
 export(createWordCloud2Element)
-export(createLineageLookup)
 export(createWordCloudElement)
 export(domain_network)
 export(downloadAssemblySummary)
@@ -50,14 +48,14 @@ export(formatJobArgumentsHTML)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_msa)
-export(getProcessRuntimeWeights)
 export(getAccNumFromFA)
+export(getProcessRuntimeWeights)
 export(getTopAccByLinDomArch)
 export(mapAcc2Name)
 export(mapAdvOption2Process)
 export(mapOption2Process)
-export(map_acc2name)
 export(msa_pdf)
+export(plotEstimatedWallTimes)
 export(plotIPR2Viz)
 export(plotIPR2VizWeb)
 export(plotLineageDA)
@@ -70,12 +68,10 @@ export(plotStackedLineage)
 export(plotSunburst)
 export(plotTreemap)
 export(plotUpSet)
-export(plotEstimatedWallTimes)
 export(prepareColumnParams)
 export(prepareSingleColumnParams)
 export(proteinAcc2TaxID)
 export(proteinAcc2TaxID_old)
-export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From 8c573693b92f2aa216b269e24244d2d63fe0d3a9 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:26 -0600
Subject: [PATCH 38/61] regen new .Rd

---
 man/GCA2Lineage.Rd | 2 +-
 man/IPG2Lineage.Rd | 5 +++--
 man/efetchIPG.Rd   | 3 ++-
 man/sinkReset.Rd   | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/man/GCA2Lineage.Rd b/man/GCA2Lineage.Rd
index 9ec0ce56..9a2a7a30 100644
--- a/man/GCA2Lineage.Rd
+++ b/man/GCA2Lineage.Rd
@@ -19,7 +19,7 @@ This file can be generated using the "downloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{acc_col}{}
 }
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index 282d5cbf..118812ab 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -29,7 +29,7 @@ file}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{assembly_path}{String of the path to the assembly_summary path
 This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function}
@@ -39,7 +39,8 @@ A \code{data.table} with the lineage information for the provided protein
 accessions.
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 047e2652..db63024f 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,7 +23,8 @@ the ipg database}
 No return value. The function writes the fetched results to \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index 0285c0b2..e3fc7ce4 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,6 +8,7 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From 2061d7a24b7a699bfeac72270817ae7225365ffa Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:48 -0600
Subject: [PATCH 39/61] remove old tryCatch code (for now)

---
 R/acc2lin.R | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 42315ece..a0a95033 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -72,14 +72,6 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 
       merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
       return(merged)
-    }, error = function(e) {
-      print(paste("Error: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("addLineages function execution completed.")
-    })
-
 }
 
 
@@ -247,13 +239,6 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)
-  }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
-  }, warning = function(w) {
-    print(paste("Warning: ", w$message))
-  }, finally = {
-    print("ipg2lin function execution completed.")
-  })
 }
 
 

From 48b7fd697b6c6cac7826ae3f09d315025db1a438 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sun, 13 Oct 2024 18:02:36 +0100
Subject: [PATCH 40/61] Update error handling to use rlang functions in
 acc2lin.R file

- Replaced base R error handling with rlang functions: `abort()`, `warn()`, and `inform()`.
- Improved clarity and consistency in error and warning messages.
- Enhanced robustness with detailed context for errors and warnings.
---
 R/acc2lin.R | 209 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 141 insertions(+), 68 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 08cb7d76..bd5cc289 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -5,6 +5,7 @@
 # suppressPackageStartupMessages(library(data.table))
 # suppressPackageStartupMessages(library(tidyverse))
 # suppressPackageStartupMessages(library(biomartr))
+suppressPackageStartupMessages(library(rlang))
 
 # https://stackoverflow.com/questions/18730491/sink-does-not-release-file
 #' Sink Reset
@@ -24,13 +25,18 @@ sinkReset <- function() {
     for (i in seq_len(sink.number())) {
       sink(NULL)
     }
-    print("All sinks closed")
+    inform("All sinks closed", class = "sink_reset_info")
   }, error = function(e) {
-    print(paste("Error: ", e$message))
+    abort(paste("Error: ", e$message), class = "sink_reset_error")
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
+    warn(paste("Warning: ", w$message), class = "sink_reset_warning")
   }, finally = {
-    print("resetSink function execution completed.")
+    # If any additional cleanup is needed, it can be done here
+    if (sink.number() > 0) {
+      # Additional cleanup if sinks are still open
+      inform("Some sinks remain open, ensure proper cleanup.",
+             class = "sink_cleanup_warning")
+    }
   })
 }
 
@@ -56,60 +62,64 @@ sinkReset <- function() {
 #' addLineage()
 #' }
 addLineage <- function(df, acc_col = "AccNum", assembly_path,
-                     lineagelookup_path, ipgout_path = NULL,
-                     plan = "sequential", ...) {
+                       lineagelookup_path, ipgout_path = NULL,
+                       plan = "sequential", ...) {
   # check for validate inputs
   if (!is.data.frame(df)) {
-    stop("Input 'df' must be a data frame.")
+    abort("Input 'df' must be a data frame.", class = "input_error")
   }
 
   if (!acc_col %in% colnames(df)) {
-    stop(paste("Column", acc_col, "not found in data frame."))
+    abort(paste("Column", acc_col, 
+                "not found in data frame."), class = "column_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    stop("Both 'assembly_path' and 
-         'lineagelookup_path' must be character strings.")
+    abort("Both 'assembly_path' and 
+          'lineagelookup_path' must be character strings.",
+          class = "path_type_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    stop(paste("Assembly file not found at:", assembly_path))
+    abort(paste("Assembly file not found at:",
+                assembly_path), class = "file_not_found_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+    abort(paste("Lineage lookup file not found at:",
+                lineagelookup_path), class = "file_not_found_error")
   }
-    tryCatch({
-      # Attempt to add lineages
-      acc_col <- sym(acc_col)
-      accessions <- df %>% pull(acc_col)
-      lins <- acc2Lineage(
-        accessions, assembly_path, lineagelookup_path, ipgout_path, plan
-      )
-
-      # Drop a lot of the unimportant columns for now? 
-      # will make merging much easier
-      lins <- lins[, c(
-        "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
-        "Id", "Strain"
-      ) := NULL]
-      lins <- unique(lins)
-
-      # dup <- lins %>% group_by(Protein) %>% 
-      # summarize(count = n()) %>% filter(count > 1) %>%
-      # pull(Protein)
-
-      merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
-      return(merged)
-    }, error = function(e) {
-      print(paste("Error: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("addLineages function execution completed.")
-    })
+  tryCatch({
+    # Attempt to add lineages
+    acc_col <- sym(acc_col)
+    accessions <- df %>% pull(acc_col)
+    lins <- acc2Lineage(
+      accessions, assembly_path, lineagelookup_path, ipgout_path, plan
+    )
+
+    # Drop a lot of the unimportant columns for now? 
+    # will make merging much easier
+    lins <- lins[, c(
+      "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
+      "Id", "Strain"
+    ) := NULL]
+    lins <- unique(lins)
+
+    # dup <- lins %>% group_by(Protein) %>% 
+    # summarize(count = n()) %>% filter(count > 1) %>%
+    # pull(Protein)
+
+    merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
+    return(merged)
+  }, error = function(e) {
+    abort(paste("Error during lineage addition:", e$message),
+          class = "lineage_addition_error")
+  }, warning = function(w) {
+    warn(paste("Warning during lineage addition:", w$message),
+         class = "lineage_addition_warning")
+  })
 
 }
 
@@ -140,11 +150,11 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' acc2Lineage()
 #' }
 acc2Lineage <- function(accessions, assembly_path, 
-                    lineagelookup_path, ipgout_path = NULL, 
-                    plan = "sequential", ...) {
+                        lineagelookup_path, ipgout_path = NULL, 
+                        plan = "sequential", ...) {
   tmp_ipg <- F
   if (is.null(ipgout_path)) {
-    tmp_ipg <- T
+    tmp_ipg <- TRUE
     ipgout_path <- tempfile("ipg", fileext = ".txt")
   }
 
@@ -154,18 +164,41 @@ acc2Lineage <- function(accessions, assembly_path,
     efetchIPG(accessions, out_path = ipgout_path, plan)
 
     # Attempt to process IPG to lineages
-    lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)
+    lins <- IPG2Lineage(accessions, ipgout_path,
+                        assembly_path, lineagelookup_path)
   }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
+    abort(
+      message = paste("An error occurred during IPG fetching
+                      or lineage processing:", e$message),
+      class = "lineage_processing_error",
+      # capturing the call stack
+      call = sys.call(),
+      # adding additional context
+      accessions = accessions,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path,
+      ipgout_path = ipgout_path,
+      plan = plan
+    )
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
+    warn(
+      message = paste("Warning during IPG fetching
+                      or lineage processing:", w$message),
+      class = "lineage_processing_warning",
+      call = sys.call(), # capturing the call stack
+      accessions = accessions,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path,
+      ipgout_path = ipgout_path,
+      plan = plan
+    )
   }, finally = {
-    print("acc2lin function execution completed.")
+    # Cleanup: delete temporary IPG file if it was created
+    if (tmp_ipg && file.exists(ipgout_path)) {
+      unlink(ipgout_path)
+    }
   })
 
-  if (tmp_ipg) {
-    unlink(tempdir(), recursive = T)
-  }
   return(lins)
 }
 
@@ -196,15 +229,18 @@ acc2Lineage <- function(accessions, assembly_path,
 efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
   # Argument validation
   if (!is.character(accnums) || length(accnums) == 0) {
-    stop("Error: 'accnums' must be a non-empty character vector.")
+    abort("Error: 'accnums' must be a non-empty character vector.",
+          class = "validation_error")
   }
 
   if (!is.character(out_path) || nchar(out_path) == 0) {
-    stop("Error: 'out_path' must be a non-empty string.")
+    abort("Error: 'out_path' must be a non-empty string.",
+          class = "validation_error")
   }
 
   if (!is.function(plan)) {
-    stop("Error: 'plan' must be a valid plan function.")
+    abort("Error: 'plan' must be a valid plan function.",
+          class = "validation_error")
   }
   if (length(accnums) > 0) {
     partition <- function(in_data, groups) {
@@ -249,11 +285,26 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
       })
       sink(NULL)
     }, error = function(e) {
-      print(paste("An error occurred: ", e$message))
+      abort(
+        message = paste("An error occurred: ", e$message),
+        class = "fetch_error",
+        call = sys.call(),
+        accnums = accnums,
+        out_path = out_path,
+        plan = plan
+      )
     }, warning = function(w) {
-      print(paste("Warning: ", w$message))
+      warn(
+        message = paste("Warning: ", w$message),
+        class = "fetch_warning",
+        call = sys.call(),
+        accnums = accnums,
+        out_path = out_path,
+        plan = plan
+      )
     }, finally = {
-      print("efetch_ipg function execution completed.")
+      # Ensure the sink is closed in case of errors
+      if (sink.number() > 0) sink(NULL)
     })
   }
 }
@@ -289,31 +340,38 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' IPG2Lineage()
 #' }
 #'
-IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) {
+IPG2Lineage <- function(accessions, ipg_file,
+                        assembly_path, lineagelookup_path, ...) {
   # Argument validation for accessions
   if (!is.character(accessions) || length(accessions) == 0) {
-    stop("Input 'accessions' must be a non-empty character vector.")
+    abort("Input 'accessions' must be a non-empty
+          character vector.", class = "validation_error")
   }
 
   # check for validate inputs
   if (!is.character(ipg_file)) {
-    stop("Input 'ipg_file' must be a character string.")
+    abort("Input 'ipg_file' must be a
+          character string.", class = "validation_error")
   }
+
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    stop("Both 'assembly_path' and 
-         'lineagelookup_path' must be character strings.")
+    abort("Both 'assembly_path' and 'lineagelookup_path'
+          must be character strings.", class = "validation_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    stop(paste("Assembly file not found at:", assembly_path))
+    abort(paste("Assembly file not found at:", assembly_path),
+          class = "file_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+    abort(paste("Lineage lookup file not found at:", lineagelookup_path),
+          class = "file_error")
   }
 
+  # Process the IPG file
   try({
     # Attempt to read the IPG file
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
@@ -332,12 +390,27 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
 
     return(lins)
   }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
+    abort(
+      message = paste("An error occurred: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      accessions = accessions,
+      ipg_file = ipg_file,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path
+    )
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
-  }, finally = {
-    print("ipg2lin function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      accessions = accessions,
+      ipg_file = ipg_file,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path
+    )
   })
+
 }
 
 

From 70f0de8c57d610eaad122e59d4bf1e96fc455963 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sun, 13 Oct 2024 19:21:41 -0600
Subject: [PATCH 41/61] remove code not relevant to PR

---
 R/acc2lin.R          |  50 +++---
 R/assign_job_queue.R | 359 +++++++++++++------------------------------
 R/blastWrappers.R    | 105 +++----------
 3 files changed, 153 insertions(+), 361 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index a0a95033..61aae87c 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -157,40 +157,34 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 
       return(partitioned)
     }
-    tryCatch({
-      # Set the future plan strategy
-      plan(strategy = plan, .skip = T)
 
+    # Set the future plan strategy
+    plan(strategy = plan, .skip = T)
 
-      min_groups <- length(accnums) / 200
-      groups <- min(max(min_groups, 15), length(accnums))
-      partitioned_acc <- partition(accnums, groups)
 
-      # Open the sink to the output path
-      sink(out_path)
+    min_groups <- length(accnums) / 200
+    groups <- min(max(min_groups, 15), length(accnums))
+    partitioned_acc <- partition(accnums, groups)
 
-      a <- future_map(1:length(partitioned_acc), function(x) {
-        # Avoid hitting the rate API limit
-        if (x %% 9 == 0) {
-          Sys.sleep(1)
-        }
-        cat(
-          entrez_fetch(
-            id = partitioned_acc[[x]],
-            db = "ipg",
-            rettype = "xml",
-            api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
-          )
+    # Open the sink to the output path
+    sink(out_path)
+
+    a <- future_map(1:length(partitioned_acc), function(x) {
+      # Avoid hitting the rate API limit
+      if (x %% 9 == 0) {
+        Sys.sleep(1)
+      }
+      cat(
+        entrez_fetch(
+          id = partitioned_acc[[x]],
+          db = "ipg",
+          rettype = "xml",
+          api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
         )
-      })
-      sink(NULL)
-    }, error = function(e) {
-      print(paste("An error occurred: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("efetch_ipg function execution completed.")
+      )
     })
+    sink(NULL)
+
   }
 }
 
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 10df1e3a..4791b4a1 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -13,22 +13,13 @@ common_root <- Sys.getenv("COMMON_SRC_ROOT")
 #' example: list_opts2procs <- mapOption2Process
 #' @export
 mapOption2Process <- function() {
-  tryCatch({
-    opts2processes <- list(
-      "homology_search" = c("dblast", "dblast_cleanup"),
-      "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
-      # processes always present agnostic of advanced options
-      "always" = c("blast_clust", "clust2table")
-    )
-    return(opts2processes)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("mapOption2Process function execution completed.")
-  })
-
+  opts2processes <- list(
+    "homology_search" = c("dblast", "dblast_cleanup"),
+    "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
+    # processes always present agnostic of advanced options
+    "always" = c("blast_clust", "clust2table")
+  )
+  return(opts2processes)
 }
 
 #' Use MolEvolvR advanced options to get associated processes
@@ -43,26 +34,14 @@ mapOption2Process <- function() {
 #' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
 mapAdvOption2Process <- function(advanced_opts) {
-  if (!is.character(advanced_opts)) {
-    stop("Argument must be a character vector!")
-  }
-  tryCatch({
-    # append 'always' to add procs that always run
-    advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- mapOption2Process()
-    # setup index for opts2proc based on advanced options
-    idx <- which(names(opts2proc) %in% advanced_opts)
-    # extract processes that will run
-    procs <- opts2proc[idx] |> unlist()
-    return(procs)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("mapOption2Process function execution completed.")
-  })
-
+  # append 'always' to add procs that always run
+  advanced_opts <- c(advanced_opts, "always")
+  opts2proc <- mapOption2Process()
+  # setup index for opts2proc based on advanced options
+  idx <- which(names(opts2proc) %in% advanced_opts)
+  # extract processes that will run
+  procs <- opts2proc[idx] |> unlist()
+  return(procs)
 }
 
 #' Scrape MolEvolvR logs and calculate median processes
@@ -88,60 +67,41 @@ mapAdvOption2Process <- function(advanced_opts) {
 #' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
 calculateProcessRuntime <- function(dir_job_results) {
-  tryCatch({
-    # Check if dir_job_results is a character string
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
+  source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
 
-    # Check if dir_job_results exists
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
+  # aggregate logs from
+  path_log_data <- file.path(common_root,
+                              "molevol_scripts", "log_data", "prod_logs.rda")
 
-    source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
-
-    # aggregate logs from
-    path_log_data <- file.path(common_root,
-                               "molevol_scripts", "log_data", "prod_logs.rda")
-
-    # ensure the folder exists to the location
-    if (!dir.exists(path_log_data)) {
-      dir.create(dirname(path_log_data),
-                 recursive = TRUE, showWarnings = FALSE)
-    }
-
-    # attempt to load pre-generated logdata
-    if (!file.exists(path_log_data)) {
-      logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-      save(logs, file = path_log_data)
-    } else {
-      load(path_log_data) # loads the logs object
-    }
-    df_log <- logs$df_log
-    procs <- c(
-      "dblast", "dblast_cleanup", "iprscan",
-      "ipr2lineage", "ipr2da", "blast_clust",
-      "clust2table"
-    )
-    list_proc_medians <- df_log |>
-      dplyr::select(dplyr::all_of(procs)) |>
-      dplyr::summarise(
-        dplyr::across(
-          dplyr::everything(),
-          \(x) median(x, na.rm = TRUE)
-        )
-      ) |>
-      as.list()
-    return(list_proc_medians)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("calculateProcessRuntime function execution completed.")
-  })
+  # ensure the folder exists to the location
+  if (!dir.exists(path_log_data)) {
+    dir.create(dirname(path_log_data),
+                recursive = TRUE, showWarnings = FALSE)
+  }
 
+  # attempt to load pre-generated logdata
+  if (!file.exists(path_log_data)) {
+    logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
+    save(logs, file = path_log_data)
+  } else {
+    load(path_log_data) # loads the logs object
+  }
+  df_log <- logs$df_log
+  procs <- c(
+    "dblast", "dblast_cleanup", "iprscan",
+    "ipr2lineage", "ipr2da", "blast_clust",
+    "clust2table"
+  )
+  list_proc_medians <- df_log |>
+    dplyr::select(dplyr::all_of(procs)) |>
+    dplyr::summarise(
+      dplyr::across(
+        dplyr::everything(),
+        \(x) median(x, na.rm = TRUE)
+      )
+    ) |>
+    as.list()
+  return(list_proc_medians)
 }
 
 #' Write a table of 2 columns: 1) process and 2) median seconds
@@ -162,39 +122,18 @@ calculateProcessRuntime <- function(dir_job_results) {
 #' )
 #' @export
 writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
-  tryCatch({
-    # Error handling for input arguments
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
-
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
-
-    if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
-    }
-    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
-      tibble::as_tibble() |>
-      tidyr::pivot_longer(
-        dplyr::everything(),
-        names_to = "process",
-        values_to = "median_seconds"
-      ) |>
-      dplyr::arrange(dplyr::desc(median_seconds))
-
-    # Write the resulting tibble to a TSV file
-    readr::write_tsv(df_proc_medians, file = filepath)
-    return(df_proc_medians)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
-  })
-
+  df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+    tibble::as_tibble() |>
+    tidyr::pivot_longer(
+      dplyr::everything(),
+      names_to = "process",
+      values_to = "median_seconds"
+    ) |>
+    dplyr::arrange(dplyr::desc(median_seconds))
+
+  # Write the resulting tibble to a TSV file
+  readr::write_tsv(df_proc_medians, file = filepath)
+  return(df_proc_medians)
 }
 
 #' Compute median process runtimes, then write a YAML list of the processes and
@@ -219,36 +158,8 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' }
 #' @export
 writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
-  tryCatch({
-    # Error handling for dir_job_results arguments
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
-
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
-    if (is.null(filepath)) {
-      filepath <- file.path(common_root,
-                            "molevol_scripts",
-                            "log_data",
-                            "job_proc_weights.yml")
-    }
-    if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
-    }
-
-    medians <- calculateProcessRuntime(dir_job_results)
-    yaml::write_yaml(medians, filepath)
-  }, error = function(e) {
-    message(paste("Encountered an error: "), e$message)
-  }, warning = function(w) {
-    message(paste("Warning: "), w$message)
-  }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
-  }
-  )
-
+  medians <- calculateProcessRuntime(dir_job_results)
+  yaml::write_yaml(medians, filepath)
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -322,81 +233,49 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
-
-  tryCatch({
-    # to calculate est walltime for a homology search job, the number of hits
-    # must be provided
-    validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
-    stopifnot(!validation_fail)
-
-    # Validate advanced_opts
-    if (!is.character(advanced_opts)) {
-      stop("Argument 'advanced_opts' must be a character vector.")
-    }
-
-    # Validate n_inputs
-    if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      stop("Argument 'n_inputs' must be a single positive numeric value.")
-    }
-
-    # Validate n_hits if homology_search is in advanced_opts
-    if ("homology_search" %in% advanced_opts &&
-          (is.null(n_hits)|| !is.numeric(n_hits)
-           || length(n_hits) != 1 || n_hits < 0)) {
-      stop("Argument 'n_hits' must be a single non-negative numeric value when 
-           'homology_search' is in 'advanced_opts'.")
-    }
-
-    # Get process weights
-    proc_weights <- writeProcessRuntime2YML()
-    if (!is.list(proc_weights)) {
-      stop("Process weights could not be retrieved correctly.")
-    }
-
-    # sort process weights by names and convert to vec
-    proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
-    all_procs <- names(proc_weights) |> sort()
-    # get processes from advanced options and sort by names
-    procs_from_opts <- mapAdvOption2Process(advanced_opts)
-    procs_from_opts <- sort(procs_from_opts)
-    # binary encode: yes proc will run (1); else 0
-    binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
-    # dot product of weights and procs to run; scaled by the number of inputs
-    est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
-      as.numeric()
-    # calculate the additional processes to run for the homologous hits
-    if ("homology_search" %in% advanced_opts) {
-      opts2procs <- mapOption2Process()
-      # exclude the homology search processes for the homologous hits
-      procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-      procs_homologs <- procs_from_opts[!(procs_from_opts 
-                                          %in% procs2exclude_for_homologs)]
-      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
-                                                %in% procs_homologs, 1L, 0L)
-      # add the estimated walltime for processes run on the homologous hits
-      est_walltime <- est_walltime +
-        (n_hits * (binary_proc_vec_homolog
-                   %*% proc_weights) |> as.numeric())
-    }
-    if (verbose) {
-      msg <- stringr::str_glue(
-        "warnings from calculateEstimatedWallTimeFromOpts	():\n",
-        "\tn_inputs={n_inputs}\n",
-        "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
-        "\test_walltime={est_walltime}\n\n"
-      )
-      cat(file = stderr(), msg)
-    }
-    return(est_walltime)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("calculateEstimatedWallTimeFromOpts	 
-            function execution completed.")
-  })
-
+  # to calculate est walltime for a homology search job, the number of hits
+  # must be provided
+  validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
+  stopifnot(!validation_fail)
+
+  # Get process weights
+  proc_weights <- writeProcessRuntime2YML()
+
+  # sort process weights by names and convert to vec
+  proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
+  all_procs <- names(proc_weights) |> sort()
+  # get processes from advanced options and sort by names
+  procs_from_opts <- mapAdvOption2Process(advanced_opts)
+  procs_from_opts <- sort(procs_from_opts)
+  # binary encode: yes proc will run (1); else 0
+  binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
+  # dot product of weights and procs to run; scaled by the number of inputs
+  est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
+    as.numeric()
+  # calculate the additional processes to run for the homologous hits
+  if ("homology_search" %in% advanced_opts) {
+    opts2procs <- mapOption2Process()
+    # exclude the homology search processes for the homologous hits
+    procs2exclude_for_homologs <- opts2procs[["homology_search"]]
+    procs_homologs <- procs_from_opts[!(procs_from_opts 
+                                        %in% procs2exclude_for_homologs)]
+    binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+                                              %in% procs_homologs, 1L, 0L)
+    # add the estimated walltime for processes run on the homologous hits
+    est_walltime <- est_walltime +
+      (n_hits * (binary_proc_vec_homolog
+                  %*% proc_weights) |> as.numeric())
+  }
+  if (verbose) {
+    msg <- stringr::str_glue(
+      "warnings from calculateEstimatedWallTimeFromOpts	():\n",
+      "\tn_inputs={n_inputs}\n",
+      "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
+      "\test_walltime={est_walltime}\n\n"
+    )
+    cat(file = stderr(), msg)
+  }
+  return(est_walltime)
 }
 
 
@@ -418,25 +297,8 @@ assignJobQueue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
-  tryCatch({
-    if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      stop("Argument 't_sec_estimate' must be a single numeric value.")
-    }
-
-    if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
-    }
-
-    queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
-    return(queue)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("assignJobQueue function execution completed.")
-  })
-
+  queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
+  return(queue)
 }
 
 #' Plot the estimated runtimes for different advanced options and number
@@ -456,7 +318,6 @@ assignJobQueue <- function(
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
 plotEstimatedWallTimes <- function() {
-  tryCatch({
     opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
@@ -536,12 +397,4 @@ plotEstimatedWallTimes <- function() {
         y = "Estimated walltime (hours)"
       )
     return(p)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("plotEstimatedWallTimes function execution completed.")
-  })
-
 }
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 15484a1b..9b55f3ee 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -21,52 +21,24 @@ run_deltablast <- function(deltablast_path, db_search_path,
                            db = "refseq", query, evalue = "1e-5",
                            out, num_alignments, num_threads = 1) {
 
-  # Argument validation
-  if (!file.exists(deltablast_path)) {
-    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
-  }
-  if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
-  }
-  if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
-  }
-  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
-  }
-  if (!is.numeric(num_alignments) || num_alignments <= 0) {
-    stop("The number of alignments must be a 
-         positive integer: ", num_alignments)
-  }
-  if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
-  }
-
   start <- Sys.time()
 
-  tryCatch({
-    system(paste0("export BLASTDB=/", db_search_path))
 
-    system2(
-      command = deltablast_path,
-      args = c(
-        "-db", db,
-        "-query", query,
-        "-evalue", evalue,
-        "-out", out,
-        "-num_threads", num_threads,
-        "-num_alignments", num_alignments
-        #   ,"-outfmt", outfmt
-      )
+  system(paste0("export BLASTDB=/", db_search_path))
+
+  system2(
+    command = deltablast_path,
+    args = c(
+      "-db", db,
+      "-query", query,
+      "-evalue", evalue,
+      "-out", out,
+      "-num_threads", num_threads,
+      "-num_alignments", num_alignments
+      #   ,"-outfmt", outfmt
     )
-    print(Sys.time() - start)
-  }, error = function(e) {
-    message(paste("Error in run_deltablast: ", e))
-  }, warning = function(w) {
-    message(paste("Warning in run_deltablast: ", w))
-  }, finally = {
-    message("run_deltablast completed")
-  })
+  )
+  print(Sys.time() - start)
 
 }
 
@@ -88,46 +60,19 @@ run_deltablast <- function(deltablast_path, db_search_path,
 run_rpsblast <- function(rpsblast_path, db_search_path,
                          db = "refseq", query, evalue = "1e-5",
                          out, num_threads = 1) {
-  # Argument validation
-  if (!file.exists(rpsblast_path)) {
-    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
-  }
-  if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
-  }
-  if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
-  }
-  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
-  }
-  if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
-  }
 
   start <- Sys.time()
+  system(paste0("export BLASTDB=/", db_search_path))
 
-  tryCatch({
-
-    system(paste0("export BLASTDB=/", db_search_path))
-
-    system2(
-      command = rpsblast_path,
-      args = c(
-        "-db", db,
-        "-query", query,
-        "-evalue", evalue,
-        "-out", out,
-        "-num_threads", num_threads
-      )
+  system2(
+    command = rpsblast_path,
+    args = c(
+      "-db", db,
+      "-query", query,
+      "-evalue", evalue,
+      "-out", out,
+      "-num_threads", num_threads
     )
-    print(Sys.time() - start)
-  }, error = function(e) {
-    message(paste("Error in run_rpsblast: ", e))
-  }, warning = function(w) {
-    message(paste("Warning in run_rpsblast: ", w))
-  }, finally = {
-    message("run_rpsblast completed")
-  })
-
+  )
+  print(Sys.time() - start)
 }

From 392775de92dfc33b198b41a5a2843f5313dd2e0d Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sun, 13 Oct 2024 19:43:58 -0600
Subject: [PATCH 42/61] adjust .Rd title tags for renamed functions

---
 R/assign_job_queue.R                      | 27 +++++++++++++++++++++++
 R/create_lineage_lookup.R                 |  3 +++
 man/assignJobQueue.Rd                     |  2 +-
 man/calculateEstimatedWallTimeFromOpts.Rd |  3 +--
 man/calculateProcessRuntime.Rd            |  2 +-
 man/createLineageLookup.Rd                |  2 +-
 man/getProcessRuntimeWeights.Rd           |  2 +-
 man/mapAdvOption2Process.Rd               |  2 +-
 man/mapOption2Process.Rd                  |  2 +-
 man/plotEstimatedWallTimes.Rd             |  6 +++--
 man/writeProcessRuntime2TSV.Rd            |  2 +-
 man/writeProcessRuntime2YML.Rd            |  6 +++--
 12 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 4791b4a1..20ba841f 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -6,6 +6,9 @@
 # file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
+#' mapOption2Process
+#' 
+#' @description
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
@@ -22,6 +25,9 @@ mapOption2Process <- function() {
   return(opts2processes)
 }
 
+#' mapAdvOption2Process
+#' 
+#' @description
 #' Use MolEvolvR advanced options to get associated processes
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
@@ -44,6 +50,9 @@ mapAdvOption2Process <- function(advanced_opts) {
   return(procs)
 }
 
+#' calculateProcessRuntime
+#' 
+#' @description
 #' Scrape MolEvolvR logs and calculate median processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -104,6 +113,9 @@ calculateProcessRuntime <- function(dir_job_results) {
   return(list_proc_medians)
 }
 
+#' writeProcessRuntime2TSV
+#' 
+#' @description
 #' Write a table of 2 columns: 1) process and 2) median seconds
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -136,6 +148,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   return(df_proc_medians)
 }
 
+#' writeProcessRuntime2YML
+#' 
+#' @description
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
@@ -162,6 +177,9 @@ writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   yaml::write_yaml(medians, filepath)
 }
 
+#' getProcessRuntimeWeights
+#' 
+#' @description
 #' Quickly get the runtime weights for MolEvolvR backend processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -213,6 +231,9 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   return(proc_weights)
 }
 
+#' calculateEstimatedWallTimeFromOpts
+#' 
+#' @description
 #' Given MolEvolvR advanced options and number of inputs,
 #' calculate the total estimated walltime for the job
 #'
@@ -279,6 +300,9 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
 }
 
 
+#' assignJobQueue
+#' 
+#' @description
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
@@ -301,6 +325,9 @@ assignJobQueue <- function(
   return(queue)
 }
 
+#' plotEstimatedWallTimes
+#' 
+#' @description
 #' Plot the estimated runtimes for different advanced options and number
 #' of inputs
 #'
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index 78e79048..2408c5e6 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -3,6 +3,9 @@
 # library(biomartr)
 
 
+#' createLineageLookup
+#' 
+#' @description
 #' Create a look up table that goes from TaxID, to Lineage
 #'
 #' @author Samuel Chen
diff --git a/man/assignJobQueue.Rd b/man/assignJobQueue.Rd
index 3663ce56..de646a82 100644
--- a/man/assignJobQueue.Rd
+++ b/man/assignJobQueue.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{assignJobQueue}
 \alias{assignJobQueue}
-\title{Decision function to assign job queue}
+\title{assignJobQueue}
 \usage{
 assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
diff --git a/man/calculateEstimatedWallTimeFromOpts.Rd b/man/calculateEstimatedWallTimeFromOpts.Rd
index c09cf6a6..d5361001 100644
--- a/man/calculateEstimatedWallTimeFromOpts.Rd
+++ b/man/calculateEstimatedWallTimeFromOpts.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{calculateEstimatedWallTimeFromOpts}
 \alias{calculateEstimatedWallTimeFromOpts}
-\title{Given MolEvolvR advanced options and number of inputs,
-calculate the total estimated walltime for the job}
+\title{calculateEstimatedWallTimeFromOpts}
 \usage{
 calculateEstimatedWallTimeFromOpts(
   advanced_opts,
diff --git a/man/calculateProcessRuntime.Rd b/man/calculateProcessRuntime.Rd
index bb6dd1ed..579ea2b6 100644
--- a/man/calculateProcessRuntime.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{calculateProcessRuntime}
 \alias{calculateProcessRuntime}
-\title{Scrape MolEvolvR logs and calculate median processes}
+\title{calculateProcessRuntime}
 \usage{
 calculateProcessRuntime(dir_job_results)
 }
diff --git a/man/createLineageLookup.Rd b/man/createLineageLookup.Rd
index 5dbab978..132019ce 100644
--- a/man/createLineageLookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/create_lineage_lookup.R
 \name{createLineageLookup}
 \alias{createLineageLookup}
-\title{Create a look up table that goes from TaxID, to Lineage}
+\title{createLineageLookup}
 \usage{
 createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
diff --git a/man/getProcessRuntimeWeights.Rd b/man/getProcessRuntimeWeights.Rd
index ff3c8e5d..de0e2ea6 100644
--- a/man/getProcessRuntimeWeights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{getProcessRuntimeWeights}
 \alias{getProcessRuntimeWeights}
-\title{Quickly get the runtime weights for MolEvolvR backend processes}
+\title{getProcessRuntimeWeights}
 \usage{
 getProcessRuntimeWeights(medians_yml_path = NULL)
 }
diff --git a/man/mapAdvOption2Process.Rd b/man/mapAdvOption2Process.Rd
index 5bd9ee65..6a210a20 100644
--- a/man/mapAdvOption2Process.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{mapAdvOption2Process}
 \alias{mapAdvOption2Process}
-\title{Use MolEvolvR advanced options to get associated processes}
+\title{mapAdvOption2Process}
 \usage{
 mapAdvOption2Process(advanced_opts)
 }
diff --git a/man/mapOption2Process.Rd b/man/mapOption2Process.Rd
index ff6905c5..9645617b 100644
--- a/man/mapOption2Process.Rd
+++ b/man/mapOption2Process.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{mapOption2Process}
 \alias{mapOption2Process}
-\title{Construct list where names (MolEvolvR advanced options) point to processes}
+\title{mapOption2Process}
 \usage{
 mapOption2Process()
 }
diff --git a/man/plotEstimatedWallTimes.Rd b/man/plotEstimatedWallTimes.Rd
index 0d53cb32..36b0ecd5 100644
--- a/man/plotEstimatedWallTimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{plotEstimatedWallTimes}
 \alias{plotEstimatedWallTimes}
-\title{Plot the estimated runtimes for different advanced options and number
-of inputs}
+\title{plotEstimatedWallTimes}
 \usage{
 plotEstimatedWallTimes()
 }
@@ -16,5 +15,8 @@ ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
 \description{
+Plot the estimated runtimes for different advanced options and number
+of inputs
+
 this function was just for fun; very, very messy code
 }
diff --git a/man/writeProcessRuntime2TSV.Rd b/man/writeProcessRuntime2TSV.Rd
index 03cbbd68..0e045a5c 100644
--- a/man/writeProcessRuntime2TSV.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{writeProcessRuntime2TSV}
 \alias{writeProcessRuntime2TSV}
-\title{Write a table of 2 columns: 1) process and 2) median seconds}
+\title{writeProcessRuntime2TSV}
 \usage{
 writeProcessRuntime2TSV(dir_job_results, filepath)
 }
diff --git a/man/writeProcessRuntime2YML.Rd b/man/writeProcessRuntime2YML.Rd
index b43f39ee..865f23f7 100644
--- a/man/writeProcessRuntime2YML.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{writeProcessRuntime2YML}
 \alias{writeProcessRuntime2YML}
-\title{Compute median process runtimes, then write a YAML list of the processes and
-their median runtimes in seconds to the path specified by 'filepath'.}
+\title{writeProcessRuntime2YML}
 \usage{
 writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 }
@@ -14,6 +13,9 @@ writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
+Compute median process runtimes, then write a YAML list of the processes and
+their median runtimes in seconds to the path specified by 'filepath'.
+
 The default value of filepath is the value of the env var
 MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 read location.

From 2057aa57a8101381adb9dffdd5a05e741843791e Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 14 Oct 2024 19:00:08 +0100
Subject: [PATCH 43/61] refactor functions in multiple files

---
 R/msa.R                 | 10 +++++-----
 R/networks_domarch.R    |  8 ++++----
 R/networks_gencontext.R |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/msa.R b/R/msa.R
index e56cc32c..4c48f323 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -24,7 +24,7 @@
 #############
 
 ## Sample Runs
-# msa_pdf(fasta_path="data/alns/pspb.gismo.fa" )#, out_path="data/msapdf")
+# createMSA_PDF(fasta_path="data/alns/pspb.gismo.fa" )#, out_path="data/msapdf")
 
 #########################################
 ## Generates MSA PDF from a Fasta file ##
@@ -34,7 +34,7 @@
 #' @description
 #' Generates a multiple sequence alignment from a fasta file
 #'
-#' msa_pdf is a function that reads a fasta file and generates a multiple sequence alignment as
+#' createMSA_PDF is a function that reads a fasta file and generates a multiple sequence alignment as
 #' a pdf
 #'
 #'
@@ -55,9 +55,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' msa_pdf()
+#' createMSA_PDF()
 #' }
-msa_pdf <- function(fasta_path, out_path = NULL,
+createMSA_PDF <- function(fasta_path, out_path = NULL,
     lowerbound = NULL, upperbound = NULL) {
     ## SAMPLE ARGUMENTS to test run
     # fasta_path=here("../molevol_data/project_data/phage_defense/full_analysis_20210108/g3d.both_lin.gen.da_sub.fa")
@@ -196,7 +196,7 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 #' @export
 #'
 #' @examples
-generate_msa <- function(fa_file = "", outfile = "") {
+createMSA_Kalign <- function(fa_file = "", outfile = "") {
     prot_aa <- readAAStringSet(
         path = fa_file,
         format = "fasta"
diff --git a/R/networks_domarch.R b/R/networks_domarch.R
index fea0a195..9215aa93 100755
--- a/R/networks_domarch.R
+++ b/R/networks_domarch.R
@@ -46,9 +46,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' domain_network(pspa)
+#' createDomainNetwork(pspa)
 #' }
-domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff = 70, layout = "nice", query_color = adjustcolor("green", alpha.f = .5)) {
+createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, cutoff = 70, layout = "nice", query_color = adjustcolor("green", alpha.f = .5)) {
     # by domain networks or all, as required.
     tryCatch(
         {
@@ -250,9 +250,9 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
 #'
 #' @examples
 #' \dontrun{
-#' domain_network(pspa)
+#' createDomainNetwork(pspa)
 #' }
-BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, cutoff = 70,
+createBinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, cutoff = 70,
     layout = "nice", query_color = adjustcolor("yellow", alpha.f = .5),
     partner_color = adjustcolor("skyblue", alpha.f = .5),
     border_color = adjustcolor("grey", alpha.f = .8),
diff --git a/R/networks_gencontext.R b/R/networks_gencontext.R
index e0dd63da..7df6c270 100755
--- a/R/networks_gencontext.R
+++ b/R/networks_gencontext.R
@@ -39,7 +39,7 @@
 #' \dontrun{
 #' domain_network(pspa)
 #' }
-gc_undirected_network <- function(prot, column = "GenContext", domains_of_interest, cutoff_type = "Lineage", cutoff = 1, layout = "grid") {
+createUndirectedGenomicContextNetwork <- function(prot, column = "GenContext", domains_of_interest, cutoff_type = "Lineage", cutoff = 1, layout = "grid") {
     # by domain networks or all, as required.
     # ye is either all of prot.list or centered on one domain
 
@@ -146,7 +146,7 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere
 #' \dontrun{
 #' gc_directed_network(pspa, column = "GenContex", cutoff = 55)
 #' }
-GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
+createGenomicContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
     cutoff = 40,
     layout = "grid",
     directed = TRUE) {

From b665a4dfd062f3a359cb907040cb8c384a23450c Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 14 Oct 2024 19:03:29 +0100
Subject: [PATCH 44/61] Update NAMESPACE and .Rd files

---
 NAMESPACE                                         | 12 ++++++------
 ...ainNetwork.Rd => createBinaryDomainNetwork.Rd} |  8 ++++----
 man/{domain_network.Rd => createDomainNetwork.Rd} |  8 ++++----
 ...tNetwork.Rd => createGenomicContextNetwork.Rd} |  6 +++---
 man/{generate_msa.Rd => createMSA_Kalign.Rd}      |  6 +++---
 man/{msa_pdf.Rd => createMSA_PDF.Rd}              | 15 ++++++++++-----
 ...d => createUndirectedGenomicContextNetwork.Rd} |  6 +++---
 7 files changed, 33 insertions(+), 28 deletions(-)
 rename man/{BinaryDomainNetwork.Rd => createBinaryDomainNetwork.Rd} (92%)
 rename man/{domain_network.Rd => createDomainNetwork.Rd} (90%)
 rename man/{GenContextNetwork.Rd => createGenomicContextNetwork.Rd} (91%)
 rename man/{generate_msa.Rd => createMSA_Kalign.Rd} (70%)
 rename man/{msa_pdf.Rd => createMSA_PDF.Rd} (77%)
 rename man/{gc_undirected_network.Rd => createUndirectedGenomicContextNetwork.Rd} (90%)

diff --git a/NAMESPACE b/NAMESPACE
index fe4c23d6..4c05dc94 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
-export(BinaryDomainNetwork)
 export(GCA2Lineage)
-export(GenContextNetwork)
 export(IPG2Lineage)
 export(acc2FA)
 export(acc2Lineage)
@@ -29,14 +27,19 @@ export(convertAlignment2FA)
 export(convertAlignment2Trees)
 export(convertFA2Tree)
 export(countByColumn)
+export(createBinaryDomainNetwork)
+export(createDomainNetwork)
 export(createFA2Tree)
+export(createGenomicContextNetwork)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createMSA_Kalign)
+export(createMSA_PDF)
 export(createRepresentativeAccNum)
+export(createUndirectedGenomicContextNetwork)
 export(createWordCloud2Element)
 export(createWordCloudElement)
 export(create_lineage_lookup)
-export(domain_network)
 export(downloadAssemblySummary)
 export(efetchIPG)
 export(extractAccNum)
@@ -44,9 +47,7 @@ export(filterByDomains)
 export(filterByFrequency)
 export(findParalogs)
 export(formatJobArgumentsHTML)
-export(gc_undirected_network)
 export(generateAllAlignments2FA)
-export(generate_msa)
 export(getAccNumFromFA)
 export(getTopAccByLinDomArch)
 export(get_proc_medians)
@@ -54,7 +55,6 @@ export(get_proc_weights)
 export(make_opts2procs)
 export(mapAcc2Name)
 export(map_advanced_opts2procs)
-export(msa_pdf)
 export(plotIPR2Viz)
 export(plotIPR2VizWeb)
 export(plotLineageDA)
diff --git a/man/BinaryDomainNetwork.Rd b/man/createBinaryDomainNetwork.Rd
similarity index 92%
rename from man/BinaryDomainNetwork.Rd
rename to man/createBinaryDomainNetwork.Rd
index bb7e2353..4f0bdc5a 100644
--- a/man/BinaryDomainNetwork.Rd
+++ b/man/createBinaryDomainNetwork.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/networks_domarch.R
-\name{BinaryDomainNetwork}
-\alias{BinaryDomainNetwork}
+\name{createBinaryDomainNetwork}
+\alias{createBinaryDomainNetwork}
 \title{Domain Network}
 \usage{
-BinaryDomainNetwork(
+createBinaryDomainNetwork(
   prot,
   column = "DomArch",
   domains_of_interest,
@@ -42,6 +42,6 @@ A network of domains is returned based on shared domain architectures.
 }
 \examples{
 \dontrun{
-domain_network(pspa)
+createDomainNetwork(pspa)
 }
 }
diff --git a/man/domain_network.Rd b/man/createDomainNetwork.Rd
similarity index 90%
rename from man/domain_network.Rd
rename to man/createDomainNetwork.Rd
index 528e4924..1588af17 100644
--- a/man/domain_network.Rd
+++ b/man/createDomainNetwork.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/networks_domarch.R
-\name{domain_network}
-\alias{domain_network}
+\name{createDomainNetwork}
+\alias{createDomainNetwork}
 \title{Domain Network}
 \usage{
-domain_network(
+createDomainNetwork(
   prot,
   column = "DomArch",
   domains_of_interest,
@@ -33,6 +33,6 @@ A network of domains is returned based on shared domain architectures.
 }
 \examples{
 \dontrun{
-domain_network(pspa)
+createDomainNetwork(pspa)
 }
 }
diff --git a/man/GenContextNetwork.Rd b/man/createGenomicContextNetwork.Rd
similarity index 91%
rename from man/GenContextNetwork.Rd
rename to man/createGenomicContextNetwork.Rd
index 2eeebbc5..ac6deb84 100644
--- a/man/GenContextNetwork.Rd
+++ b/man/createGenomicContextNetwork.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/networks_gencontext.R
-\name{GenContextNetwork}
-\alias{GenContextNetwork}
+\name{createGenomicContextNetwork}
+\alias{createGenomicContextNetwork}
 \title{Genomic Context Directed Network}
 \usage{
-GenContextNetwork(
+createGenomicContextNetwork(
   prot,
   domains_of_interest,
   column = "GenContext",
diff --git a/man/generate_msa.Rd b/man/createMSA_Kalign.Rd
similarity index 70%
rename from man/generate_msa.Rd
rename to man/createMSA_Kalign.Rd
index a68eb8b4..946f04ae 100644
--- a/man/generate_msa.Rd
+++ b/man/createMSA_Kalign.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/msa.R
-\name{generate_msa}
-\alias{generate_msa}
+\name{createMSA_Kalign}
+\alias{createMSA_Kalign}
 \title{Function to generate MSA using kalign}
 \usage{
-generate_msa(fa_file = "", outfile = "")
+createMSA_Kalign(fa_file = "", outfile = "")
 }
 \arguments{
 \item{outfile}{}
diff --git a/man/msa_pdf.Rd b/man/createMSA_PDF.Rd
similarity index 77%
rename from man/msa_pdf.Rd
rename to man/createMSA_PDF.Rd
index 4d5fed17..7cd7516a 100644
--- a/man/msa_pdf.Rd
+++ b/man/createMSA_PDF.Rd
@@ -1,10 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/msa.R
-\name{msa_pdf}
-\alias{msa_pdf}
+\name{createMSA_PDF}
+\alias{createMSA_PDF}
 \title{Multiple Sequence Alignment}
 \usage{
-msa_pdf(fasta_path, out_path = NULL, lowerbound = NULL, upperbound = NULL)
+createMSA_PDF(
+  fasta_path,
+  out_path = NULL,
+  lowerbound = NULL,
+  upperbound = NULL
+)
 }
 \arguments{
 \item{fasta_path}{Character. The path location of the fasta file to be read.}
@@ -21,11 +26,11 @@ Default is NULL. If value is NULL, the entire multiple sequence alignment is pri
 \description{
 Generates a multiple sequence alignment from a fasta file
 
-msa_pdf is a function that reads a fasta file and generates a multiple sequence alignment as
+createMSA_PDF is a function that reads a fasta file and generates a multiple sequence alignment as
 a pdf
 }
 \examples{
 \dontrun{
-msa_pdf()
+createMSA_PDF()
 }
 }
diff --git a/man/gc_undirected_network.Rd b/man/createUndirectedGenomicContextNetwork.Rd
similarity index 90%
rename from man/gc_undirected_network.Rd
rename to man/createUndirectedGenomicContextNetwork.Rd
index 28cf1abb..d61c23df 100644
--- a/man/gc_undirected_network.Rd
+++ b/man/createUndirectedGenomicContextNetwork.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/networks_gencontext.R
-\name{gc_undirected_network}
-\alias{gc_undirected_network}
+\name{createUndirectedGenomicContextNetwork}
+\alias{createUndirectedGenomicContextNetwork}
 \title{Domain Network}
 \usage{
-gc_undirected_network(
+createUndirectedGenomicContextNetwork(
   prot,
   column = "GenContext",
   domains_of_interest,

From 6babffe95d2729857b921c9305f25dcbc0c0ed49 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 15 Oct 2024 11:57:15 +0100
Subject: [PATCH 45/61] Update error handling to use rlang functions in
 R/assign_job_queue.R file - Replaced base R error handling with rlang
 functions: `abort()`, `warn()`, and `inform()`. - Improved clarity and
 consistency in error and warning messages. - Enhanced robustness with
 detailed context for errors and warnings.

---
 R/assign_job_queue.R | 227 +++++++++++++++++++++++++++++++------------
 1 file changed, 166 insertions(+), 61 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index c531fb09..df4f97e7 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -1,3 +1,4 @@
+suppressPackageStartupMessages(library(rlang))
 # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since
 # the working directory is changed in many parts of the current molevolvr
 # pipeline.
@@ -22,11 +23,9 @@ make_opts2procs <- function() {
     )
     return(opts2processes)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(paste("Error: ", e$message), class = "Opts_to_process_error")
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("make_opts2procs function execution completed.")
+    warn(paste("Warning: ", w$message), class = "Opts_to_process_warning")
   })
 
 }
@@ -44,7 +43,7 @@ make_opts2procs <- function() {
 #' @export
 map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
-    stop("Argument must be a character vector!")
+    abort("Argument must be a character vector!", class = "validation_error")
   }
   tryCatch({
     # append 'always' to add procs that always run
@@ -56,11 +55,19 @@ map_advanced_opts2procs <- function(advanced_opts) {
     procs <- opts2proc[idx] |> unlist()
     return(procs)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "map_advanced_opts2procs_error",
+      call = sys.call(),
+      advanced_opts = advanced_opts
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("make_opts2procs function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "map_advanced_opts2procs_warning",
+      call = sys.call(),
+      advanced_opts = advanced_opts
+    )
   })
 
 }
@@ -91,12 +98,14 @@ get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort("Input 'dir_job_results' must be a single character string.",
+            class = "validation_error")
     }
 
     # Check if dir_job_results exists
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(paste("The directory", dir_job_results, "does not exist."),
+            class = "file_error")
     }
 
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
@@ -135,11 +144,10 @@ get_proc_medians <- function(dir_job_results) {
       as.list()
     return(list_proc_medians)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(paste("Encountered an error: ", e$message),
+          class = "processing_error")
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("get_proc_medians function execution completed.")
+    warn(paste("Warning: ", w$message), class = "processing_warning")
   })
 
 }
@@ -165,15 +173,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort("Input 'dir_job_results' must be a single character string.",
+            class = "validation_error")
     }
 
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(paste("The directory", dir_job_results, "does not exist."),
+            class = "file_error")
     }
 
     if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
+      abort("Input 'filepath' must be a single character string.",
+            class = "validation_error")
     }
     df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
@@ -188,11 +199,21 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   })
 
 }
@@ -222,12 +243,21 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort(
+        message = "Input 'dir_job_results' must be a single character string.",
+        class = "validation_error",
+        dir_job_results = dir_job_results
+      )
     }
 
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(
+        message = paste("The directory", dir_job_results, "does not exist."),
+        class = "file_error",
+        dir_job_results = dir_job_results
+      )
     }
+
     if (is.null(filepath)) {
       filepath <- file.path(common_root,
                             "molevol_scripts",
@@ -235,20 +265,32 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
                             "job_proc_weights.yml")
     }
     if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
+      abort(
+        message = "Input 'filepath' must be a single character string.",
+        class = "validation_error",
+        filepath = filepath
+      )
     }
 
     medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
-    message(paste("Encountered an error: "), e$message)
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   }, warning = function(w) {
-    message(paste("Warning: "), w$message)
-  }, finally = {
-    message("write_proc_medians_table function execution completed.")
-  }
-  )
-
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
+  })
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -275,13 +317,24 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     # attempt to read the weights from the YAML file produced by
     # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
-      stop(
-        stringr::str_glue("medians_yml_path is empty 
-                          ({medians_yml_path}), returning default weights")
+      abort(
+        message = stringr::str_glue("medians_yml_path is empty 
+                          ({medians_yml_path}), returning default weights"),
+        class = "input_error",
+        medians_yml_path = medians_yml_path
       )
     }
 
     proc_weights <- yaml::read_yaml(medians_yml_path)
+
+    if (!is.list(proc_weights) || length(proc_weights) == 0) {
+      abort(
+        message = "The loaded YAML file does not
+                  contain valid process weights.",
+        class = "file_error",
+        medians_yml_path = medians_yml_path
+      )
+    }
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
@@ -318,10 +371,9 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime	 <- function(advanced_opts,
-                                                  n_inputs = 1L,
-                                                  n_hits = NULL,
-                                                  verbose = FALSE) {
+advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
+                                       n_hits = NULL,
+                                       verbose = FALSE) {
 
   tryCatch({
     # to calculate est walltime for a homology search job, the number of hits
@@ -331,26 +383,42 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
 
     # Validate advanced_opts
     if (!is.character(advanced_opts)) {
-      stop("Argument 'advanced_opts' must be a character vector.")
+      abort(
+        message = "Argument 'advanced_opts' must be a character vector.",
+        class = "validation_error",
+        advanced_opts = advanced_opts
+      )
     }
 
     # Validate n_inputs
     if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      stop("Argument 'n_inputs' must be a single positive numeric value.")
+      abort(
+        message = "Argument 'n_inputs' 
+                  must be a single positive numeric value.",
+        class = "validation_error",
+        n_inputs = n_inputs
+      )
     }
 
     # Validate n_hits if homology_search is in advanced_opts
     if ("homology_search" %in% advanced_opts &&
-          (is.null(n_hits)|| !is.numeric(n_hits)
-           || length(n_hits) != 1 || n_hits < 0)) {
-      stop("Argument 'n_hits' must be a single non-negative numeric value when 
-           'homology_search' is in 'advanced_opts'.")
+          (is.null(n_hits) || !is.numeric(n_hits) ||
+             length(n_hits) != 1 || n_hits < 0)) {
+      abort(
+        message = "Argument 'n_hits' must be a single non-negative numeric 
+        value when 'homology_search' is in 'advanced_opts'.",
+        class = "validation_error",
+        n_hits = n_hits
+      )
     }
 
     # Get process weights
     proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
-      stop("Process weights could not be retrieved correctly.")
+      abort(
+        message = "Process weights could not be retrieved correctly.",
+        class = "processing_error"
+      )
     }
 
     # sort process weights by names and convert to vec
@@ -389,12 +457,23 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
     return(est_walltime)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      advanced_opts = advanced_opts,
+      n_inputs = n_inputs,
+      n_hits = n_hits
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("advanced_opts2est_walltime	 
-            function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      advanced_opts = advanced_opts,
+      n_inputs = n_inputs,
+      n_hits = n_hits
+    )
   })
 
 }
@@ -419,22 +498,44 @@ assign_job_queue <- function(
   t_cutoff = 21600 # 6 hours
 ) {
   tryCatch({
+    # Validate t_sec_estimate
     if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      stop("Argument 't_sec_estimate' must be a single numeric value.")
+      abort(
+        message = "Argument 't_sec_estimate' must be a single numeric value.",
+        class = "validation_error",
+        t_sec_estimate = t_sec_estimate
+      )
     }
 
+    # Validate t_cutoff
     if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
+      abort(
+        message = "Argument 't_cutoff' must be a 
+                  single non-negative numeric value.",
+        class = "validation_error",
+        t_cutoff = t_cutoff
+      )
     }
 
+
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      t_sec_estimate = t_sec_estimate,
+      t_cutoff = t_cutoff
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("assign_job_queue function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      t_sec_estimate = t_sec_estimate,
+      t_cutoff = t_cutoff
+    )
   })
 
 }
@@ -537,11 +638,15 @@ plot_estimated_walltimes <- function() {
       )
     return(p)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error:", e$message),
+      .internal = TRUE
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("plot_estimated_walltimes function execution completed.")
+    warn(
+      message = paste("Warning:", w$message),
+      .internal = TRUE
+    )
   })
 
 }

From 57a635671795984f5ace17076ef0029c6ff0336c Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sun, 20 Oct 2024 12:01:02 +0100
Subject: [PATCH 46/61] Enhance error handling and validation across functions

- Added robust error handling in run_deltablast and run_rpsblast functions.
- Updated Roxygen documentation to import rlang::abort, rlang::warn and rlang::inform for better error management.
- Refactored code for clarity and consistency based on the suggestion from the last review.
---
 NAMESPACE            |   3 +
 R/acc2lin.R          | 105 ++++++++++++++++++-----------------
 R/assign_job_queue.R | 128 ++++++++++++++++++++++---------------------
 R/blastWrappers.R    |  84 +++++++++++++++++++++-------
 4 files changed, 184 insertions(+), 136 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 078f971b..9449e14b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -240,8 +240,11 @@ importFrom(readr,write_lines)
 importFrom(readr,write_tsv)
 importFrom(rentrez,entrez_fetch)
 importFrom(rlang,.data)
+importFrom(rlang,abort)
 importFrom(rlang,as_string)
+importFrom(rlang,inform)
 importFrom(rlang,sym)
+importFrom(rlang,warn)
 importFrom(sendmailR,mime_part)
 importFrom(sendmailR,sendmail)
 importFrom(seqinr,dist.alignment)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index bd5cc289..c1f3b34e 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -5,11 +5,13 @@
 # suppressPackageStartupMessages(library(data.table))
 # suppressPackageStartupMessages(library(tidyverse))
 # suppressPackageStartupMessages(library(biomartr))
-suppressPackageStartupMessages(library(rlang))
+
 
 # https://stackoverflow.com/questions/18730491/sink-does-not-release-file
 #' Sink Reset
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return No return, but run to close all outstanding `sink()`s
 #'         and handles any errors or warnings that occur during the process.
 #'
@@ -25,17 +27,17 @@ sinkReset <- function() {
     for (i in seq_len(sink.number())) {
       sink(NULL)
     }
-    inform("All sinks closed", class = "sink_reset_info")
+    rlang::inform("All sinks closed", class = "sink_reset_info")
   }, error = function(e) {
-    abort(paste("Error: ", e$message), class = "sink_reset_error")
+    rlang::abort(paste("Error: ", e$message), class = "sink_reset_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "sink_reset_warning")
+    rlang::warn(paste("Warning: ", w$message), class = "sink_reset_warning")
   }, finally = {
     # If any additional cleanup is needed, it can be done here
     if (sink.number() > 0) {
       # Additional cleanup if sinks are still open
-      inform("Some sinks remain open, ensure proper cleanup.",
-             class = "sink_cleanup_warning")
+      rlang::inform("Some sinks remain open, ensure proper cleanup.",
+                    class = "sink_cleanup_warning")
     }
   })
 }
@@ -52,7 +54,7 @@ sinkReset <- function() {
 #'
 #' @importFrom dplyr pull
 #' @importFrom magrittr %>%
-#' @importFrom rlang sym
+#' @importFrom rlang sym warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -66,30 +68,30 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
                        plan = "sequential", ...) {
   # check for validate inputs
   if (!is.data.frame(df)) {
-    abort("Input 'df' must be a data frame.", class = "input_error")
+    rlang::abort("Input 'df' must be a data frame.", class = "input_error")
   }
 
   if (!acc_col %in% colnames(df)) {
-    abort(paste("Column", acc_col, 
-                "not found in data frame."), class = "column_error")
+    rlang::abort(paste("Column", acc_col,
+                       "not found in data frame."), class = "column_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    abort("Both 'assembly_path' and 
-          'lineagelookup_path' must be character strings.",
-          class = "path_type_error")
+    rlang::abort("Both 'assembly_path' and
+                 'lineagelookup_path' must be character strings.",
+                 class = "path_type_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    abort(paste("Assembly file not found at:",
-                assembly_path), class = "file_not_found_error")
+    rlang::abort(paste("Assembly file not found at:",
+                       assembly_path), class = "file_not_found_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    abort(paste("Lineage lookup file not found at:",
-                lineagelookup_path), class = "file_not_found_error")
+    rlang::abort(paste("Lineage lookup file not found at:",
+                       lineagelookup_path), class = "file_not_found_error")
   }
   tryCatch({
     # Attempt to add lineages
@@ -99,7 +101,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
       accessions, assembly_path, lineagelookup_path, ipgout_path, plan
     )
 
-    # Drop a lot of the unimportant columns for now? 
+    # Drop a lot of the unimportant columns for now?
     # will make merging much easier
     lins <- lins[, c(
       "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
@@ -107,18 +109,18 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
     ) := NULL]
     lins <- unique(lins)
 
-    # dup <- lins %>% group_by(Protein) %>% 
+    # dup <- lins %>% group_by(Protein) %>%
     # summarize(count = n()) %>% filter(count > 1) %>%
     # pull(Protein)
 
     merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
     return(merged)
   }, error = function(e) {
-    abort(paste("Error during lineage addition:", e$message),
-          class = "lineage_addition_error")
+    rlang::abort(paste("Error during lineage addition:", e$message),
+                 class = "lineage_addition_error")
   }, warning = function(w) {
-    warn(paste("Warning during lineage addition:", w$message),
-         class = "lineage_addition_warning")
+    rlang::warn(paste("Warning during lineage addition:", w$message),
+                class = "lineage_addition_warning")
   })
 
 }
@@ -137,11 +139,13 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results 
+#' @param ipgout_path Path to write the results
 #'                    of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return Describe return, in detail
 #' @export
 #'
@@ -149,8 +153,8 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' \dontrun{
 #' acc2Lineage()
 #' }
-acc2Lineage <- function(accessions, assembly_path, 
-                        lineagelookup_path, ipgout_path = NULL, 
+acc2Lineage <- function(accessions, assembly_path,
+                        lineagelookup_path, ipgout_path = NULL,
                         plan = "sequential", ...) {
   tmp_ipg <- F
   if (is.null(ipgout_path)) {
@@ -167,12 +171,10 @@ acc2Lineage <- function(accessions, assembly_path,
     lins <- IPG2Lineage(accessions, ipgout_path,
                         assembly_path, lineagelookup_path)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("An error occurred during IPG fetching
                       or lineage processing:", e$message),
       class = "lineage_processing_error",
-      # capturing the call stack
-      call = sys.call(),
       # adding additional context
       accessions = accessions,
       assembly_path = assembly_path,
@@ -181,11 +183,10 @@ acc2Lineage <- function(accessions, assembly_path,
       plan = plan
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning during IPG fetching
                       or lineage processing:", w$message),
       class = "lineage_processing_warning",
-      call = sys.call(), # capturing the call stack
       accessions = accessions,
       assembly_path = assembly_path,
       lineagelookup_path = lineagelookup_path,
@@ -218,6 +219,7 @@ acc2Lineage <- function(accessions, assembly_path,
 #' @importFrom furrr future_map
 #' @importFrom future plan
 #' @importFrom rentrez entrez_fetch
+#' @importFrom rlang warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -229,18 +231,18 @@ acc2Lineage <- function(accessions, assembly_path,
 efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
   # Argument validation
   if (!is.character(accnums) || length(accnums) == 0) {
-    abort("Error: 'accnums' must be a non-empty character vector.",
-          class = "validation_error")
+    rlang::abort("Error: 'accnums' must be a non-empty character vector.",
+                 class = "validation_error")
   }
 
   if (!is.character(out_path) || nchar(out_path) == 0) {
-    abort("Error: 'out_path' must be a non-empty string.",
-          class = "validation_error")
+    rlang::abort("Error: 'out_path' must be a non-empty string.",
+                 class = "validation_error")
   }
 
   if (!is.function(plan)) {
-    abort("Error: 'plan' must be a valid plan function.",
-          class = "validation_error")
+    rlang::abort("Error: 'plan' must be a valid plan function.",
+                 class = "validation_error")
   }
   if (length(accnums) > 0) {
     partition <- function(in_data, groups) {
@@ -285,19 +287,17 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
       })
       sink(NULL)
     }, error = function(e) {
-      abort(
+      rlang::abort(
         message = paste("An error occurred: ", e$message),
         class = "fetch_error",
-        call = sys.call(),
         accnums = accnums,
         out_path = out_path,
         plan = plan
       )
     }, warning = function(w) {
-      warn(
+      rlang::warn(
         message = paste("Warning: ", w$message),
         class = "fetch_warning",
-        call = sys.call(),
         accnums = accnums,
         out_path = out_path,
         plan = plan
@@ -331,6 +331,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' "create_lineage_lookup()" function
 #'
 #' @importFrom data.table fread
+#' @importFrom rlang warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -344,31 +345,31 @@ IPG2Lineage <- function(accessions, ipg_file,
                         assembly_path, lineagelookup_path, ...) {
   # Argument validation for accessions
   if (!is.character(accessions) || length(accessions) == 0) {
-    abort("Input 'accessions' must be a non-empty
+    rlang::abort("Input 'accessions' must be a non-empty
           character vector.", class = "validation_error")
   }
 
   # check for validate inputs
   if (!is.character(ipg_file)) {
-    abort("Input 'ipg_file' must be a
+    rlang::abort("Input 'ipg_file' must be a
           character string.", class = "validation_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    abort("Both 'assembly_path' and 'lineagelookup_path'
-          must be character strings.", class = "validation_error")
+    rlang::abort("Both 'assembly_path' and 'lineagelookup_path'
+                 must be character strings.", class = "validation_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    abort(paste("Assembly file not found at:", assembly_path),
-          class = "file_error")
+    rlang::abort(paste("Assembly file not found at:", assembly_path),
+                 class = "file_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    abort(paste("Lineage lookup file not found at:", lineagelookup_path),
-          class = "file_error")
+    rlang::abort(paste("Lineage lookup file not found at:", lineagelookup_path),
+                 class = "file_error")
   }
 
   # Process the IPG file
@@ -390,20 +391,18 @@ IPG2Lineage <- function(accessions, ipg_file,
 
     return(lins)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("An error occurred: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       accessions = accessions,
       ipg_file = ipg_file,
       assembly_path = assembly_path,
       lineagelookup_path = lineagelookup_path
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       accessions = accessions,
       ipg_file = ipg_file,
       assembly_path = assembly_path,
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index df4f97e7..8b227979 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -1,4 +1,4 @@
-suppressPackageStartupMessages(library(rlang))
+
 # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since
 # the working directory is changed in many parts of the current molevolvr
 # pipeline.
@@ -9,6 +9,8 @@ common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
 #' example: list_opts2procs <- make_opts2procs
@@ -23,9 +25,10 @@ make_opts2procs <- function() {
     )
     return(opts2processes)
   }, error = function(e) {
-    abort(paste("Error: ", e$message), class = "Opts_to_process_error")
+    rlang::abort(paste("Error: ", e$message), class = "Opts_to_process_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "Opts_to_process_warning")
+    rlang::warn(paste("Warning: ", w$message),
+                class = "Opts_to_process_warning")
   })
 
 }
@@ -34,6 +37,8 @@ make_opts2procs <- function() {
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return character vector of process names that will execute given
 #' the advanced options
 #'
@@ -43,7 +48,8 @@ make_opts2procs <- function() {
 #' @export
 map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
-    abort("Argument must be a character vector!", class = "validation_error")
+    rlang::abort("Argument must be a character vector!",
+                 class = "validation_error")
   }
   tryCatch({
     # append 'always' to add procs that always run
@@ -55,17 +61,15 @@ map_advanced_opts2procs <- function(advanced_opts) {
     procs <- opts2proc[idx] |> unlist()
     return(procs)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "map_advanced_opts2procs_error",
-      call = sys.call(),
       advanced_opts = advanced_opts
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "map_advanced_opts2procs_warning",
-      call = sys.call(),
       advanced_opts = advanced_opts
     )
   })
@@ -78,6 +82,7 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #' directory
 #'
 #' @importFrom dplyr across everything select summarise
+#' @importFrom rlang warn abort inform
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
@@ -98,14 +103,14 @@ get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort("Input 'dir_job_results' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'dir_job_results' must be a single character string.",
+                   class = "validation_error")
     }
 
     # Check if dir_job_results exists
     if (!dir.exists(dir_job_results)) {
-      abort(paste("The directory", dir_job_results, "does not exist."),
-            class = "file_error")
+      rlang::abort(paste("The directory", dir_job_results, "does not exist."),
+                         class = "file_error")
     }
 
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
@@ -144,10 +149,10 @@ get_proc_medians <- function(dir_job_results) {
       as.list()
     return(list_proc_medians)
   }, error = function(e) {
-    abort(paste("Encountered an error: ", e$message),
-          class = "processing_error")
+    rlang::abort(paste("Encountered an error: ", e$message),
+                 class = "processing_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "processing_warning")
+    rlang::warn(paste("Warning: ", w$message), class = "processing_warning")
   })
 
 }
@@ -161,6 +166,7 @@ get_proc_medians <- function(dir_job_results) {
 #' @importFrom tibble as_tibble
 #' @importFrom readr write_tsv
 #' @importFrom tidyr pivot_longer
+#' @importFrom rlang warn abort inform
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
@@ -173,18 +179,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort("Input 'dir_job_results' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'dir_job_results' must be a single character string.",
+                   class = "validation_error")
     }
 
     if (!dir.exists(dir_job_results)) {
-      abort(paste("The directory", dir_job_results, "does not exist."),
-            class = "file_error")
+      rlang::abort(paste("The directory", dir_job_results, "does not exist."),
+                   class = "file_error")
     }
 
     if (!is.character(filepath) || length(filepath) != 1) {
-      abort("Input 'filepath' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'filepath' must be a single character string.",
+                   class = "validation_error")
     }
     df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
@@ -199,18 +205,16 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
@@ -226,10 +230,11 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
-#' @param filepath [chr] path to save YAML file; if NULL, 
+#' @param filepath [chr] path to save YAML file; if NULL,
 #'                 uses ./molevol_scripts/log_data/job_proc_weights.yml
 #'
 #' @importFrom yaml write_yaml
+#' @importFrom rlang warn abort inform
 #'
 #' @examples
 #' \dontrun{
@@ -243,7 +248,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort(
+      rlang::abort(
         message = "Input 'dir_job_results' must be a single character string.",
         class = "validation_error",
         dir_job_results = dir_job_results
@@ -251,7 +256,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
     }
 
     if (!dir.exists(dir_job_results)) {
-      abort(
+      rlang::abort(
         message = paste("The directory", dir_job_results, "does not exist."),
         class = "file_error",
         dir_job_results = dir_job_results
@@ -265,7 +270,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
                             "job_proc_weights.yml")
     }
     if (!is.character(filepath) || length(filepath) != 1) {
-      abort(
+      rlang::abort(
         message = "Input 'filepath' must be a single character string.",
         class = "validation_error",
         filepath = filepath
@@ -275,18 +280,16 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
     medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
@@ -300,6 +303,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @importFrom stringr str_glue str_trim
 #' @importFrom yaml read_yaml
+#' @importFrom rlang warn abort inform
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
@@ -317,9 +321,9 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     # attempt to read the weights from the YAML file produced by
     # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
-      abort(
-        message = stringr::str_glue("medians_yml_path is empty 
-                          ({medians_yml_path}), returning default weights"),
+      rlang::abort(
+        message = stringr::str_glue("medians_yml_path is empty
+                                    ({medians_yml_path}), returning default weights"),
         class = "input_error",
         medians_yml_path = medians_yml_path
       )
@@ -328,7 +332,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     proc_weights <- yaml::read_yaml(medians_yml_path)
 
     if (!is.list(proc_weights) || length(proc_weights) == 0) {
-      abort(
+      rlang::abort(
         message = "The loaded YAML file does not
                   contain valid process weights.",
         class = "file_error",
@@ -364,6 +368,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @importFrom dplyr if_else
 #' @importFrom stringr str_glue
+#' @importFrom rlang warn abort inform
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
@@ -383,7 +388,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 
     # Validate advanced_opts
     if (!is.character(advanced_opts)) {
-      abort(
+      rlang::abort(
         message = "Argument 'advanced_opts' must be a character vector.",
         class = "validation_error",
         advanced_opts = advanced_opts
@@ -392,8 +397,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 
     # Validate n_inputs
     if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      abort(
-        message = "Argument 'n_inputs' 
+      rlang::abort(
+        message = "Argument 'n_inputs'
                   must be a single positive numeric value.",
         class = "validation_error",
         n_inputs = n_inputs
@@ -404,8 +409,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     if ("homology_search" %in% advanced_opts &&
           (is.null(n_hits) || !is.numeric(n_hits) ||
              length(n_hits) != 1 || n_hits < 0)) {
-      abort(
-        message = "Argument 'n_hits' must be a single non-negative numeric 
+      rlang::abort(
+        message = "Argument 'n_hits' must be a single non-negative numeric
         value when 'homology_search' is in 'advanced_opts'.",
         class = "validation_error",
         n_hits = n_hits
@@ -415,7 +420,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     # Get process weights
     proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
-      abort(
+      rlang::abort(
         message = "Process weights could not be retrieved correctly.",
         class = "processing_error"
       )
@@ -437,9 +442,9 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
       opts2procs <- make_opts2procs()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-      procs_homologs <- procs_from_opts[!(procs_from_opts 
+      procs_homologs <- procs_from_opts[!(procs_from_opts
                                           %in% procs2exclude_for_homologs)]
-      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+      binary_proc_vec_homolog <- dplyr::if_else(all_procs
                                                 %in% procs_homologs, 1L, 0L)
       # add the estimated walltime for processes run on the homologous hits
       est_walltime <- est_walltime +
@@ -457,19 +462,17 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     }
     return(est_walltime)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       advanced_opts = advanced_opts,
       n_inputs = n_inputs,
       n_hits = n_hits
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       advanced_opts = advanced_opts,
       n_inputs = n_inputs,
       n_hits = n_hits
@@ -486,6 +489,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return a string of "short" or "long"
 #'
 #' example:
@@ -500,7 +505,7 @@ assign_job_queue <- function(
   tryCatch({
     # Validate t_sec_estimate
     if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      abort(
+      rlang::abort(
         message = "Argument 't_sec_estimate' must be a single numeric value.",
         class = "validation_error",
         t_sec_estimate = t_sec_estimate
@@ -509,8 +514,8 @@ assign_job_queue <- function(
 
     # Validate t_cutoff
     if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      abort(
-        message = "Argument 't_cutoff' must be a 
+      rlang::abort(
+        message = "Argument 't_cutoff' must be a
                   single non-negative numeric value.",
         class = "validation_error",
         t_cutoff = t_cutoff
@@ -521,18 +526,16 @@ assign_job_queue <- function(
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       t_sec_estimate = t_sec_estimate,
       t_cutoff = t_cutoff
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       t_sec_estimate = t_sec_estimate,
       t_cutoff = t_cutoff
     )
@@ -548,6 +551,7 @@ assign_job_queue <- function(
 #' @importFrom dplyr mutate select
 #' @importFrom ggplot2 aes geom_line ggplot labs
 #' @importFrom tibble as_tibble
+#' @importFrom rlang warn abort inform
 #'
 #' @return line plot object
 #'
@@ -581,8 +585,8 @@ plot_estimated_walltimes <- function() {
             n_hits <- if ("homology_search" %in% advanced_opts) {
               100
             } else {
-                NULL
-              }
+              NULL
+            }
             est_walltime <- advanced_opts2est_walltime	(
               advanced_opts,
               n_inputs = i,
@@ -627,8 +631,8 @@ plot_estimated_walltimes <- function() {
     # sec to hrs
     df_walltimes <- df_walltimes |>
       dplyr::mutate(est_walltime = est_walltime / 3600)
-    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, 
-                                                    y = est_walltime, 
+    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs,
+                                                    y = est_walltime,
                                                     color = advanced_opts)) +
       ggplot2::geom_line() +
       ggplot2::labs(
@@ -638,12 +642,12 @@ plot_estimated_walltimes <- function() {
       )
     return(p)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error:", e$message),
       .internal = TRUE
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning:", w$message),
       .internal = TRUE
     )
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 15484a1b..95643e24 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -13,6 +13,8 @@
 #' @param num_alignments
 #' @param num_threads
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return
 #' @export
 #'
@@ -23,23 +25,25 @@ run_deltablast <- function(deltablast_path, db_search_path,
 
   # Argument validation
   if (!file.exists(deltablast_path)) {
-    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
+    rlang::abort(paste("The DELTABLAST executable path is invalid:",
+                       deltablast_path))
   }
   if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
+    rlang::abort(paste("The database search path is invalid:", db_search_path))
   }
   if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
+    rlang::abort(paste("The query file path is invalid:", query))
   }
   if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
+    rlang::abort(paste("The evalue must be a positive number:", evalue))
   }
   if (!is.numeric(num_alignments) || num_alignments <= 0) {
-    stop("The number of alignments must be a 
-         positive integer: ", num_alignments)
+    rlang::abort(paste("The number of alignments must be a positive integer:",
+                       num_alignments))
   }
   if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
+    rlang::abort(paste("The number of threads must be a positive integer:",
+                       num_threads))
   }
 
   start <- Sys.time()
@@ -61,13 +65,28 @@ run_deltablast <- function(deltablast_path, db_search_path,
     )
     print(Sys.time() - start)
   }, error = function(e) {
-    message(paste("Error in run_deltablast: ", e))
+    rlang::abort(
+      message = paste("Error in run_deltablast:", e$message),
+      class = "processing_error",
+      deltablast_path = deltablast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_alignments = num_alignments,
+      num_threads = num_threads
+    )
   }, warning = function(w) {
-    message(paste("Warning in run_deltablast: ", w))
-  }, finally = {
-    message("run_deltablast completed")
+    rlang::warn(
+      message = paste("Warning in run_deltablast:", w$message),
+      class = "processing_warning",
+      deltablast_path = deltablast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_alignments = num_alignments,
+      num_threads = num_threads
+    )
   })
-
 }
 
 
@@ -81,6 +100,8 @@ run_deltablast <- function(deltablast_path, db_search_path,
 #' @param out
 #' @param num_threads
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return
 #' @export
 #'
@@ -90,19 +111,26 @@ run_rpsblast <- function(rpsblast_path, db_search_path,
                          out, num_threads = 1) {
   # Argument validation
   if (!file.exists(rpsblast_path)) {
-    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
+    rlang::abort(paste("The RPSBLAST executable path is invalid:",
+                       rpsblast_path),
+                 class = "file_error")
   }
   if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
+    rlang::abort(paste("The database search path is invalid:", db_search_path),
+                 class = "file_error")
   }
   if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
+    rlang::abort(paste("The query file path is invalid:", query),
+                 class = "file_error")
   }
   if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
+    rlang::abort(paste("The evalue must be a positive number:", evalue),
+          class = "validation_error")
   }
   if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
+    rlang::abort(paste("The number of threads must be a positive integer:",
+                       num_threads),
+                 class = "validation_error")
   }
 
   start <- Sys.time()
@@ -123,11 +151,25 @@ run_rpsblast <- function(rpsblast_path, db_search_path,
     )
     print(Sys.time() - start)
   }, error = function(e) {
-    message(paste("Error in run_rpsblast: ", e))
+    rlang::abort(
+      message = paste("Error in run_rpsblast:", e$message),
+      class = "processing_error",
+      rpsblast_path = rpsblast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_threads = num_threads
+    )
   }, warning = function(w) {
-    message(paste("Warning in run_rpsblast: ", w))
-  }, finally = {
-    message("run_rpsblast completed")
+    rlang::warn(
+      message = paste("Warning in run_rpsblast:", w$message),
+      class = "processing_warning",
+      rpsblast_path = rpsblast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_threads = num_threads
+    )
   })
 
 }

From df602dfd63cbab0d84dbcc8229e3da9c7646b9d5 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 13:52:56 -0600
Subject: [PATCH 47/61] 
 https://github.com/JRaviLab/MolEvolvR/pull/95/files#r1805272251 -
 re-implement dropped check - fix .Rd

---
 R/assign_job_queue.R           | 5 ++++-
 man/writeProcessRuntime2YML.Rd | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 20ba841f..69609417 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -155,7 +155,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which getProcessRuntimeWeights() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -173,6 +173,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' }
 #' @export
 writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
+  if (is.null(filepath)) {
+    filepath <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+  }
   medians <- calculateProcessRuntime(dir_job_results)
   yaml::write_yaml(medians, filepath)
 }
diff --git a/man/writeProcessRuntime2YML.Rd b/man/writeProcessRuntime2YML.Rd
index 865f23f7..5e0a05a4 100644
--- a/man/writeProcessRuntime2YML.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -17,7 +17,7 @@ Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.
 
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which getProcessRuntimeWeights() also uses as its default
 read location.
 }
 \examples{

From 1a0b66358eac637736a18868ae27e4049aa22628 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 14:43:47 -0600
Subject: [PATCH 48/61] 
 https://github.com/JRaviLab/MolEvolvR/pull/95#discussion_r1805166466 - adjust
 roxygen skeleton readability

---
 R/acc2lin.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 61aae87c..7b6f570c 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -198,10 +198,8 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #'              of an efetch run on the ipg database and
 #'
 #' @param accessions Character vector of protein accessions
-#' @param ipg_file Filepath to the file
-#'                 containing results of an efetch run on the
-#' ipg database. The protein accession in
-#'               'accessions' should be contained in this
+#' @param ipg_file Filepath to the file containing results of an efetch run on the
+#' ipg database. The protein accession in 'accessions' should be contained in this
 #' file
 #' @param assembly_path String of the path to the assembly_summary path
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function

From 13e70c75a197c02c395cbef2d7b3c5b991ea7649 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 15:02:39 -0600
Subject: [PATCH 49/61] formatting

---
 R/acc2lin.R      | 8 ++------
 man/efetchIPG.Rd | 3 +--
 man/sinkReset.Rd | 1 -
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 7b6f570c..5f25afe2 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,7 +10,6 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
-#'         and handles any errors or warnings that occur during the process.
 #'
 #' @export
 #'
@@ -87,8 +86,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results 
-#'                    of the efetch run of the accessions
+#' @param ipgout_path Path to write the results of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan A string specifying the parallelization strategy for the future
 #' package, such as `"sequential"` or `"multisession"`.
@@ -122,9 +120,7 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Perform efetch on the ipg database
-#'              and write the results to out_path
-#'
+#' @description Perform efetch on the ipg database and write the results to out_path
 #' @param accnums Character vector containing the accession numbers to query on
 #' the ipg database
 #' @param out_path Path to write the efetch results to
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index db63024f..047e2652 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,8 +23,7 @@ the ipg database}
 No return value. The function writes the fetched results to \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database
-and write the results to out_path
+Perform efetch on the ipg database and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index e3fc7ce4..0285c0b2 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,7 +8,6 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
-and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From cdac9a3cc8a446596474fdc27892c2cc5fffbb3b Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 15:27:32 -0600
Subject: [PATCH 50/61] let R sort NAMESPACE

---
 NAMESPACE | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 025f00cf..d91f16c9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,9 +34,9 @@ export(createFA2Tree)
 export(createGenomicContextNetwork)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createLineageLookup)
 export(createMSA_Kalign)
 export(createMSA_PDF)
-export(createLineageLookup)
 export(createRepresentativeAccNum)
 export(createUndirectedGenomicContextNetwork)
 export(createWordCloud2Element)
@@ -55,7 +55,6 @@ export(getTopAccByLinDomArch)
 export(mapAcc2Name)
 export(mapAdvOption2Process)
 export(mapOption2Process)
-export(msa_pdf)
 export(plotEstimatedWallTimes)
 export(plotIPR2Viz)
 export(plotIPR2VizWeb)

From 22504868261a7a56fa93c4889ac42a9becb66fff Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 15:33:13 -0600
Subject: [PATCH 51/61] function/doc consistency

---
 R/networks_gencontext.R                      | 4 ++--
 man/createUndirectedGenomicContextNetwork.Rd | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/networks_gencontext.R b/R/networks_gencontext.R
index 7df6c270..ca1ef52d 100755
--- a/R/networks_gencontext.R
+++ b/R/networks_gencontext.R
@@ -8,7 +8,7 @@
 ## GC Undirected Network ##
 ###########################
 
-#' Domain Network
+#' createUndirectedGenomicContextNetwork
 #'
 #' @description
 #' This function creates a domain network from the 'DomArch' column.
@@ -37,7 +37,7 @@
 #'
 #' @examples
 #' \dontrun{
-#' domain_network(pspa)
+#' createUndirectedGenomicContextNetwork(pspa)
 #' }
 createUndirectedGenomicContextNetwork <- function(prot, column = "GenContext", domains_of_interest, cutoff_type = "Lineage", cutoff = 1, layout = "grid") {
     # by domain networks or all, as required.
diff --git a/man/createUndirectedGenomicContextNetwork.Rd b/man/createUndirectedGenomicContextNetwork.Rd
index d61c23df..b74da141 100644
--- a/man/createUndirectedGenomicContextNetwork.Rd
+++ b/man/createUndirectedGenomicContextNetwork.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/networks_gencontext.R
 \name{createUndirectedGenomicContextNetwork}
 \alias{createUndirectedGenomicContextNetwork}
-\title{Domain Network}
+\title{createUndirectedGenomicContextNetwork}
 \usage{
 createUndirectedGenomicContextNetwork(
   prot,
@@ -35,6 +35,6 @@ A network of domains is returned based on shared domain architectures.
 }
 \examples{
 \dontrun{
-domain_network(pspa)
+createUndirectedGenomicContextNetwork(pspa)
 }
 }

From 6632fe4cc4a26451c17831ef25a5a03fa182bb81 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 16:01:06 -0600
Subject: [PATCH 52/61] replace rd

---
 man/acc2FA.Rd | 31 +++++++++++++++----------------
 man/acc2fa.Rd | 38 --------------------------------------
 2 files changed, 15 insertions(+), 54 deletions(-)
 delete mode 100644 man/acc2fa.Rd

diff --git a/man/acc2FA.Rd b/man/acc2FA.Rd
index 6c6ea43c..517ee3d6 100644
--- a/man/acc2FA.Rd
+++ b/man/acc2FA.Rd
@@ -1,35 +1,34 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R
-\name{acc2FA}
-\alias{acc2FA}
-\title{acc2FA converts protein accession numbers to a fasta format.}
+% Please edit documentation in R/pre-msa-tree.R
+\name{acc2fa}
+\alias{acc2fa}
+\title{acc2fa}
 \usage{
-acc2FA(accessions, outpath, plan = "sequential")
+acc2fa(accessions, outpath, plan = "sequential")
 }
 \arguments{
 \item{accessions}{Character vector containing protein accession numbers to
-generate fasta sequences for.
-Function may not work for vectors of length > 10,000}
+generate fasta sequences for. Function may not work for vectors of
+length > 10,000}
 
-\item{outpath}{\link{str} Location where fasta file should be written to.}
+\item{outpath}{\link{str}. Location where fasta file should be written to.}
 
-\item{plan}{Character string specifying the parallel processing strategy to
-use with the \code{future} package. Default is "sequential".}
+\item{plan}{Character. The plan to use for processing. Default is "sequential".}
 }
 \value{
-A logical value indicating whether the retrieval and conversion were
-successful. Returns \code{TRUE} if successful and \code{FALSE} otherwise.
+A Fasta file is written to the specified \code{outpath}.
 }
 \description{
+acc2fa converts protein accession numbers to a fasta format.
 Resulting fasta file is written to the outpath.
 }
 \examples{
 \dontrun{
-acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
 outpath = "my_proteins.fasta")
-Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", 
-"O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
+Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
+EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
+acc2fa(outpath = "ebi.fa")
 }
 }
 \author{
diff --git a/man/acc2fa.Rd b/man/acc2fa.Rd
deleted file mode 100644
index 517ee3d6..00000000
--- a/man/acc2fa.Rd
+++ /dev/null
@@ -1,38 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pre-msa-tree.R
-\name{acc2fa}
-\alias{acc2fa}
-\title{acc2fa}
-\usage{
-acc2fa(accessions, outpath, plan = "sequential")
-}
-\arguments{
-\item{accessions}{Character vector containing protein accession numbers to
-generate fasta sequences for. Function may not work for vectors of
-length > 10,000}
-
-\item{outpath}{\link{str}. Location where fasta file should be written to.}
-
-\item{plan}{Character. The plan to use for processing. Default is "sequential".}
-}
-\value{
-A Fasta file is written to the specified \code{outpath}.
-}
-\description{
-acc2fa converts protein accession numbers to a fasta format.
-Resulting fasta file is written to the outpath.
-}
-\examples{
-\dontrun{
-acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
-outpath = "my_proteins.fasta")
-Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
-acc2fa(outpath = "ebi.fa")
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
-\keyword{accnum,}
-\keyword{fasta}

From 5bedeee27a7fbd20eb17847bc1e4833d09f9d439 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Thu, 24 Oct 2024 13:45:02 -0600
Subject: [PATCH 53/61] update .Rd

---
 man/generateAllAlignments2FA.Rd | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/man/generateAllAlignments2FA.Rd b/man/generateAllAlignments2FA.Rd
index 5babd22d..421d8cf7 100644
--- a/man/generateAllAlignments2FA.Rd
+++ b/man/generateAllAlignments2FA.Rd
@@ -22,18 +22,21 @@ generateAllAlignments2FA(
 \item{aln_path}{Character. Path to alignment files.
 Default is 'here("data/rawdata_aln/")'}
 
-\item{fa_outpath}{Character. Path to the written fasta file.
-Default is 'here("data/alns/")'.}
-
-\item{lin_file}{Character. Path to file. Master protein file with AccNum &
+\item{fa_outpath}{Character. Path to file. Master protein file with AccNum &
 lineages.
 Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
 
+\item{lin_file}{Character. Path to the written fasta file.
+Default is 'here("data/alns/")'.}
+
 \item{reduced}{Boolean. If TRUE, the fasta file will contain only one
 sequence per lineage.
 Default is 'FALSE'.}
 }
 \value{
+NULL. The function saves the output FASTA files to the specified
+directory.
+
 NULL. The function saves the output FASTA files to the specified
 directory.
 }
@@ -47,6 +50,12 @@ Adding Leaves to an alignment file w/ accessions
 Adding Leaves to all alignment files w/ accessions & DAs?
 }
 \details{
+The alignment files would need two columns separated by spaces:
+\enumerate{
+\item AccNum and 2. alignment. The protein homolog file should have AccNum,
+Species, Lineages.
+}
+
 The alignment files would need two columns separated by spaces:
 \enumerate{
 \item AccNum and 2. alignment. The protein homolog file should have AccNum,
@@ -54,6 +63,9 @@ Species, Lineages.
 }
 }
 \note{
+Please refer to the source code if you have alternate + file formats
+and/or column names.
+
 Please refer to the source code if you have alternate + file formats
 and/or column names.
 }
@@ -64,12 +76,6 @@ generateAllAlignments2FA()
 \dontrun{
 generateAllAlignments2FA()
 }
-\dontrun{
-generateAllAlignments2FA()
-}
-}
-\author{
-Janani Ravi
 }
 \keyword{accnum,}
 \keyword{alignment,}

From cb76c69eba5586c255834a370bc7ffa035700b8c Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Sat, 26 Oct 2024 12:40:34 +0300
Subject: [PATCH 54/61] change 'print(results)' to 'results' for brevity and to
 avoid potential issues

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 R/CHANGED-pre-msa-tree.R          |  77 +++++++++--------
 R/blastWrappers.R                 |  17 ++--
 R/fa2domain.R                     |  10 +--
 R/ipr2viz.R                       |  98 ++++++++++-----------
 R/lineage.R                       |  78 ++++++++---------
 R/plotme.R                        |   6 +-
 R/plotting.R                      | 138 +++++++++++++++---------------
 R/pre-msa-tree.R                  |  74 ++++++++--------
 R/reverse_operons.R               |  16 ++--
 man/acc2FA.Rd                     |   8 +-
 man/addName.Rd                    |   2 +-
 man/addTaxID.Rd                   |   2 +-
 man/alignFasta.Rd                 |   6 +-
 man/convert2TitleCase.Rd          |   4 +-
 man/createRepresentativeAccNum.Rd |   9 +-
 man/downloadAssemblySummary.Rd    |   2 +-
 man/getAccNumFromFA.Rd            |   5 +-
 man/getTopAccByLinDomArch.Rd      |   4 +-
 man/mapAcc2Name.Rd                |   4 +-
 man/plotIPR2Viz.Rd                |  18 ++--
 man/plotIPR2VizWeb.Rd             |  18 ++--
 man/plotLineageSunburst.Rd        |   2 +-
 man/prepareColumnParams.Rd        |   2 +-
 man/prepareSingleColumnParams.Rd  |   2 +-
 man/proteinAcc2TaxID.Rd           |   4 +-
 man/renameFA.Rd                   |   2 +-
 man/rename_fasta.Rd               |   2 +-
 man/reverseOperonSeq.Rd           |   2 +-
 man/runDeltaBlast.Rd              |  11 ++-
 man/runIPRScan.Rd                 |   2 +-
 man/shortenLineage.Rd             |   4 +-
 man/writeMSA_AA2FA.Rd             |   3 +
 32 files changed, 329 insertions(+), 303 deletions(-)

diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index 40bd672e..48d1abf9 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -47,7 +47,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @examples
 #' # Convert a single string to title case
 #' convert2TitleCase("hello world") # Returns "Hello World"
-#' 
+#'
 convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
@@ -80,7 +80,7 @@ convert2TitleCase <- function(x, y = " ") {
 #' @importFrom stringr str_sub
 #' @importFrom tidyr replace_na separate
 #'
-#' @return A data frame containing the enriched alignment data with lineage 
+#' @return A data frame containing the enriched alignment data with lineage
 #' information.
 #'
 #' @details The alignment file would need two columns: 1. accession +
@@ -215,7 +215,7 @@ addLeaves2Alignment <- function(aln_file = "",
 #'   Lineage = c("Eukaryota>Chordata", "Eukaryota>Chordata")
 #' )
 #' enriched_data <- addName(data)
-#' print(enriched_data)
+#' enriched_data
 addName <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
@@ -292,7 +292,7 @@ addName <- function(data,
 #' file formats and/or column names.
 #'
 #' @return A character string representing the FASTA formatted sequences.
-#' If `fa_outpath` is provided, the FASTA will also be saved to the specified 
+#' If `fa_outpath` is provided, the FASTA will also be saved to the specified
 #' file.
 #' @export
 #'
@@ -336,22 +336,22 @@ convertAlignment2FA <- function(aln_file = "",
 }
 
 #' mapAcc2Name
-#' 
+#'
 #' @description
 #' Default renameFA() replacement function. Maps an accession number to its name
 #'
 #' @param line The line of a fasta file starting with '>'
-#' @param acc2name Data Table containing a column of accession numbers and a 
+#' @param acc2name Data Table containing a column of accession numbers and a
 #' name column
 #' @param acc_col Name of the column containing Accession numbers
-#' @param name_col Name of the column containing the names that the accession 
+#' @param name_col Name of the column containing the names that the accession
 #' numbers
 #' are mapped to
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return A character string representing the updated FASTA line, where the 
+#' @return A character string representing the updated FASTA line, where the
 #' accession number is replaced with its corresponding name.
 #' @export
 #'
@@ -389,7 +389,7 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
 #'
 #' @examples
 #' \dontrun{
-#' renameFA("path/to/input.fasta", 
+#' renameFA("path/to/input.fasta",
 #' "path/to/output.fasta", mapAcc2Name, acc2name)
 #' }
 renameFA <- function(fa_path, outpath,
@@ -411,8 +411,8 @@ renameFA <- function(fa_path, outpath,
 ################################
 ## generateAllAlignments2FA
 #' generateAllAlignments2FA
-#' 
-#' @description 
+#'
+#' @description
 #' Adding Leaves to an alignment file w/ accessions
 #'
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -420,25 +420,25 @@ renameFA <- function(fa_path, outpath,
 #'
 #' @param aln_path Character. Path to alignment files.
 #' Default is 'here("data/rawdata_aln/")'
-#' @param fa_outpath Character. Path to file. Master protein file with AccNum & 
+#' @param fa_outpath Character. Path to file. Master protein file with AccNum &
 #' lineages.
 #' Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
 #' @param lin_file Character. Path to the written fasta file.
 #' Default is 'here("data/alns/")'.
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one
 #' sequence per lineage.
 #' Default is 'FALSE'.
 #'
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return NULL. The function saves the output FASTA files to the specified 
+#' @return NULL. The function saves the output FASTA files to the specified
 #' directory.
 #'
-#' @details The alignment files would need two columns separated by spaces: 
-#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum, 
+#' @details The alignment files would need two columns separated by spaces:
+#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum,
 #' Species, Lineages.
-#' @note Please refer to the source code if you have alternate + file formats 
+#' @note Please refer to the source code if you have alternate + file formats
 #' and/or column names.
 #'
 #' @export
@@ -478,20 +478,20 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 
 # accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
 # accessions <- rep("ANY95992.1", 201)
-#' acc2FA 
+#' acc2FA
 #'
 #' @description
-#' converts protein accession numbers to a fasta format. Resulting 
+#' converts protein accession numbers to a fasta format. Resulting
 #' fasta file is written to the outpath.
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
 #'
-#' @param accessions  Character vector containing protein accession numbers to 
+#' @param accessions  Character vector containing protein accession numbers to
 #' generate fasta sequences for.
 #' Function may not work for vectors of length > 10,000
 #' @param outpath [str] Location where fasta file should be written to.
-#' @param plan Character string specifying the parallel processing strategy to 
+#' @param plan Character string specifying the parallel processing strategy to
 #' use with the `future` package. Default is "sequential".
 #'
 #' @importFrom Biostrings readAAStringSet
@@ -499,16 +499,16 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return A logical value indicating whether the retrieval and conversion were 
+#' @return A logical value indicating whether the retrieval and conversion were
 #' successful. Returns `TRUE` if successful and `FALSE` otherwise.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"),
 #' outpath = "my_proteins.fasta")
 #' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", 
+#' EBI:accessions <- c("P12345", "Q9UHC1",
 #' "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
 #' }
 acc2FA <- function(accessions, outpath, plan = "sequential") {
@@ -583,9 +583,9 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 }
 
 #' createRepresentativeAccNum
-#' 
+#'
 #' @description
-#' Function to generate a vector of one Accession number per distinct 
+#' Function to generate a vector of one Accession number per distinct
 #' observation from 'reduced' column
 #'
 #' @author Samuel Chen, Janani Ravi
@@ -599,15 +599,18 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return A character vector containing one Accession number per distinct 
+#' @return A character vector containing one Accession number per distinct
 #' observation from the specified reduced column.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' createRepresentativeAccNum(prot)
+#' }
 createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    # Get Unique reduced column and then bind the AccNums back to get one 
+    # Get Unique reduced column and then bind the AccNums back to get one
     # AccNum per reduced column
     reduced_sym <- sym(reduced)
     accnum_sym <- sym(accnum_col)
@@ -635,16 +638,16 @@ createRepresentativeAccNum <- function(prot_data,
 }
 
 #' alignFasta
-#' 
+#'
 #' @description
 #' Perform a Multiple Sequence Alignment on a FASTA file.
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
 #' @param fasta_file Path to the FASTA file to be aligned
-#' @param tool Type of alignment tool to use. One of three options: "Muscle", 
+#' @param tool Type of alignment tool to use. One of three options: "Muscle",
 #' "ClustalO", or "ClustalW"
-#' @param outpath Path to write the resulting alignment to as a FASTA file. 
+#' @param outpath Path to write the resulting alignment to as a FASTA file.
 #' If NULL, no file is written
 #'
 #' @importFrom Biostrings readAAStringSet
@@ -655,7 +658,7 @@ createRepresentativeAccNum <- function(prot_data,
 #'
 #' @examples
 #' \dontrun{
-#' aligned_sequences <- alignFasta("my_sequences.fasta", 
+#' aligned_sequences <- alignFasta("my_sequences.fasta",
 #' tool = "Muscle", outpath = "aligned_output.fasta")
 #' }
 alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
@@ -690,7 +693,10 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @export
 #'
 #' @examples
-writeMSA_AA2FA <- function(alignment, outpath) {
+#' \dontrun{
+#' writeMSA_AA2FA("my_sequences.fasta", outpath = "aligned_output.fasta")
+#' }
+writeMSA_AA2FA <- function(writeMSA_AA2FA, outpath) {
     l <- length(rownames(alignment))
     fasta <- ""
     for (i in 1:l)
@@ -705,7 +711,7 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 
 #' getAccNumFromFA
 #'
-#' @param fasta_file Character. The path to the FASTA file from which 
+#' @param fasta_file Character. The path to the FASTA file from which
 #' accession numbers will be extracted.
 #'
 #' @importFrom stringi stri_extract_all_regex
@@ -714,6 +720,9 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' getAccNumFromFA("my_sequences.fasta")
+#' }
 getAccNumFromFA <- function(fasta_file) {
     txt <- read_file(fasta_file)
     accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index d89f9b95..3c9c4192 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -4,8 +4,8 @@
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description
-#' This function executes a Delta-BLAST search using the specified parameters 
-#' and database. It sets the BLAST database path, runs the Delta-BLAST command 
+#' This function executes a Delta-BLAST search using the specified parameters
+#' and database. It sets the BLAST database path, runs the Delta-BLAST command
 #' with the given query, and outputs the results.
 #'
 #' @param deltablast_path Path to the Delta-BLAST executable.
@@ -17,12 +17,15 @@
 #' @param num_alignments Number of alignments to report.
 #' @param num_threads Number of threads to use for the search (default is 1).
 #'
-#' @return This function does not return a value; it outputs results to the 
+#' @return This function does not return a value; it outputs results to the
 #' specified file.
 #' @export
 #'
 #' @examples
-runDeltaBlast <- function(deltablast_path, db_search_path,
+#' \dontrun{
+#' runDeltaBlast(runDeltaBlast, db_search_path)
+#' }
+runDeltaBlast <- function(runDeltaBlast, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_alignments, num_threads = 1) {
     start <- Sys.time()
@@ -49,8 +52,8 @@ runDeltaBlast <- function(deltablast_path, db_search_path,
 #' Run RPSBLAST to generate domain architectures for proteins of interest
 #'
 #' @description
-#' This function executes an RPS-BLAST search to generate domain architectures 
-#' for specified proteins. It sets the BLAST database path, runs the RPS-BLAST 
+#' This function executes an RPS-BLAST search to generate domain architectures
+#' for specified proteins. It sets the BLAST database path, runs the RPS-BLAST
 #' command with the provided query, and outputs the results.
 #'
 #' @param rpsblast_path Path to the RPS-BLAST executable.
@@ -61,7 +64,7 @@ runDeltaBlast <- function(deltablast_path, db_search_path,
 #' @param out Path to the output file where results will be saved.
 #' @param num_threads Number of threads to use for the search (default is 1).
 #'
-#' @return This function does not return a value; it outputs results to the 
+#' @return This function does not return a value; it outputs results to the
 #' specified file.
 #' @export
 #'
diff --git a/R/fa2domain.R b/R/fa2domain.R
index 29803b85..f53322ca 100644
--- a/R/fa2domain.R
+++ b/R/fa2domain.R
@@ -5,18 +5,18 @@
 # interproscan CLI will return a completely empty file (0Bytes)
 
 #' runIPRScan
-#' 
-#' Run InterProScan on a given FASTA file and save the results to an 
+#'
+#' Run InterProScan on a given FASTA file and save the results to an
 #' output file.
 #'
 #' @param filepath_fasta A string representing the path to the input FASTA file.
 #' @param filepath_out A string representing the base path for the output file.
-#' @param appl A character vector specifying the InterProScan applications to 
+#' @param appl A character vector specifying the InterProScan applications to
 #' use (e.g., "Pfam", "Gene3D"). Default is `c("Pfam", "Gene3D")`.
 #'
 #' @importFrom stringr str_glue
 #'
-#' @return A data frame containing the results from the InterProScan output 
+#' @return A data frame containing the results from the InterProScan output
 #' TSV file.
 #'
 #' @examples
@@ -26,7 +26,7 @@
 #'     filepath_out = "path/to/output_file",
 #'     appl = c("Pfam", "Gene3D")
 #' )
-#' print(results)
+#' results
 #' }
 runIPRScan <- function(
         filepath_fasta,
diff --git a/R/ipr2viz.R b/R/ipr2viz.R
index c976276d..e582ab09 100644
--- a/R/ipr2viz.R
+++ b/R/ipr2viz.R
@@ -23,7 +23,7 @@
 #' @export
 #' @examples
 #' library(ggplot2)
-#' 
+#'
 #' # Create a sample plot using the custom theme
 #' ggplot(mtcars, aes(x = wt, y = mpg)) +
 #'     geom_point() +
@@ -51,15 +51,15 @@ themeGenes2 <- function() {
 #' getTopAccByLinDomArch
 #' @description Group by lineage + DA then take top 20
 #'
-#' @param infile_full A data frame containing the full dataset with lineage and 
+#' @param infile_full A data frame containing the full dataset with lineage and
 #' domain architecture information.
-#' @param DA_col A string representing the name of the domain architecture 
+#' @param DA_col A string representing the name of the domain architecture
 #' column. Default is "DomArch.Pfam".
-#' @param lin_col A string representing the name of the lineage column. 
+#' @param lin_col A string representing the name of the lineage column.
 #' Default is "Lineage_short".
-#' @param n An integer specifying the number of top accession numbers to return. 
+#' @param n An integer specifying the number of top accession numbers to return.
 #' Default is 20.
-#' @param query A string for filtering a specific query name. If it is not 
+#' @param query A string for filtering a specific query name. If it is not
 #' "All", only the data matching this query will be processed.
 #'
 #' @importFrom dplyr arrange filter group_by select summarise
@@ -68,14 +68,14 @@ themeGenes2 <- function() {
 #' @importFrom rlang sym
 #' @importFrom rlang .data
 #'
-#' @return A vector of the top N accession numbers (`AccNum`) based on counts 
+#' @return A vector of the top N accession numbers (`AccNum`) based on counts
 #' grouped by lineage and domain architecture.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' top_accessions <- getTopAccByLinDomArch(infile_full = my_data, 
-#' DA_col = "DomArch.Pfam", lin_col = "Lineage_short", 
+#' top_accessions <- getTopAccByLinDomArch(infile_full = my_data,
+#' DA_col = "DomArch.Pfam", lin_col = "Lineage_short",
 #' n = 20, query = "specific_query_name")
 #' }
 getTopAccByLinDomArch <- function(infile_full,
@@ -113,26 +113,26 @@ getTopAccByLinDomArch <- function(infile_full,
 #############################################
 #' plotIPR2Viz
 #'
-#' @param infile_ipr A path to the input IPR file (TSV format) containing 
+#' @param infile_ipr A path to the input IPR file (TSV format) containing
 #' domain information.
-#' @param infile_full A path to the full input file (TSV format) containing 
+#' @param infile_full A path to the full input file (TSV format) containing
 #' lineage and accession information.
-#' @param accessions A character vector of accession numbers to filter the 
+#' @param accessions A character vector of accession numbers to filter the
 #' analysis. Default is an empty vector.
-#' @param analysis A character vector specifying the types of analysis to 
-#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a 
+#' @param analysis A character vector specifying the types of analysis to
+#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a
 #' vector of these analyses.
-#' @param group_by A string specifying how to group the visualization. 
+#' @param group_by A string specifying how to group the visualization.
 #' Default is "Analysis". Options include "Analysis" or "Query".
-#' @param topn An integer specifying the number of top accessions to visualize. 
+#' @param topn An integer specifying the number of top accessions to visualize.
 #' Default is 20.
-#' @param name A string representing the name to use for y-axis labels. 
+#' @param name A string representing the name to use for y-axis labels.
 #' Default is "Name".
-#' @param text_size An integer specifying the text size for the plot. 
+#' @param text_size An integer specifying the text size for the plot.
 #' Default is 15.
-#' @param query A string for filtering a specific query name. If it is not 
+#' @param query A string for filtering a specific query name. If it is not
 #' "All", only the data matching this query will be processed.
-#' 
+#'
 #' @importFrom dplyr distinct filter select
 #' @importFrom gggenes geom_gene_arrow geom_subgene_arrow
 #' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal unit ylab
@@ -145,16 +145,16 @@ getTopAccByLinDomArch <- function(infile_full,
 #'
 #' @examples
 #' \dontrun{
-#' plot <- plotIPR2Viz(infile_ipr = "path/to/ipr_file.tsv", 
-#'                     infile_full = "path/to/full_file.tsv", 
-#'                     accessions = c("ACC123", "ACC456"), 
-#'                     analysis = c("Pfam", "TMHMM"), 
-#'                     group_by = "Analysis", 
-#'                     topn = 20, 
-#'                     name = "Gene Name", 
-#'                     text_size = 15, 
+#' plot <- plotIPR2Viz(infile_ipr = "path/to/ipr_file.tsv",
+#'                     infile_full = "path/to/full_file.tsv",
+#'                     accessions = c("ACC123", "ACC456"),
+#'                     analysis = c("Pfam", "TMHMM"),
+#'                     group_by = "Analysis",
+#'                     topn = 20,
+#'                     name = "Gene Name",
+#'                     text_size = 15,
 #'                     query = "All")
-#' print(plot)
+#' plot
 #' }
 plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     analysis = c("Pfam", "Phobius", "TMHMM", "Gene3D"),
@@ -291,24 +291,24 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
 
 #' plotIPR2VizWeb
 #'
-#' @param infile_ipr A path to the input IPR file (TSV format) containing 
+#' @param infile_ipr A path to the input IPR file (TSV format) containing
 #' domain information.
-#' @param accessions A character vector of accession numbers to filter the 
+#' @param accessions A character vector of accession numbers to filter the
 #' analysis.
-#' @param analysis A character vector specifying the types of analysis to 
-#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a vector 
+#' @param analysis A character vector specifying the types of analysis to
+#' include (e.g., "Pfam", "Phobius", "TMHMM", "Gene3D"). Default is a vector
 #' of these analyses.
-#' @param group_by A string specifying how to group the visualization. 
+#' @param group_by A string specifying how to group the visualization.
 #' Default is "Analysis". Options include "Analysis" or "Query".
-#' @param name A string representing the name to use for y-axis labels. 
+#' @param name A string representing the name to use for y-axis labels.
 #' Default is "Name".
-#' @param text_size An integer specifying the text size for the plot. 
+#' @param text_size An integer specifying the text size for the plot.
 #' Default is 15.
-#' @param legend_name A string representing the column to use for legend labels. 
+#' @param legend_name A string representing the column to use for legend labels.
 #' Default is "ShortName".
-#' @param cols An integer specifying the number of columns in the facet wrap. 
+#' @param cols An integer specifying the number of columns in the facet wrap.
 #' Default is 5.
-#' @param rows An integer specifying the number of rows in the legend. 
+#' @param rows An integer specifying the number of rows in the legend.
 #' Default is 10.
 #'
 #' @importFrom dplyr arrange distinct filter select
@@ -317,22 +317,22 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
 #' @importFrom readr read_tsv
 #' @importFrom tidyr pivot_wider
 #'
-#' @return A ggplot object representing the domain architecture visualization 
+#' @return A ggplot object representing the domain architecture visualization
 #' for web display.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' plot <- plotIPR2VizWeb(infile_ipr = "path/to/ipr_file.tsv", 
-#'                        accessions = c("ACC123", "ACC456"), 
-#'                        analysis = c("Pfam", "TMHMM"), 
-#'                        group_by = "Analysis", 
-#'                        name = "Gene Name", 
-#'                        text_size = 15, 
-#'                        legend_name = "ShortName", 
-#'                        cols = 5, 
+#' plot <- plotIPR2VizWeb(infile_ipr = "path/to/ipr_file.tsv",
+#'                        accessions = c("ACC123", "ACC456"),
+#'                        analysis = c("Pfam", "TMHMM"),
+#'                        group_by = "Analysis",
+#'                        name = "Gene Name",
+#'                        text_size = 15,
+#'                        legend_name = "ShortName",
+#'                        cols = 5,
 #'                        rows = 10)
-#' print(plot)
+#' plot
 #' }
 plotIPR2VizWeb <- function(infile_ipr,
     accessions,
diff --git a/R/lineage.R b/R/lineage.R
index 73fa008a..46249c91 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -11,22 +11,22 @@
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @param outpath String of path where the assembly summary file should be 
+#' @param outpath String of path where the assembly summary file should be
 #' written
-#' @param keep Character vector containing which columns should be retained and 
+#' @param keep Character vector containing which columns should be retained and
 #' downloaded
 #'
 #' @importFrom data.table fwrite setnames
 #' @importFrom dplyr bind_rows select
 #' @importFrom biomartr getKingdomAssemblySummary
 #'
-#' @return A tab-separated file containing the assembly summary. The function 
+#' @return A tab-separated file containing the assembly summary. The function
 #' does notreturn any value but writes the output directly to the specified file.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' downloadAssemblySummary(outpath = "assembly_summary.tsv", 
+#' downloadAssemblySummary(outpath = "assembly_summary.tsv",
 #'      keep = c("assembly_accession", "taxid", "organism_name"))
 #' }
 downloadAssemblySummary <- function(outpath,
@@ -85,16 +85,16 @@ downloadAssemblySummary <- function(outpath,
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' "createLineageLookup()" function
-#' @param acc_col Character. The name of the column in `prot_data` containing 
+#' @param acc_col Character. The name of the column in `prot_data` containing
 #' accession numbers. Default is "AccNum".
 #'
 #' @importFrom dplyr pull
 #' @importFrom data.table fread setnames
 #'
-#' @return A dataframe containing the merged information of GCA_IDs, TaxIDs, 
-#' and their corresponding lineage up to the phylum level. The dataframe 
+#' @return A dataframe containing the merged information of GCA_IDs, TaxIDs,
+#' and their corresponding lineage up to the phylum level. The dataframe
 #' will include information from the input `prot_data` and lineage data.
-#' 
+#'
 #' @export
 #'
 #' @examples
@@ -151,25 +151,25 @@ GCA2Lineage <- function(prot_data,
 ###################################
 #' addLineage
 #'
-#' @param df Dataframe containing accession numbers. The dataframe should 
+#' @param df Dataframe containing accession numbers. The dataframe should
 #' have a column specified by `acc_col` that contains these accession numbers.
-#' @param acc_col Character. The name of the column in `df` containing 
+#' @param acc_col Character. The name of the column in `df` containing
 #' accession numbers. Default is "AccNum".
-#' @param assembly_path String. The path to the assembly summary file generated 
+#' @param assembly_path String. The path to the assembly summary file generated
 #' using the `downloadAssemblySummary()` function.
-#' @param lineagelookup_path String. The path to the lineage lookup file (taxid 
+#' @param lineagelookup_path String. The path to the lineage lookup file (taxid
 #' to lineage mapping) generated using the `create_lineage_lookup()` function.
-#' @param ipgout_path String. Optional path to save intermediate output files. 
+#' @param ipgout_path String. Optional path to save intermediate output files.
 #' Default is NULL.
-#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' @param plan Character. Specifies the execution plan for parallel processing.
 #' Default is "multicore".
 #'
 #' @importFrom dplyr pull
 #' @importFrom rlang sym
 #'
-#' @return A dataframe that combines the original dataframe `df` with lineage 
+#' @return A dataframe that combines the original dataframe `df` with lineage
 #' information retrieved based on the provided accession numbers.
-#' 
+#'
 #' @export
 #'
 #' @examples
@@ -224,11 +224,11 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' (taxid to lineage mapping). This file can be generated using the
 #' @param ipgout_path Path to write the results of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
-#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' @param plan Character. Specifies the execution plan for parallel processing.
 #' Default is "multicore".
 #'
-#' @return A dataframe containing lineage information mapped to the given protein 
-#' accessions. The dataframe includes relevant columns such as TaxID, GCA_ID, 
+#' @return A dataframe containing lineage information mapped to the given protein
+#' accessions. The dataframe includes relevant columns such as TaxID, GCA_ID,
 #' Protein, Protein Name, Species, and Lineage.
 #' @export
 #'
@@ -276,16 +276,16 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path,
 #' @param accessions Character vector containing the accession numbers to query on
 #' the ipg database
 #' @param out_path Path to write the efetch results to
-#' @param plan Character. Specifies the execution plan for parallel processing. 
+#' @param plan Character. Specifies the execution plan for parallel processing.
 #' Default is "multicore".
 #'
 #' @importFrom future future plan
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return The function does not return a value but writes the efetch results 
+#' @return The function does not return a value but writes the efetch results
 #' directly to the specified `out_path`.
-#' 
+#'
 #' @export
 #'
 #' @examples
@@ -363,7 +363,7 @@ efetchIPG <- function(accessions, out_path, plan = "multicore") {
 #'
 #' @importFrom data.table fread setnames
 #'
-#' @return A data table containing protein accessions along with their 
+#' @return A data table containing protein accessions along with their
 #' corresponding TaxIDs and lineage information.
 #' @export
 #'
@@ -444,14 +444,14 @@ IPG2Lineage <- function(accessions, ipg_file,
 #' addTaxID
 #'
 #' @param data A data frame or data table containing protein accession numbers.
-#' @param acc_col A string specifying the column name in `data` that contains 
+#' @param acc_col A string specifying the column name in `data` that contains
 #' the accession numbers. Defaults to "AccNum".
-#' @param version A logical indicating whether to remove the last two characters 
+#' @param version A logical indicating whether to remove the last two characters
 #' from the accession numbers for TaxID retrieval. Defaults to TRUE.
 #'
 #' @importFrom data.table as.data.table
 #'
-#' @return A data table that includes the original data along with a new column 
+#' @return A data table that includes the original data along with a new column
 #' containing the corresponding TaxIDs.
 #' @export
 #'
@@ -460,7 +460,7 @@ IPG2Lineage <- function(accessions, ipg_file,
 #' # Create a sample data table with accession numbers
 #' sample_data <- data.table(AccNum = c("ABC123.1", "XYZ456.1", "LMN789.2"))
 #' enriched_data <- addTaxID(sample_data, acc_col = "AccNum", version = TRUE)
-#' print(enriched_data)
+#' enriched_data
 #' }
 addTaxID <- function(data, acc_col = "AccNum", version = T) {
     if (!is.data.table(data)) {
@@ -490,19 +490,19 @@ addTaxID <- function(data, acc_col = "AccNum", version = T) {
 ##################################
 #' proteinAcc2TaxID
 #'
-#' @param accnums A character vector of protein accession numbers to be mapped 
+#' @param accnums A character vector of protein accession numbers to be mapped
 #' to TaxIDs.
-#' @param suffix A string suffix used to name the output file generated by the 
+#' @param suffix A string suffix used to name the output file generated by the
 #' script.
-#' @param out_path A string specifying the directory where the output file will 
+#' @param out_path A string specifying the directory where the output file will
 #' be saved.
-#' @param return_dt A logical indicating whether to return the result as a data 
-#' table. Defaults to FALSE. If TRUE, the output file is read into a data table 
+#' @param return_dt A logical indicating whether to return the result as a data
+#' table. Defaults to FALSE. If TRUE, the output file is read into a data table
 #' and returned.
 #'
 #' @importFrom data.table fread
 #'
-#' @return If `return_dt` is TRUE, a data table containing the mapping of protein 
+#' @return If `return_dt` is TRUE, a data table containing the mapping of protein
 #' accession numbers to TaxIDs. If FALSE, the function returns NULL.
 #' @export
 #'
@@ -510,9 +510,9 @@ addTaxID <- function(data, acc_col = "AccNum", version = T) {
 #' \dontrun{
 #' # Example accession numbers
 #' accessions <- c("ABC123", "XYZ456", "LMN789")
-#' tax_data <- proteinAcc2TaxID(accessions, suffix = "example", 
+#' tax_data <- proteinAcc2TaxID(accessions, suffix = "example",
 #' out_path = "/path/to/output", return_dt = TRUE)
-#' print(tax_data)
+#' tax_data
 #' }
 proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) {
     # Write accnums to a file
@@ -538,17 +538,17 @@ proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) {
 #' @description Perform elink to go from protein database to taxonomy database
 #' and write the resulting file of taxid and lineage to out_path
 #'
-#' @param accessions A character vector containing the accession numbers to query 
+#' @param accessions A character vector containing the accession numbers to query
 #' in the protein database.
-#' @param out_path A string specifying the path where the results of the query 
+#' @param out_path A string specifying the path where the results of the query
 #' will be written. If set to NULL, a temporary directory will be used.
-#' @param plan A character string that specifies the execution plan for parallel 
+#' @param plan A character string that specifies the execution plan for parallel
 #' processing. The default is "multicore".
 #'
 #' @importFrom future plan
 #' @importFrom purrr map
 #'
-#' @return This function does not return a value. It writes the results to the 
+#' @return This function does not return a value. It writes the results to the
 #'         specified output path.
 #' @export
 #'
diff --git a/R/plotme.R b/R/plotme.R
index 3527f170..3cfd54f8 100644
--- a/R/plotme.R
+++ b/R/plotme.R
@@ -83,7 +83,7 @@ plotTreemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
 #' count_data <- data.frame(Category = c("A", "B", "C"),
 #'                           n = c(10, 20, 15))
 #' params <- prepareColumnParams(count_data, fill_by_n = TRUE, sort_by_n = FALSE)
-#' print(params)
+#' params
 #' }
 prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
     validateCountDF(count_data)
@@ -128,7 +128,7 @@ prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
 #' @importFrom dplyr c_across group_by mutate rowwise select summarise ungroup
 #' @importFrom stringr str_glue
 #'
-#' @return A data frame containing parameters for the specified column for 
+#' @return A data frame containing parameters for the specified column for
 #' treemap visualization.
 #' @export
 #'
@@ -137,7 +137,7 @@ prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
 #' df <- data.frame(Category = c("A", "A", "B", "B", "C"),
 #'                  n = c(10, 20, 30, 40, 50))
 #' params <- prepareSingleColumnParams(df, col_num = 1, root = "Root")
-#' print(params)
+#' params
 #' }
 prepareSingleColumnParams <- function(df,
     col_num,
diff --git a/R/plotting.R b/R/plotting.R
index b9a2758a..102ab6af 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -21,31 +21,31 @@
 #' Shorten Lineage Names
 #'
 #' @description
-#' This function abbreviates lineage names by shortening the first part of the 
-#' string (up to a given delimiter). 
+#' This function abbreviates lineage names by shortening the first part of the
+#' string (up to a given delimiter).
 #'
-#' @param data A data frame that contains a column with lineage names to be 
+#' @param data A data frame that contains a column with lineage names to be
 #' shortened.
-#' @param colname Character. The name of the column in the data frame containing 
+#' @param colname Character. The name of the column in the data frame containing
 #' the lineage strings to be shortened. Default is `"Lineage"`.
-#' @param abr_len Integer. The number of characters to retain after the first 
-#' letter. If set to 1, only the first letter of each segment before the 
+#' @param abr_len Integer. The number of characters to retain after the first
+#' letter. If set to 1, only the first letter of each segment before the
 #' delimiter (`>`) is retained. Default is 1.
 #'
 #' @importFrom stringr str_locate
 #' @importFrom purrr pmap
 #'
-#' @return A modified data frame where the specified lineage column has been 
+#' @return A modified data frame where the specified lineage column has been
 #' shortened.
 #'
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia", 
+#' df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia",
 #' "Archaea>Euryarchaeota>Thermococci"))
 #' shortened_df <- shortenLineage(df, colname = "Lineage", abr_len = 1)
-#' print(shortened_df)
+#' shortened_df
 #' }
 shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
     abbrv <- function(x) {
@@ -82,17 +82,17 @@ shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
 #' @param colname Column name from query_data: "DomArch.norep", "GenContext.norep",
 #' "DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".
 #' @param cutoff Numeric. Cutoff for word frequency. Default is 90.
-#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows 
+#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows
 #' based on a certain condition. Default is FALSE.
-#' @param text.scale  Allows scaling of axis title, tick lables, and numbers 
+#' @param text.scale  Allows scaling of axis title, tick lables, and numbers
 #' above the intersection size bars.
 #' text.scale can either take a universal scale in the form of an integer,
 #' or a vector of specific scales in the format: c(intersection size title,
 #' intersection size tick labels, set size title, set size tick labels, set names,
 #'  numbers above bars)
-#' @param point.size Numeric. Sets the size of points in the UpSet plot. 
+#' @param point.size Numeric. Sets the size of points in the UpSet plot.
 #' Default is 2.2.
-#' @param line.size Numeric. Sets the line width in the UpSet plot. 
+#' @param line.size Numeric. Sets the line width in the UpSet plot.
 #' Default is 0.8.
 #'
 #' @importFrom dplyr across distinct filter if_else mutate pull select where
@@ -100,7 +100,7 @@ shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
 #' @importFrom stringr str_detect str_replace_all str_split
 #' @importFrom UpSetR upset
 #'
-#' @return An UpSet plot object. The plot visualizes intersections of sets based 
+#' @return An UpSet plot object. The plot visualizes intersections of sets based
 #' on the provided colname in query_data.
 #' @export
 #'
@@ -251,7 +251,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
 #' @param colname Column name from query_data: "DomArch.norep", "GenContext.norep",
 #' "DomArch.PFAM.norep" or "DomArch.LADB.norep". Default is "DomArch.norep".
 #' @param cutoff Numeric. Cutoff for word frequency. Default is 90.
-#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows 
+#' @param RowsCutoff Boolean. If TRUE, applies a row cutoff to remove data rows
 #' based on a certain condition. Default is FALSE.
 #' @param color Color for the heatmap. One of six options: "default", "magma", "inferno",
 #' "plasma", "viridis", or "cividis"
@@ -354,13 +354,13 @@ plotLineageDA <- function(query_data = "prot",
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format).
 #' Default is prot (variable w/ protein data).
-#' @param queries Character Vector containing the queries that will be used for 
+#' @param queries Character Vector containing the queries that will be used for
 #' the categories.
-#' @param colname Character. The column used for filtering based on the `queries`. 
+#' @param colname Character. The column used for filtering based on the `queries`.
 #' Default is "ClustName".
-#' @param cutoff Numeric. The cutoff value for filtering rows based on their 
+#' @param cutoff Numeric. The cutoff value for filtering rows based on their
 #' total count. Rows with values below this cutoff are excluded.
-#' @param color Character. Defines the color palette used for the heatmap. 
+#' @param color Character. Defines the color palette used for the heatmap.
 #' Default is a red gradient.
 #'
 #' @importFrom dplyr arrange desc filter group_by select summarise union
@@ -371,8 +371,8 @@ plotLineageDA <- function(query_data = "prot",
 #' @importFrom tidyr drop_na
 #' @importFrom viridis scale_fill_viridis
 #'
-#' @return A ggplot object representing a heatmap (tile plot) showing the 
-#' relationship between queries and lineages, with the intensity of color 
+#' @return A ggplot object representing a heatmap (tile plot) showing the
+#' relationship between queries and lineages, with the intensity of color
 #' representing the count of matching records.
 #' @export
 #'
@@ -503,8 +503,8 @@ plotLineageQuery <- function(query_data = all,
 #' @importFrom stringr str_replace_all
 #' @importFrom tidyr gather
 #'
-#' @return A ggplot object representing a heatmap (tile plot) of lineage versus 
-#' the top neighboring domain architectures, with color intensity representing 
+#' @return A ggplot object representing a heatmap (tile plot) of lineage versus
+#' the top neighboring domain architectures, with color intensity representing
 #' the frequency of occurrences.
 #' @export
 #'
@@ -583,9 +583,9 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
 
 #' Lineage Domain Repeats Plot
 #'
-#' @param query_data Data frame containing protein homolog data, including 
+#' @param query_data Data frame containing protein homolog data, including
 #' relevant domain architectures and lineages.
-#' @param colname Character. The name of the column in query_data that contains 
+#' @param colname Character. The name of the column in query_data that contains
 #' domain architectures or other structural information.
 #'
 #' @importFrom dplyr across mutate select where
@@ -593,8 +593,8 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
 #' @importFrom stringr str_count str_replace_all
 #' @importFrom tidyr gather
 #'
-#' @return A ggplot object representing a heatmap (tile plot) of domain repeat 
-#' counts across different lineages, with color intensity representing the 
+#' @return A ggplot object representing a heatmap (tile plot) of domain repeat
+#' counts across different lineages, with color intensity representing the
 #' occurrence of domains.
 #' @export
 #'
@@ -679,8 +679,8 @@ plotLineageDomainRepeats <- function(query_data, colname) {
 #' @importFrom purrr map
 #' @importFrom stringr str_locate str_locate_all
 #'
-#' @return A ggplot object representing a heatmap (tile plot) of domain repeat 
-#' counts across different lineages, with color intensity representing the 
+#' @return A ggplot object representing a heatmap (tile plot) of domain repeat
+#' counts across different lineages, with color intensity representing the
 #' occurrence of domains.
 #' @export
 #'
@@ -826,26 +826,26 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
 
 #' Stacked Lineage Plot
 #'
-#' @param prot Data frame containing protein data including domain architecture 
+#' @param prot Data frame containing protein data including domain architecture
 #' and lineage information.
-#' @param column Character. The name of the column in prot representing domain 
+#' @param column Character. The name of the column in prot representing domain
 #' architectures (default is "DomArch").
-#' @param cutoff Numeric. A threshold value for filtering domain architectures 
+#' @param cutoff Numeric. A threshold value for filtering domain architectures
 #' or protein counts.
-#' @param Lineage_col Character. The name of the column representing lineage 
+#' @param Lineage_col Character. The name of the column representing lineage
 #' data (default is "Lineage").
-#' @param xlabel Character. Label for the x-axis 
+#' @param xlabel Character. Label for the x-axis
 #' (default is "Domain Architecture").
-#' @param reduce_lineage Logical. Whether to shorten lineage names 
+#' @param reduce_lineage Logical. Whether to shorten lineage names
 #' (default is TRUE).
 #' @param label.size Numeric. The size of axis text labels (default is 8).
-#' @param legend.position Numeric vector. Coordinates for placing the legend 
+#' @param legend.position Numeric vector. Coordinates for placing the legend
 #' (default is c(0.7, 0.4)).
-#' @param legend.text.size Numeric. Size of the text in the legend 
+#' @param legend.text.size Numeric. Size of the text in the legend
 #' (default is 10).
 #' @param legend.cols Numeric. Number of columns in the legend (default is 2).
 #' @param legend.size Numeric. Size of the legend keys (default is 0.7).
-#' @param coord_flip Logical. Whether to flip the coordinates of the plot 
+#' @param coord_flip Logical. Whether to flip the coordinates of the plot
 #' (default is TRUE).
 #' @param legend Logical. Whether to display the legend (default is TRUE).
 #'
@@ -853,7 +853,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
 #' @importFrom ggplot2 aes_string coord_flip element_blank element_line element_rect element_text geom_bar ggplot guides guide_legend scale_fill_manual xlab ylab theme theme_minimal
 #' @importFrom purrr map
 #'
-#' @return A ggplot object representing a stacked bar plot showing the 
+#' @return A ggplot object representing a stacked bar plot showing the
 #' distribution of protein domain architectures across lineages.
 #' @export
 #'
@@ -982,34 +982,34 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
 
 #' plotWordCloud3
 #'
-#' @param data Data frame or table containing words and their frequencies for 
+#' @param data Data frame or table containing words and their frequencies for
 #' the word cloud.
 #' @param size Numeric. Scaling factor for word sizes (default is 1).
-#' @param minSize Numeric. Minimum font size for the smallest word 
+#' @param minSize Numeric. Minimum font size for the smallest word
 #' (default is 0).
 #' @param gridSize Numeric. Size of the grid for placing words (default is 0).
-#' @param fontFamily Character. Font family to use for the words 
+#' @param fontFamily Character. Font family to use for the words
 #' (default is "Segoe UI").
 #' @param fontWeight Character. Font weight for the words (default is "bold").
-#' @param color Character or vector. Color of the words. Use "random-dark" for 
+#' @param color Character or vector. Color of the words. Use "random-dark" for
 #' random dark colors (default) or specify a color.
-#' @param backgroundColor Character. Background color of the word cloud 
+#' @param backgroundColor Character. Background color of the word cloud
 #' (default is "white").
-#' @param minRotation Numeric. Minimum rotation angle of words in radians 
+#' @param minRotation Numeric. Minimum rotation angle of words in radians
 #' (default is -π/4).
-#' @param maxRotation Numeric. Maximum rotation angle of words in radians 
+#' @param maxRotation Numeric. Maximum rotation angle of words in radians
 #' (default is π/4).
 #' @param shuffle Logical. Whether to shuffle the words (default is TRUE).
-#' @param rotateRatio Numeric. Proportion of words that are rotated 
+#' @param rotateRatio Numeric. Proportion of words that are rotated
 #' (default is 0.4).
-#' @param shape Character. Shape of the word cloud ("circle" is default, but 
+#' @param shape Character. Shape of the word cloud ("circle" is default, but
 #' you can use "cardioid", "star", "triangle", etc.).
 #' @param ellipticity Numeric. Degree of ellipticity (default is 0.65).
-#' @param widgetsize Numeric vector. Width and height of the widget 
+#' @param widgetsize Numeric vector. Width and height of the widget
 #' (default is NULL, which uses default size).
-#' @param figPath Character. Path to an image file to use as a mask for the 
+#' @param figPath Character. Path to an image file to use as a mask for the
 #' word cloud (optional).
-#' @param hoverFunction JS function. JavaScript function to run when hovering 
+#' @param hoverFunction JS function. JavaScript function to run when hovering
 #' over words (optional).
 #'
 #' @importFrom base64enc base64encode
@@ -1082,11 +1082,11 @@ wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = "
 #'
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format). Default is "prot".
-#' @param colname Character. The name of the column in `query_data` to generate 
+#' @param colname Character. The name of the column in `query_data` to generate
 #' the word cloud from. Default is "DomArch".
-#' @param cutoff Numeric. The cutoff value for filtering elements based on their 
+#' @param cutoff Numeric. The cutoff value for filtering elements based on their
 #' frequency. Default is 70.
-#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of 
+#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of
 #' a frequency cutoff. Default is FALSE.
 #'
 #' @importFrom dplyr filter pull
@@ -1094,7 +1094,7 @@ wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = "
 #' @importFrom rlang sym
 #' @importFrom wordcloud wordcloud
 #'
-#' @return A word cloud plot showing the frequency of elements from the selected 
+#' @return A word cloud plot showing the frequency of elements from the selected
 #' column.
 #' @export
 #'
@@ -1166,17 +1166,17 @@ createWordCloudElement <- function(query_data = "prot",
 #'
 #' @param query_data Data frame of protein homologs with the usual 11 columns +
 #' additional word columns (0/1 format). Default is "prot".
-#' @param colname Character. The name of the column in `query_data` to generate 
+#' @param colname Character. The name of the column in `query_data` to generate
 #' the word cloud from. Default is "DomArch".
-#' @param cutoff Numeric. The cutoff value for filtering elements based on their 
+#' @param cutoff Numeric. The cutoff value for filtering elements based on their
 #' frequency. Default is 70.
-#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of 
+#' @param UsingRowsCutoff Logical. Whether to use a row-based cutoff instead of
 #' a frequency cutoff. Default is FALSE.
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return A word cloud plot showing the frequency of elements from the selected 
+#' @return A word cloud plot showing the frequency of elements from the selected
 #' column.
 #' @export
 #'
@@ -1240,22 +1240,22 @@ createWordCloud2Element <- function(query_data = "prot",
 #### Sunburst #####
 #' Lineage Sunburst
 #'
-#' @param prot Data frame containing a lineage column that the sunburst plot 
+#' @param prot Data frame containing a lineage column that the sunburst plot
 #' will be generated for
-#' @param lineage_column String. Name of the lineage column within the 
+#' @param lineage_column String. Name of the lineage column within the
 #' data frame. Defaults to "Lineage"
-#' @param type String, either "sunburst" or "sund2b". If type is "sunburst", 
+#' @param type String, either "sunburst" or "sund2b". If type is "sunburst",
 #' a sunburst plot of the lineage
 #' @param levels Integer. Number of levels the sunburst will have.
-#' @param colors A vector of colors for the sunburst plot. 
+#' @param colors A vector of colors for the sunburst plot.
 #' If NULL, default colors are used.
-#' @param legendOrder String vector. The order of the legend. If legendOrder 
+#' @param legendOrder String vector. The order of the legend. If legendOrder
 #' is NULL,
-#' @param showLegend Boolean. If TRUE, the legend will be enabled when the 
+#' @param showLegend Boolean. If TRUE, the legend will be enabled when the
 #' component first renders.
-#' @param maxLevels Integer, the maximum number of levels to display in the 
-#' sunburst; 5 by default, NULL to disable then the legend will be in the 
-#' descending order of the top level hierarchy. will be rendered. If the type is 
+#' @param maxLevels Integer, the maximum number of levels to display in the
+#' sunburst; 5 by default, NULL to disable then the legend will be in the
+#' descending order of the top level hierarchy. will be rendered. If the type is
 #' sund2b, a sund2b plot will be rendered.
 #'
 #' @importFrom d3r d3_nest
@@ -1270,7 +1270,7 @@ createWordCloud2Element <- function(query_data = "prot",
 #'
 #' @examples
 #' \dontrun{
-#' plotLineageSunburst(prot, lineage_column = "Lineage", 
+#' plotLineageSunburst(prot, lineage_column = "Lineage",
 #' type = "sunburst", levels = 3)
 #' }
 plotLineageSunburst <- function(prot, lineage_column = "Lineage",
diff --git a/R/pre-msa-tree.R b/R/pre-msa-tree.R
index 2f9c7832..e2a8a39c 100644
--- a/R/pre-msa-tree.R
+++ b/R/pre-msa-tree.R
@@ -49,8 +49,8 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @export
 #'
 #' @examples
-#' convert2TitleCase("hello world") 
-#' convert2TitleCase("this is a test", "_") 
+#' convert2TitleCase("hello world")
+#' convert2TitleCase("this is a test", "_")
 convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
@@ -89,7 +89,7 @@ convert2TitleCase <- function(x, y = " ") {
 #' @importFrom stringr str_sub
 #' @importFrom tidyr replace_na separate
 #'
-#' @return A data frame containing the combined alignment and lineage 
+#' @return A data frame containing the combined alignment and lineage
 #' information.
 #' @export
 #'
@@ -191,7 +191,7 @@ addLeaves2Alignment <- function(aln_file = "",
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description This function adds a new 'Name' column that is comprised of 
+#' @description This function adds a new 'Name' column that is comprised of
 #' components from Kingdom, Phylum, Genus, and species, as well as the accession
 #'
 #' @param data Data to add name column to
@@ -278,7 +278,7 @@ addName <- function(data,
 #' Default is 'pspa.txt'
 #' @param fa_outpath Character. Path to the written fasta file.
 #' Default is 'NULL'
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one
 #' sequence per lineage. Default is 'FALSE'
 #'
 #' @details The alignment file would need two columns: 1. accession +
@@ -289,8 +289,8 @@ addName <- function(data,
 #'
 #' @importFrom readr write_file
 #'
-#' @return Character string containing the Fasta formatted sequences. 
-#' If `fa_outpath` is specified, the function also writes the sequences to the 
+#' @return Character string containing the Fasta formatted sequences.
+#' If `fa_outpath` is specified, the function also writes the sequences to the
 #' Fasta file.
 #' @export
 #'
@@ -333,7 +333,7 @@ convertAlignment2FA <- function(aln_file = "",
 }
 
 #' mapAcc2Name
-#' 
+#'
 #' @description
 #' Default rename_fasta() replacement function. Maps an accession number to its name
 #'
@@ -347,17 +347,17 @@ convertAlignment2FA <- function(aln_file = "",
 #' @importFrom stringr str_locate
 #' @importFrom rlang sym
 #'
-#' @return Character string. The modified line from the Fasta file header with 
+#' @return Character string. The modified line from the Fasta file header with
 #' the name instead of the accession number.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"), 
+#' acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"),
 #' Name = c("Species A", "Species B"))
 #' line <- ">ACC001 some additional info"
 #' mapped_line <- mapAcc2Name(line, acc2name_table)
-#' print(mapped_line)  # Expected output: ">Species A"
+#' mapped_line  # Expected output: ">Species A"
 #' }
 mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an add_names column
@@ -389,7 +389,7 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
 #'
 #' @examples
 #' \dontrun{
-#' rename_fasta("input.fasta", "output.fasta", 
+#' rename_fasta("input.fasta", "output.fasta",
 #' replacement_function = map_acc2name, acc2name = acc2name_table)
 #' }
 rename_fasta <- function(fa_path, outpath,
@@ -411,8 +411,8 @@ rename_fasta <- function(fa_path, outpath,
 ################################
 ## generateAllAlignments2FA
 #' generateAllAlignments2FA
-#' 
-#' @description 
+#'
+#' @description
 #' Adding Leaves to an alignment file w/ accessions
 #'
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -420,25 +420,25 @@ rename_fasta <- function(fa_path, outpath,
 #'
 #' @param aln_path Character. Path to alignment files.
 #' Default is 'here("data/rawdata_aln/")'
-#' @param fa_outpath Character. Path to file. Master protein file with AccNum & 
+#' @param fa_outpath Character. Path to file. Master protein file with AccNum &
 #' lineages.
 #' Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
 #' @param lin_file Character. Path to the written fasta file.
 #' Default is 'here("data/alns/")'.
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one
 #' sequence per lineage.
 #' Default is 'FALSE'.
 #'
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return NULL. The function saves the output FASTA files to the specified 
+#' @return NULL. The function saves the output FASTA files to the specified
 #' directory.
 #'
-#' @details The alignment files would need two columns separated by spaces: 
-#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum, 
+#' @details The alignment files would need two columns separated by spaces:
+#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum,
 #' Species, Lineages.
-#' @note Please refer to the source code if you have alternate + file formats 
+#' @note Please refer to the source code if you have alternate + file formats
 #' and/or column names.
 #'
 #' @export
@@ -481,9 +481,9 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #' acc2FA
 #'
 #' @description
-#' converts protein accession numbers to a fasta format. Resulting 
+#' converts protein accession numbers to a fasta format. Resulting
 #' fasta file is written to the outpath.
-#' 
+#'
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
 #'
@@ -492,8 +492,8 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #' Resulting fasta file is written to the outpath.
 #'
 #'
-#' @param accessions Character vector containing protein accession numbers to 
-#' generate fasta sequences for. Function may not work for vectors of 
+#' @param accessions Character vector containing protein accession numbers to
+#' generate fasta sequences for. Function may not work for vectors of
 #' length > 10,000
 #' @param outpath [str]. Location where fasta file should be written to.
 #' @param plan Character. The plan to use for processing. Default is "sequential".
@@ -508,10 +508,10 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #'
 #' @examples
 #' \dontrun{
-#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"),
 #' outpath = "my_proteins.fasta")
 #' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
+#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |>
 #' acc2FA(outpath = "ebi.fa")
 #' }
 acc2FA <- function(accessions, outpath, plan = "sequential") {
@@ -601,22 +601,22 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return A character vector containing representative accession numbers, 
+#' @return A character vector containing representative accession numbers,
 #' one for each distinct observation in the specified 'reduced' column.
 #' @export
 #'
 #' @examples
 #' \dontrun{
 #' # Example usage with a data frame called `protein_data`
-#' createRepresentativeAccNum <- RepresentativeAccNums(prot_data = protein_data, 
-#'                                                     reduced = "Lineage", 
+#' createRepresentativeAccNum <- RepresentativeAccNums(prot_data = protein_data,
+#'                                                     reduced = "Lineage",
 #'                                                     accnum_col = "AccNum")
-#' print(representative_accessions)
+#' representative_accessions
 #' }
 createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    # Get Unique reduced column and then bind the AccNums back to get one 
+    # Get Unique reduced column and then bind the AccNums back to get one
     # AccNum per reduced column
     reduced_sym <- sym(reduced)
     accnum_sym <- sym(accnum_col)
@@ -651,9 +651,9 @@ createRepresentativeAccNum <- function(prot_data,
 #' @author Samuel Chen, Janani Ravi
 #'
 #' @param fasta_file Path to the FASTA file to be aligned
-#' @param tool Type of alignment tool to use. One of three options: "Muscle", 
+#' @param tool Type of alignment tool to use. One of three options: "Muscle",
 #' "ClustalO", or "ClustalW"
-#' @param outpath Path to write the resulting alignment to as a FASTA file. If 
+#' @param outpath Path to write the resulting alignment to as a FASTA file. If
 #' NULL, no file is written
 #'
 #' @importFrom Biostrings readAAStringSet
@@ -665,9 +665,9 @@ createRepresentativeAccNum <- function(prot_data,
 #' @examples
 #' \dontrun{
 #' # Example usage
-#' aligned_sequences <- alignFasta("path/to/sequences.fasta", 
+#' aligned_sequences <- alignFasta("path/to/sequences.fasta",
 #' tool = "ClustalO", outpath = "path/to/aligned_sequences.fasta")
-#' print(aligned_sequences)
+#' aligned_sequences
 #' }
 alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     fasta <- readAAStringSet(fasta_file)
@@ -723,7 +723,7 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 
 #' getAccNumFromFA
 #'
-#' @param fasta_file Character. Path to the FASTA file from which 
+#' @param fasta_file Character. Path to the FASTA file from which
 #' accession numbers will be extracted.
 #'
 #' @importFrom readr read_file
@@ -736,7 +736,7 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 #' \dontrun{
 #' # Example usage
 #' accnums <- getAccNumFromFA("path/to/sequences.fasta")
-#' print(accnums)
+#' accnums
 #' }
 getAccNumFromFA <- function(fasta_file) {
     txt <- read_file(fasta_file)
diff --git a/R/reverse_operons.R b/R/reverse_operons.R
index 5e1cb423..9094598b 100755
--- a/R/reverse_operons.R
+++ b/R/reverse_operons.R
@@ -7,12 +7,12 @@
 #'
 #' @description
 #' This function processes the genomic context strings (GenContext) and reverses
-#'  directional signs based on the presence of an equal sign ("="). 
+#'  directional signs based on the presence of an equal sign ("=").
 #'
 #' @param prot [vector] A vector of genomic context strings to be processed.
 #'
-#' @return [vector] A vector of the same length as the input, where each genomic 
-#' element is annotated with either a forward ("->") or reverse ("<-") direction, 
+#' @return [vector] A vector of the same length as the input, where each genomic
+#' element is annotated with either a forward ("->") or reverse ("<-") direction,
 #' depending on its position relative to the "=" symbols.
 #'
 #' @export
@@ -73,12 +73,12 @@ straightenOperonSeq <- function(prot) {
 #'
 #' @description
 #' This function processes a genomic context data frame to reverse the direction
-#' of operons based on specific patterns in the GenContext column. It handles 
-#' elements represented by ">" and "<" and restructures the genomic context by 
-#' flipping the direction of operons while preserving the relationships 
+#' of operons based on specific patterns in the GenContext column. It handles
+#' elements represented by ">" and "<" and restructures the genomic context by
+#' flipping the direction of operons while preserving the relationships
 #' indicated by "=".
 #'
-#' @param prot [data.frame] A data frame containing at least a column named 
+#' @param prot [data.frame] A data frame containing at least a column named
 #' 'GenContext', which represents the genomic contexts that need to be reversed.
 #'
 #' @return [data.frame] The input data frame with the 'GenContext' column updated t
@@ -90,7 +90,7 @@ straightenOperonSeq <- function(prot) {
 #' # Example genomic context data frame
 #' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
 #' reversed_prot <- reverseOperonSeq(prot)
-#' print(reversed_prot)
+#' reversed_prot
 reverseOperonSeq <- function(prot) {
     gencontext <- prot$GenContext
 
diff --git a/man/acc2FA.Rd b/man/acc2FA.Rd
index c878403b..ae7101d7 100644
--- a/man/acc2FA.Rd
+++ b/man/acc2FA.Rd
@@ -35,17 +35,17 @@ Resulting fasta file is written to the outpath.
 }
 \examples{
 \dontrun{
-acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"),
 outpath = "my_proteins.fasta")
 Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", 
+EBI:accessions <- c("P12345", "Q9UHC1",
 "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
 }
 \dontrun{
-acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"),
 outpath = "my_proteins.fasta")
 Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> 
+EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |>
 acc2FA(outpath = "ebi.fa")
 }
 }
diff --git a/man/addName.Rd b/man/addName.Rd
index e4a745c5..b681f349 100644
--- a/man/addName.Rd
+++ b/man/addName.Rd
@@ -56,7 +56,7 @@ data <- data.frame(
   Lineage = c("Eukaryota>Chordata", "Eukaryota>Chordata")
 )
 enriched_data <- addName(data)
-print(enriched_data)
+enriched_data
 \dontrun{
 addName(data_frame)
 }
diff --git a/man/addTaxID.Rd b/man/addTaxID.Rd
index e960769b..9e68321c 100644
--- a/man/addTaxID.Rd
+++ b/man/addTaxID.Rd
@@ -27,6 +27,6 @@ addTaxID
 # Create a sample data table with accession numbers
 sample_data <- data.table(AccNum = c("ABC123.1", "XYZ456.1", "LMN789.2"))
 enriched_data <- addTaxID(sample_data, acc_col = "AccNum", version = TRUE)
-print(enriched_data)
+enriched_data
 }
 }
diff --git a/man/alignFasta.Rd b/man/alignFasta.Rd
index e9bd22d7..61e880ab 100644
--- a/man/alignFasta.Rd
+++ b/man/alignFasta.Rd
@@ -29,14 +29,14 @@ Perform a Multiple Sequence Alignment on a FASTA file.
 }
 \examples{
 \dontrun{
-aligned_sequences <- alignFasta("my_sequences.fasta", 
+aligned_sequences <- alignFasta("my_sequences.fasta",
 tool = "Muscle", outpath = "aligned_output.fasta")
 }
 \dontrun{
 # Example usage
-aligned_sequences <- alignFasta("path/to/sequences.fasta", 
+aligned_sequences <- alignFasta("path/to/sequences.fasta",
 tool = "ClustalO", outpath = "path/to/aligned_sequences.fasta")
-print(aligned_sequences)
+aligned_sequences
 }
 }
 \author{
diff --git a/man/convert2TitleCase.Rd b/man/convert2TitleCase.Rd
index a4078141..4769efea 100644
--- a/man/convert2TitleCase.Rd
+++ b/man/convert2TitleCase.Rd
@@ -30,8 +30,8 @@ Changing case to 'Title Case'
 # Convert a single string to title case
 convert2TitleCase("hello world") # Returns "Hello World"
 
-convert2TitleCase("hello world") 
-convert2TitleCase("this is a test", "_") 
+convert2TitleCase("hello world")
+convert2TitleCase("this is a test", "_")
 }
 \seealso{
 chartr, toupper, and tolower.
diff --git a/man/createRepresentativeAccNum.Rd b/man/createRepresentativeAccNum.Rd
index 639a36d4..53902940 100644
--- a/man/createRepresentativeAccNum.Rd
+++ b/man/createRepresentativeAccNum.Rd
@@ -40,11 +40,14 @@ Function to generate a vector of one Accession number per distinct observation f
 }
 \examples{
 \dontrun{
+createRepresentativeAccNum(prot)
+}
+\dontrun{
 # Example usage with a data frame called `protein_data`
-createRepresentativeAccNum <- RepresentativeAccNums(prot_data = protein_data, 
-                                                    reduced = "Lineage", 
+createRepresentativeAccNum <- RepresentativeAccNums(prot_data = protein_data,
+                                                    reduced = "Lineage",
                                                     accnum_col = "AccNum")
-print(representative_accessions)
+representative_accessions
 }
 }
 \author{
diff --git a/man/downloadAssemblySummary.Rd b/man/downloadAssemblySummary.Rd
index bad2b603..e67aba70 100644
--- a/man/downloadAssemblySummary.Rd
+++ b/man/downloadAssemblySummary.Rd
@@ -25,7 +25,7 @@ Download the combined assembly summaries of genbank and refseq
 }
 \examples{
 \dontrun{
-downloadAssemblySummary(outpath = "assembly_summary.tsv", 
+downloadAssemblySummary(outpath = "assembly_summary.tsv",
      keep = c("assembly_accession", "taxid", "organism_name"))
 }
 }
diff --git a/man/getAccNumFromFA.Rd b/man/getAccNumFromFA.Rd
index d2d9216a..4c6179a1 100644
--- a/man/getAccNumFromFA.Rd
+++ b/man/getAccNumFromFA.Rd
@@ -24,8 +24,11 @@ getAccNumFromFA
 }
 \examples{
 \dontrun{
+getAccNumFromFA("my_sequences.fasta")
+}
+\dontrun{
 # Example usage
 accnums <- getAccNumFromFA("path/to/sequences.fasta")
-print(accnums)
+accnums
 }
 }
diff --git a/man/getTopAccByLinDomArch.Rd b/man/getTopAccByLinDomArch.Rd
index c76931f1..0eeb0610 100644
--- a/man/getTopAccByLinDomArch.Rd
+++ b/man/getTopAccByLinDomArch.Rd
@@ -37,8 +37,8 @@ Group by lineage + DA then take top 20
 }
 \examples{
 \dontrun{
-top_accessions <- getTopAccByLinDomArch(infile_full = my_data, 
-DA_col = "DomArch.Pfam", lin_col = "Lineage_short", 
+top_accessions <- getTopAccByLinDomArch(infile_full = my_data,
+DA_col = "DomArch.Pfam", lin_col = "Lineage_short",
 n = 20, query = "specific_query_name")
 }
 }
diff --git a/man/mapAcc2Name.Rd b/man/mapAcc2Name.Rd
index 7ef04955..3213201a 100644
--- a/man/mapAcc2Name.Rd
+++ b/man/mapAcc2Name.Rd
@@ -35,10 +35,10 @@ Default rename_fasta() replacement function. Maps an accession number to its nam
 mapAcc2Name(">P12345 some description", acc2name, "AccNum", "Name")
 }
 \dontrun{
-acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"), 
+acc2name_table <- data.table(AccNum = c("ACC001", "ACC002"),
 Name = c("Species A", "Species B"))
 line <- ">ACC001 some additional info"
 mapped_line <- mapAcc2Name(line, acc2name_table)
-print(mapped_line)  # Expected output: ">Species A"
+mapped_line  # Expected output: ">Species A"
 }
 }
diff --git a/man/plotIPR2Viz.Rd b/man/plotIPR2Viz.Rd
index 8d06eae1..13ac06c1 100644
--- a/man/plotIPR2Viz.Rd
+++ b/man/plotIPR2Viz.Rd
@@ -53,15 +53,15 @@ plotIPR2Viz
 }
 \examples{
 \dontrun{
-plot <- plotIPR2Viz(infile_ipr = "path/to/ipr_file.tsv", 
-                    infile_full = "path/to/full_file.tsv", 
-                    accessions = c("ACC123", "ACC456"), 
-                    analysis = c("Pfam", "TMHMM"), 
-                    group_by = "Analysis", 
-                    topn = 20, 
-                    name = "Gene Name", 
-                    text_size = 15, 
+plot <- plotIPR2Viz(infile_ipr = "path/to/ipr_file.tsv",
+                    infile_full = "path/to/full_file.tsv",
+                    accessions = c("ACC123", "ACC456"),
+                    analysis = c("Pfam", "TMHMM"),
+                    group_by = "Analysis",
+                    topn = 20,
+                    name = "Gene Name",
+                    text_size = 15,
                     query = "All")
-print(plot)
+plot
 }
 }
diff --git a/man/plotIPR2VizWeb.Rd b/man/plotIPR2VizWeb.Rd
index 9de7413f..e56d917e 100644
--- a/man/plotIPR2VizWeb.Rd
+++ b/man/plotIPR2VizWeb.Rd
@@ -54,15 +54,15 @@ plotIPR2VizWeb
 }
 \examples{
 \dontrun{
-plot <- plotIPR2VizWeb(infile_ipr = "path/to/ipr_file.tsv", 
-                       accessions = c("ACC123", "ACC456"), 
-                       analysis = c("Pfam", "TMHMM"), 
-                       group_by = "Analysis", 
-                       name = "Gene Name", 
-                       text_size = 15, 
-                       legend_name = "ShortName", 
-                       cols = 5, 
+plot <- plotIPR2VizWeb(infile_ipr = "path/to/ipr_file.tsv",
+                       accessions = c("ACC123", "ACC456"),
+                       analysis = c("Pfam", "TMHMM"),
+                       group_by = "Analysis",
+                       name = "Gene Name",
+                       text_size = 15,
+                       legend_name = "ShortName",
+                       cols = 5,
                        rows = 10)
-print(plot)
+plot
 }
 }
diff --git a/man/plotLineageSunburst.Rd b/man/plotLineageSunburst.Rd
index 3240d77d..363e8c27 100644
--- a/man/plotLineageSunburst.Rd
+++ b/man/plotLineageSunburst.Rd
@@ -49,7 +49,7 @@ Lineage Sunburst
 }
 \examples{
 \dontrun{
-plotLineageSunburst(prot, lineage_column = "Lineage", 
+plotLineageSunburst(prot, lineage_column = "Lineage",
 type = "sunburst", levels = 3)
 }
 }
diff --git a/man/prepareColumnParams.Rd b/man/prepareColumnParams.Rd
index 8a9f566b..f685624e 100644
--- a/man/prepareColumnParams.Rd
+++ b/man/prepareColumnParams.Rd
@@ -24,6 +24,6 @@ prepareColumnParams
 count_data <- data.frame(Category = c("A", "B", "C"),
                           n = c(10, 20, 15))
 params <- prepareColumnParams(count_data, fill_by_n = TRUE, sort_by_n = FALSE)
-print(params)
+params
 }
 }
diff --git a/man/prepareSingleColumnParams.Rd b/man/prepareSingleColumnParams.Rd
index 0070497e..0261f9c1 100644
--- a/man/prepareSingleColumnParams.Rd
+++ b/man/prepareSingleColumnParams.Rd
@@ -25,6 +25,6 @@ prepareSingleColumnParams
 df <- data.frame(Category = c("A", "A", "B", "B", "C"),
                  n = c(10, 20, 30, 40, 50))
 params <- prepareSingleColumnParams(df, col_num = 1, root = "Root")
-print(params)
+params
 }
 }
diff --git a/man/proteinAcc2TaxID.Rd b/man/proteinAcc2TaxID.Rd
index 9be09d53..1ccafe4f 100644
--- a/man/proteinAcc2TaxID.Rd
+++ b/man/proteinAcc2TaxID.Rd
@@ -31,8 +31,8 @@ proteinAcc2TaxID
 \dontrun{
 # Example accession numbers
 accessions <- c("ABC123", "XYZ456", "LMN789")
-tax_data <- proteinAcc2TaxID(accessions, suffix = "example", 
+tax_data <- proteinAcc2TaxID(accessions, suffix = "example",
 out_path = "/path/to/output", return_dt = TRUE)
-print(tax_data)
+tax_data
 }
 }
diff --git a/man/renameFA.Rd b/man/renameFA.Rd
index da7d339b..18eca8b9 100644
--- a/man/renameFA.Rd
+++ b/man/renameFA.Rd
@@ -23,7 +23,7 @@ Rename the labels of fasta files
 }
 \examples{
 \dontrun{
-renameFA("path/to/input.fasta", 
+renameFA("path/to/input.fasta",
 "path/to/output.fasta", mapAcc2Name, acc2name)
 }
 }
diff --git a/man/rename_fasta.Rd b/man/rename_fasta.Rd
index 3089d530..35658437 100644
--- a/man/rename_fasta.Rd
+++ b/man/rename_fasta.Rd
@@ -23,7 +23,7 @@ Rename the labels of fasta files
 }
 \examples{
 \dontrun{
-rename_fasta("input.fasta", "output.fasta", 
+rename_fasta("input.fasta", "output.fasta",
 replacement_function = map_acc2name, acc2name = acc2name_table)
 }
 }
diff --git a/man/reverseOperonSeq.Rd b/man/reverseOperonSeq.Rd
index 3709bbe1..03e68a94 100644
--- a/man/reverseOperonSeq.Rd
+++ b/man/reverseOperonSeq.Rd
@@ -25,5 +25,5 @@ indicated by "=".
 # Example genomic context data frame
 prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
 reversed_prot <- reverseOperonSeq(prot)
-print(reversed_prot)
+reversed_prot
 }
diff --git a/man/runDeltaBlast.Rd b/man/runDeltaBlast.Rd
index fc9cd09e..c3384d12 100644
--- a/man/runDeltaBlast.Rd
+++ b/man/runDeltaBlast.Rd
@@ -5,7 +5,7 @@
 \title{Run DELTABLAST to find homologs for proteins of interest}
 \usage{
 runDeltaBlast(
-  deltablast_path,
+  runDeltaBlast,
   db_search_path,
   db = "refseq",
   query,
@@ -16,8 +16,6 @@ runDeltaBlast(
 )
 }
 \arguments{
-\item{deltablast_path}{Path to the Delta-BLAST executable.}
-
 \item{db_search_path}{Path to the BLAST databases.}
 
 \item{db}{Name of the BLAST database to search against (default is "refseq").}
@@ -31,6 +29,8 @@ runDeltaBlast(
 \item{num_alignments}{Number of alignments to report.}
 
 \item{num_threads}{Number of threads to use for the search (default is 1).}
+
+\item{deltablast_path}{Path to the Delta-BLAST executable.}
 }
 \value{
 This function does not return a value; it outputs results to the
@@ -41,6 +41,11 @@ This function executes a Delta-BLAST search using the specified parameters
 and database. It sets the BLAST database path, runs the Delta-BLAST command
 with the given query, and outputs the results.
 }
+\examples{
+\dontrun{
+runDeltaBlast(runDeltaBlast, db_search_path)
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/runIPRScan.Rd b/man/runIPRScan.Rd
index 8431efb4..f675314d 100644
--- a/man/runIPRScan.Rd
+++ b/man/runIPRScan.Rd
@@ -29,6 +29,6 @@ results <- runIPRScan(
     filepath_out = "path/to/output_file",
     appl = c("Pfam", "Gene3D")
 )
-print(results)
+results
 }
 }
diff --git a/man/shortenLineage.Rd b/man/shortenLineage.Rd
index 00200f96..161d0260 100644
--- a/man/shortenLineage.Rd
+++ b/man/shortenLineage.Rd
@@ -27,9 +27,9 @@ string (up to a given delimiter).
 }
 \examples{
 \dontrun{
-df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia", 
+df <- data.frame(Lineage = c("Bacteria>Firmicutes>Clostridia",
 "Archaea>Euryarchaeota>Thermococci"))
 shortened_df <- shortenLineage(df, colname = "Lineage", abr_len = 1)
-print(shortened_df)
+shortened_df
 }
 }
diff --git a/man/writeMSA_AA2FA.Rd b/man/writeMSA_AA2FA.Rd
index c9551102..d0d5d305 100644
--- a/man/writeMSA_AA2FA.Rd
+++ b/man/writeMSA_AA2FA.Rd
@@ -28,6 +28,9 @@ and msaMuscle from the 'msa' package
 }
 \examples{
 \dontrun{
+writeMSA_AA2FA("my_sequences.fasta", outpath = "aligned_output.fasta")
+}
+\dontrun{
 # Example usage
 alignment <- alignFasta("path/to/sequences.fasta")
 writeMSA_AA2FA(alignment, "path/to/aligned_sequences.fasta")

From ecdd69e27f422c4c0e13dc9ce4ef9818ccdfb828 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 27 Oct 2024 16:34:58 +0100
Subject: [PATCH 55/61] added boundary guard and error handling to
 .LevelReduction

---
 R/plotting.R | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/R/plotting.R b/R/plotting.R
index 5d949cd5..853b377f 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -23,22 +23,12 @@
 ########################
 #' 
 #' 
-.LevelReduction <- function(lin, level) {   
-    if (level == 1) {
-        gt_loc <- str_locate(lin, ">")[[1]]
-        if (is.na(gt_loc)) {
-            # No '>' in lineage
-            return(lin)
-        } else {
-            lin <- substring(lin, first = 0, last = (gt_loc - 1))
-            return(lin)
-        }
-    }
-    # Out of bounds guard
-    gt_loc <- str_locate_all(lin, ">")[[1]] 
-    l <- length(gt_loc) / 2
-    if (level > l) {
-        # Not enough '>' in lineage
+.LevelReduction <- function(lin, level) {
+    gt_loc <- str_locate_all(lin, ">")[[1]]
+    available_levels <- length(gt_loc) / 2  # Since `str_locate_all` returns a matrix
+    
+    # Guard against out-of-bounds level requests
+    if (level > available_levels || level < 1) {
         return(lin)
     } else {
         gt_loc <- gt_loc[level, ][1] %>% as.numeric()
@@ -47,6 +37,8 @@
     }
 }
 
+
+
 .GetKingdom <- function(lin) {
     gt_loc <- str_locate(lin, ">")[, "start"]
     if (is.na(gt_loc)) {
@@ -1359,4 +1351,4 @@ plotLineageSunburst <- function(prot, lineage_column = "Lineage",
 # 	# 	theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
 # 	# 				axis.text.y=element_text(angle=90,hjust=1,vjust=0.5))
 #
-# }
+# }
\ No newline at end of file

From 64d16bec64a607c9cfb427b01acad378dd064ab0 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 09:20:49 -0600
Subject: [PATCH 56/61] Rd consistency

---
 man/shortenLineage.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/shortenLineage.Rd b/man/shortenLineage.Rd
index 161d0260..7390b254 100644
--- a/man/shortenLineage.Rd
+++ b/man/shortenLineage.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/plotting.R
 \name{shortenLineage}
 \alias{shortenLineage}
-\title{Shorten Lineage Names}
+\title{shortenLineage}
 \usage{
 shortenLineage(data, colname = "Lineage", abr_len = 1)
 }

From 32418b39ba9b40550ef425c771d4c857741a8446 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 09:21:11 -0600
Subject: [PATCH 57/61] disable example code for reverseOperonSeq() - see
 https://github.com/JRaviLab/MolEvolvR/issues/118

---
 R/reverse_operons.R     | 5 ++++-
 man/reverseOperonSeq.Rd | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/reverse_operons.R b/R/reverse_operons.R
index 9094598b..f250e8c0 100755
--- a/R/reverse_operons.R
+++ b/R/reverse_operons.R
@@ -87,10 +87,13 @@ straightenOperonSeq <- function(prot) {
 #' @export
 #'
 #' @examples
+#' \dontrun{
 #' # Example genomic context data frame
-#' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+#' ## Rework example data, does not pass R-CMD Check
+#' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I")) 
 #' reversed_prot <- reverseOperonSeq(prot)
 #' reversed_prot
+#' }
 reverseOperonSeq <- function(prot) {
     gencontext <- prot$GenContext
 
diff --git a/man/reverseOperonSeq.Rd b/man/reverseOperonSeq.Rd
index 03e68a94..812d0e89 100644
--- a/man/reverseOperonSeq.Rd
+++ b/man/reverseOperonSeq.Rd
@@ -22,8 +22,11 @@ flipping the direction of operons while preserving the relationships
 indicated by "=".
 }
 \examples{
+\dontrun{
 # Example genomic context data frame
-prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+## Rework example data, does not pass R-CMD Check
+prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I")) 
 reversed_prot <- reverseOperonSeq(prot)
 reversed_prot
 }
+}

From f4b50f4c387f7a797e630eb2b00c06d473ee75da Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 20:39:13 -0600
Subject: [PATCH 58/61] fix error introduced by merge

---
 R/assign_job_queue.R | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 6f3dde17..e0c22ec6 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -127,33 +127,6 @@ calculateProcessRuntime <- function(dir_job_results) {
     dir.create(dirname(path_log_data),
                 recursive = TRUE, showWarnings = FALSE)
   }
-
-  # attempt to load pre-generated logdata
-  if (!file.exists(path_log_data)) {
-    logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-    save(logs, file = path_log_data)
-  } else {
-    load(path_log_data) # loads the logs object
-  }
-  df_log <- logs$df_log
-  procs <- c(
-    "dblast", "dblast_cleanup", "iprscan",
-    "ipr2lineage", "ipr2da", "blast_clust",
-    "clust2table"
-  )
-  list_proc_medians <- df_log |>
-    dplyr::select(dplyr::all_of(procs)) |>
-    dplyr::summarise(
-      dplyr::across(
-        dplyr::everything(),
-        \(x) median(x, na.rm = TRUE)
-      )
-    ) |>
-    as.list()
-  return(list_proc_medians)
-}
-
-
     # attempt to load pre-generated logdata
     if (!file.exists(path_log_data)) {
       logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
@@ -600,6 +573,7 @@ assignJobQueue <- function(
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
 plotEstimatedWallTimes <- function() {
+  tryCatch({
     opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {

From dd86b3ce04e68345297ee2f0f095f2999ff286f1 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 20:45:29 -0600
Subject: [PATCH 59/61] fix .Rd

---
 R/assign_job_queue.R | 18 ++++++++++++++++++
 man/acc2Lineage.Rd   |  3 +--
 man/efetchIPG.Rd     |  3 +--
 man/sinkReset.Rd     |  1 -
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index e0c22ec6..52af46bf 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -36,6 +36,9 @@ mapOption2Process <- function() {
 
 }
 
+#' mapAdvOption2Process
+#' 
+#' @description
 #' Use MolEvolvR advanced options to get associated processes
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
@@ -79,6 +82,9 @@ mapAdvOption2Process <- function(advanced_opts) {
 
 }
 
+#' calculateProcessRuntime
+#' 
+#' @description
 #' Scrape MolEvolvR logs and calculate median processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -227,6 +233,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 
 }
 
+#' writeProcessRuntime2YML
+#' 
+#' @description
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
@@ -304,6 +313,9 @@ writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   })
 }
 
+#' getProcessRuntimeWeights
+#' 
+#' @description
 #' Quickly get the runtime weights for MolEvolvR backend processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -494,6 +506,9 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
 }
 
 
+#' assignJobQueue
+#' 
+#' @description
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
@@ -555,6 +570,9 @@ assignJobQueue <- function(
 
 }
 
+#' plotEstimatedWallTimes
+#' 
+#' @description
 #' Plot the estimated runtimes for different advanced options and number
 #' of inputs
 #'
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index fd4eeceb..ce499592 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -44,8 +44,7 @@ accessions. The dataframe includes relevant columns such as TaxID, GCA_ID,
 Protein, Protein Name, Species, and Lineage.
 }
 \description{
-This function combines 'efetchIPG()'
-and 'IPG2Lineage()' to map a set
+This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index eb5ca678..e55c342a 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -27,8 +27,7 @@ The function does not return a value but writes the efetch results
 directly to the specified \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database
-and write the results to out_path
+Perform efetch on the ipg database and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index e3fc7ce4..0285c0b2 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,7 +8,6 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
-and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From f6c8188c9eb27df33bbb74a5c1b0febff549153a Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 21:17:23 -0600
Subject: [PATCH 60/61] swap rlang::abort() for base::stop() - allows for
 additional metadata to be added to error - pkg consistency, abort is used
 elsewhere

print -> message
---
 R/tree.R | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/R/tree.R b/R/tree.R
index 82eb11db..ddbf9d61 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -43,6 +43,8 @@
 #' be saved. Default is the path to "data/alns/pspa_snf7.tre".
 #' @param fasttree_path Path to the FastTree executable, which is used to 
 #' generate the phylogenetic tree. Default is "src/FastTree".
+#' 
+#' @importFrom rlang abort
 #'
 #' @return No return value. The function generates a tree file (.tre) from the 
 #' input FASTA file.
@@ -63,19 +65,19 @@ convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
     
     # Check if the FASTA file exists
     if (!file.exists(fa_path)) {
-        stop(paste("Error: The FASTA file does not exist at:", fa_path))
+        abort(paste("Error: The FASTA file does not exist at:", fa_path))
     }
     
     # Check if the FastTree executable exists
     if (!file.exists(fasttree_path)) {
-        stop(paste("Error: The FastTree executable does not exist at:", 
+        abort(paste("Error: The FastTree executable does not exist at:", 
                    fasttree_path))
     }
     
     # Check if the output directory exists
     tre_dir <- dirname(tre_path)
     if (!dir.exists(tre_dir)) {
-        stop(paste("Error: The output directory does not exist:", tre_dir))
+        abort(paste("Error: The output directory does not exist:", tre_dir))
     }
     
     # Check if the output file already exists
@@ -84,7 +86,7 @@ convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
             tre_path, "\n")
     }
     
-    print(fa_path)
+    message(fa_path)
     system2(
         command = fasttree_path,
         args = paste(c(fa_path, ">", tre_path),
@@ -125,13 +127,13 @@ convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
     
     # Check if the alignment directory exists
     if (!dir.exists(aln_path)) {
-        stop(paste("Error: The alignment directory does not exist:", aln_path))
+        abort(paste("Error: The alignment directory does not exist:", aln_path))
     }
     # finding all fasta alignment files
     fa_filenames <- list.files(path = aln_path, pattern = "*.fa")
     # Check if any FASTA files were found
     if (length(fa_filenames) == 0) {
-        stop("Error: No FASTA files found in the specified directory.")
+        abort("Error: No FASTA files found in the specified directory.")
     }
     
     fa_paths <- paste0(aln_path, fa_filenames)
@@ -194,13 +196,13 @@ createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa",
     
     # Check if the FASTA file exists
     if (!file.exists(fa_file)) {
-        stop(paste("Error: The FASTA file does not exist at:", fa_file))
+        abort(paste("Error: The FASTA file does not exist at:", fa_file))
     }
     
     # Check if the output directory exists
     out_dir <- dirname(out_file)
     if (!dir.exists(out_dir)) {
-        stop(paste("Error: The output directory does not exist:", out_dir))
+        abort(paste("Error: The output directory does not exist:", out_dir))
     }
     
     # Check if the output file already exists
@@ -233,7 +235,7 @@ createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa",
     ## Model Testing & Distance Matrices
     ## Comparison of different nucleotide or amino acid substitution models
     mt <- modelTest(prot10, model = "all")
-    print(mt)
+    message(mt)
 
     # estimate a distance matrix using a Jules-Cantor Model
     dna_dist <- dist.ml(prot10, model = "JC69")
@@ -254,7 +256,7 @@ createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa",
     ## Maximum likelihood and Bootstrapping
     # ml estimation w/ distance matrix
     fit <- pml(prot_NJ, prot10)
-    print(fit)
+    message(fit)
     fitJC <- optim.pml(fit, model = "JC", rearrangement = "stochastic")
     logLik(fitJC)
     bs <- bootstrap.pml(fitJC,
@@ -267,7 +269,7 @@ createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa",
     prot10_dm <- dist.ml(prot10)
     prot10_NJ <- NJ(prot10_dm)
     fit2 <- pml(prot10_NJ, data = prot10)
-    print(fit2)
+    message(fit2)
     fitJC2 <- optim.pml(fit2, model = "JC", rearrangement = "stochastic")
     logLik(fitJC2)
     bs_subset <- bootstrap.pml(fitJC2,

From 01ac8b233f75df76d12320dbbfc0fe09441b1910 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 21:34:12 -0600
Subject: [PATCH 61/61] use rlang::abort()

---
 R/summarize.R | 54 +++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/R/summarize.R b/R/summarize.R
index 504da767..e76a86da 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -25,7 +25,7 @@
 #'
 #' @importFrom dplyr filter
 #' @importFrom stringr str_replace_all
-#' @importFrom rlang sym
+#' @importFrom rlang abort sym
 #'
 #' @return Filtered data frame
 #' @note There is no need to make the domains 'regex safe', that will be handled by this function
@@ -44,12 +44,12 @@ filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
     
     # Check if prot is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Check if the specified column exists in the data frame
     if (!column %in% names(prot)) {
-        stop(paste("Error: The specified column '", column, "' does not exist 
+        abort(paste("Error: The specified column '", column, "' does not exist 
                    in the data frame.", sep = ""))
     }
     
@@ -139,19 +139,19 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
     
     # Check if 'prot' is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Check if the specified column exists in the data frame
     if (!column %in% names(prot)) {
-        stop(paste("Error: The specified column '", column, "' does not exist in
+        abort(paste("Error: The specified column '", column, "' does not exist in
                    the data frame.", sep = ""))
     }
     
     # Check if min.freq is a positive integer
     if (!is.numeric(min.freq) || length(min.freq) != 1 || min.freq < 1 || 
         floor(min.freq) != min.freq) {
-        stop("Error: 'min.freq' must be a positive integer.")
+        abort("Error: 'min.freq' must be a positive integer.")
     }
     counts <- prot %>%
         select(column) %>%
@@ -200,19 +200,19 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
 elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
     # Check if 'prot' is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Check if the specified column exists in the data frame
     if (!column %in% names(prot)) {
-        stop(paste("Error: The specified column '", column, "' does not exist in 
+        abort(paste("Error: The specified column '", column, "' does not exist in 
                    the data frame.", sep = ""))
     }
     
     # Check for valid conversion_type values
     valid_types <- c("da2doms", "doms2da")
     if (!conversion_type %in% valid_types) {
-        stop(paste("Error: Invalid 'conversion_type'. Must be one of:", 
+        abort(paste("Error: Invalid 'conversion_type'. Must be one of:", 
                    paste(valid_types, collapse = ", ")))
     }
     
@@ -277,7 +277,7 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 words2WordCounts <- function(string) {
     # Check if 'string' is a character vector of length 1
     if (!is.character(string) || length(string) != 1) {
-        stop("Error: 'string' must be a single character vector.")
+        abort("Error: 'string' must be a single character vector.")
     }
     
     df_word_count <- string %>%
@@ -331,18 +331,18 @@ filterByFrequency <- function(x, min.freq) {
     
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     
     # Check if 'min.freq' is a positive integer
     if (!is.numeric(min.freq) || length(min.freq) != 1 || min.freq < 1 || 
         floor(min.freq) != min.freq) {
-        stop("Error: 'min.freq' must be a positive integer.")
+        abort("Error: 'min.freq' must be a positive integer.")
     }
     
     # Check if the 'freq' column exists in the data frame
     if (!"freq" %in% names(x)) {
-        stop("Error: The data frame must contain a 'freq' column.")
+        abort("Error: The data frame must contain a 'freq' column.")
     }
     x %>%
         filter(freq >= min.freq)
@@ -388,18 +388,18 @@ summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage"
     query) {
     # Check if 'prot' is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Check if the specified column exists in the data frame
     if (!column %in% names(prot)) {
-        stop(paste("Error: The specified column '", column, "' does not exist in 
+        abort(paste("Error: The specified column '", column, "' does not exist in 
                    the data frame.", sep = ""))
     }
     
     # Check if the 'by' column exists in the data frame
     if (!by %in% names(prot)) {
-        stop(paste("Error: The specified 'by' column '", by, "' does not exist 
+        abort(paste("Error: The specified 'by' column '", by, "' does not exist 
                    n the data frame.", sep = ""))
     }
     
@@ -448,7 +448,7 @@ summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage"
 summarizeDomArch_ByLineage <- function(x) {
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     
     # Check if required columns exist in the data frame
@@ -456,7 +456,7 @@ summarizeDomArch_ByLineage <- function(x) {
     missing_columns <- setdiff(required_columns, names(x))
     
     if (length(missing_columns) > 0) {
-        stop(paste("Error: The following required columns are 
+        abort(paste("Error: The following required columns are 
                    missing:", paste(missing_columns, collapse = ", ")))
     }
     x %>%
@@ -494,7 +494,7 @@ summarizeDomArch_ByLineage <- function(x) {
 summarizeDomArch <- function(x) {
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     x %>%
         group_by(DomArch) %>%
@@ -530,7 +530,7 @@ summarizeDomArch <- function(x) {
 summarizeGenContext_ByDomArchLineage <- function(x) {
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -559,7 +559,7 @@ summarizeGenContext_ByDomArchLineage <- function(x) {
 summarizeGenContext_ByLineage <- function(x) {
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -596,7 +596,7 @@ summarizeGenContext_ByLineage <- function(x) {
 summarizeGenContext <- function(x) {
     # Check if 'x' is a data frame
     if (!is.data.frame(x)) {
-        stop("Error: 'x' must be a data frame.")
+        abort("Error: 'x' must be a data frame.")
     }
     x %>%
         group_by(GenContext) %>%
@@ -659,7 +659,7 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
 ) {
     # Check if 'prot' is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Check if the specified columns exist in the data frame
@@ -667,19 +667,19 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
     missing_columns <- setdiff(required_columns, names(prot))
     
     if (length(missing_columns) > 0) {
-        stop(paste("Error: The following required columns are missing:", 
+        abort(paste("Error: The following required columns are missing:", 
                    paste(missing_columns, collapse = ", ")))
     }
     
     # Check that cutoff is a numeric value between 0 and 100
     if (!is.numeric(cutoff) || length(cutoff) != 1 || cutoff < 0 || cutoff > 100) {
-        stop("Error: 'cutoff' must be a numeric value between 0 and 100.")
+        abort("Error: 'cutoff' must be a numeric value between 0 and 100.")
     }
     
     # Check that digits is a non-negative integer
     if (!is.numeric(digits) || length(digits) != 1 || digits < 0 || 
         floor(digits) != digits) {
-        stop("Error: 'digits' must be a non-negative integer.")
+        abort("Error: 'digits' must be a non-negative integer.")
     }
     
     column <- sym(column)
@@ -843,7 +843,7 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
 findParalogs <- function(prot) {
     # Check if 'prot' is a data frame
     if (!is.data.frame(prot)) {
-        stop("Error: 'prot' must be a data frame.")
+        abort("Error: 'prot' must be a data frame.")
     }
     
     # Remove eukaryotes