diff --git a/NAMESPACE b/NAMESPACE index bb0fd1d..217758b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,7 @@ export(pull_biological_samples) export(pull_catch) export(pull_gemm) export(pull_haul) +export(pull_spp) export(wh_plot_proportion) import(chron) import(cowplot) diff --git a/R/PullBio.fn.R b/R/PullBio.fn.R index a12cad1..72a4932 100644 --- a/R/PullBio.fn.R +++ b/R/PullBio.fn.R @@ -9,8 +9,11 @@ #' @param SurveyName survey to pull the data for the options are: #' Triennial, AFSC.Slope, NWFSC.Combo, NWFSC.Slope, NWFSC.Shelf, NWFSC.Hypoxia, #' NWFSC.Santa.Barb.Basin, NWFSC.Shelf.Rockfish (NWFSC.Hook.Line but both are not working), NWFSC.Video#' -#' @param SaveFile option to save the file to the directory -#' @param Dir directory where the file should be saved +#' @param SaveFile Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +#' if the Dir input is specified. +#' @param Dir The directory where you want the output file to be saved. +#' The name of the file within `Dir` will start with Catch_ and end with .rdata. +#' Default NULL which will not save an output file. #' @template verbose #' #' @author Chantel Wetzel based on code by John Wallace @@ -23,265 +26,54 @@ #' @examples #' \dontrun{ #' # SurveyName is only arg that has to be specified -#' bio_dat <- PullBio.fn(SurveyName = "NWFSC.Combo") +#' bio_dat <- PullBio.fn(SurveyName = "NWFSC.Combo") #' #' # Example with specified common name -#' bio_dat <- PullBio.fn(Name = "vermilion rockfish", -#' SurveyName = "NWFSC.Combo") +#' bio_dat <- PullBio.fn(Name = "vermilion rockfish", +#' SurveyName = "NWFSC.Combo") #' #' # Example with specified scientific name -#' bio_dat <- PullBio.fn(SciName = "Eopsetta jordani", -#' SurveyName = "NWFSC.Combo") +#' bio_dat <- PullBio.fn(SciName = "Eopsetta jordani", +#' SurveyName = "NWFSC.Combo") #' #' # Example with multiple names -#' bio_dat <- PullBio.fn(SciName = c("Sebastes aurora","Eopsetta jordani"), -#' SurveyName = "NWFSC.Combo") -# bio_dat <- PullBio.fn(Name = c("Sunset rockfish", "vermilion rockfish", -# "vermilion and sunset rockfish"), SurveyName = "NWFSC.Combo") +#' bio_dat <- PullBio.fn(SciName = c("Sebastes aurora","Eopsetta jordani"), +#' SurveyName = "NWFSC.Combo") +# bio_dat <- PullBio.fn(Name = c("Sunset rockfish", "vermilion rockfish", +# "vermilion and sunset rockfish"), SurveyName = "NWFSC.Combo") #' } #' -PullBio.fn <- function(Name = NULL, SciName = NULL, YearRange = c(1980, 5000), SurveyName = NULL, SaveFile = FALSE, Dir = NULL, verbose = TRUE) { - # increase the timeout period to avoid errors when pulling data - options(timeout = 4000000) - if (SurveyName %in% c("NWFSC.Shelf.Rockfish", "NWFSC.Hook.Line")) { - stop("The bio pull currently does not work for hook & line data. Pull directly from the warehouse https://www.webapp.nwfsc.noaa.gov/data") - } - - - if (SaveFile) { - if (is.null(Dir)) { - stop("The Dir input needs to be specified in order to save output file.") - } - if (!file.exists(Dir)) { - stop( - "The Dir argument leads to a location", - ",\ni.e., ", Dir, ", that doesn't exist." - ) - } - } - - if (is.null(Name)) { - var.name <- "scientific_name" - Species <- SciName - new.name <- "Scientific_name" - outName <- Name - } - if (is.null(SciName)) { - var.name <- "common_name" - Species <- Name - new.name <- "Common_name" - outName <- SciName - outName <- "All" - } - if (is.null(SciName) & is.null(Name)) { - var.name <- c("scientific_name", "common_name") - Species <- "pull all" - new.name <- c("Scientific_name", "Common_name") - } # stop("Need to specifiy Name or SciName to pull data!")} - - surveys <- createMatrix() - - if (!SurveyName %in% surveys[, 1]) { - stop(cat("The SurveyName does not match one of the available options:", surveys[, 1])) - } - - for (i in 1:dim(surveys)[1]) { - if (SurveyName == surveys[i, 1]) { - project <- surveys[i, 2] - projectShort <- surveys[i, 1] - } - } - - if (length(YearRange) == 1) { - YearRange <- c(YearRange, YearRange) - } - - - if (projectShort != "NWFSC.Hook.Line") { - Vars <- c( - "project", "trawl_id", var.name, "year", "vessel", "pass", - "tow", "datetime_utc_iso", "depth_m", "weight_kg", "ageing_laboratory_dim$laboratory", - "length_cm", "width_cm", "sex", "age_years", "otosag_id", "latitude_dd", "longitude_dd", - "standard_survey_age_indicator", - "standard_survey_length_or_width_indicator", - "standard_survey_weight_indicator", - "operation_dim$legacy_performance_code" - ) - - Vars.short <- c( - "project", "trawl_id", var.name, "year", "vessel", "pass", - "tow", "datetime_utc_iso", "depth_m", "weight_kg", "ageing_lab", "otosag_id", - "length_cm", "width_cm", "sex", "age_years", "latitude_dd", "longitude_dd" - ) - } else { - Vars <- Vars.short <- c(var.name, "age_years", "drop_latitude_dim$latitude_in_degrees", ) - } - - # symbols here are generally: %22 = ", %2C = ",", %20 = " " - species_str <- paste0("%22",stringr::str_replace_all(Species[1]," ","%20"),"%22") - if(length(Species) > 1) { - for(i in 2:length(Species)) { - species_str <- paste0(species_str, "%2C", paste0("%22",stringr::str_replace_all(Species[i]," ","%20"),"%22")) - } - } - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.individual_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", - "depth_ftm>=30,depth_ftm<=700,", - "field_identified_taxonomy_dim$", var.name, "|=[", species_str, "]", - ",year>=", YearRange[1], ",year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") +PullBio.fn <- function( + Name = NULL, + SciName = NULL, + YearRange = c(1980, 5000), + SurveyName = NULL, + SaveFile = lifecycle::deprecated(), + Dir = NULL, + verbose = TRUE) { + + lifecycle::deprecate_soft( + when = "2.3", + what = "nwfscSurvey::PullBio.fn()", + details = "Please switch to pull_bio()." ) - if (Species[1] == "pull all") { - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.individual_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", "depth_ftm>=30,depth_ftm<=700,", - "year>=", YearRange[1], ",year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") - ) - } - - DataPull <- NULL - if (verbose) { - message("Pulling biological data. This can take up to ~ 30 seconds (or more).") - } - DataPull <- try(get_json(url = UrlText)) - - if (is.data.frame(DataPull)) { - if (SurveyName == "NWFSC.Combo") { - # Filter out non-standard samples - keep <- DataPull[, "standard_survey_length_or_width_indicator"] %in% c("NA", "Standard Survey Length or Width") - DataPull <- DataPull[keep, ] - remove <- DataPull[, "standard_survey_age_indicator"] == "Not Standard Survey Age" - if (sum(remove) != 0) { - DataPull[remove, "age_years"] <- NA - } - remove <- DataPull[, "standard_survey_weight_indicator"] == "Not Standard Survey Weight" - if (sum(remove) != 0) { - DataPull[remove, "weight_kg"] <- NA - } - } - - if (SurveyName == "Triennial") { - # Remove water hauls - fix <- is.na(DataPull[, "operation_dim$legacy_performance_code"]) - if (sum(fix) > 0) { - DataPull[fix, "operation_dim$legacy_performance_code"] <- -999 - } - keep <- DataPull[, "operation_dim$legacy_performance_code"] != 8 - DataPull <- DataPull[keep, ] - } - - find <- colnames(DataPull) == "ageing_laboratory_dim$laboratory" - colnames(DataPull)[find] <- "ageing_lab" - # Remove the extra columns now that they are not needed - DataPull <- DataPull[, Vars.short] - } - - - if (SurveyName %in% c("Triennial", "AFSC.Slope")) { - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.triennial_length_fact/selection.json?filters=project=", - paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", - "field_identified_taxonomy_dim$", var.name, "=", paste(strsplit(Species, " ")[[1]], collapse = "%20"), - ",year>=", YearRange[1], ",year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") - ) - - LenPull <- try(get_json(url = UrlText)) - - # Remove water hauls - if (is.data.frame(LenPull)) { - fix <- is.na(LenPull[, "operation_dim$legacy_performance_code"]) - if (sum(fix) > 0) { - LenPull[fix, "operation_dim$legacy_performance_code"] <- -999 - } - keep <- LenPull[, "operation_dim$legacy_performance_code"] != 8 - LenPull <- LenPull[keep, ] - - colnames(LenPull)[2] <- "Date" - LenPull$Weight <- NA - LenPull$Age <- NA - Len <- dplyr::rename(LenPull, - Trawl_id = trawl_id, Year = year, Vessel = vessel, Project = project, - Pass = pass, Tow = tow, Depth_m = depth_m, Length_cm = length_cm, - Width_cm = width_cm, Sex = sex, Latitude_dd = latitude_dd, Longitude_dd = longitude_dd - ) - names(Len)[which(names(Len) == "scientific_name")] <- "Scientific_name" - names(Len)[which(names(Len) == "common_name")] <- "Common_name" - - Len$Date <- chron::chron(format(as.POSIXlt(Len$Date, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") - Len$Trawl_id <- as.character(Len$Trawl_id) - Len$Project <- projectShort - Len$Depth_m <- as.numeric(as.character(Len$Depth_m)) - Len$Length_cm <- as.numeric(as.character(Len$Length_cm)) - Len$Age <- as.numeric(as.character(Len$Age)) - } - } - - if (!is.data.frame(DataPull) & !SurveyName %in% c("Triennial", "AFSC.Slope")) { - stop(cat("\nNo data returned by the warehouse for the filters given. - Make sure the year range is correct for the project selected and the input name is correct, - otherwise there may be no data for this species from this project.\n")) - } - - - Data <- NULL - if (length(DataPull) > 0) { - Data <- dplyr::rename(DataPull, - Trawl_id = trawl_id, Year = year, Vessel = vessel, Project = project, Pass = pass, - Tow = tow, Date = datetime_utc_iso, Depth_m = depth_m, Weight = weight_kg, - Length_cm = length_cm, Width_cm = width_cm, Sex = sex, Age = age_years, Oto_id = otosag_id, - Ageing_Lab = ageing_lab, - Latitude_dd = latitude_dd, Longitude_dd = longitude_dd + if (lifecycle::is_present(SaveFile)) { + lifecycle::deprecate_warn( + when = "2.3", + what = "nwfscSurvey::PullBio.fn(SaveFile =)" ) - - names(Data)[which(names(Data) == "scientific_name")] <- "Scientific_name" - names(Data)[which(names(Data) == "common_name")] <- "Common_name" - Data$Date <- chron::chron(format(as.POSIXlt(Data$Date, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") - Data$Trawl_id <- as.character(Data$Trawl_id) - Data$Project <- projectShort - Data$Depth_m <- as.numeric(as.character(Data$Depth_m)) - Data$Length_cm <- as.numeric(as.character(Data$Length_cm)) - Data$Age <- as.numeric(as.character(Data$Age)) - } - - Ages <- NULL - if (SurveyName %in% c("Triennial", "AFSC.Slope")) { - if (!is.null(Data) & sum(is.na(Data$Age)) != length(Data$Age)) { - Ages <- Data - } - - Data <- list() - if (is.data.frame(LenPull)) { - Data$Lengths <- Len - } else { - Data$Lengths <- "no_lengths_available" - } - if (!is.null(Ages)) { - Data$Ages <- Ages - } else { - Data$Ages <- "no_ages_available" - } - if (verbose) { - message("Triennial & AFSC Slope data returned as a list: Data$Lengths and Data$Ages\n") - } } - if (SaveFile) { - time <- Sys.time() - time <- substring(time, 1, 10) - # save(Data, file = paste0(Dir, "/Bio_", outName, "_", SurveyName, "_", time, ".rda")) - save(Data, file = file.path(Dir, paste("Bio_", outName, "_", SurveyName, "_", time, ".rda", sep = ""))) - if (verbose) { - message(paste("Biological data file saved to following location:", Dir)) - } - } + Data <- pull_bio( + common_name = Name, + sci_name = SciName, + years = YearRange, + survey = SurveyName, + dir = Dir, + convert = TRUE, + verbose = TRUE + ) return(Data) } diff --git a/R/PullCatch.fn.R b/R/PullCatch.fn.R index d53b962..fd96572 100644 --- a/R/PullCatch.fn.R +++ b/R/PullCatch.fn.R @@ -52,14 +52,14 @@ #' allowing for a vector of survey names and #' `NWFSC.Shelf.Rockfish` and `NWFSC.Hook.Line` are not supported. #' The default of `NULL` is a placeholder that must be replaced with an entry. -#' @param SaveFile A logical value specifying whether or not the the data should -#' be saved to a file in `Dir`. Must change from the default of `FALSE` to save a file. -#' @param Dir If `SaveFile = TRUE`, then one must specify the directory where you want -#' the resulting file to be saved. The directory where the file should be saved. -#' The name of the file within `Dir` will start with Catch_ and end with .rda. +#' @param SaveFile Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +#' if the Dir input is specified. +#' @param Dir The directory where you want the output file to be saved. +#' The name of the file within `Dir` will start with Catch_ and end with .rdata. +#' Default NULL which will not save an output file. #' @template verbose #' -#' @author Chantel Wetzel based on code by John Wallace +#' @author Chantel Wetzel (maintainer) based on code by John Wallace #' @export #' #' @import chron @@ -88,244 +88,36 @@ # SurveyName = "NWFSC.Combo") #' } #' -PullCatch.fn <- function(Name = NULL, SciName = NULL, YearRange = c(1980, 5000), SurveyName = NULL, SaveFile = FALSE, Dir = NULL, verbose = TRUE) { - if (SurveyName %in% c("NWFSC.Shelf.Rockfish", "NWFSC.Hook.Line")) { - stop("The catch pull currently does not work for hook & line data.", - "\nPull directly from the warehouse https://www.webapp.nwfsc.noaa.gov/data") - } - - if (SaveFile) { - if (is.null(Dir)) { - stop("The Dir input needs to be specified in order to save output file.") - } - if (!file.exists(Dir)) { - stop( - "The Dir argument leads to a location", - ",\ni.e., ", Dir, ", that doesn't exist." - ) - } - } - - if (is.null(Name)) { - var.name <- "scientific_name" - Species <- SciName - new.name <- "Scientific_name" - outName <- Name - } - if (is.null(SciName)) { - var.name <- "common_name" - Species <- Name - new.name <- "Common_name" - outName <- SciName - } - if (is.null(SciName) & is.null(Name)) { - var.name <- "common_name" - Species <- "pull all" - new.name <- "Common_name" - } # stop("Need to specifiy Name or SciName to pull data!")} - - # Survey options available in the data warehouse - surveys <- createMatrix() - - # Check the input survey name against available options - if (!SurveyName %in% surveys[, 1]) { - stop( - "The SurveyName argument does not match one of the available options:\n", - paste(surveys[, 1], collapse = "\n") - ) - } - - # Find the long project name to extract data from the warehouse - for (i in 1:dim(surveys)[1]) { - if (SurveyName == surveys[i, 1]) { - project <- surveys[i, 2] - projectShort <- surveys[i, 1] - } - } - - if (length(YearRange) == 1) { - YearRange <- c(YearRange, YearRange) - } - - - # Pull data for the specific species for the following variables - Vars <- c( - var.name, "year", "subsample_count", "subsample_wt_kg", "project", "cpue_kg_per_ha_der", - "total_catch_numbers", "total_catch_wt_kg", "vessel", "tow", "operation_dim$legacy_performance_code", - "statistical_partition_dim$statistical_partition_type" - ) - - Vars.short <- c( - var.name, "year", "subsample_count", "subsample_wt_kg", "project", "cpue_kg_per_ha_der", - "total_catch_numbers", "total_catch_wt_kg", "vessel", "tow" - ) - - # symbols here are generally: %22 = ", %2C = ",", %20 = " " - species_str <- paste0("%22",stringr::str_replace_all(Species[1]," ","%20"),"%22") - if(length(Species) > 1) { - for(i in 2:length(Species)) { - species_str <- paste0(species_str, "%2C", paste0("%22",stringr::str_replace_all(Species[i]," ","%20"),"%22")) - } - } - - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.catch_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", "depth_ftm>=30,depth_ftm<=700,", - "field_identified_taxonomy_dim$", var.name, "|=[", species_str,"]", - ",date_dim$year>=", YearRange[1], ",date_dim$year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") +PullCatch.fn <- function( + Name = NULL, + SciName = NULL, + YearRange = c(1980, 5000), + SurveyName = NULL, + SaveFile = lifecycle::deprecated(), + Dir = NULL, + verbose = TRUE) { + + lifecycle::deprecate_soft( + when = "2.3", + what = "nwfscSurvey::PullCatch.fn()", + details = "Please switch to pull_catch()." ) - if (Species[1] == "pull all") { - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.catch_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", "depth_ftm>=30,depth_ftm<=700,", - "date_dim$year>=", YearRange[1], ",date_dim$year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") + if (lifecycle::is_present(SaveFile)) { + lifecycle::deprecate_warn( + when = "2.3", + what = "nwfscSurvey::PullCatch.fn(SaveFile =)" ) } - if (verbose) { - message("Pulling catch data. This can take up to ~ 30 seconds (or more).") - } - # Pull data from the warehouse - DataPull <- try(get_json(url = UrlText)) - - # Remove water hauls - fix <- is.na(DataPull[, "operation_dim$legacy_performance_code"]) - if (sum(fix) > 0) { - DataPull[fix, "operation_dim$legacy_performance_code"] <- -999 - } - # Whether values are NA or "NA" varies based on the presence of "Life Stage" samples - if (sum(is.na(DataPull[, "statistical_partition_dim$statistical_partition_type"])) != dim(DataPull)[1]) { - keep <- DataPull[, "statistical_partition_dim$statistical_partition_type"] == "NA" - DataPull <- DataPull[keep, ] - } - - keep <- DataPull[, "operation_dim$legacy_performance_code"] != 8 - DataPull <- DataPull[keep, ] - DataPull <- DataPull[, Vars.short] - - Data <- dplyr::rename(DataPull, - Year = year, Subsample_count = subsample_count, - Subsample_wt_kg = subsample_wt_kg, Project = project, - CPUE_kg_per_ha = cpue_kg_per_ha_der, Subsample_count = subsample_count, - Subsample_wt_kg = subsample_wt_kg, Vessel = vessel, Tow = tow - ) - - names(Data)[which(names(Data) == "scientific_name")] <- "Scientific_name" - names(Data)[which(names(Data) == "common_name")] <- "Common_name" - - # Pull all tow data (includes tows where the species was not observed) - Vars <- c("project", "year", "vessel", "pass", "tow", "datetime_utc_iso", "depth_m", "longitude_dd", "latitude_dd", "area_swept_ha_der", "trawl_id", "operation_dim$legacy_performance_code") - Vars.short <- c("project", "year", "vessel", "pass", "tow", "datetime_utc_iso", "depth_m", "longitude_dd", "latitude_dd", "area_swept_ha_der", "trawl_id") - - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.operation_haul_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",", - "station_invalid=0,", - "performance=Satisfactory,", - "depth_ftm>=30,depth_ftm<=700,", - "date_dim$year>=", YearRange[1], ",date_dim$year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") - ) - All.Tows <- try(get_json(url = UrlText)) - - # Remove water hauls - fix <- is.na(All.Tows[, "operation_dim$legacy_performance_code"]) - if (sum(fix) > 0) { - All.Tows[fix, "operation_dim$legacy_performance_code"] <- -999 - } - keep <- All.Tows[, "operation_dim$legacy_performance_code"] != 8 - All.Tows <- All.Tows[keep, ] - All.Tows <- All.Tows[, Vars.short] - - All.Tows <- dplyr::rename(All.Tows, - Project = project, Trawl_id = trawl_id, Year = year, - Pass = pass, Vessel = vessel, Tow = tow, Date = datetime_utc_iso, - Depth_m = depth_m, Longitude_dd = longitude_dd, Latitude_dd = latitude_dd, - Area_Swept_ha = area_swept_ha_der - ) - - All.Tows <- All.Tows[ - !duplicated(paste(All.Tows$Year, All.Tows$Pass, All.Tows$Vessel, All.Tows$Tow)), - c("Project", "Trawl_id", "Year", "Pass", "Vessel", "Tow", "Date", "Depth_m", "Longitude_dd", "Latitude_dd", "Area_Swept_ha") - ] - - # Link each data set together based on trawl_id - if ("Common_name" %in% names(Data)) { - grid <- expand.grid( - "Trawl_id" = unique(All.Tows$Trawl_id), "Common_name" = unique(Data$Common_name), - stringsAsFactors = FALSE - ) - } else { - grid <- expand.grid( - "Trawl_id" = unique(All.Tows$Trawl_id), "Scientific_name" = unique(Data$Scientific_name), - stringsAsFactors = FALSE - ) - } - - Out <- dplyr::left_join( - grid, - All.Tows, - by = intersect(colnames(grid), colnames(All.Tows)), - multiple = "all" - ) - Out <- dplyr::left_join( - Out, - Data, - by = intersect(colnames(Out), colnames(Data)), - multiple = "all" - ) - - # Fill in zeros where needed - Out$total_catch_wt_kg[is.na(Out$total_catch_wt_kg)] <- 0 - - Out$CPUE_kg_per_ha[is.na(Out$CPUE_kg_per_ha)] <- 0 - - Out$Subsample_count[is.na(Out$Subsample_count)] <- 0 - - Out$Subsample_wt_kg[is.na(Out$Subsample_wt_kg)] <- 0 - - Out$total_catch_numbers[is.na(Out$total_catch_numbers)] <- 0 - - # Need to check what this is doing - noArea <- which(is.na(Out$Area_Swept_ha)) - if (length(noArea) > 0) { - if (verbose) { - print(cat("\nThere are", length(noArea), "records with no area swept calculation. These record will be filled with the mean swept area across all tows.\n")) - print(Out[noArea, c("Trawl_id", "Year", "Area_Swept_ha", "CPUE_kg_per_ha", "total_catch_numbers")]) - } - Out[noArea, "Area_Swept_ha"] <- mean(Out$Area_Swept_ha, trim = 0.05, na.rm = TRUE) - } - - # Scientific Name is missing after the matching when Total_sp_wt_kg is zero - # if (!is.null(Name)) { - # Out$Common_name <- Species - # } - # if (!is.null(SciName)) { - # Out$Scientific_name <- Species - # } - - Out$Date <- chron::chron(format(as.POSIXlt(Out$Date, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") - - Out$Project <- projectShort - - Out$Trawl_id <- as.character(Out$Trawl_id) - - # Convert the CPUE into km2 - Out$cpue_kg_km2 <- Out$CPUE_kg_per_ha * 100 - - if (SaveFile) { - time <- Sys.time() - time <- substring(time, 1, 10) - # save(Out, file = paste0(Dir, "/Catch_", outName, "_", SurveyName, "_", time, ".rda")) - save(Out, file = file.path(Dir, paste("Catch_", outName, "_", SurveyName, "_", time, ".rda", sep = ""))) - if (verbose) { - message(paste("Catch data file saved to following location:", Dir)) - } - } + Out <- pull_catch( + common_name = Name, + sci_name = SciName, + years = YearRange, + survey = SurveyName, + dir = Dir, + convert = TRUE, + verbose = verbose) return(Out) } diff --git a/R/PullHaul.fn.R b/R/PullHaul.fn.R index 6ff684a..5298290 100644 --- a/R/PullHaul.fn.R +++ b/R/PullHaul.fn.R @@ -6,7 +6,8 @@ #' @param SurveyName survey to pull the data for the options are: #' Triennial, AFSC.Slope, NWFSC.Combo, NWFSC.Slope, NWFSC.Shelf, NWFSC.Hypoxia, #' NWFSC.Santa.Barb.Basin, NWFSC.Shelf.Rockfish (NWFSC.Hook.Line but both are not working), NWFSC.Video#' -#' @param SaveFile option to save the file to the directory +#' @param SaveFile Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +#' if the Dir input is specified. #' @param Dir directory where the file should be saved #' @template verbose #' @@ -22,97 +23,31 @@ #' haul_dat <- PullHaul.fn() #' } #' -PullHaul.fn <- function(YearRange = c(1980, 5000), SurveyName = NULL, SaveFile = FALSE, Dir = NULL, verbose = TRUE) { - # increase the timeout period to avoid errors when pulling data - options(timeout = 4000000) - - if (SaveFile) { - if (is.null(Dir)) { - stop("The Dir input needs to be specified in order to save output file.") - } - if (!file.exists(Dir)) { - stop( - "The Dir argument leads to a location", - ",\ni.e., ", Dir, ", that doesn't exist." - ) - } - } - - surveys <- createMatrix() - - if (is.null(SurveyName)) { - SurveyName <- surveys[, 1] - } - - if (length(SurveyName) == 1) { - if (!SurveyName %in% surveys[, 1]) { - stop(cat("The SurveyName does not match one of the available options:", surveys[, 1])) - } - } else { - if (length(which(SurveyName %in% surveys[, 1])) != length(SurveyName)) { - stop(cat("One or more of the SurveyName fields does not match one of the available options:", surveys[, 1])) - } - } - - project <- "" - for (i in 1:dim(surveys)[1]) { - if (length(which(SurveyName %in% surveys[i, 1]) > 0)) { - project <- c(project, surveys[i, 2]) - } - } - project <- project[which(project %in% c("", "NA") == FALSE)] - - if (length(YearRange) == 1) { - YearRange <- c(YearRange, YearRange) - } - - - Vars <- c( - "area_swept_ha_der", "date_dim.year", "date_yyyymmdd", - "depth_hi_prec_m", "door_width_m_der", "fluorescence_at_surface_mg_per_m3_der", - "gear_end_latitude_dd", "gear_end_longitude_dd", "gear_start_latitude_dd", - "gear_start_longitude_dd", "invertebrate_weight_kg", "latitude_dd", "leg", - "longitude_dd", "net_height_m_der", "net_width_m_der", "nonspecific_organics_weight_kg", - "o2_at_gear_ml_per_l_der", "pass", "performance", "project", "salinity_at_gear_psu_der", - "sampling_end_hhmmss", "sampling_start_hhmmss", - "target_station_design_dim.stn_invalid_for_trawl_date_whid", - "temperature_at_gear_c_der", "temperature_at_surface_c_der", - "trawl_id", "turbidity_ntu_der", "vertebrate_weight_kg", "vessel", - "vessel_end_latitude_dd", "vessel_end_longitude_dd", - "vessel_start_latitude_dd", "vessel_start_longitude_dd" +PullHaul.fn <- function( + YearRange = c(1980, 5000), + SurveyName = NULL, + SaveFile = lifecycle::deprecated(), + Dir = NULL, + verbose = TRUE) { + + lifecycle::deprecate_soft( + when = "2.3", + what = "nwfscSurvey::PullHaul.fn()", + details = "Please switch to pull_haul()." ) - project_str <- NA - for (i in 1:length(project)) { - project_str[i] <- paste(strsplit(project, " ")[[i]], collapse = "%20") + if (lifecycle::is_present(SaveFile)) { + lifecycle::deprecate_warn( + when = "2.3", + what = "nwfscSurvey::PullHaul.fn(SaveFile =)" + ) } - # Note: this string grabs data from all projects. Projects filtered below - UrlText <- paste0( - "https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.operation_haul_fact/selection.json?filters=", - "station_invalid=0,", - "performance=Satisfactory,", - "year>=", YearRange[1], ",year<=", YearRange[2], - "&variables=", paste0(Vars, collapse = ",") - ) - - DataPull <- NULL - if (verbose) { - message("Pulling haul data. This can take up to ~ 30 seconds.") - } - Data <- try(get_json(url = UrlText)) - - # filter projects - Data <- Data[which(Data$project %in% project == TRUE), ] - - if (SaveFile) { - time <- Sys.time() - time <- substring(time, 1, 10) - save(Data, file = file.path(Dir, paste("Haul_", SurveyName, "_", time, ".rda", sep = ""))) - if (verbose) { - message(paste("Haul data file saved to following location:", Dir)) - } - } + Data <- pull_haul( + years = YearRange, + survey = SurveyName, + dir = Dir, + verbose = verbose) return(Data) } diff --git a/R/get_json.R b/R/get_json.R index b04b637..7c860e0 100644 --- a/R/get_json.R +++ b/R/get_json.R @@ -3,7 +3,7 @@ #' Get information stored on the web in .json format using a URL. The content #' is first pulled from the web as text with UTF-8 encoding. Then the text #' is passed to [jsonlite::fromJSON()]. This workflow ensures that the URL -#' is not mistaken for a file name rather than web content. +#' is not mistaken for a file name rather than web content. #' #' @param url A string containing a valid URL to pull the data from the data #' warehouse. @@ -12,14 +12,21 @@ #' @export #' @return A data frame. #' @seealso See all the `pull_*` functions for examples where this function is -#' used, e.g., [pull_catch()]. +#' used, e.g., [pull_catch()]. #' get_json <- function(url) { - out <- httr::GET(url) %>% - httr::content(as = "text", encoding = "UTF-8") %>% + + out <- httr::GET(url) |> + httr::content(as = "text", encoding = "UTF-8") |> jsonlite::fromJSON() + if (!(is.data.frame(out) && NROW(out) > 0)) { - stop(glue::glue("No data returned by the warehouse using {url}")) + stop(glue::glue( + "\n No data returned by the warehouse for the filters given. + \n Make sure the year range is correct (cannot include -Inf or Inf) for the project selected and the input name is correct, + \n otherwise there may be no data for this species from this project.\n + URL: {url}") + ) } return(out) } diff --git a/R/pull_bio.R b/R/pull_bio.R index 9722487..5eb359e 100644 --- a/R/pull_bio.R +++ b/R/pull_bio.R @@ -7,11 +7,11 @@ #' @template sci_name #' @template years #' @template survey -#' @template dir +#' @template dir #' @template convert -#' @template verbose +#' @template verbose #' -#' @author Chantel Wetzel +#' @author Chantel Wetzel #' @export #' #' @import chron @@ -38,11 +38,11 @@ # "vermilion and sunset rockfish"), SurveyName = "NWFSC.Combo") #' } #' -pull_bio <- function(common_name = NULL, - sci_name = NULL, - years = c(1980, 2050), - survey = NULL, - dir = NULL, +pull_bio <- function(common_name = NULL, + sci_name = NULL, + years = c(1970, 2050), + survey, + dir = NULL, convert = TRUE, verbose = TRUE) { @@ -50,16 +50,21 @@ pull_bio <- function(common_name = NULL, if (survey %in% c("NWFSC.Shelf.Rockfish", "NWFSC.Hook.Line")) { stop("The catch pull currently does not work for NWFSC Hook & Line Survey data.", "\nA subset of the data is available on the data warehouse https://www.webapp.nwfsc.noaa.gov/data", - "\nContact John Harms (john.harms@noaa.gov) for the full data set.") + "\nContact John Harms (john.harms@noaa.gov) for the full data set.") + } + + if(length(c(common_name, sci_name)) != max(c(length(common_name), length(sci_name)))){ + stop("Can not pull data using both the common_name or sci_name together. + \n Please retry using only one." ) } - check_dir(dir = dir, verbose = verbose) + + check_dir(dir = dir, verbose = verbose) if (is.null(common_name)) { var_name <- "scientific_name" species <- sci_name - } - if (is.null(sci_name)) { + } else { var_name <- "common_name" species <- common_name } @@ -89,23 +94,18 @@ pull_bio <- function(common_name = NULL, ) # symbols here are generally: %22 = ", %2C = ",", %20 = " " - species_str <- paste0("%22",stringr::str_replace_all(species[1]," ","%20"),"%22") - if(length(species) > 1) { - for(i in 2:length(species)) { - species_str <- paste0(species_str, "%2C", paste0("%22",stringr::str_replace_all(species[i]," ","%20"),"%22")) - } - } + species_str <- convert_to_hex_string(species) add_species <- paste0("field_identified_taxonomy_dim$", var_name, "|=[", species_str,"]") - - if (species[1] == "pull all") { + + if (any(species == "pull all")) { add_species <- "" } - url_text <- get_url(data_table = "trawl.individual_fact", - project_long = project_long, - add_species = add_species, - years = years, - vars_long = vars_long) + url_text <- get_url(data_table = "trawl.individual_fact", + project_long = project_long, + add_species = add_species, + years = years, + vars_long = vars_long) if (verbose) { message("Pulling biological data. This can take up to ~ 30 seconds (or more).") @@ -122,18 +122,18 @@ pull_bio <- function(common_name = NULL, # Some early entries are NA for standard sample indicators. These should be retained. standard_lengths <- bio_pull[, "standard_survey_length_or_width_indicator"] %in% c(NA, "NA", "Standard Survey Length or Width") bio_pull <- bio_pull[standard_lengths, ] - + # Remove non-standard ages nonstandard_age <- which(bio_pull[, "standard_survey_age_indicator"] == "Not Standard Survey Age") if (length(nonstandard_age) > 0) { - bio_pull[nonstandard_age, "age_years"] <- NA + bio_pull[nonstandard_age, "age_years"] <- NA } # Remove non-standard weights nonstandard_wgt <- which(bio_pull[, "standard_survey_weight_indicator"] == "Not Standard Survey Weight") if (length(nonstandard_wgt) > 0) { - bio_pull[nonstandard_wgt, "weight_kg"] <- NA - } + bio_pull[nonstandard_wgt, "weight_kg"] <- NA + } # Remove water hauls water_hauls <- is.na(bio_pull[, "operation_dim$legacy_performance_code"]) @@ -151,10 +151,10 @@ pull_bio <- function(common_name = NULL, if (survey %in% c("Triennial", "AFSC.Slope")) { - url_text <- get_url(data_table = "trawl.triennial_length_fact", - project_long = project_long, - add_species = add_species, - years = years, + url_text <- get_url(data_table = "trawl.triennial_length_fact", + project_long = project_long, + add_species = add_species, + years = years, vars_long = vars_long) len_pull <- try(get_json(url = url_text)) @@ -167,15 +167,15 @@ pull_bio <- function(common_name = NULL, } good_tows <- len_pull[, "operation_dim$legacy_performance_code"] != 8 len_pull <- len_pull[good_tows, ] - + len_pull$weight_kg <- NA - len_pull$date_formatted <- chron::chron(format(as.POSIXlt(len_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") + len_pull$date <- chron::chron(format(as.POSIXlt(len_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") len_pull$trawl_id <- as.character(len_pull$trawl_id) } } if (nrow(bio_pull) > 0) { - bio_pull$date_formatted <- chron::chron(format(as.POSIXlt(bio_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") + bio_pull$date <- chron::chron(format(as.POSIXlt(bio_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") bio_pull$trawl_id <- as.character(bio_pull$trawl_id) bio <- bio_pull @@ -184,18 +184,20 @@ pull_bio <- function(common_name = NULL, if (survey %in% c("Triennial", "AFSC.Slope")) { if (!is.null(bio_pull) & sum(is.na(bio_pull$age_years)) != length(bio_pull$age_years)) { age_data <- bio_pull + } else { + age_data <- NULL } bio <- list() if (is.data.frame(len_pull)) { - bio$length_data <- len_pull + bio$Lengths <- len_pull } else { - bio$length_data <- "no_lengths_available" + bio$Lengths <- "no_lengths_available" } if (!is.null(age_data)) { - bio$age_data <- age_data + bio$Ages <- age_data } else { - bio$age_data <- "no_ages_available" + bio$Ages <- "no_ages_available" } if (verbose) { message("Triennial & AFSC Slope data returned as a list: bio_data$length_data and bio_data$age_data\n") @@ -203,7 +205,6 @@ pull_bio <- function(common_name = NULL, } if(convert) { - bio$data <- bio$date_formatted bio$age <- bio$age_years bio$weight <- bio$weight_kg firstup <- function(x) { @@ -211,8 +212,14 @@ pull_bio <- function(common_name = NULL, x } if(survey %in% c("Triennial", "AFSC.Slope")){ + bio[[1]][, "weight"] <- bio[[1]][, "weight_kg"] colnames(bio[[1]]) <- firstup(colnames(bio[[1]])) - colnames(bio[[2]]) <- firstup(colnames(bio[[2]])) + + if(!is.null(nrow(bio[[2]]))){ + bio[[2]][, "age"] <- bio[[2]][, "age_years"] + bio[[2]][, "weight"] <- bio[[2]][, "weight_kg"] + colnames(bio[[2]]) <- firstup(colnames(bio[[2]])) + } } else { colnames(bio) <- firstup(colnames(bio)) } diff --git a/R/pull_biological_samples.R b/R/pull_biological_samples.R index 9fd64aa..4936cba 100644 --- a/R/pull_biological_samples.R +++ b/R/pull_biological_samples.R @@ -1,18 +1,18 @@ #' Pull biological sample information from the NWFSC data warehouse for biological -#' collections taken at sea. Generally these are samples that require lab processing. -#' Generally, these types of biological sample are maturity, stomach, fin clips, and -#' tissue samples. This function returns collection information for these samples -#' include the sample numbers which allows the lab analysis to be linked back to +#' collections taken at sea. Generally these are samples that require lab processing. +#' Generally, these types of biological sample are maturity, stomach, fin clips, and +#' tissue samples. This function returns collection information for these samples +#' include the sample numbers which allows the lab analysis to be linked back to #' the sampled fish. #' The website is: https://www.webapps.nwfsc.noaa.gov/data. #' #' @template common_name #' @template sci_name -#' @template years -#' @template survey -#' @template dir -#' @template verbose +#' @template years +#' @template survey +#' @template dir +#' @template verbose #' #' @return Returns a data frame of special biological samples with sample number #' @author Chantel Wetzel @@ -21,17 +21,22 @@ #' @import glue #' #' -pull_biological_samples <- function(common_name = NULL, +pull_biological_samples <- function(common_name = NULL, sci_name = NULL, - years= c(1980, 2050), - survey = "NWFSC.Combo", - dir = NULL, + years= c(1980, 2050), + survey = "NWFSC.Combo", + dir = NULL, verbose = TRUE) { # increase the timeout period to avoid errors when pulling data options(timeout = 4000000) - check_dir(dir = dir, verbose = verbose) + if(length(c(common_name, sci_name)) != max(c(length(common_name), length(sci_name)))){ + stop("Can not pull data using both the common_name or sci_name together. + \n Please retry using only one." ) + } + + check_dir(dir = dir, verbose = verbose) project_long <- check_survey(survey = survey) @@ -42,8 +47,7 @@ pull_biological_samples <- function(common_name = NULL, if (is.null(common_name)) { var_name <- "scientific_name" species <- sci_name - } - if (is.null(sci_name)) { + } else { var_name <- "common_name" species <- common_name } @@ -53,63 +57,58 @@ pull_biological_samples <- function(common_name = NULL, } # symbols here are generally: %22 = ", %2C = ",", %20 = " " - species_str <- paste0("%22",stringr::str_replace_all(species[1]," ","%20"),"%22") - if(length(species) > 1) { - for(i in 2:length(species)) { - species_str <- paste0(species_str, "%2C", paste0("%22",stringr::str_replace_all(species[i]," ","%20"),"%22")) - } - } + species_str <- convert_to_hex_string(species) add_species <- paste0("field_identified_taxonomy_dim$", var_name, "|=[", species_str,"]") - - if (species[1] == "pull all") { + + if (any(species == "pull all")) { add_species <- "" } vars_str <- c( - "common_name", "scientific_name", - "age_years", + "common_name", "scientific_name", + "age_years", #"best_available_taxonomy_observation_detail_dim$method_description", "best_available_taxonomy_observation_detail_whid", - "date_yyyymmdd", - "depth_m", + "date_yyyymmdd", + "depth_m", "individual_tracking_id", "lab_maturity_detail_dim", - "latitude_dd", - "left_pectoral_fin_id", - "leg", - "length_cm", + "latitude_dd", + "left_pectoral_fin_id", + "leg", + "length_cm", "length_type", - "longitude_dd", + "longitude_dd", "max_depth_m", - "min_depth_m", - "most_recent_age_update", + "min_depth_m", + "most_recent_age_update", "most_recent_maturity_update_date_whid", - "most_recent_taxon_update", - "otosag_id", + "most_recent_taxon_update", + "otosag_id", "ovary_id", - "ovary_proportion_atresia", - "partition", - "pass", - "performance", - "program", - "project", - "reason_stn_invalid", - "sex", + "ovary_proportion_atresia", + "partition", + "pass", + "performance", + "program", + "project", + "reason_stn_invalid", + "sex", "species_category", "species_subcategory", - "stomach_id", - "taxon_rank", - "taxon_source", - "tissue_id", + "stomach_id", + "taxon_rank", + "taxon_source", + "tissue_id", "tow", - "trawl_id", - "vessel", - "vessel_id", - "weight_kg", - "width_cm", + "trawl_id", + "vessel", + "vessel_id", + "weight_kg", + "width_cm", "width_type", - "year", - "year_stn_invalid", + "year", + "year_stn_invalid", "lab_maturity_detail_dim$biologically_mature_certain_indicator", "lab_maturity_detail_dim$biologically_mature_indicator" ) @@ -120,9 +119,9 @@ pull_biological_samples <- function(common_name = NULL, "/selection.json?filters=", paste0("project=",paste(strsplit(project_long, " ")[[1]], collapse = "%20")), ",", add_species, - ",year>", years[1], ",year<", years[2], + ",year>", years[1], ",year<", years[2], #",ovary_id>0&", - "&variables=", + "&variables=", glue::glue_collapse(vars_str, sep = ",") ) @@ -130,17 +129,17 @@ pull_biological_samples <- function(common_name = NULL, message("Pulling maturity, stomach, fin clip, and tissue sample data.") } bio_samples <- try(get_json(url = url_text)) - + keep <- which(bio_samples$ovary_id > 0 | bio_samples$stomach_id > 0 | bio_samples$tissue_id > 0 | bio_samples$left_pectoral_fin_id > 0) bio_samples <- bio_samples[keep, ] rename_columns <- which( - colnames(bio_samples) %in% + colnames(bio_samples) %in% c("lab_maturity_detail_dim$biologically_mature_certain_indicator", "lab_maturity_detail_dim$biologically_mature_indicator")) - colnames(bio_samples)[rename_columns] <- + colnames(bio_samples)[rename_columns] <- c("biologically_mature_certain_indicator", "biologically_mature_indicator") diff --git a/R/pull_catch.R b/R/pull_catch.R index 7830379..80bfc25 100644 --- a/R/pull_catch.R +++ b/R/pull_catch.R @@ -6,7 +6,7 @@ #' by leaving both `common_name = NULL` and `sci_name = NULL`. #' #' @details -#' The data available in the warehouse are cleaned pior to being downloaded +#' The data available in the warehouse are cleaned prior to being downloaded #' with the intent that they provide the best available information for use #' in an index-standardization procedure. The removed samples may be of use #' to others with a less-restrictive goal than producing an index of abundance. @@ -24,9 +24,9 @@ #' @template sci_name #' @template years #' @template survey -#' @template dir +#' @template dir #' @template convert -#' @template verbose +#' @template verbose #' #' @author Chantel Wetzel #' @export @@ -54,23 +54,28 @@ #' "vermilion and sunset rockfish"), survey = "NWFSC.Combo") #' #' catch_data <- pull_catch(sci_name = c("Sebastes miniatus", -#' "Sebastes sp. (crocotulus)", +#' "Sebastes sp. (crocotulus)", #' "Sebastes sp. (miniatus / crocotulus)"), #' survey = "NWFSC.Combo") #' } #' -pull_catch <- function(common_name = NULL, - sci_name = NULL, - years = c(1980, 2050), - survey = NULL, - dir = NULL, +pull_catch <- function(common_name = NULL, + sci_name = NULL, + years = c(1970, 2050), + survey, + dir = NULL, convert = TRUE, verbose = TRUE) { if (survey %in% c("NWFSC.Shelf.Rockfish", "NWFSC.Hook.Line")) { stop("The catch pull currently does not work for NWFSC Hook & Line Survey data.", "\nA subset of the data is available on the data warehouse https://www.webapp.nwfsc.noaa.gov/data", - "\nContact John Harms (john.harms@noaa.gov) for the full data set.") + "\nContact John Harms (john.harms@noaa.gov) for the full data set.") + } + + if(length(c(common_name, sci_name)) != max(c(length(common_name), length(sci_name)))){ + stop("Can not pull data using both the common_name or sci_name together. + \n Please retry using only one." ) } check_dir(dir = dir, verbose = verbose) @@ -78,15 +83,14 @@ pull_catch <- function(common_name = NULL, if (is.null(common_name)) { var_name <- "scientific_name" species <- sci_name - } - if (is.null(sci_name)) { + } else { var_name <- "common_name" species <- common_name } if (is.null(sci_name) & is.null(common_name)) { var_name <- "common_name" species <- "pull all" - } + } # Survey options available in the data warehouse project_long <- check_survey(survey = survey) @@ -97,17 +101,17 @@ pull_catch <- function(common_name = NULL, # Pull data for the specific species for the following variables # Can only pull the nested fields (legacy performance and statistical partition) if - # the main table fields are specified. Could pull separate and then join which + # the main table fields are specified. Could pull separate and then join which # would allow us to eliminate vars_long form the main pull perf_codes <- c( - "operation_dim$legacy_performance_code", + "operation_dim$legacy_performance_code", "statistical_partition_dim$statistical_partition_type" ) vars_long <- c( - "common_name", "scientific_name", "project", "year", "vessel", "tow", - "total_catch_numbers", "total_catch_wt_kg", + "common_name", "scientific_name", "project", "year", "vessel", "tow", + "total_catch_numbers", "total_catch_wt_kg", "subsample_count", "subsample_wt_kg", "cpue_kg_per_ha_der", perf_codes ) @@ -116,20 +120,10 @@ pull_catch <- function(common_name = NULL, vars_short <- vars_long[!vars_long %in% perf_codes] # symbols here are generally: %22 = ", %2C = ",", %20 = " " - species_str <- paste0( - "%22",stringr::str_replace_all(species[1]," ","%20"),"%22" - ) - - if(length(species) > 1) { - for(i in 2:length(species)) { - species_str <- paste0( - species_str, "%2C", paste0( - "%22",stringr::str_replace_all(species[i]," ","%20"),"%22")) - } - } + species_str <- convert_to_hex_string(species) add_species <- paste0("field_identified_taxonomy_dim$", var_name, "|=[", species_str,"]") - - if (species[1] == "pull all") { + + if (any(species == "pull all")) { add_species <- "" } @@ -145,10 +139,8 @@ pull_catch <- function(common_name = NULL, # Pull data from positive tows for selected species positive_tows <- try(get_json(url = url_text)) - if (!is.data.frame(positive_tows)) { - stop(cat("\nNo data returned by the warehouse for the filters given. - \n Make sure the year range is correct for the project selected and the input name is correct, - \n otherwise there may be no data for this species from this project.\n")) + if(!is.data.frame(positive_tows)){ + stop() } # Remove water hauls @@ -169,9 +161,14 @@ pull_catch <- function(common_name = NULL, positive_tows <- positive_tows[good_tows, ] positive_tows <- positive_tows[, vars_short] + if(sum(is.na(positive_tows[, "common_name"])) > 0) { + replace <- which(is.na(positive_tows[, "common_name"])) + positive_tows[replace, "common_name"] <- positive_tows[replace, "scientific_name"] + } + # Pull all tow data including tows where the species was not observed - vars_long <- c("project", "year", "vessel", "pass", "tow", "datetime_utc_iso", - "depth_m", "longitude_dd", "latitude_dd", "area_swept_ha_der", + vars_long <- c("project", "year", "vessel", "pass", "tow", "datetime_utc_iso", + "depth_m", "longitude_dd", "latitude_dd", "area_swept_ha_der", "trawl_id", "operation_dim$legacy_performance_code") vars_short <- vars_long[vars_long != "operation_dim$legacy_performance_code"] @@ -181,7 +178,7 @@ pull_catch <- function(common_name = NULL, years = years, vars_long = vars_long) - all_tows <- try(get_json(url = url_text)) + all_tows <- try(get_json(url = url_text)) # Remove water hauls water_hauls <- is.na(all_tows[, "operation_dim$legacy_performance_code"]) @@ -194,39 +191,52 @@ pull_catch <- function(common_name = NULL, all_tows <- all_tows[ !duplicated(paste(all_tows$year, all_tows$pass, all_tows$vessel, all_tows$tow)), - c("project", "trawl_id", "year", "pass", "vessel", "tow", "datetime_utc_iso", "depth_m", - "longitude_dd", "latitude_dd", "area_swept_ha_der" - ) + #c("project", "trawl_id", "year", "pass", "vessel", "tow", "datetime_utc_iso", "depth_m", + # "longitude_dd", "latitude_dd", "area_swept_ha_der" + #) ] - # Link each data set together based on trawl_id - if (species == "pull all"){ - grid <- expand.grid( - "trawl_id" = unique(all_tows$trawl_id), - "common_name" = unique(positive_tows$common_name), - stringsAsFactors = FALSE - ) - } else { - grid <- expand.grid( - "trawl_id" = unique(all_tows$trawl_id), - "common_name" = unique(positive_tows$common_name), - "scientific_name" = unique(positive_tows$scientific_name), - stringsAsFactors = FALSE - ) - } - - catch_data <- dplyr::left_join( - grid, - all_tows, - by = intersect(colnames(grid), colnames(all_tows)), - multiple = "all" + positive_tows_grouped <- dplyr::group_by( + .data = positive_tows, + common_name, scientific_name ) - catch <- dplyr::left_join( - catch_data, - positive_tows, - by = intersect(colnames(catch_data), colnames(positive_tows)), - multiple = "all" + # Split positive_tows into 1 data frame for each combination of common_name + # and scientific_name and store in a named list for purrr::map() + positive_tows_split <- dplyr::group_split(positive_tows_grouped) + group_names <- dplyr::group_keys(positive_tows_grouped) + names(positive_tows_split) <- tidyr::unite(group_names, col = "groups") |> + dplyr::pull(groups) + + # For each data frame in the large list, find the tows that are not present + # in positive_tows and join them into a single data frame + # Give them the appropriate common and scientific names using .id then split + # the concatenated column out into the two original columns + names_intersect <- intersect(colnames(all_tows), colnames(positive_tows)) + zero_tows <- purrr::map_df( + .x = positive_tows_split, + .f = \(y) dplyr::anti_join(x = all_tows, y = y, by = names_intersect), + .id = "groups" + ) |> + tidyr::separate_wider_delim( + cols = "groups", + delim = "_", + names = colnames(group_names) + ) + + # Join the positive tows with the tow information + positive_tows_with_tow_info <- dplyr::left_join( + x = positive_tows, + y = all_tows, + by = intersect(colnames(all_tows), colnames(positive_tows)) ) + # Join the augmented positive tow information with the zero tows + # arrange by common_name and tow_id + catch <- dplyr::full_join( + x = positive_tows_with_tow_info, + y = zero_tows, + by = c(colnames(group_names), colnames(all_tows)) + ) |> + dplyr::arrange(common_name, trawl_id) # Need to check what this is doing no_area <- which(is.na(catch$area_swept_ha_der)) @@ -240,20 +250,20 @@ pull_catch <- function(common_name = NULL, catch[no_area, "area_swept_ha_der"] <- mean(catch$area_swept_ha_der, trim = 0.05, na.rm = TRUE) } - # Fill in zeros where needed + # Fill in zeros where needed catch[is.na(catch)] <- 0 - catch$date_formatted <- chron::chron( - format(as.POSIXlt(catch$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), + catch$date <- chron::chron( + format(as.POSIXlt(catch$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") catch$trawl_id <- as.character(catch$trawl_id) # kg / km2 <- (100 hectare / 1 *km2) * (kg / hectare) catch$cpue_kg_km2 <- catch$cpue_kg_per_ha_der * 100 + colnames(catch)[which(colnames(catch) == "area_swept_ha_der")] <- "area_swept_ha" if(convert) { - catch$Area_Swept_ha <- catch$area_swept_ha_der - catch$date <- catch$date_formatted + firstup <- function(x) { substr(x, 1, 1) <- toupper(substr(x, 1, 1)) x diff --git a/R/pull_haul.R b/R/pull_haul.R index 8534cb6..4c29222 100644 --- a/R/pull_haul.R +++ b/R/pull_haul.R @@ -2,10 +2,10 @@ #' The website is: https://www.webapps.nwfsc.noaa.gov/data. #' This function can be used to pull haul data and associated covariates. #' -#' @template years -#' @template survey -#' @template dir -#' @template verbose +#' @template years +#' @template survey +#' @template dir +#' @template verbose #' #' @return Returns a data frame of haul characteristics for satisfactory hauls #' @author Eric Ward, Chantel Wetzel @@ -19,15 +19,15 @@ #' haul_data <- pull_haul() #' } #' -pull_haul <- function(years= c(1980, 2050), - survey = NULL, - dir = NULL, +pull_haul <- function(years= c(1970, 2050), + survey, + dir = NULL, verbose = TRUE) { # increase the timeout period to avoid errors when pulling data options(timeout = 4000000) - check_dir(dir = dir, verbose = verbose) + check_dir(dir = dir, verbose = verbose) project_long <- check_survey(survey = survey) @@ -50,10 +50,10 @@ pull_haul <- function(years= c(1980, 2050), "vessel_start_latitude_dd", "vessel_start_longitude_dd" ) - url_text <- get_url(data_table = "trawl.operation_haul_fact", - years = years, + url_text <- get_url(data_table = "trawl.operation_haul_fact", + years = years, project_long = project_long, - vars_long = var_str) + vars_long = var_str) if (verbose) { @@ -61,10 +61,12 @@ pull_haul <- function(years= c(1980, 2050), } haul_data <- try(get_json(url = url_text)) - haul_data$date_formatted <- - chron::chron(format(as.POSIXlt(haul_data$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), + haul_data$date_formatted <- + chron::chron(format(as.POSIXlt(haul_data$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d") + haul_data$trawl_id <- as.character(haul_data$trawl_id) + save_rdata( x = haul_data, dir = dir, diff --git a/R/pull_spp.R b/R/pull_spp.R new file mode 100644 index 0000000..a303a28 --- /dev/null +++ b/R/pull_spp.R @@ -0,0 +1,22 @@ +#' Pull species names from the warehouse +#' +#' Pull common name and scientific name information from the +#' data warehouse. +#' The website is https://www.webapps.nwfsc.noaa.gov/data +#' +#' @author Kelli Faye Johnson +#' @export +#' +#' @examples +#' \dontrun{ +#' spp <- pull_spp() +#' } +#' +pull_spp <- function() { + # Get the data from saved .rda file + PullSpp <- NULL + newenv <- new.env(hash = TRUE, parent = parent.frame()) + utils::data(species, package = "nwfscSurvey", envir = newenv) + species <- get("species", envir = newenv) + return(species) +} diff --git a/R/utilities.R b/R/utilities.R new file mode 100644 index 0000000..3d5beed --- /dev/null +++ b/R/utilities.R @@ -0,0 +1,33 @@ +#' Utility function used throughout the package +#' +#' @details +#' Function that converts a string to a hex string +#' for common name or scientific name when pulling +#' data. This function is used within the pull_* +#' functions that retrive species specific data +#' +#' @param x A string of either common_name or +#' sci_name +#' @author Kelli Johnson +#' +#' @examples +#' \dontrun{ +#' common_name <- c("lingcod", "sablefish", "Pacific cod") +#' convert_to_hex_string(common_name) +#' } +#' +convert_to_hex_string <- function(x) { + hex_comma <- toupper(paste0("%", charToRaw(","))) + hex_quote <- paste0("%", charToRaw('"')) + hex_space <- paste0("%", charToRaw(" ")) + stopifnot(inherits(x, "character")) + + # Convert spaces to %20 + x_no_spaces <- gsub(pattern = " ", replacement = hex_space, x) + + # Wrap each string in quotes with %22 and + # separate strings with %2C, which is a comma + out <- paste0(hex_quote, x_no_spaces, hex_quote, collapse = hex_comma) + + return(out) +} diff --git a/data/species.rda b/data/species.rda new file mode 100644 index 0000000..ea53f84 Binary files /dev/null and b/data/species.rda differ diff --git a/man-roxygen/survey.R b/man-roxygen/survey.R index c8ea461..2f4c0ec 100644 --- a/man-roxygen/survey.R +++ b/man-roxygen/survey.R @@ -11,23 +11,23 @@ #' * NWFSC.Hook.Line (not yet working), #' * NWFSC.Video, #' * Triennial.Canada -#' +#' #' The National Marine Fishery Service Alaska Fisheries Science Center (AFSC) #' Triennial survey was conducted between 1977 - 2004 occurring every 3rd year. -#' The initial year, 1977, survey is not traditionally used in calculating -#' indices of abundance. The Triennial survey sampled areas within the Canadian -#' EEZ on the West Coast of Vancouver Island in 1980 - 2001 but these data are +#' The initial year, 1977, survey is not traditionally used in calculating +#' indices of abundance. The Triennial survey sampled areas within the Canadian +#' EEZ on the West Coast of Vancouver Island in 1980 - 2001 but these data are #' associated with a different survey name "Triennial.Canada". -#' The AFSC Slope Survey (AFSC.Slope) along the west coast of the U.S. began in 1984 and occurred -#' annually from 1988-2001, with the exception of 1994 and 1998, when surveys were not conducted. -#' Prior to 1997, only a limited portion of the coast was covered in each year. -#' U.S. West Coast groundfish stock assessments only use the four years of consistent +#' The AFSC Slope Survey (AFSC.Slope) along the west coast of the U.S. began in 1984 and occurred +#' annually from 1988-2001, with the exception of 1994 and 1998, when surveys were not conducted. +#' Prior to 1997, only a limited portion of the coast was covered in each year. +#' U.S. West Coast groundfish stock assessments only use the four years of consistent #' and complete survey coverage (1997, 1999-2001). The Northwest Fisheries Science -#' Center (NWFSC) Slope survey (NWFSC.Slope) was conducted between 1998 - 2001. -#' The NWFSC West Coast Groundfish Bottom Trawl survey (NWFSC.Combo) is conducted +#' Center (NWFSC) Slope survey (NWFSC.Slope) was conducted between 1998 - 2001. +#' The NWFSC West Coast Groundfish Bottom Trawl survey (NWFSC.Combo) is conducted #' annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast -#' shelf and slope between 55 - 1,280 meters. +#' shelf and slope between 55 - 1,280 meters. #' Data can only be pulled from one survey at a time, though we are working on #' allowing for a vector of survey names. #' Currently, `NWFSC.Shelf.Rockfish` and `NWFSC.Hook.Line` are not supported. -#' The default of `NULL` is a placeholder that must be replaced with an entry. + diff --git a/man-roxygen/years.R b/man-roxygen/years.R index ea4fe00..2a7540f 100644 --- a/man-roxygen/years.R +++ b/man-roxygen/years.R @@ -1,2 +1,3 @@ #' @param years An integer vector of length two with the -#' range of years to pull data for. +#' range of years to pull data for (e.g., c(2003, 2024)). +#' Vector can not contain -Inf or Inf. diff --git a/man/PullBio.fn.Rd b/man/PullBio.fn.Rd index 95c5654..16797de 100644 --- a/man/PullBio.fn.Rd +++ b/man/PullBio.fn.Rd @@ -12,7 +12,7 @@ PullBio.fn( SciName = NULL, YearRange = c(1980, 5000), SurveyName = NULL, - SaveFile = FALSE, + SaveFile = lifecycle::deprecated(), Dir = NULL, verbose = TRUE ) @@ -28,9 +28,12 @@ PullBio.fn( Triennial, AFSC.Slope, NWFSC.Combo, NWFSC.Slope, NWFSC.Shelf, NWFSC.Hypoxia, NWFSC.Santa.Barb.Basin, NWFSC.Shelf.Rockfish (NWFSC.Hook.Line but both are not working), NWFSC.Video#'} -\item{SaveFile}{option to save the file to the directory} +\item{SaveFile}{Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +if the Dir input is specified.} -\item{Dir}{directory where the file should be saved} +\item{Dir}{The directory where you want the output file to be saved. +The name of the file within \code{Dir} will start with Catch_ and end with .rdata. +Default NULL which will not save an output file.} \item{verbose}{A logical that specifies if you want to print messages and warnings to the console. The default is \code{TRUE}.} @@ -44,19 +47,19 @@ In order to pull all species leave Name = NULL and SciName = NULL \examples{ \dontrun{ # SurveyName is only arg that has to be specified -bio_dat <- PullBio.fn(SurveyName = "NWFSC.Combo") + bio_dat <- PullBio.fn(SurveyName = "NWFSC.Combo") # Example with specified common name -bio_dat <- PullBio.fn(Name = "vermilion rockfish", -SurveyName = "NWFSC.Combo") + bio_dat <- PullBio.fn(Name = "vermilion rockfish", + SurveyName = "NWFSC.Combo") # Example with specified scientific name -bio_dat <- PullBio.fn(SciName = "Eopsetta jordani", -SurveyName = "NWFSC.Combo") + bio_dat <- PullBio.fn(SciName = "Eopsetta jordani", + SurveyName = "NWFSC.Combo") # Example with multiple names -bio_dat <- PullBio.fn(SciName = c("Sebastes aurora","Eopsetta jordani"), -SurveyName = "NWFSC.Combo") + bio_dat <- PullBio.fn(SciName = c("Sebastes aurora","Eopsetta jordani"), + SurveyName = "NWFSC.Combo") } } diff --git a/man/PullCatch.fn.Rd b/man/PullCatch.fn.Rd index 822ecbd..05cf462 100644 --- a/man/PullCatch.fn.Rd +++ b/man/PullCatch.fn.Rd @@ -9,7 +9,7 @@ PullCatch.fn( SciName = NULL, YearRange = c(1980, 5000), SurveyName = NULL, - SaveFile = FALSE, + SaveFile = lifecycle::deprecated(), Dir = NULL, verbose = TRUE ) @@ -53,12 +53,12 @@ allowing for a vector of survey names and \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. The default of \code{NULL} is a placeholder that must be replaced with an entry.} -\item{SaveFile}{A logical value specifying whether or not the the data should -be saved to a file in \code{Dir}. Must change from the default of \code{FALSE} to save a file.} +\item{SaveFile}{Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +if the Dir input is specified.} -\item{Dir}{If \code{SaveFile = TRUE}, then one must specify the directory where you want -the resulting file to be saved. The directory where the file should be saved. -The name of the file within \code{Dir} will start with Catch_ and end with .rda.} +\item{Dir}{The directory where you want the output file to be saved. +The name of the file within \code{Dir} will start with Catch_ and end with .rdata. +Default NULL which will not save an output file.} \item{verbose}{A logical that specifies if you want to print messages and warnings to the console. The default is \code{TRUE}.} @@ -105,5 +105,5 @@ catch_dat <- PullBio.fn(Name = c("vermilion rockfish", } \author{ -Chantel Wetzel based on code by John Wallace +Chantel Wetzel (maintainer) based on code by John Wallace } diff --git a/man/PullHaul.fn.Rd b/man/PullHaul.fn.Rd index 496dc8d..6937165 100644 --- a/man/PullHaul.fn.Rd +++ b/man/PullHaul.fn.Rd @@ -9,7 +9,7 @@ This function can be used to pull haul data and associated covariates} PullHaul.fn( YearRange = c(1980, 5000), SurveyName = NULL, - SaveFile = FALSE, + SaveFile = lifecycle::deprecated(), Dir = NULL, verbose = TRUE ) @@ -21,7 +21,8 @@ PullHaul.fn( Triennial, AFSC.Slope, NWFSC.Combo, NWFSC.Slope, NWFSC.Shelf, NWFSC.Hypoxia, NWFSC.Santa.Barb.Basin, NWFSC.Shelf.Rockfish (NWFSC.Hook.Line but both are not working), NWFSC.Video#'} -\item{SaveFile}{option to save the file to the directory} +\item{SaveFile}{Deprecated with {nwfscSurvey} 2.3. Output will be save automatically +if the Dir input is specified.} \item{Dir}{directory where the file should be saved} diff --git a/man/check_survey.Rd b/man/check_survey.Rd index 6f7b0d2..0f91caa 100644 --- a/man/check_survey.Rd +++ b/man/check_survey.Rd @@ -40,8 +40,7 @@ annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast shelf and slope between 55 - 1,280 meters. Data can only be pulled from one survey at a time, though we are working on allowing for a vector of survey names. -Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. -The default of \code{NULL} is a placeholder that must be replaced with an entry.} +Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported.} } \description{ Check and create survey string diff --git a/man/convert_to_hex_string.Rd b/man/convert_to_hex_string.Rd new file mode 100644 index 0000000..9530aba --- /dev/null +++ b/man/convert_to_hex_string.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{convert_to_hex_string} +\alias{convert_to_hex_string} +\title{Utility function used throughout the package} +\usage{ +convert_to_hex_string(x) +} +\arguments{ +\item{x}{A string of either common_name or +sci_name} +} +\description{ +Utility function used throughout the package +} +\details{ +Function that converts a string to a hex string +for common name or scientific name when pulling +data. This function is used within the pull_* +functions that retrive species specific data +} +\examples{ +\dontrun{ +common_name <- c("lingcod", "sablefish", "Pacific cod") +convert_to_hex_string(common_name) +} + +} +\author{ +Kelli Johnson +} diff --git a/man/get_url.Rd b/man/get_url.Rd index 32269bb..8d1309e 100644 --- a/man/get_url.Rd +++ b/man/get_url.Rd @@ -16,7 +16,8 @@ trawl.catch_fact, trawl.operation_haul_fact} functions.} \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{vars_long}{string of fields to pull from the data warehouse} } diff --git a/man/pull_bio.Rd b/man/pull_bio.Rd index 352dbaa..4d99df5 100644 --- a/man/pull_bio.Rd +++ b/man/pull_bio.Rd @@ -10,8 +10,8 @@ In order to pull all species leave common_name or sci_name as NULL} pull_bio( common_name = NULL, sci_name = NULL, - years = c(1980, 2050), - survey = NULL, + years = c(1970, 2050), + survey, dir = NULL, convert = TRUE, verbose = TRUE @@ -33,7 +33,8 @@ e.g., vermilion rockfish (see the example below). Use the \code{common_name} argument if you know the common name.} \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{survey}{A character entry from one of the following options that specifies which survey to pull the data for. The input options are: @@ -68,8 +69,7 @@ annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast shelf and slope between 55 - 1,280 meters. Data can only be pulled from one survey at a time, though we are working on allowing for a vector of survey names. -Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. -The default of \code{NULL} is a placeholder that must be replaced with an entry.} +Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported.} \item{dir}{directory where ouptut will be saved. The directory where the file should be saved. If dir = NULL no output will be saved.} diff --git a/man/pull_biological_samples.Rd b/man/pull_biological_samples.Rd index 4206054..7900d80 100644 --- a/man/pull_biological_samples.Rd +++ b/man/pull_biological_samples.Rd @@ -35,7 +35,8 @@ e.g., vermilion rockfish (see the example below). Use the \code{common_name} argument if you know the common name.} \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{survey}{A character entry from one of the following options that specifies which survey to pull the data for. The input options are: @@ -70,8 +71,7 @@ annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast shelf and slope between 55 - 1,280 meters. Data can only be pulled from one survey at a time, though we are working on allowing for a vector of survey names. -Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. -The default of \code{NULL} is a placeholder that must be replaced with an entry.} +Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported.} \item{dir}{directory where ouptut will be saved. The directory where the file should be saved. If dir = NULL no output will be saved.} diff --git a/man/pull_catch.Rd b/man/pull_catch.Rd index 89a2a72..8b95eda 100644 --- a/man/pull_catch.Rd +++ b/man/pull_catch.Rd @@ -7,8 +7,8 @@ pull_catch( common_name = NULL, sci_name = NULL, - years = c(1980, 2050), - survey = NULL, + years = c(1970, 2050), + survey, dir = NULL, convert = TRUE, verbose = TRUE @@ -30,7 +30,8 @@ e.g., vermilion rockfish (see the example below). Use the \code{common_name} argument if you know the common name.} \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{survey}{A character entry from one of the following options that specifies which survey to pull the data for. The input options are: @@ -65,8 +66,7 @@ annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast shelf and slope between 55 - 1,280 meters. Data can only be pulled from one survey at a time, though we are working on allowing for a vector of survey names. -Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. -The default of \code{NULL} is a placeholder that must be replaced with an entry.} +Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported.} \item{dir}{directory where ouptut will be saved. The directory where the file should be saved. If dir = NULL no output will be saved.} @@ -84,7 +84,7 @@ for a single species or all observed species, where the latter is specified by leaving both \code{common_name = NULL} and \code{sci_name = NULL}. } \details{ -The data available in the warehouse are cleaned pior to being downloaded +The data available in the warehouse are cleaned prior to being downloaded with the intent that they provide the best available information for use in an index-standardization procedure. The removed samples may be of use to others with a less-restrictive goal than producing an index of abundance. @@ -116,7 +116,7 @@ catch_data <- pull_catch(common_name = c("vermilion rockfish", "vermilion and sunset rockfish"), survey = "NWFSC.Combo") catch_data <- pull_catch(sci_name = c("Sebastes miniatus", - "Sebastes sp. (crocotulus)", + "Sebastes sp. (crocotulus)", "Sebastes sp. (miniatus / crocotulus)"), survey = "NWFSC.Combo") } diff --git a/man/pull_gemm.Rd b/man/pull_gemm.Rd index f187cf0..13612eb 100644 --- a/man/pull_gemm.Rd +++ b/man/pull_gemm.Rd @@ -23,7 +23,8 @@ e.g., vermilion rockfish (see the example below). Use the \code{sci_name} argument if you know the latin name.} \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{dir}{directory where ouptut will be saved. The directory where the file should be saved. If dir = NULL no output will be saved.} diff --git a/man/pull_haul.Rd b/man/pull_haul.Rd index f773048..741d098 100644 --- a/man/pull_haul.Rd +++ b/man/pull_haul.Rd @@ -6,11 +6,12 @@ The website is: https://www.webapps.nwfsc.noaa.gov/data. This function can be used to pull haul data and associated covariates.} \usage{ -pull_haul(years = c(1980, 2050), survey = NULL, dir = NULL, verbose = TRUE) +pull_haul(years = c(1970, 2050), survey, dir = NULL, verbose = TRUE) } \arguments{ \item{years}{An integer vector of length two with the -range of years to pull data for.} +range of years to pull data for (e.g., c(2003, 2024)). +Vector can not contain -Inf or Inf.} \item{survey}{A character entry from one of the following options that specifies which survey to pull the data for. The input options are: @@ -45,8 +46,7 @@ annualy starting in 2003 (excluding 2020) and samples both the U.S. west coast shelf and slope between 55 - 1,280 meters. Data can only be pulled from one survey at a time, though we are working on allowing for a vector of survey names. -Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported. -The default of \code{NULL} is a placeholder that must be replaced with an entry.} +Currently, \code{NWFSC.Shelf.Rockfish} and \code{NWFSC.Hook.Line} are not supported.} \item{dir}{directory where ouptut will be saved. The directory where the file should be saved. If dir = NULL no output will be saved.} diff --git a/man/pull_spp.Rd b/man/pull_spp.Rd new file mode 100644 index 0000000..ff7df7d --- /dev/null +++ b/man/pull_spp.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pull_spp.R +\name{pull_spp} +\alias{pull_spp} +\title{Pull species names from the warehouse} +\usage{ +pull_spp() +} +\description{ +Pull common name and scientific name information from the +data warehouse. +The website is https://www.webapps.nwfsc.noaa.gov/data +} +\examples{ +\dontrun{ +spp <- pull_spp() +} + +} +\author{ +Kelli Faye Johnson +} diff --git a/tests/testthat/test-codify_sex.R b/tests/testthat/test-codify_sex.R index ba95c06..4a2d7b0 100644 --- a/tests/testthat/test-codify_sex.R +++ b/tests/testthat/test-codify_sex.R @@ -2,11 +2,10 @@ test_that("AFSC Slope pull biological table of Pacific ocean perch sexes", { skip_on_cran() - dat <- PullBio.fn( - Name = "Pacific ocean perch", - SciName = NULL, YearRange = c(1910, 2020), - SurveyName = "AFSC.Slope", SaveFile = FALSE, - Dir = NULL, verbose = TRUE + dat <- pull_bio( + common_name = "Pacific ocean perch", + years = c(1910, 2020), + survey = "AFSC.Slope" ) originaltable <- table(dat[["Lengths"]][["Sex"]]) testthat::expect_equal( diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index 222e1e0..7868526 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -4,21 +4,6 @@ if (interactive()) options(mc.cores = parallel::detectCores()) # devtools::test() set.seed(1) -test_that("PullCatch", { - skip_on_cran() - - set.seed(123) - dat <- PullCatch.fn( - Name = "lingcod", - SciName = NULL, YearRange = c(2003, 2018), - SurveyName = "NWFSC.Combo", SaveFile = FALSE, - Dir = NULL, verbose = TRUE - ) - expect_is(dat, "data.frame") - expect_equal(nrow(dat), 10351) - expect_equal(length(which(dat$cpue_kg_km2 == 0)), 6887) -}) - test_that("pull_catch", { skip_on_cran() @@ -33,20 +18,6 @@ test_that("pull_catch", { expect_equal(nrow(dat), 10351) }) -test_that("PullCatch-multispecies", { - skip_on_cran() - - set.seed(123) - dat <- PullCatch.fn( - SciName = NULL, YearRange = 2017, - SurveyName = "NWFSC.Combo", SaveFile = FALSE, - Dir = NULL, verbose = TRUE - ) - expect_is(dat, "data.frame") - expect_equal(nrow(dat), 350126) - expect_equal(length(which(dat$cpue_kg_km2 == 0)), 330971) -}) - test_that("pull_catch-multispecies", { skip_on_cran() @@ -57,7 +28,33 @@ test_that("pull_catch-multispecies", { verbose = TRUE ) expect_is(dat, "data.frame") - expect_equal(nrow(dat), 350126) + expect_equal(nrow(dat), 392705) + expect_equal(length(which(dat$cpue_kg_km2 == 0)), 373550) + + dat_lingcod <- pull_catch( + common_name = "lingcod", + years = c(2017), + survey = "NWFSC.Combo", + verbose = TRUE + ) + dat_lingcod_sablefish <- pull_catch( + common_name = c("lingcod", "sablefish"), + years = c(2017), + survey = "NWFSC.Combo", + verbose = TRUE + ) + expect_equal( + NROW(dplyr::filter(dat, Common_name == "lingcod")), + NROW(dat_lingcod), + label = "entries of all species filtered for lingcod", + expected.label = "entries of lingcod" + ) + expect_equal( + NROW(dplyr::filter(dat_lingcod_sablefish, Common_name == "lingcod")), + NROW(dat_lingcod), + label = "entries of 2 species filtered for lingcod", + expected.label = "entries of lingcod" + ) }) test_that("PullHaul", { @@ -66,25 +63,11 @@ test_that("PullHaul", { set.seed(123) dat <- PullHaul.fn( YearRange = c(2003, 2018), - SurveyName = "NWFSC.Combo", SaveFile = FALSE, - Dir = NULL, verbose = TRUE - ) - expect_is(dat, "data.frame") - expect_equal(nrow(dat), 10361) -}) - -test_that("PullBio", { - skip_on_cran() - - set.seed(123) - dat <- PullBio.fn( - Name = "lingcod", - SciName = NULL, YearRange = c(2016, 2017), - SurveyName = "NWFSC.Combo", SaveFile = FALSE, + SurveyName = "NWFSC.Combo", Dir = NULL, verbose = TRUE ) expect_is(dat, "data.frame") - expect_equal(nrow(dat), 3363) + expect_equal(nrow(dat), 10351) }) test_that("pull_bio", {