diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 6602e653..98822609 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -13,6 +13,8 @@ permissions: read-all jobs: test-coverage: runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 @@ -42,7 +44,7 @@ jobs: file: ./cobertura.xml plugin: noop disable_search: true - token: 8e3843c1-7a05-4c43-89ac-ddc8f0edff7d + token: ${{ secrets.CODECOV_TOKEN }} - name: Show testthat output if: always() diff --git a/DESCRIPTION b/DESCRIPTION index 9699fbce..5063cff4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: OmopSketch Title: Characterise Tables of an OMOP Common Data Model Instance -Version: 0.1.1 +Version: 0.1.2 Authors@R: c( person( "Marta", "Alcalde-Herraiz", @@ -15,10 +15,6 @@ Authors@R: c( "Elin", "Rowlands", email = "elin.rowlands@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0009-0005-5166-0417") ), - person( - "Cecilia", "Campanile", email = "cecilia.campanile@ndorms.ox.ac.uk", - role = c("aut"), comment = c(ORCID = "0009-0007-6629-4661") - ), person( "Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0000-0002-9286-1128") @@ -34,45 +30,27 @@ Description: Summarises key information in data mapped to the Observational to obtain feasibility counts and trends. License: Apache License (>= 2) Encoding: UTF-8 -Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Suggests: - CodelistGenerator, - DBI, - duckdb, - flextable, - gt, - here, - knitr, - odbc, - remotes, - rmarkdown, - RPostgres, - testthat (>= 3.0.0), - withr, - omock (>= 0.3.0), - covr +Suggests: CodelistGenerator, DBI, here, knitr, odbc, remotes, + rmarkdown, RPostgres, testthat (>= 3.0.0), withr Config/testthat/edition: 3 Config/testthat/parallel: true -Imports: - CDMConnector (>= 1.3.0), - cli, - clock, - CohortConstructor (>= 0.3.1), - dplyr, - ggplot2, - omopgenerics (>= 0.3.1), - PatientProfiles (>= 1.2.1), - purrr, - rlang, - stringr, - tibble, - tidyr, - visOmopResults (>= 0.4.0) -Depends: - R (>= 2.10) +Imports: CDMConnector (>= 1.3.0), cli, clock, gt, flextable, + CohortCharacteristics (>= 0.3.0), CohortConstructor (>= 0.3.1), + dplyr, ggplot2, omock (>= 0.3.0), omopgenerics (>= 0.3.1), + PatientProfiles (>= 1.2.0), purrr, rlang, stringr, tibble, + tidyr, visOmopResults (>= 0.4.0), duckdb +Depends: R (>= 2.10) URL: https://OHDSI.github.io/OmopSketch/ BugReports: https://github.com/OHDSI/OmopSketch/issues VignetteBuilder: knitr -Remotes: - darwin-eu-dev/omopgenerics +NeedsCompilation: no +Packaged: 2024-11-12 21:14:36 UTC; martaa +Author: Marta Alcalde-Herraiz [aut, cre] + (), + Kim Lopez-Guell [aut] (), + Elin Rowlands [aut] (), + Edward Burn [aut] (), + MartĂ­ CatalĂ  [aut] () +Repository: CRAN +Date/Publication: 2024-11-12 21:40:07 UTC diff --git a/NAMESPACE b/NAMESPACE index 45674048..072eb118 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,20 +9,18 @@ export(plotInObservation) export(plotObservationPeriod) export(plotRecordCount) export(settings) -export(summariseAllConceptCounts) export(summariseClinicalRecords) export(summariseConceptCounts) export(summariseInObservation) -export(summariseMissingData) export(summariseObservationPeriod) export(summariseOmopSnapshot) +export(summarisePopulationCharacteristics) export(summariseRecordCount) export(suppress) -export(tableAllConceptCounts) export(tableClinicalRecords) -export(tableMissingData) export(tableObservationPeriod) export(tableOmopSnapshot) +export(tablePopulationCharacteristics) importFrom(dplyr,"%>%") importFrom(omopgenerics,bind) importFrom(omopgenerics,exportSummarisedResult) diff --git a/R/checks.R b/R/checks.R index 83402e6b..299513b9 100644 --- a/R/checks.R +++ b/R/checks.R @@ -1,32 +1,12 @@ #' @noRd -checkInterval <- function(interval, call = parent.frame()){ - omopgenerics::assertCharacter(interval, length = 1, na = FALSE, null = FALSE, call = call) +checkUnit <- function(unit, call = parent.frame()){ + omopgenerics::assertCharacter(unit, length = 1, na = FALSE, null = FALSE, call = call) - if(!interval %in% c("year","month")){ - cli::cli_abort("Interval argument {interval} is not valid. Valid options are either `year` or `month`.", call = call) + if(!unit %in% c("year","month")){ + cli::cli_abort("Unit argument {unit} is not valid. Valid options are either `year` or `month`.", call = call) } } -validateIntervals <- function(interval, call = parent.frame()){ - - omopgenerics::assertCharacter(interval, length = 1, na = FALSE, null = FALSE, call = call) - - if(!interval %in% c("overall","years","months","quarters")){ - cli::cli_abort("Interval argument {interval} is not valid. Valid options are either `overall`, `years`, `quarters` or `months`.", call = call) - } - - unitInterval <- dplyr::case_when( - interval == "overall" ~ NA, - interval == "quarters" ~ 4, - interval == "months" ~ 1, - interval == "years" ~ 1 - ) - - if(interval == "quarters"){quarters <- "month"}else{interval <- gsub("s$","",interval)} - - return(list("interval" = interval, "unitInterval" = unitInterval)) -} - #' @noRd checkCategory <- function(category, overlap = FALSE, type = "numeric", call = parent.frame()) { omopgenerics::assertList( diff --git a/R/mockOmopSketch.R b/R/mockOmopSketch.R index d746c8bc..840a8738 100644 --- a/R/mockOmopSketch.R +++ b/R/mockOmopSketch.R @@ -14,7 +14,10 @@ #' @return A mock cdm_reference object. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' mockOmopSketch(numberIndividuals = 100) +#' } mockOmopSketch <- function(con = NULL, writeSchema = NULL, numberIndividuals = 100, diff --git a/R/plotConceptCounts.R b/R/plotConceptCounts.R index ae0b2b06..045d1d78 100644 --- a/R/plotConceptCounts.R +++ b/R/plotConceptCounts.R @@ -9,7 +9,7 @@ #' @export #' @examples #' \donttest{ -#' library(dplyr) +#' library(dplyr, warn.conflicts = FALSE) #' #' cdm <- mockOmopSketch() #' @@ -22,8 +22,8 @@ #' ) #' #' result |> -#' filter(variable_name == "Number subjects") |> -#' plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") +#' filter(estimate_name == "person_count", variable_name == "overall") |> +#' plotConceptCounts(facet = "codelist_name", colour = "codelist_name") #' #' PatientProfiles::mockDisconnect(cdm) #' } @@ -36,55 +36,29 @@ plotConceptCounts <- function(result, # subset to results of interest result <- result |> visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts") - if (nrow(result) == 0) { cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_counts")) } # check only one estimate is contained - variable <- unique(result$variable_name) - if (length(variable) > 1) { + estimate <- unique(result$estimate_name) + if (length(estimate) > 1) { cli::cli_abort(c( - "!" = "Subset to the variable of interest, there are results from: {variable}.", - "i" = "result |> dplyr::filter(variable_name == '{variable[1]}')" + "!" = "Subset to the estimate of interest, there are results from: {estimate}.", + "i" = "result |> dplyr::filter(estimate_name == '{estimate[1]}')" )) } - result1 <- result |> visOmopResults::splitAdditional() - # Detect if there are several time intervals - if("time_interval" %in% colnames(result1)){ - # Line plot where each concept is a different line - p <- result1 |> - dplyr::filter(.data$time_interval != "overall") |> - visOmopResults::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |> - visOmopResults::scatterPlot(x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = TRUE, - group = "standard_concept_name", - facet = facet, - colour = colour) - }else{ - if("standard_concept_name" %in% colnames(result1)){ - p <- result |> - visOmopResults::barPlot(x = "standard_concept_name", - y = "count", - facet = facet, - colour = colour) - }else{ - p <- result |> - visOmopResults::barPlot(x = "codelist_name", - y = "count", - facet = facet, - colour = colour) - } - p <- p + - ggplot2::labs( - x = "Concept name" - ) - } - - p + + order <- c("overall", sort(unique(result$variable_name[result$variable_name != "overall"]))) + result |> + dplyr::mutate(variable_name = factor(.data$variable_name, + levels = order)) |> + visOmopResults::barPlot(x = "variable_name", + y = estimate, + facet = facet, + colour = colour) + + ggplot2::labs( + x = "Concept name" + ) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)) } diff --git a/R/plotInObservation.R b/R/plotInObservation.R index 7910f207..b4cd6ecf 100644 --- a/R/plotInObservation.R +++ b/R/plotInObservation.R @@ -10,7 +10,7 @@ #' @export #' @examples #' \donttest{ -#' library(dplyr) +#' library(dplyr, warn.conflicts = FALSE) #' #' cdm <- mockOmopSketch() #' @@ -52,35 +52,27 @@ plotInObservation <- function(result, } # warn - warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "additional_level")) + warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "variable_level")) # plot - if(length(unique(result$additional_level)) > 1 ){ - result |> - dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::scatterPlot( - x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = FALSE, - ymin = NULL, - ymax = NULL, - facet = facet, - colour = colour, - group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) - ) + - ggplot2::labs( - y = variable, - x = "Date" - ) - }else{ - result |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::barPlot(x = "variable_name", - y = "count", - facet = facet, - colour = colour) - } + result |> + dplyr::mutate(variable_level = as.Date(stringr::str_extract( + .data$variable_level, "^[^ to]+"))) |> + dplyr::filter(.data$estimate_name == "count") |> + visOmopResults::scatterPlot( + x = "variable_level", + y = "count", + line = TRUE, + point = TRUE, + ribbon = FALSE, + ymin = NULL, + ymax = NULL, + facet = facet, + colour = colour, + group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) + ) + + ggplot2::labs( + y = variable, + x = "Date" + ) } diff --git a/R/plotObservationPeriod.R b/R/plotObservationPeriod.R index 88b711fa..0011ccd8 100644 --- a/R/plotObservationPeriod.R +++ b/R/plotObservationPeriod.R @@ -57,9 +57,9 @@ plotObservationPeriod <- function(result, validateFacet(facet, result) - optFacetColour <- c("cdm_name", "observation_period_ordinal", - visOmopResults::strataColumns(result)) - omopgenerics::assertChoice(facet, optFacetColour, null = TRUE) + optFacetColour <- visOmopResults::tidyColumns(result) + optFacetColour <- optFacetColour[optFacetColour %in% visOmopResults::tidyColumns(result)] + omopgenerics::assertChoice(facet, optFacetColour, null = TRUE, call = call) # this is due to bug in visOmopResults to remove in next release # https://github.com/darwin-eu/visOmopResults/issues/246 @@ -68,7 +68,8 @@ plotObservationPeriod <- function(result, if(length(visOmopResults::groupColumns(result)) == 0){ result <- result |> - dplyr::mutate(group_name = "observation_period_ordinal") + dplyr::mutate(group_name = "observation_period_ordinal", + group_level = "Overall") } if (plotType == "barplot") { @@ -85,7 +86,7 @@ plotObservationPeriod <- function(result, x = "observation_period_ordinal", facet = facet, colour = colour) - } else if (plotType == "densityplot") { + } else { p <- visOmopResults::scatterPlot( result = result, x = "density_x", @@ -95,8 +96,7 @@ plotObservationPeriod <- function(result, ribbon = FALSE, facet = facet, colour = colour, - group = optFacetColour - ) + + group = optFacetColour) + ggplot2::xlab(stringr::str_to_sentence(unique(result$variable_name))) + ggplot2::ylab("Density") } diff --git a/R/plotRecordCount.R b/R/plotRecordCount.R index 221cb4bd..874d6ab7 100644 --- a/R/plotRecordCount.R +++ b/R/plotRecordCount.R @@ -37,34 +37,25 @@ plotRecordCount <- function(result, cli::cli_abort(c("!" = "No records found with result_type == summarise_record_count")) } - # Detect if there are several time intervals - if(length(unique(result$additional_level)) > 1 ){ - # Line plot where each concept is a different line - p <- result |> - dplyr::filter(.data$additional_level != "overall") |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::scatterPlot(x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = FALSE, - facet = facet, - colour = colour, - group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result))) + - ggplot2::labs( - y = "Number records", - x = "Date" - ) - }else{ - p <- result |> - visOmopResults::barPlot(x = "variable_name", - y = "count", - facet = facet, - colour = colour) + - ggplot2::labs( - y = "Count", - x = "" - ) - } - p + # plot + result |> + dplyr::mutate(variable_level = as.Date(stringr::str_extract( + .data$variable_level, "^[^ to]+"))) |> + dplyr::filter(.data$estimate_name == "count") |> + visOmopResults::scatterPlot( + x = "variable_level", + y = "count", + line = TRUE, + point = TRUE, + ribbon = FALSE, + ymin = NULL, + ymax = NULL, + facet = facet, + colour = colour, + group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) + ) + + ggplot2::labs( + y = "Incident records", + x = "Date" + ) } diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R deleted file mode 100644 index e7df70e9..00000000 --- a/R/summariseAllConceptCounts.R +++ /dev/null @@ -1,187 +0,0 @@ - -my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){ - - strata <- as.character() - - if(!is.null(ageGroup)){ - strata <- append(strata, "age_group") - } - - if(sex){ - strata <- append(strata, "sex") - } - if(year){ - strata <- append(strata, "year") - } - return(strata) -} - - -checkFeasibility <- function(omopTable, tableName, conceptId){ - - if (omopgenerics::isTableEmpty(omopTable)){ - cli::cli_warn(paste0(tableName, " omop table is empty.")) - return(NULL) - } - - if (is.na(conceptId)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - - y <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) - - if (omopgenerics::isTableEmpty(y)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - return(TRUE) -} - -#' Summarise concept use in patient-level data -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param countBy Either "record" for record-level counts or "person" for -#' person-level counts -#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export -summariseAllConceptCounts <- function(cdm, - omopTableName, - countBy = "record", - year = FALSE, - sex = FALSE, - ageGroup = NULL){ - - omopgenerics::validateCdmArgument(cdm) - checkCountBy(countBy) - omopgenerics::assertLogical(year, length = 1) - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup) - - stratification <- omopgenerics::combineStrata(strata) - - result_tables <- purrr::map(omopTableName, function(table){ - - - - - omopTable <- cdm[[table]] |> - dplyr::ungroup() - - - conceptId <- standardConcept(omopgenerics::tableName(omopTable)) - - if (is.null(checkFeasibility(omopTable, table, conceptId))){ - return(NULL) - } - - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - - x <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) |> - dplyr::left_join( - cdm$concept |> dplyr::select("concept_id", "concept_name"), - by = stats::setNames("concept_id", conceptId)) |> - PatientProfiles::addDemographicsQuery(age = FALSE, - ageGroup = ageGroup, - sex = sex, - indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - level <- c(conceptId, "concept_name") - - groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x))) - - result <- list() - if ("record" %in% countBy){ - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - - - grouped_results <- purrr::map(groupings, \(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop") - - }) - - result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate("estimate_name" = "record_count") - result<-dplyr::bind_rows(result,result_record) - } - - if ("person" %in% countBy){ - - grouped_results <- purrr::map(groupings, \(g) { - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - }) - - result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |> - dplyr::mutate("estimate_name" = "person_count") - result<-dplyr::bind_rows(result,result_person) - } - result<- result |> - dplyr::mutate("omop_table" = table, - "variable_level" = as.character(.data[[conceptId]])) |> - - dplyr::select(-dplyr::all_of(conceptId)) - return(result) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - sr <-purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm) - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer" - ) |> - dplyr::rename("variable_name" = "concept_name") - # |> - # dplyr::select(!c()) - - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_all_concept_counts" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - return(sr) - -} - diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index 423f666e..b6f67e05 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -55,16 +55,15 @@ summariseClinicalRecords <- function(cdm, ageGroup = NULL) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) - opts <- omopgenerics::omopTables() - opts <- opts[opts %in% names(cdm)] - omopgenerics::assertChoice(omopTableName, choices = opts) + omopTableName |> + omopgenerics::assertChoice(choices = omopgenerics::omopTables()) estimates <- PatientProfiles::availableEstimates( variableType = "numeric", fullQuantiles = TRUE) |> dplyr::pull("estimate_name") omopgenerics::assertChoice(recordsPerPerson, choices = estimates, null = TRUE) + recordsPerPerson <- unique(recordsPerPerson) - if (is.null(recordsPerPerson)) recordsPerPerson <- character() omopgenerics::assertLogical(inObservation, length = 1) omopgenerics::assertLogical(standardConcept, length = 1) @@ -72,50 +71,43 @@ summariseClinicalRecords <- function(cdm, omopgenerics::assertLogical(domainId, length = 1) omopgenerics::assertLogical(typeConcept, length = 1) omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, multipleAgeGroup = FALSE)[[1]] - - result <- purrr::map(omopTableName, \(x) { - if(omopgenerics::isTableEmpty(cdm[[x]])) { - cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) - return(omopgenerics::emptySummarisedResult()) - } - summariseClinicalRecord( - x, - cdm = cdm, - recordsPerPerson = recordsPerPerson, - inObservation = inObservation, - standardConcept = standardConcept, - sourceVocabulary = sourceVocabulary, - domainId = domainId, - typeConcept = typeConcept, - sex = sex, - ageGroup = ageGroup - ) - }) |> - omopgenerics::bind() + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + result <- purrr::map(omopTableName, + function(x) { + if(omopgenerics::isTableEmpty(cdm[[x]])) { + cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) + return(omopgenerics::emptySummarisedResult()) + } + summariseClinicalRecord(x, + cdm = cdm, + recordsPerPerson = recordsPerPerson, + inObservation = inObservation, + standardConcept = standardConcept, + sourceVocabulary = sourceVocabulary, + domainId = domainId, + typeConcept = typeConcept, + sex = sex, + ageGroup = ageGroup) + } + ) |> + dplyr::bind_rows() return(result) } #' @noRd -summariseClinicalRecord <- function(omopTableName, - cdm, - recordsPerPerson, - inObservation, - standardConcept, - sourceVocabulary, - domainId, - typeConcept, - sex, - ageGroup, - call = parent.frame(3)) { +summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, + inObservation, standardConcept, + sourceVocabulary, domainId, typeConcept, + sex, ageGroup, call = parent.frame(3)) { tablePrefix <- omopgenerics::tmpPrefix() # Initial checks omopgenerics::assertClass(cdm[[omopTableName]], "omop_table", call = call) - date <- startDate(omopTableName) + date <- startDate(omopgenerics::tableName(cdm[[omopTableName]])) omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() @@ -123,21 +115,21 @@ summariseClinicalRecord <- function(omopTableName, omopTable <- filterPersonId(omopTable) |> addStrataToOmopTable(date, ageGroup, sex) - if ("observation_period" == omopTableName) { - if (standardConcept) { - if (!missing(standardConcept)) { + if ("observation_period" == omopTableName) { + if(standardConcept){ + if(!missing(standardConcept)){ cli::cli_inform("standardConcept turned to FALSE for observation_period OMOP table", call = call) } standardConcept <- FALSE } - if (sourceVocabulary) { - if (!missing(sourceVocabulary)) { + if(sourceVocabulary){ + if(!missing(sourceVocabulary)){ cli::cli_inform("sourceVocabulary turned to FALSE for observation_period OMOP table", call = call) } sourceVocabulary <- FALSE } - if (domainId) { - if (!missing(domainId)) { + if(domainId){ + if(!missing(domainId)){ cli::cli_inform("domainId turned to FALSE for observation_period OMOP table", call = call) } domainId <- FALSE @@ -145,46 +137,57 @@ summariseClinicalRecord <- function(omopTableName, } strata <- getStrataList(sex, ageGroup) - strata <- c(list(character()), strata) + + peopleStrata <- suppressWarnings(addStrataToPeopleInObservation(cdm, ageGroup, sex, tablePrefix)) + + people <- getNumberPeopleInCdm(cdm, strata, peopleStrata) + result <- omopgenerics::emptySummarisedResult() # Counts summary ---- - cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}} counts and records per person")) - result <- summariseRecordsPerPerson( - omopTable, date, sex, ageGroup, recordsPerPerson) + cli::cli_inform(c("i" = "Summarising table counts")) + result <- result |> + addCounts(strata, omopTable) |> + addSubjectsPercentage(omopTable, people, strata) + + # Records per person summary ---- + if(!is.null(recordsPerPerson)){ + cli::cli_inform(c("i" = "Summarising records per person")) + result <- result |> + addRecordsPerPerson(omopTable, recordsPerPerson, cdm, peopleStrata, strata) + } + + denominator <- result |> + dplyr::filter(.data$variable_name == "number records") |> + dplyr::collect("strata_name", "strata_level", "estimate_value") # Summary concepts ---- if (inObservation | standardConcept | sourceVocabulary | domainId | typeConcept) { - denominator <- result |> - dplyr::filter(.data$variable_name == "number records") |> - dplyr::select("strata_name", "strata_level", "estimate_value") - variables <- columnsVariables( inObservation, standardConcept, sourceVocabulary, domainId, typeConcept ) - cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}}: {.var {variables}}.")) + cli::cli_inform(c("i" = "Summarising {variables} information")) result <- result |> dplyr::bind_rows( omopTable |> - addVariables(variables) |> - dplyr::group_by(dplyr::across(dplyr::everything())) |> - dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> + addVariables(variables, strata) |> + dplyr::group_by(dplyr::across(dplyr::all_of(variables)), .data$age_group, .data$sex) |> + dplyr::tally() |> dplyr::collect() |> - summaryData(denominator, strata, cdm) + dplyr::mutate("n" = as.integer(.data$n)) |> + summaryData(variables, cdm, denominator, result) ) } # Format output as a summarised result result <- result |> + tidyr::fill("result_id", "cdm_name", "group_name", "group_level", + "additional_name", "additional_level", .direction = "downup") |> dplyr::mutate( - "result_id" = 1L, - "cdm_name" = omopgenerics::cdmName(cdm), "group_name" = "omop_table", - "group_level" = omopTableName, - "additional_name" = "overall", - "additional_level" = "overall" + "group_level" = omopgenerics::tableName(omopTable) ) |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( "result_id" = 1L, @@ -200,129 +203,33 @@ summariseClinicalRecord <- function(omopTableName, # Functions ----- getStrataList <- function(sex, ageGroup){ - omopgenerics::combineStrata(c("age_group"[!is.null(ageGroup)], "sex"[sex])) -} - -summariseRecordsPerPerson <- function(omopTable, date, sex, ageGroup, recordsPerPerson) { - # get strata - strataCols <- c("sex"[sex], "age_group"[!is.null(ageGroup)]) - - cdm <- omopgenerics::cdmReference(omopTable) - tablePrefix <- omopgenerics::tmpPrefix() - nm <- omopgenerics::uniqueTableName(tablePrefix) - - # denominator - demographics <- CohortConstructor::demographicsCohort( - cdm = cdm, name = nm, ageRange = ageGroup - ) |> - suppressMessages() - set <- omopgenerics::settings(demographics) - if (sex) demographics <- PatientProfiles::addSexQuery(demographics) - if (is.null(ageGroup)) { - set <- set |> dplyr::select("cohort_definition_id") - } else { - set <- set |> - dplyr::left_join( - dplyr::tibble( - age_group = names(ageGroup), - age_range = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])) - ), - by = "age_range" - ) |> - dplyr::mutate(age_group = dplyr::coalesce(.data$age_group, .data$age_range)) |> - dplyr::select("cohort_definition_id", "age_group") - } - - # records per person - x <- demographics |> - dplyr::select(dplyr::any_of(c( - "cohort_definition_id", "person_id" = "subject_id", "sex" - ))) |> - dplyr::distinct() |> - dplyr::collect() |> - dplyr::left_join(set, by = "cohort_definition_id") |> - dplyr::select(!"cohort_definition_id") |> - dplyr::left_join( - omopTable |> - dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strataCols)))) |> - dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> - dplyr::collect(), - by = c("person_id", strataCols) - ) |> - dplyr::mutate(n = dplyr::coalesce(.data$n, 0L)) - - omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) - - result <- list() - result[["overall"]] <- summariseCounts(x, character(), recordsPerPerson) + strata <- as.character() - if (!is.null(ageGroup)) { - result[["age_group"]] <- x |> - summariseCounts(c("age_group"), recordsPerPerson) + if(!is.null(ageGroup)){ + strata <- append(strata, "age_group") } - if (sex) { - result[["sex"]] <- x |> - summariseCounts(c("sex"), recordsPerPerson) + if(sex){ + strata <- append(strata, "sex") } - if (!is.null(ageGroup) & sex) { - result[["age_group_sex"]] <- x |> - summariseCounts(c("age_group", "sex"), recordsPerPerson) - } - - result <- result |> - dplyr::bind_rows() |> - dplyr::mutate( - variable_name = dplyr::if_else( - .data$variable_name == "n", - dplyr::if_else(.data$estimate_name == "sum", "number records", "records_per_person"), - .data$variable_name - ), - estimate_name = dplyr::if_else( - .data$variable_name == "number records", "count", .data$estimate_name - ) - ) - - return(result) -} -summariseCounts <- function(x, strata, recordsPerPerson) { - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strata)))) |> - dplyr::summarise(n = sum(.data$n), .groups = "drop") |> - dplyr::mutate(number_subjects = dplyr::if_else(.data$n == 0, 0L, 1L)) |> - dplyr::select(!"person_id") |> - PatientProfiles::summariseResult( - group = character(), - includeOverallGroup = FALSE, - strata = strata, - includeOverallStrata = FALSE, - counts = FALSE, - variables = list("number_subjects", "n"), - estimates = list(c("count", "percentage"), c(recordsPerPerson, "sum")) - ) |> - suppressMessages() + strata <- omopgenerics::combineStrata(levels = strata) + return(strata) } -getNumberPeopleInCdm <- function(cdm, ageGroup, sex, strata) { - tablePrefix <- omopgenerics::tmpPrefix() +getNumberPeopleInCdm <- function(cdm, strata, peopleStrata){ - x <- cdm |> - addStrataToPeopleInObservation(ageGroup, sex, tablePrefix) |> + peopleStrata |> + dplyr::select(-c("observation_period_start_date","observation_period_end_date")) |> + dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 - PatientProfiles::summariseResult( - strata = strata, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = character() - ) |> + PatientProfiles::summariseResult(strata = strata, + includeOverallStrata = TRUE, + counts = TRUE, + estimates = c("")) |> suppressMessages() |> dplyr::filter(.data$variable_name != "number records") - - omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) - - return(x) } addCounts <- function(result, strata, omopTable){ @@ -333,7 +240,7 @@ addCounts <- function(result, strata, omopTable){ rbind( omopTable |> dplyr::select("person_id", dplyr::any_of(c("age_group","sex"))) |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult(strata = strata, includeOverallStrata = TRUE, counts = TRUE, @@ -384,7 +291,7 @@ addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm, people .data$records_per_person )) |> dplyr::distinct() |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( strata = strata, includeOverallStrata = TRUE, @@ -395,7 +302,7 @@ addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm, people ) } -addVariables <- function(x, variables) { +addVariables <- function(x, variables, strata) { name <- omopgenerics::tableName(x) @@ -413,7 +320,7 @@ addVariables <- function(x, variables) { cdm <- omopgenerics::cdmReference(x) x <- x |> - dplyr::select(dplyr::all_of(newNames), dplyr::any_of(c("age_group", "sex"))) + dplyr::select(dplyr::all_of(newNames), "age_group", "sex") # Domain and standard ---- if (any(c("domain_id", "standard") %in% variables)) { @@ -474,9 +381,27 @@ addVariables <- function(x, variables) { } x <- x |> - dplyr::select(dplyr::all_of(variables), dplyr::any_of(c("age_group", "sex"))) |> + dplyr::select(dplyr::all_of(variables), "age_group", "sex") |> dplyr::mutate(dplyr::across(dplyr::everything(), ~as.character(.))) + # Create overall groups - This chunk will need efficiency improvement + if(length(strata) == 3){ + x <- x |> + dplyr::union_all( + x |> + dplyr::mutate(age_group = "overall") + ) |> + dplyr::union_all( + x |> + dplyr::mutate(sex = "overall") + ) |> + dplyr::union_all( + x |> + dplyr::mutate(sex = "overall") |> + dplyr::mutate(age_group = "overall") + ) + + } return(x) } @@ -486,43 +411,38 @@ columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, d )] } -summaryData <- function(x, denominator, strata, cdm) { - - cols <- colnames(x) - +summaryData <- function(x, variables, cdm, denominator, result) { results <- list() # in observation ---- - if ("in_observation" %in% cols) { + if ("in_observation" %in% variables) { results[["obs"]] <- x |> dplyr::mutate("in_observation" = dplyr::if_else( - .data$in_observation == "1", "Yes", "No" + !is.na(.data$in_observation), "Yes", "No" )) |> - formatResults("In observation", "in_observation", denominator, strata) + formatResults("In observation", "in_observation", denominator, result) } # standard ----- - if ("standard" %in% cols) { + if ("standard" %in% variables) { results[["standard"]] <- x |> - formatResults("Standard concept", "standard", denominator, strata) + formatResults("Standard concept", "standard", denominator, result) } # source ---- - if ("source" %in% cols) { - results[["source"]] <- x |> - formatResults("Source vocabulary", "source", denominator, strata) + if ("source" %in% variables) { + results[["source"]] <- x |> formatResults("Source vocabulary", "source", denominator, result) } # domain ---- - if ("domain_id" %in% cols) { - results[["domain"]] <- x |> - formatResults("Domain", "domain_id", denominator, strata) + if ("domain_id" %in% variables) { + results[["domain"]] <- x |> formatResults("Domain", "domain_id", denominator, result) } # type ---- - if ("type" %in% cols) { + if ("type" %in% variables) { xx <- x |> - formatResults("Type concept id", "type", denominator, strata) |> + formatResults("Type concept id", "type", denominator, result) |> dplyr::left_join( conceptTypes |> dplyr::select( @@ -559,56 +479,56 @@ summaryData <- function(x, denominator, strata, cdm) { paste0(.data$new_variable_level, " (", .data$variable_level, ")") )) } - results[["type"]] <- xx |> - dplyr::select(-"new_variable_level") + results[["type"]] <- xx |> dplyr::select(-"new_variable_level") } - results <- dplyr::bind_rows(results) + results <- results |> + dplyr::bind_rows() return(results) } -formatResults <- function(x, variableName, variableLevel, denominator, strata) { +formatResults <- function(x, variableName, variableLevel, denominator, result) { denominator <- denominator |> dplyr::select("strata_name", "strata_level", "denominator" = "estimate_value") |> visOmopResults::splitStrata() - strataCols <- unique(unlist(strata)) + if(!"age_group" %in% colnames(denominator)){ + denominator <- denominator |> + dplyr::mutate("age_group" = "overall") + } - result <- list() - for (strat in strata) { - res <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel, strat)))) |> - dplyr::summarise("count" = sum(.data$n), .groups = "drop") - for (col in strataCols) { - if (!col %in% colnames(res)) { - res <- res |> dplyr::mutate(!!col := "overall") - } - } - result[[paste0(strat, collapse = "_")]] <- res |> - dplyr::inner_join(denominator, by = strataCols) |> - dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - tidyr::pivot_longer( - cols = c("count", "percentage"), - names_to = "estimate_name", - values_to = "estimate_value" - ) |> - dplyr::mutate( - "variable_name" = .env$variableName, - "variable_level" = as.character(.data[[variableLevel]]), - "estimate_type" = dplyr::if_else( - .data$estimate_name == "count", "integer", "percentage" - ) - ) |> - visOmopResults::uniteStrata(cols = strataCols) |> - dplyr::select( - "strata_name", "strata_level", "variable_name", "variable_level", - "estimate_name", "estimate_type", "estimate_value" - ) |> - dplyr::ungroup() + if(!"sex" %in% colnames(denominator)){ + denominator <- denominator |> + dplyr::mutate("sex" = "overall") } - dplyr::bind_rows(result) + x |> + dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel,"age_group","sex")))) |> + dplyr::summarise("count" = sum(.data$n), .groups = "drop") |> + dplyr::inner_join( + denominator, + by = c("age_group","sex") + ) |> + dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + tidyr::pivot_longer( + cols = c("count", "percentage"), + names_to = "estimate_name", + values_to = "estimate_value" + ) |> + dplyr::mutate( + "variable_name" = .env$variableName, + "variable_level" = as.character(.data[[variableLevel]]), + "estimate_type" = dplyr::if_else( + .data$estimate_name == "count", "integer", "percentage" + ) + ) |> + visOmopResults::uniteStrata(cols = c("age_group","sex")) |> + dplyr::select( + "strata_name", "strata_level", "variable_name", "variable_level", + "estimate_name", "estimate_type", "estimate_value" + ) |> + dplyr::ungroup() } diff --git a/R/summariseConceptCounts.R b/R/summariseConceptCounts.R index e6ea43a8..1e8dc898 100644 --- a/R/summariseConceptCounts.R +++ b/R/summariseConceptCounts.R @@ -1,12 +1,12 @@ -#' Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +#' Summarise code use in patient-level data #' #' @param cdm A cdm object #' @param conceptId List of concept IDs to summarise. #' @param countBy Either "record" for record-level counts or "person" for #' person-level counts #' @param concept TRUE or FALSE. If TRUE code use will be summarised by concept. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. #' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. #' @param ageGroup A list of ageGroup vectors of length two. Code use will be #' thus summarised by age groups. @@ -15,7 +15,6 @@ #' @export #' @examples #' \donttest{ -#' library(OmopSketch) #' #' cdm <- mockOmopSketch() #' @@ -26,28 +25,22 @@ #' results #' #' PatientProfiles::mockDisconnect(cdm) -#' #' } summariseConceptCounts <- function(cdm, conceptId, countBy = c("record", "person"), concept = TRUE, - interval = "overall", + year = FALSE, sex = FALSE, ageGroup = NULL){ omopgenerics::validateCdmArgument(cdm) omopgenerics::assertList(conceptId, named = TRUE) checkCountBy(countBy) - omopgenerics::assertChoice(countBy, choices = c("record", "person")) - countBy <- gsub("persons","subjects",paste0("number ",countBy,"s")) - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval - omopgenerics::assertNumeric(unitInterval, length = 1, min = 1, na = TRUE) - omopgenerics::assertLogical(concept, length = 1) - omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + if(!is.null(conceptId) && length(names(conceptId)) != length(conceptId)){ + cli::cli_abort("conceptId must be a named list") + } # Get all concepts in concept table if conceptId is NULL # if(is.null(conceptId)) { @@ -59,78 +52,88 @@ summariseConceptCounts <- function(cdm, # tibble::deframe() # } - codeUse <- list() - cli::cli_progress_bar("Getting use of codes", total = length(conceptId)) - for(i in 1:length(conceptId)) { - cli::cli_alert_info("Getting concept counts of {names(conceptId)[i]}") - codeUse[[i]] <- getCodeUse(conceptId[i], - cdm = cdm, - countBy = countBy, - concept = concept, - interval = interval, - unitInterval = unitInterval, - sex = sex, - ageGroup = ageGroup) - Sys.sleep(i/length(conceptId)) - cli::cli_progress_update() + getAllCodeUse <- function() { + codeUse <- list() + cli::cli_progress_bar("Getting use of codes", total = length(conceptId)) + for(i in 1:length(conceptId)) { + cli::cli_alert_info("Getting use of codes from {names(conceptId)[i]}") + codeUse[[i]] <- getCodeUse(conceptId[i], + cdm = cdm, + cohortTable = NULL, + cohortId = NULL, + timing = "any", + countBy = countBy, + concept = concept, + year = year, + sex = sex, + ageGroup = ageGroup) + Sys.sleep(i/length(conceptId)) + cli::cli_progress_update() + } + codeUse <- codeUse |> + dplyr::bind_rows() + cli::cli_progress_done() + return(codeUse) } - codeUse <- codeUse |> - dplyr::bind_rows() - cli::cli_progress_done() + codeUse <- getAllCodeUse() if(nrow(codeUse) > 0) { codeUse <- codeUse %>% dplyr::mutate( result_id = as.integer(1), cdm_name = omopgenerics::cdmName(cdm) + ) %>% + omopgenerics::newSummarisedResult( + settings = dplyr::tibble( + result_id = as.integer(1), + result_type = "summarise_concept_counts", + package_name = "OmopSketch", + package_version = as.character(utils::packageVersion("OmopSketch")) + ) ) } else { codeUse <- omopgenerics::emptySummarisedResult() } - codeUse <- codeUse %>% - omopgenerics::newSummarisedResult( - settings = dplyr::tibble( - result_id = 1L, - result_type = "summarise_concept_counts", - package_name = "OmopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")) - ) - ) return(codeUse) } getCodeUse <- function(x, cdm, + cohortTable, + cohortId, + timing, countBy, concept, - interval, - unitInterval, + year, sex, ageGroup, - call = parent.frame()){ - - tablePrefix <- omopgenerics::tmpPrefix() + call = parent.frame()) { + omopgenerics::assertCharacter(timing, len = 1) + omopgenerics::assertChoice(timing, choices = c("any", "entry")) + omopgenerics::assertChoice(countBy, choices = c("record", "person")) omopgenerics::assertNumeric(x[[1]], integerish = TRUE) omopgenerics::assertList(x) + omopgenerics::assertLogical(concept) + omopgenerics::assertLogical(year) + omopgenerics::assertLogical(sex) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - # Create code list table - tableCodelist <- paste0(tablePrefix,"codelist") + tableCodelist <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableCodelist, table = dplyr::tibble(concept_id = x[[1]]), overwrite = TRUE, temporary = FALSE) - cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% dplyr::left_join( cdm[["concept"]] %>% dplyr::select("concept_id", "domain_id"), - by = "concept_id" - ) + by = "concept_id") - # Create domains table - tableDomainsData <- paste0(tablePrefix,"domains_data") + tableDomainsData <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableDomainsData, table = tables, @@ -145,102 +148,133 @@ getCodeUse <- function(x, temporary = FALSE, overwrite = TRUE) - # Create records table - intermediateTable <- paste0(tablePrefix,"intermediate_table") + CDMConnector::dropTable(cdm = cdm, name = tableDomainsData) + cdm[[tableDomainsData]] <- NULL + + intermediateTable <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) records <- getRelevantRecords(cdm = cdm, tableCodelist = tableCodelist, - intermediateTable = intermediateTable, - tablePrefix = tablePrefix) - if(is.null(records)){ - cc <- dplyr::tibble() + cohortTable = cohortTable, + cohortId = cohortId, + timing = timing, + intermediateTable = intermediateTable) + + if(!is.null(records) && + (records %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") > 0)) { + if(sex == TRUE | !is.null(ageGroup)){ + records <- records %>% + PatientProfiles::addDemographicsQuery(age = !is.null(ageGroup), + ageGroup = ageGroup, + sex = sex, + priorObservation = FALSE, + futureObservation = FALSE, + indexDate = "date") |> + dplyr::compute(overwrite = TRUE, + name = omopgenerics::tableName(records), + temporary = FALSE) + } + + byAgeGroup <- !is.null(ageGroup) + codeCounts <- getSummaryCounts(records = records, + cdm = cdm, + countBy = countBy, + concept = concept, + year = year, + sex = sex, + byAgeGroup = byAgeGroup) + + if (is.null(cohortTable)) { + cohortName <- NA + } else { + cohortName <- omopgenerics::settings(cdm[[cohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == cohortId) %>% + dplyr::pull("cohort_name") + } + + codeCounts <- codeCounts %>% + dplyr::mutate( + "codelist_name" := !!names(x), + "cohort_name" = .env$cohortName, + "estimate_type" = "integer", + "variable_name" = dplyr::if_else(is.na(.data$standard_concept_name), "overall", .data$standard_concept_name), + "variable_level" = as.character(.data$standard_concept_id) + ) %>% + visOmopResults::uniteGroup(cols = c("cohort_name", "codelist_name")) %>% + visOmopResults::uniteAdditional( + cols = c("source_concept_name", "source_concept_id", "domain_id") + ) %>% + dplyr::select( + "group_name", "group_level", "strata_name", "strata_level", + "variable_name", "variable_level", "estimate_name", "estimate_type", + "estimate_value", "additional_name", "additional_level" + ) + } else { + codeCounts <- dplyr::tibble() cli::cli_inform(c( "i" = "No records found in the cdm for the concepts provided." )) - return(omopgenerics::emptySummarisedResult()) } - records <- addStrataToOmopTable(records, "date", ageGroup, sex) - strata <- getStrataList(sex, ageGroup) - - if(interval != "overall"){ - intervalTibble <- getIntervalTibble(omopTable = records, - start_date_name = "date", - end_date_name = "date", - interval = interval, - unitInterval = unitInterval) - - cdm <- cdm |> omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = intervalTibble) - - records <- splitIncidenceBetweenIntervals(cdm, records, "date", tablePrefix) - - strata <- omopgenerics::combineStrata(c(unique(unlist(getStrataList(sex,ageGroup))), "interval_group")) - } - - if(!"number subjects" %in% c(countBy)){records <- records |> dplyr::select(-"person_id")} - if(concept){ - group <- list("standard_concept_id") - }else{ - group <- list() - records <- records |> - dplyr::mutate("standard_concept_name" = !!names(x)) - } - - cc <- records |> - PatientProfiles::summariseResult(strata = strata, - variable = "standard_concept_name", - group = group, - includeOverallGroup = TRUE, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = as.character()) |> - suppressMessages() |> - dplyr::filter(.data$variable_name %in% .env$countBy) |> - dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> - dplyr::mutate(standard_concept_id = .data$group_level) |> - dplyr::mutate(group_name = "codelist_name") |> - dplyr::mutate(group_level = names(x)) |> - dplyr::mutate(cdm_name = omopgenerics::cdmName(cdm)) |> - dplyr::select(-c("additional_name", "additional_level")) |> - dplyr::left_join( - getConceptsInfo(records), - by = "standard_concept_id" - ) |> - dplyr::select(-"standard_concept_id") - - if(interval != "overall"){ - cc <- cc |> - visOmopResults::splitStrata() |> - dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |> - dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |> - dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |> - dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |> - visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> - dplyr::select(-"interval_group") - } - CDMConnector::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) + CDMConnector::dropTable(cdm = cdm, + name = tableCodelist) + cdm[[tableCodelist]] <- NULL + CDMConnector::dropTable( + cdm = cdm, + name = dplyr::starts_with(intermediateTable) + ) - return(cc) + return(codeCounts) } getRelevantRecords <- function(cdm, tableCodelist, - intermediateTable, - tablePrefix){ + cohortTable, + cohortId, + timing, + intermediateTable){ codes <- cdm[[tableCodelist]] |> dplyr::collect() tableName <- purrr::discard(unique(codes$table_name), is.na) standardConceptIdName <- purrr::discard(unique(codes$standard_concept), is.na) - sourceConceptIdName <- purrr::discard(unique(codes$source_concept), is.na) + sourceConceptIdName <- purrr::discard(unique(codes$source_concept), is.na) dateName <- purrr::discard(unique(codes$start_date), is.na) + if(!is.null(cohortTable)){ + if(is.null(cohortId)){ + cohortSubjects <- cdm[[cohortTable]] %>% + dplyr::select("subject_id", "cohort_start_date") %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::distinct() + } else { + cohortSubjects <- cdm[[cohortTable]] %>% + dplyr::filter(.data$cohort_definition_id %in% cohortId) %>% + dplyr::select("subject_id", "cohort_start_date") %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::distinct() + } + } + if(length(tableName)>0){ codeRecords <- cdm[[tableName[[1]]]] + if(!is.null(cohortTable)){ + # keep only records of those in the cohorts of interest + codeRecords <- codeRecords %>% + dplyr::inner_join(cohortSubjects, + by = "person_id") + if(timing == "entry"){ + codeRecords <- codeRecords %>% + dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[1]])) + } + } - if(is.null(codeRecords)){return(NULL)} - - tableCodes <- paste0(tablePrefix, "table_codes") + if(is.null(codeRecords)){ + return(NULL) + } + tableCodes <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableCodes, table = codes %>% @@ -251,15 +285,15 @@ getRelevantRecords <- function(cdm, codeRecords <- codeRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% + dplyr::mutate(year = clock::get_year(date)) %>% dplyr::select(dplyr::all_of(c("person_id", standardConceptIdName[[1]], sourceConceptIdName[[1]], - "date"))) %>% + "date", "year"))) %>% dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[1]], "source_concept_id" = .env$sourceConceptIdName[[1]]) %>% dplyr::inner_join(cdm[[tableCodes]], by = c("standard_concept_id"="concept_id")) %>% - filterInObservation(indexDate = "date") |> dplyr::compute( name = paste0(intermediateTable,"_grr"), temporary = FALSE, @@ -278,7 +312,16 @@ getRelevantRecords <- function(cdm, if(length(tableName) > 1) { for(i in 1:(length(tableName)-1)) { workingRecords <- cdm[[tableName[[i+1]]]] - + if(!is.null(cohortTable)){ + # keep only records of those in the cohorts of interest + workingRecords <- workingRecords %>% + dplyr::inner_join(cohortSubjects, + by = "person_id") + if(timing == "entry"){ + workingRecords <- workingRecords %>% + dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[i+1]])) + } + } workingRecords <- workingRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% dplyr::mutate(year = clock::get_year(date)) %>% @@ -330,18 +373,181 @@ getRelevantRecords <- function(cdm, return(codeRecords) } -getConceptsInfo <- function(records){ - records |> - dplyr::select("standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id") |> - dplyr::distinct() |> - dplyr::collect() |> - dplyr::mutate("additional_name" = "standard_concept_name &&& standard_concept_id &&& source_concept_name &&& source_concept_id &&& domain_id") |> - dplyr::mutate("additional_level" = paste0(.data$standard_concept_name, " &&& ",.data$standard_concept_id, " &&& ", .data$source_concept_name, " &&& ", .data$source_concept_id, " &&& ", .data$domain_id)) |> - dplyr::select("standard_concept_id","additional_name", "additional_level") |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - dplyr::add_row( - "standard_concept_id" = "overall", - "additional_name" = "overall", - "additional_level" = "overall" +getSummaryCounts <- function(records, + cdm, + countBy, + concept, + year, + sex, + byAgeGroup) { + + if ("record" %in% countBy) { + recordSummary <- records %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + if(isTRUE(concept)) { + recordSummary <- dplyr::bind_rows( + recordSummary, + records %>% + dplyr::group_by( + .data$standard_concept_id, .data$standard_concept_name, + .data$source_concept_id, .data$source_concept_name, .data$domain_id + ) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) + } + recordSummary <- recordSummary %>% + dplyr::mutate( + strata_name = "overall", + strata_level = "overall", + estimate_name = "record_count" + ) + } else { + recordSummary <- dplyr::tibble() + } + + if ("person" %in% countBy) { + personSummary <- records %>% + dplyr::select("person_id") %>% + dplyr::distinct() %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + + if (isTRUE(concept)) { + personSummary <- dplyr::bind_rows( + personSummary, + records %>% + dplyr::select( + "person_id", "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id" + ) %>% + dplyr::distinct() %>% + dplyr::group_by( + .data$standard_concept_id, .data$standard_concept_name, + .data$source_concept_id, .data$source_concept_name, .data$domain_id + ) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) + } + personSummary <- personSummary %>% + dplyr::mutate( + strata_name = "overall", + strata_level = "overall", + estimate_name = "person_count") + } else { + personSummary <- dplyr::tibble() + } + + if ("record" %in% countBy & year == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "year") + ) + } + if ("person" %in% countBy & year == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "year") + ) + } + if ("record" %in% countBy & sex == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "sex") + ) + } + if ("person" %in% countBy & sex == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "sex") + ) + } + if ("record" %in% countBy & byAgeGroup == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "age_group") + ) + } + if ("person" %in% countBy & byAgeGroup == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "age_group") ) + } + if ("record" %in% countBy && byAgeGroup == TRUE && sex == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = c("age_group", "sex")) + ) + } + if ("person" %in% countBy && byAgeGroup == TRUE && sex == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = c("age_group", "sex")) + ) + } + summary <- dplyr::bind_rows(recordSummary, personSummary) + return(summary) +} + +getGroupedRecordCount <- function(records, + cdm, + groupBy){ + + groupedCounts <- dplyr::bind_rows( + records %>% + dplyr::group_by(dplyr::pick(.env$groupBy)) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect(), + records %>% + dplyr::group_by(dplyr::pick(.env$groupBy, + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", + "domain_id")) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) %>% + visOmopResults::uniteStrata(cols = groupBy) %>% + dplyr::mutate(estimate_name = "record_count") + + return(groupedCounts) +} + +getGroupedPersonCount <- function(records, + cdm, + groupBy){ + + groupedCounts <- dplyr::bind_rows( + records %>% + dplyr::select(dplyr::all_of(c("person_id", .env$groupBy))) %>% + dplyr::distinct() %>% + dplyr::group_by(dplyr::pick(.env$groupBy)) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect(), + records %>% + dplyr::select(dplyr::all_of(c( + "person_id", "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id", .env$groupBy + ))) %>% + dplyr::distinct() %>% + dplyr::group_by(dplyr::pick( + .env$groupBy, "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id" + )) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect()) %>% + visOmopResults::uniteStrata(cols = groupBy) %>% + dplyr::mutate(estimate_name = "person_count") + + return(groupedCounts) } diff --git a/R/summariseInObservation.R b/R/summariseInObservation.R index bc0e2a52..4345bc63 100644 --- a/R/summariseInObservation.R +++ b/R/summariseInObservation.R @@ -3,7 +3,9 @@ #' #' @param observationPeriod An observation_period omop table. It must be part of #' a cdm_reference object. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param unit Whether to stratify by "year" or by "month". +#' @param unitInterval Number of years or months to include within the time +#' interval. #' @param output Output format. It can be either the number of records #' ("records") that are in observation in the specific interval of time, the #' number of person-days ("person-days"), or both c("records","person-days"). @@ -20,7 +22,8 @@ #' #' result <- summariseInObservation( #' cdm$observation_period, -#' interval = "months", +#' unit = "month", +#' unitInterval = 6, #' output = c("person-days","records"), #' ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), #' sex = TRUE @@ -30,10 +33,10 @@ #' glimpse() #' #' PatientProfiles::mockDisconnect(cdm) -#' #' } summariseInObservation <- function(observationPeriod, - interval = "overall", + unit = "year", + unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE){ @@ -49,13 +52,11 @@ summariseInObservation <- function(observationPeriod, return(omopgenerics::emptySummarisedResult()) } + checkUnit(unit) + omopgenerics::assertNumeric(unitInterval, length = 1, min = 1) checkOutput(output) ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] omopgenerics::assertLogical(sex, length = 1) - original_interval <- interval - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval if(length(output) > 1){output <- "all"} if(missing(ageGroup) | is.null(ageGroup)){ageGroup <- list("overall" = c(0,Inf))}else{ageGroup <- append(ageGroup, list("overall" = c(0, Inf)))} @@ -63,33 +64,30 @@ summariseInObservation <- function(observationPeriod, # Create initial variables ---- cdm <- omopgenerics::cdmReference(observationPeriod) observationPeriod <- addStrataToPeopleInObservation(cdm, ageGroup, sex, tablePrefix) - strata <- getStrataList(sex, ageGroup) - - # Calculate denominator ---- - denominator <- cdm |> getDenominator(output) + # Observation period ---- name <- "observation_period" start_date_name <- startDate(name) end_date_name <- endDate(name) - # Observation period ---- - if(interval != "overall"){ - timeInterval <- getIntervalTibbleForObservation(observationPeriod, start_date_name, end_date_name, interval, unitInterval) + interval <- getIntervalTibbleForObservation(observationPeriod, start_date_name, end_date_name, unit, unitInterval) - # Insert interval table to the cdm ---- - cdm <- cdm |> - omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = timeInterval) - } + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = interval) + + # Calculate denominator ---- + denominator <- cdm |> getDenominator(output) # Count records ---- result <- observationPeriod |> - countRecords(cdm, start_date_name, end_date_name, interval, output, tablePrefix) + countRecords(cdm, start_date_name, end_date_name, unit, output, tablePrefix) # Add category sex overall result <- addSexOverall(result, sex) # Create summarisedResult - result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, original_interval) + result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, unit, unitInterval) CDMConnector::dropTable(cdm, name = dplyr::starts_with(tablePrefix)) return(result) @@ -136,23 +134,23 @@ getDenominator <- function(cdm, output){ } } -getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date_name, interval, unitInterval){ +getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){ startDate <- getOmopTableStartDate(omopTable, start_date_name) endDate <- getOmopTableEndDate(omopTable, end_date_name) tibble::tibble( - "group" = seq.Date(startDate, endDate, .env$interval) + "group" = seq.Date(startDate, endDate, .env$unit) ) |> dplyr::rowwise() |> dplyr::mutate("interval" = max(which( - .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$interval)) + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) ), na.rm = TRUE)) |> dplyr::ungroup() |> dplyr::group_by(.data$interval) |> dplyr::mutate( "interval_start_date" = min(.data$group), - "interval_end_date" = dplyr::if_else(.env$interval == "year", + "interval_end_date" = dplyr::if_else(.env$unit == "year", clock::add_years(min(.data$group),.env$unitInterval)-1, clock::add_months(min(.data$group),.env$unitInterval)-1) ) |> @@ -168,99 +166,81 @@ getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date dplyr::distinct() } -countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, interval, output, tablePrefix){ +countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit, output, tablePrefix){ if(output == "person-days" | output == "all"){ - if(interval != "overall"){ - x <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("additional_level" = "interval_group") |> - dplyr::cross_join( - observationPeriod |> - dplyr::select("start_date" = "observation_period_start_date", - "end_date" = "observation_period_end_date", - "age_group", "sex","person_id") - ) |> - dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | - (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) %>% - dplyr::mutate(start_date = pmax(.data$interval_start_date, .data$start_date, na.rm = TRUE)) |> - dplyr::mutate(end_date = pmin(.data$interval_end_date, .data$end_date, na.rm = TRUE)) |> - dplyr::compute(temporary = FALSE, name = tablePrefix) - }else{ - x <- observationPeriod |> - dplyr::rename("start_date" = "observation_period_start_date", - "end_date" = "observation_period_end_date") |> - dplyr::mutate("additional_level" = "overall", - "additional_name" = "overall") - } + x <- cdm[[paste0(tablePrefix, "interval")]] |> + dplyr::cross_join( + observationPeriod |> + dplyr::select("start_date" = "observation_period_start_date", + "end_date" = "observation_period_end_date", + "age_group", "sex","person_id") + ) |> + dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | + (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) %>% + dplyr::mutate(start_date = pmax(.data$interval_start_date, .data$start_date, na.rm = TRUE)) |> + dplyr::mutate(end_date = pmin(.data$interval_end_date, .data$end_date, na.rm = TRUE)) |> + dplyr::compute(temporary = FALSE, name = tablePrefix) personDays <- x %>% dplyr::mutate(estimate_value = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> - dplyr::group_by(dplyr::across(dplyr::any_of(c("additional_level", "sex", "age_group")))) |> + dplyr::group_by(.data$interval_group, .data$sex, .data$age_group) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number person-days", - "additional_name" = "time_interval") |> + dplyr::mutate(variable_name = "Number person-days") |> dplyr::collect() }else{ - personDays <- createEmptyIntervalTable(interval) + personDays <- createEmptyIntervalTable() } if(output == "records" | output == "all"){ - - if(interval != "overall"){ - x <- observationPeriod |> - dplyr::mutate("start_date" = as.Date(paste0(clock::get_year(.data[[start_date_name]]),"/",clock::get_month(.data[[start_date_name]]),"/01"))) |> - dplyr::mutate("end_date" = as.Date(paste0(clock::get_year(.data[[end_date_name]]),"/",clock::get_month(.data[[end_date_name]]),"/01"))) |> - dplyr::group_by(.data$start_date, .data$end_date, .data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> - dplyr::compute(temporary = FALSE, name = tablePrefix) - - records <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("additional_level" = "interval_group") |> - dplyr::cross_join(x) |> - dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | - (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |> - dplyr::group_by(.data$additional_level, .data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number records in observation", - "additional_name" = "time_interval") |> - dplyr::collect() - }else{ - records <- observationPeriod |> - dplyr::group_by(.data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number records in observation", - "additional_level" = "overall", - "additional_name" = "overall") |> - dplyr::collect() - } + x <- observationPeriod |> + dplyr::mutate("start_date" = as.Date(paste0(clock::get_year(.data[[start_date_name]]),"/",clock::get_month(.data[[start_date_name]]),"/01"))) |> + dplyr::mutate("end_date" = as.Date(paste0(clock::get_year(.data[[end_date_name]]),"/",clock::get_month(.data[[end_date_name]]),"/01"))) |> + dplyr::group_by(.data$start_date, .data$end_date, .data$age_group, .data$sex) |> + dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> + dplyr::compute(temporary = FALSE, name = tablePrefix) + + records <- cdm[[paste0(tablePrefix, "interval")]] |> + dplyr::cross_join(x) |> + dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | + (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |> + dplyr::group_by(.data$interval_group, .data$age_group, .data$sex) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(variable_name = "Number records in observation") |> + dplyr::collect() }else{ - records <- createEmptyIntervalTable(interval) + records <- createEmptyIntervalTable() } x <- personDays |> rbind(records) |> - dplyr::arrange(dplyr::across(dplyr::any_of("additional_level"))) + dplyr::arrange(.data$interval_group) |> + dplyr::rename("time_interval" = "interval_group") + + omopgenerics::dropTable(cdm = cdm, name = c(dplyr::starts_with(tablePrefix))) return(x) } -createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, original_interval){ +createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, unit, unitInterval){ result <- result |> dplyr::mutate("estimate_value" = as.character(.data$estimate_value)) |> + dplyr::rename("variable_level" = "time_interval") |> visOmopResults::uniteStrata(cols = c("sex", "age_group")) |> dplyr::mutate( "result_id" = as.integer(1), "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)), "group_name" = "omop_table", "group_level" = name, - "variable_level" = as.character(NA), "estimate_name" = "count", - "estimate_type" = "integer" + "estimate_type" = "integer", + "additional_name" = "overall", + "additional_level" = "overall" ) result <- result |> rbind(result) |> - dplyr::group_by(.data$additional_level, .data$strata_level, .data$variable_name) |> + dplyr::group_by(.data$variable_level, .data$strata_level, .data$variable_name) |> dplyr::mutate(estimate_type = dplyr::if_else(dplyr::row_number() == 2, "percentage", .data$estimate_type)) |> dplyr::inner_join(denominator, by = "variable_name") |> dplyr::mutate(estimate_value = dplyr::if_else(.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value)) |> @@ -271,62 +251,62 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n "result_type" = "summarise_in_observation", "package_name" = "OmopSketch", "package_version" = as.character(utils::packageVersion("OmopSketch")), - "interval" = .env$original_interval + "unit" = .env$unit, + "unitInterval" = .env$unitInterval )) return(result) } -addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix) { - demographics <- cdm |> - CohortConstructor::demographicsCohort( - name = paste0(tablePrefix, "demographics_table"), - sex = NULL, - ageRange = ageGroup, - minPriorObservation = NULL - ) |> - suppressMessages() - - if (sex) { - demographics <- demographics |> - PatientProfiles::addSexQuery() - } else { - demographics <- demographics |> - dplyr::mutate("sex" = "overall") - } +addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix){ + demographics_table <- suppressWarnings(suppressMessages( + cdm |> + CohortConstructor::demographicsCohort(name = paste0(tablePrefix, "demographics_table"), + sex = NULL, + ageRange = ageGroup, + minPriorObservation = NULL) + )) + + if(is.null(ageGroup)){ + demographics <- demographics_table |> + dplyr::rename("observation_period_start_date" = "cohort_start_date", + "observation_period_end_date" = "cohort_end_date", + "person_id" = "subject_id") |> + dplyr::select(-c("cohort_definition_id")) |> + dplyr::mutate("age_group" = "overall") |> + dplyr::compute(temporary = FALSE, name = paste0(tablePrefix, "demographics")) + }else{ + age_tibble <- dplyr::tibble( + "age_range" = gsub(",","_",gsub("\\)","",gsub("c\\(","",gsub(" ","",ageGroup)))), + "age_group" = names(ageGroup) + ) + + settings <- demographics_table |> + CDMConnector::settings() |> + dplyr::inner_join(age_tibble, by = "age_range") |> + dplyr::select("cohort_definition_id","age_group") - if (!is.null(ageGroup)) { - set <- omopgenerics::settings(demographics) |> - dplyr::select("cohort_definition_id", dplyr::any_of("age_range")) - set <- set |> - dplyr::left_join( - dplyr::tibble( - "age_range" = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])), - "age_group" = names(ageGroup) - ), - by = "age_range" + cdm <- cdm |> + omopgenerics::insertTable(name = paste0(tablePrefix, "settings"), table = settings) + + demographics <- demographics_table |> + dplyr::inner_join(cdm[[paste0(tablePrefix,"settings")]], by = "cohort_definition_id") |> + dplyr::rename("observation_period_start_date" = "cohort_start_date", + "observation_period_end_date" = "cohort_end_date", + "person_id" = "subject_id") |> + dplyr::select(-c("cohort_definition_id")) |> + dplyr::inner_join( + cdm[["person"]] |> dplyr::select("person_id"), by = "person_id" ) |> - dplyr::mutate("age_group" = dplyr::if_else( - is.na(.data$age_group), .data$age_range, .data$age_group - )) |> - dplyr::select(!"age_range") - nm <- paste0(tablePrefix, "_settings") - cdm <- omopgenerics::insertTable(cdm = cdm, name = nm, table = set) - demographics <- demographics |> - dplyr::left_join(cdm[[nm]], by = "cohort_definition_id") - } else { - demographics <- demographics |> - dplyr::mutate("age_group" = "overall") + dplyr::compute(name = paste0(tablePrefix, "demographics"), temporary = FALSE) } - nm <- paste0(tablePrefix, "_demographics") - demographics <- demographics |> - dplyr::select( - "observation_period_start_date" = "cohort_start_date", - "observation_period_end_date" = "cohort_end_date", - "person_id" = "subject_id", "age_group", "sex" - ) |> - dplyr::compute(name = nm, temporary = FALSE) + + if(sex){ + demographics <- demographics |> PatientProfiles::addSexQuery() + }else{ + demographics <- demographics |> dplyr::mutate(sex = "overall") + } return(demographics) } @@ -335,30 +315,19 @@ addSexOverall <- function(result, sex){ if(sex){ result <- result |> rbind( result |> - dplyr::group_by(.data$age_group, .data$additional_level, .data$variable_name) |> + dplyr::group_by(.data$age_group, .data$time_interval, .data$variable_name) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(sex = "overall", - additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) + dplyr::mutate(sex = "overall") ) } return(result) } -createEmptyIntervalTable <- function(interval){ - if(interval == "overall"){ - tibble::tibble( - "sex" = as.character(), - "age_group" = as.character(), - "estimate_value" = as.double() - ) - - }else{ - tibble::tibble( - "interval_group" = as.character(), - "sex" = as.character(), - "age_group" = as.character(), - "estimate_value" = as.double() - ) - } - +createEmptyIntervalTable <- function(){ + tibble::tibble( + "interval_group" = as.character(), + "sex" = as.character(), + "age_group" = as.character(), + "estimate_value" = as.double() + ) } diff --git a/R/summariseMissingData.R b/R/summariseMissingData.R deleted file mode 100644 index 6b9dc0dd..00000000 --- a/R/summariseMissingData.R +++ /dev/null @@ -1,156 +0,0 @@ -#' Summarise missing data in omop tables -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param col A character vector of column names to check for missing values. -#' If `NULL`, all columns in the specified tables are checked. Default is `NULL`. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export -summariseMissingData <- function(cdm, - omopTableName, - col = NULL, - sex = FALSE, - year = FALSE, - ageGroup = NULL){ - - - omopgenerics::validateCdmArgument(cdm) - - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, ageGroup = ageGroup, year = year) - stratification <- c(list(character()),omopgenerics::combineStrata(strata)) - - result_tables <- purrr::map(omopTableName, function(table) { - - if (omopgenerics::isTableEmpty(cdm[[table]])){ - cli::cli_warn(paste0(table, " omop table is empty.")) - return(NULL) - } - - omopTable <- cdm[[table]] - col_table <- intersect(col, colnames(omopTable)) - if (is.null(col_table) | rlang::is_empty(col_table)){ - col_table<-colnames(omopTable) - } - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - x <- omopTable |> PatientProfiles::addDemographicsQuery(age = FALSE, ageGroup = ageGroup, sex = sex, indexDate = indexDate) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - result_columns <- purrr::map(col_table, function(c) { - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(strata))) |> - dplyr::summarise( - na_count = sum(as.integer(is.na(.data[[c]])), na.rm = TRUE), - total_count = dplyr::n(), - .groups = "drop" - ) |> - dplyr::collect() - - # Group results for each level of stratification - grouped_results <- purrr::map(stratification, function(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise( - na_count = sum(.data$na_count, na.rm = TRUE), - total_count = sum(.data$total_count, na.rm = TRUE), - colName = c, - .groups = "drop" - ) |> - dplyr::mutate(na_percentage = dplyr::if_else(.data$total_count > 0, (.data$na_count / .data$total_count) * 100, 0)) - }) - - return(purrr::reduce(grouped_results, dplyr::bind_rows)) - - }) - - res <- purrr::reduce(result_columns, dplyr::union)|> - dplyr::mutate(omop_table = table) - - warningDataRequire(cdm = cdm, res = res, table = table) - - return(res) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - - result <- purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate( - na_count = as.double(.data$na_count), # Cast na_count to double - na_percentage = as.double(.data$na_percentage) - )|> - tidyr::pivot_longer( - cols = c(.data$na_count, .data$na_percentage), - names_to = "estimate_name", - values_to = "estimate_value" - ) - - - sr <- result |> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm), - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer", - "variable_level" = NA_character_ - ) |> - dplyr::rename("variable_name" = "colName") |> - dplyr::select(!c(.data$total_count)) - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_missing_data" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - - return(sr) - -} - -warningDataRequire <- function(cdm, table, res){ -required_cols <- omopgenerics::omopTableFields(CDMConnector::cdmVersion(cdm))|> - dplyr::filter(.data$cdm_table_name==table)|> - dplyr::filter(.data$is_required==TRUE)|> - dplyr::pull(.data$cdm_field_name) -warning_columns <- res |> - dplyr::filter(.data$colName %in% required_cols)|> - dplyr::filter(.data$na_count>0)|> - dplyr::distinct(.data$colName)|> - dplyr::pull() - -if (length(warning_columns) > 0) { - cli::cli_warn(c( - "These columns contain missing values, which are not permitted:", - "{.val {warning_columns}}" - )) -} -} - - diff --git a/R/summariseObservationPeriod.R b/R/summariseObservationPeriod.R index 98b54773..4aae2672 100644 --- a/R/summariseObservationPeriod.R +++ b/R/summariseObservationPeriod.R @@ -1,6 +1,5 @@ #' Summarise the observation period table getting some overall statistics in a #' summarised_result object. -#' #' @param observationPeriod observation_period omop table. #' @param estimates Estimates to summarise the variables of interest ( #' `records per person`, `duration in days` and @@ -8,11 +7,8 @@ #' @param ageGroup A list of age groups to stratify results by. #' @param sex Boolean variable. Whether to stratify by sex (TRUE) or not #' (FALSE). -#' #' @return A summarised_result object with the summarised data. -#' #' @export -#' #' @examples #' \donttest{ #' library(dplyr, warn.conflicts = FALSE) @@ -32,7 +28,7 @@ summariseObservationPeriod <- function(observationPeriod, "median", "q75", "q95", "max", "density"), ageGroup = NULL, - sex = FALSE) { + sex = FALSE){ # input checks omopgenerics::assertClass(observationPeriod, class = "omop_table") omopgenerics::assertTrue(omopgenerics::tableName(observationPeriod) == "observation_period") @@ -50,7 +46,7 @@ summariseObservationPeriod <- function(observationPeriod, if (omopgenerics::isTableEmpty(observationPeriod)) { obsSr <- observationPeriod |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( variables = NULL, estimates = NULL, counts = TRUE) } else { @@ -74,7 +70,7 @@ summariseObservationPeriod <- function(observationPeriod, dplyr::collect() obsSr <- obs |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( strata = strataId, variables = c("duration", "next_obs"), @@ -86,7 +82,7 @@ summariseObservationPeriod <- function(observationPeriod, dplyr::group_by(.data$person_id, dplyr::across(dplyr::any_of(c("sex","age_group")))) |> dplyr::tally(name = "n") |> dplyr::ungroup() |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( variables = c("n"), estimates = estimates, @@ -123,8 +119,13 @@ summariseObservationPeriod <- function(observationPeriod, } addOrdinalLevels <- function(x) { - strata_cols <- visOmopResults::strataColumns(x) - strata_cols <- strata_cols[strata_cols != "id"] + strata_cols <- x |> + dplyr::select("strata_name") |> + dplyr::filter(!grepl("&&&", x$strata_name), + .data$strata_name != "overall", + .data$strata_name != "id") |> + dplyr::distinct() |> + dplyr::pull("strata_name") x <- x |> visOmopResults::splitStrata() @@ -144,7 +145,7 @@ addOrdinalLevels <- function(x) { x <- x |> dplyr::mutate("group_level" = .env$val) |> dplyr::select(-c("id")) |> - dplyr::mutate("group_name" = "observation_period_ordinal") |> + dplyr::mutate("group_name" = dplyr::if_else(.data$group_level == "overall", "overall", "observation_period_ordinal")) |> visOmopResults::uniteStrata(cols = strata_cols) return(x) diff --git a/R/summariseOmopSnapshot.R b/R/summariseOmopSnapshot.R index 7e2fa575..8342e856 100644 --- a/R/summariseOmopSnapshot.R +++ b/R/summariseOmopSnapshot.R @@ -6,9 +6,12 @@ #' @return A summarised_result object. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' cdm <- mockOmopSketch(numberIndividuals = 10) #' #' summariseOmopSnapshot(cdm) +#' } summariseOmopSnapshot <- function(cdm) { omopgenerics::validateCdmArgument(cdm) @@ -19,7 +22,7 @@ summariseOmopSnapshot <- function(cdm) { internalTibble() |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( result_id = unique(summaryTable$result_id), - package_name = "OmopSketch", + package_name = "omopSketch", package_version = as.character(utils::packageVersion("OmopSketch")), result_type = "summarise_omop_snapshot" )) diff --git a/R/summarisePopulationCharacteristics.R b/R/summarisePopulationCharacteristics.R new file mode 100644 index 00000000..aa0e6dbf --- /dev/null +++ b/R/summarisePopulationCharacteristics.R @@ -0,0 +1,82 @@ + +#' Summarise the characteristics of the base population of a cdm_reference +#' object. +#' +#' @param cdm A cdm_reference object. +#' @param studyPeriod Dates to trim the observation period. If NA, +#' min(observation_period_start_date) and/or max(observation_period_end_date) +#' are used. +#' @param sex Whether to stratify the results by sex. +#' @param ageGroup List of age groups to stratify by at index date. +#' @return A summarised_result object. +#' @export +#' @examples +#' \donttest{ +#' cdm <- mockOmopSketch() +#' +#' summarisedPopulation <- summarisePopulationCharacteristics( +#' cdm = cdm, +#' studyPeriod = c("2010-01-01", NA), +#' sex = TRUE, +#' ageGroup = NULL +#' ) +#' +#' summarisedPopulation |> print() +#' +#' PatientProfiles::mockDisconnect(cdm = cdm) +#' } +summarisePopulationCharacteristics <- function(cdm, + studyPeriod = c(NA, NA), + sex = FALSE, + ageGroup = NULL) { + # check inputs + omopgenerics::validateCdmArgument(cdm) + studyPeriod <- validateStudyPeriod(cdm, studyPeriod) + omopgenerics::assertLogical(sex, length = 1) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + cohort <- CohortConstructor::demographicsCohort(cdm = cdm, + name = omopgenerics::uniqueTableName()) |> + CohortConstructor::trimToDateRange(dateRange = studyPeriod) |> + PatientProfiles::addAgeQuery(indexDate = "cohort_end_date", + ageName = "age_at_end") + + cohort <- cohort |> + PatientProfiles::addDemographicsQuery(ageGroup = ageGroup, sex = sex, + priorObservation = F, + futureObservation = F, + age = F) + if(!is.null(ageGroup)) { + cohort <- cohort |> + dplyr::rename("age_group_at_start" = "age_group") + } + + strata <- switch( + paste(is.null(ageGroup), sex), + "TRUE TRUE" = list("sex"), + "TRUE FALSE" = list(), + "FALSE TRUE" = list("age_group_at_start", "sex", c("age_group_at_start", "sex")), + "FALSE FALSE" = list("age_group_at_start") + ) + + summarisedCohort <- cohort |> + CohortCharacteristics::summariseCharacteristics(strata = strata, + otherVariables = "age_at_end") |> + dplyr::mutate(variable_name = dplyr::if_else(.data$variable_name == "Age", "Age at start", .data$variable_name)) |> + dplyr::mutate(variable_name = factor(.data$variable_name, + levels = c("Number records", "Number subjects", "Cohort start date", "Cohort end date", + "Age at start", "Age at end", "Sex", "Prior observation", "Future observation", + "Days in cohort"))) |> + dplyr::arrange(.data$variable_name) |> + omopgenerics::newSummarisedResult( + settings = dplyr::tibble( + "result_id" = 1L, + "package_name" = "OmopSketch", + "package_version" = as.character(utils::packageVersion( + "OmopSketch" + )), + "result_type" = "summarise_population_characteristics" + )) + + return(summarisedCohort) +} diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index c5cb0f2f..8c3db977 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -4,7 +4,9 @@ #' #' @param cdm A cdm_reference object. #' @param omopTableName A character vector of omop tables from the cdm. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param unit Time unit it can either be "year" or "month". +#' @param unitInterval Number of years or months to include within the same +#' interval. #' @param ageGroup A list of age groups to stratify results by. #' @param sex Whether to stratify by sex (TRUE) or not (FALSE). #' @return A summarised_result object. @@ -18,7 +20,8 @@ #' summarisedResult <- summariseRecordCount( #' cdm = cdm, #' omopTableName = c("condition_occurrence", "drug_exposure"), -#' interval = "years", +#' unit = "year", +#' unitInterval = 10, #' ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), #' sex = TRUE #' ) @@ -30,17 +33,16 @@ #' } summariseRecordCount <- function(cdm, omopTableName, - interval = "overall", + unit = "year", + unitInterval = 1, ageGroup = NULL, sex = FALSE) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) omopgenerics::assertCharacter(omopTableName) - original_interval <- interval - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval + checkUnit(unit) + omopgenerics::assertNumeric(unitInterval, length = 1, min = 1) ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] omopgenerics::assertLogical(sex, length = 1) @@ -53,9 +55,8 @@ summariseRecordCount <- function(cdm, } summariseRecordCountInternal(x, cdm = cdm, - interval = interval, + unit = unit, unitInterval = unitInterval, - original_interval, ageGroup = ageGroup, sex = sex) } @@ -66,10 +67,9 @@ summariseRecordCount <- function(cdm, } #' @noRd -summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval, - original_interval, ageGroup, sex) { +summariseRecordCountInternal <- function(omopTableName, cdm, unit, unitInterval, + ageGroup, sex) { - prefix <- omopgenerics::tmpPrefix() omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() # Create initial variables ---- @@ -78,39 +78,37 @@ summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInter result <- omopgenerics::emptySummarisedResult() date <- startDate(omopTableName) - strata <- getStrataList(sex, ageGroup) + # Create strata variable ---- + strata <- c("age_group","sex") # Incidence counts ---- omopTable <- omopTable |> dplyr::select(dplyr::all_of(date), "person_id") - result <- addStrataToOmopTable(omopTable, date, ageGroup, sex) + omopTable <- addStrataToOmopTable(omopTable, date, ageGroup, sex) if(omopTableName != "observation_period") { - result <- result |> + omopTable <- omopTable |> filterInObservation(indexDate = date) } - if(interval != "overall"){ - # interval sequence ---- - timeInterval <- getIntervalTibble(omopTable = omopTable, - start_date_name = date, - end_date_name = date, - interval = interval, - unitInterval = unitInterval) - - # Insert interval table to the cdm ---- - cdm <- cdm |> omopgenerics::insertTable(name = paste0(prefix, "interval"), table = timeInterval) + # interval sequence ---- + interval <- getIntervalTibble(omopTable = omopTable, + start_date_name = date, + end_date_name = date, + unit = unit, + unitInterval = unitInterval) - # Obtain record counts for each interval ---- - result <- splitIncidenceBetweenIntervals(cdm, result, date, prefix) + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = "interval", table = interval) - strata <- omopgenerics::combineStrata(c(unique(unlist(strata)), "interval_group")) - } + # Obtain record counts for each interval ---- + result <- splitIncidenceBetweenIntervals(cdm, omopTable, date, strata) # Create summarised result ---- - result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, original_interval) - omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(prefix)) + result <- createSummarisedResultRecordCount(result, sex, ageGroup, omopTable, omopTableName, unit, unitInterval) + omopgenerics::dropTable(cdm = cdm, name = "interval") return(result) } @@ -134,19 +132,20 @@ filterPersonId <- function(omopTable){ return(omopTable) } -addStrataToOmopTable <- function(omopTable, date, ageGroup, sex) { - omopTable |> - PatientProfiles::addDemographicsQuery( - indexDate = date, - age = FALSE, - ageGroup = ageGroup, - missingAgeGroupValue = "unknown", - sex = sex, - missingSexValue = "unknown", - priorObservation = FALSE, - futureObservation = FALSE, - dateOfBirth = FALSE - ) +addStrataToOmopTable <- function(omopTable, date, ageGroup, sex){ + suppressWarnings(omopTable |> + dplyr::mutate(sex = "overall") |> + dplyr::mutate(age_group = "overall") |> + PatientProfiles::addDemographicsQuery(indexDate = date, + age = FALSE, + ageGroup = ageGroup, + missingAgeGroupValue = "unknown", + sex = sex, + missingSexValue = "unknown", + priorObservation = FALSE, + futureObservation = FALSE, + dateOfBirth = FALSE)) + } filterInObservation <- function(x, indexDate){ @@ -186,7 +185,7 @@ getOmopTableEndDate <- function(omopTable, date){ dplyr::pull("end_date") } -getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interval, unitInterval){ +getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){ startDate <- getOmopTableStartDate(omopTable, start_date_name) endDate <- getOmopTableEndDate(omopTable, end_date_name) @@ -195,14 +194,14 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interva ) |> dplyr::rowwise() |> dplyr::mutate("interval" = max(which( - .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$interval)) + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) ), na.rm = TRUE)) |> dplyr::ungroup() |> dplyr::group_by(.data$interval) |> dplyr::mutate( "interval_start_date" = min(.data$group), - "interval_end_date" = dplyr::if_else(.env$interval == "year", + "interval_end_date" = dplyr::if_else(.env$unit == "year", clock::add_years(min(.data$group),.env$unitInterval)-1, clock::add_months(min(.data$group),.env$unitInterval)-1) ) |> @@ -219,8 +218,8 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interva dplyr::distinct() } -splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){ - cdm[[paste0(prefix, "interval")]] |> +splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){ + cdm$interval |> dplyr::inner_join( omopTable |> dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |> @@ -229,45 +228,35 @@ splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){ ) |> dplyr::select(-c("my")) |> dplyr::relocate("person_id") |> - dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date")) + dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date", "person_id")) } -createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, original_interval){ +createSummarisedResultRecordCount <- function(result, sex, ageGroup, omopTable, omopTableName, unit, unitInterval){ - result <- result |> - dplyr::mutate(n = 1) |> - dplyr::select(-"person_id") |> + result |> + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( - variables = "n", - strata = strata, + strata = getStrataList(sex, ageGroup), includeOverallStrata = TRUE, - estimates = as.character(), - counts = TRUE, + estimates = "count", + counts = FALSE ) |> - suppressMessages() |> - dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> + dplyr::filter(!.data$variable_name %in% c("sex", "age_group")) |> + dplyr::mutate("variable_name" = "incidence_records") |> dplyr::mutate( + "result_id" = as.integer(1), + "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), "group_name" = "omop_table", - "group_level" = omopTableName - ) - - if(original_interval != "overall"){ - result <- result |> - visOmopResults::splitStrata() |> - dplyr::mutate(additional_level = .data$interval_group) |> - dplyr::mutate(additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) |> - visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> - dplyr::select(-"interval_group") - } - - result |> - omopgenerics::newSummarisedResult( - settings = dplyr::tibble( - "result_id" = 1L, - "result_type" = "summarise_record_count", - "package_name" = "OmopSketch", - "package_version" = as.character(utils::packageVersion("OmopSketch")), - "interval" = .env$original_interval - ) - ) + "group_level" = omopTableName, + "additional_name" = "time_interval", + "additional_level" = gsub(" to.*","",.data$variable_level) + ) |> + omopgenerics::newSummarisedResult(settings = dplyr::tibble( + "result_id" = 1L, + "result_type" = "summarise_record_count", + "package_name" = "OmopSketch", + "package_version" = as.character(utils::packageVersion("OmopSketch")), + "unit" = .env$unit, + "unitInterval" = .env$unitInterval + )) } diff --git a/R/tableAllConceptCounts.R b/R/tableAllConceptCounts.R deleted file mode 100644 index 57e7e1af..00000000 --- a/R/tableAllConceptCounts.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Create a visual table from a summariseAllConceptCounts() result. -#' @param result A summarised_result object. -#' @param type Type of formatting output table, either "gt" or "flextable". -#' @return A gt or flextable object with the summarised data. -#' @export -#' -#' -tableAllConceptCounts <- function(result, - type = "gt") { - # initial checks - omopgenerics::validateResultArgument(result) - omopgenerics::assertChoice(type, choicesTables()) - - # subset to result_type of interest - result <- result |> - visOmopResults::filterSettings( - .data$result_type == "summarise_all_concept_counts") - - # check if it is empty - if (nrow(result) == 0) { - warnEmpty("summarise_all_concept_counts") - return(emptyTable(type)) - } - - estimate_names <- result |> - dplyr::distinct(.data$estimate_name) |> - dplyr::pull() - estimateName <- c() - if ("record_count" %in% estimate_names) { - estimateName <- c(estimateName, "N records" = "") - } - if ("person_count" %in% estimate_names) { - estimateName <- c(estimateName, "N persons" = "") - } - - result |> - formatColumn(c("variable_name", "variable_level")) |> - visOmopResults::visOmopTable( - type = type, - estimateName = estimateName, - header = c("cdm_name"), - rename = c("Database name" = "cdm_name"), - groupColumn = c("omop_table", visOmopResults::strataColumns(result)) - ) -} diff --git a/R/tableMissingData.R b/R/tableMissingData.R deleted file mode 100644 index 60dfd276..00000000 --- a/R/tableMissingData.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Create a visual table from a summariseMissingData() result. -#' @param result A summarised_result object. -#' @param type Type of formatting output table, either "gt" or "flextable". -#' @return A gt or flextable object with the summarised data. -#' @export -#' -#' -tableMissingData <- function(result, - type = "gt") { - # initial checks - omopgenerics::validateResultArgument(result) - omopgenerics::assertChoice(type, choicesTables()) - - # subset to result_type of interest - result <- result |> - visOmopResults::filterSettings( - .data$result_type == "summarise_missing_data") - - # check if it is empty - if (nrow(result) == 0) { - warnEmpty("summarise_missing_data") - return(emptyTable(type)) - } - - result |> - formatColumn(c("variable_name", "variable_level")) |> - visOmopResults::visOmopTable( - type = type, - estimateName = c( - "N (%)" = " (%)", - "N" = ""), - header = c("cdm_name"), - rename = c("Database name" = "cdm_name"), - groupColumn = c("omop_table", visOmopResults::strataColumns(result)) - ) -} diff --git a/R/tableOmopSnapshot.R b/R/tableOmopSnapshot.R index 60166009..1ccb8741 100644 --- a/R/tableOmopSnapshot.R +++ b/R/tableOmopSnapshot.R @@ -5,6 +5,8 @@ #' @return A gt or flextable object with the summarised data. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' cdm <- mockOmopSketch(numberIndividuals = 10) #' #' result <- summariseOmopSnapshot(cdm) @@ -13,6 +15,7 @@ #' tableOmopSnapshot() #' #' PatientProfiles::mockDisconnect(cdm) +#' } tableOmopSnapshot <- function(result, type = "gt") { # initial checks diff --git a/R/tablePopulationCharacteristics.R b/R/tablePopulationCharacteristics.R new file mode 100644 index 00000000..886d4de3 --- /dev/null +++ b/R/tablePopulationCharacteristics.R @@ -0,0 +1,57 @@ + +#' Create a visual table from a summarise_population_characteristics result. +#' +#' @param result Output from summarisePopulationCharacteristics(). +#' @param type Type of formatting output table, either "gt" or "flextable". +#' @return A gt or flextable object with the summarised data. +#' @export +#' @examples +#' \donttest{ +#' cdm <- mockOmopSketch() +#' +#' summarisedPopulation <- summarisePopulationCharacteristics( +#' cdm = cdm, +#' studyPeriod = c("2010-01-01", NA), +#' sex = TRUE, +#' ageGroup = list("<=60" = c(0, 60), ">60" = c(61, Inf)) +#' ) +#' +#' summarisedPopulation |> +#' suppress(minCellCount = 5) |> +#' tablePopulationCharacteristics() +#' +#' PatientProfiles::mockDisconnect(cdm = cdm) +#'} +tablePopulationCharacteristics <- function(result, + type = "gt") { + # Initial checks ---- + omopgenerics::validateResultArgument(result) + omopgenerics::assertChoice(type, choicesTables()) + + # subset to result_type of interest + result <- result |> + visOmopResults::filterSettings( + .data$result_type == "summarise_population_characteristics") + + # check if it is empty + if (nrow(result) == 0) { + warnEmpty("summarise_population_characteristics") + return(emptyTable(type)) + } + + # Function + result <- result |> + visOmopResults::visOmopTable( + hide = c("cohort_name"), + estimateName = c( + "N%" = " ()", + "N" = "", + "Median [Q25 - Q75]" = " [ - ]", + "Mean (SD)" = " ()", + "Range" = " to "), + rename = c("Database name" = "cdm_name"), + header = c("cdm_name"), + groupColumn = visOmopResults::strataColumns(result)) + + return(result) +} diff --git a/R/utilities.R b/R/utilities.R index 007e8582..aa23797f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,23 +1,18 @@ startDate <- function(name) { tables$start_date[tables$table_name == name] } - endDate <- function(name) { tables$end_date[tables$table_name == name] } - standardConcept <- function(name) { tables$standard_concept[tables$table_name == name] } - sourceConcept <- function(name) { tables$source_concept[tables$table_name == name] } - typeConcept <- function(name) { tables$type_concept[tables$table_name == name] } - tableId <- function(name) { tables$id[tables$table_name == name] } @@ -26,7 +21,7 @@ warnFacetColour <- function(result, cols) { colsToWarn <- result |> dplyr::select( "cdm_name", "group_name", "group_level", "strata_name", "strata_level", - "variable_name", "variable_level" + "variable_name", "variable_level", "additional_name", "additional_level" ) |> dplyr::distinct() |> visOmopResults::splitAll() |> @@ -41,14 +36,12 @@ warnFacetColour <- function(result, cols) { } invisible(NULL) } - collapseStr <- function(x, sep) { x <- x[x != ""] if (length(x) == 1) return(x) len <- length(x) paste0(paste0(x[-len], collapse = ", "), " ", sep, " ", x[len]) } - asCharacterFacet <- function(facet) { if (rlang::is_formula(facet)) { facet <- as.character(facet) diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index 8a692bee..00000000 --- a/README.Rmd +++ /dev/null @@ -1,111 +0,0 @@ ---- -output: github_document ---- - - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) -``` - -# OmopSketch OmopSketch website - - -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml) -[![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch) -[![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main) - - -### WARNING: this package is under-development and has only been tested using mock data - -The goal of OmopSketch is to characterise and visualise an OMOP CDM instance to asses if it meets the necessary criteria to answer a specific clinical question and conduct a certain study. - -## Installation - -You can install the development version of OmopSketch from [GitHub](https://github.com/) with: - -``` r -# install.packages("remotes") -remotes::install_github("OHDSI/OmopSketch") -``` - -## Example - -Let's start by creating a cdm object using the Eunomia mock dataset: - -```{r, message=TRUE, warning=FALSE} -library(duckdb) -library(CDMConnector) -library(dplyr, warn.conflicts = FALSE) -library(OmopSketch) -con <- dbConnect(duckdb(), eunomia_dir()) -cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main") -cdm -``` -### Snapshot -We first create a snapshot of our database. This will allow us to track when the analysis has been conducted and capture details about the CDM version or the data release. -```{r} -summariseOmopSnapshot(cdm) |> - tableOmopSnapshot(type = "flextable") -``` - - -### Characterise the clinical tables -Once we have collected the snapshot information, we can start characteristing the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables. - -```{r} -summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |> - tableClinicalRecords(type = "flextable") -``` - -We can also explore trends in the clinical table records over time. - -```{r} -summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure")) |> - plotRecordCount(facet = "omop_table") -``` -### Characterise the observation period -After visualising the main characteristics of our clinical tables, we can explore the observation period details. OmopSketch provides several functions to have an overwied of the dataset study period. - -Using `summariseInObservation()` and `plotInObservation()`, we can gather information on the number of records per year. - -```{r} -summariseInObservation(cdm$observation_period, output = "records") |> - plotInObservation() -``` -You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`. -```{r} -summariseObservationPeriod(cdm$observation_period) |> - tableObservationPeriod(type = "flextable") -``` - -Or if visualisation is prefered, you can easily build a histogram to explore how many participants have more than one observation period. -```{r} -summariseObservationPeriod(cdm$observation_period) |> - plotObservationPeriod() -``` - -### Characterise the concepts -OmopSketch also provides functions to explore some of (or all) the concepts in the dataset. -```{r} -acetaminophen <- c(1125315, 1127433, 1127078) - -summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen)) |> - filter(estimate_name == "record_count") |> - plotConceptCounts() -``` - -### Characterise the population -Finally, OmopSketch can also help us to characterise the population at the start and end of the observation period. -```{r} -summarisePopulationCharacteristics(cdm) |> - tablePopulationCharacteristics(type = "flextable") -``` -As seen, OmopSketch offers multiple functionalities to provide a general overview of a database. Additionally, it includes more tools and arguments that allow for deeper exploration, helping to assess the database's suitability for specific research studies. For further information, please refer to the vignettes. - diff --git a/_pkgdown.yml b/_pkgdown.yml index 6f67a4b6..44574c60 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,8 +15,6 @@ reference: - tableClinicalRecords - summariseRecordCount - plotRecordCount - - summariseMissingData - - tableMissingData - subtitle: Observation Periods desc: Summarise and plot the observation period table in the OMOP Common Data Model - contents: @@ -30,8 +28,8 @@ reference: - contents: - summariseConceptCounts - plotConceptCounts - - summariseAllConceptCounts - - tableAllConceptCounts + - summarisePopulationCharacteristics + - tablePopulationCharacteristics - subtitle: Mock Database desc: Create a mock database to test the OmopSketch package - contents: diff --git a/cran-comments.md b/cran-comments.md deleted file mode 100644 index 12324b7b..00000000 --- a/cran-comments.md +++ /dev/null @@ -1,8 +0,0 @@ -## R CMD check results - -0 errors | 0 warnings | 1 note - -* This is a new release. -* We do not cite any reference. -* OMOP is the name of the common data model that we use and it is described in -the description diff --git a/man/OmopSketch-package.Rd b/man/OmopSketch-package.Rd index 8d1c0d01..0bc315f4 100644 --- a/man/OmopSketch-package.Rd +++ b/man/OmopSketch-package.Rd @@ -25,7 +25,6 @@ Authors: \itemize{ \item Kim Lopez-Guell \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) \item Elin Rowlands \email{elin.rowlands@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0005-5166-0417}{ORCID}) - \item Cecilia Campanile \email{cecilia.campanile@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0007-6629-4661}{ORCID}) \item Edward Burn \email{edward.burn@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-9286-1128}{ORCID}) \item MartĂ­ CatalĂ  \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) } diff --git a/man/mockOmopSketch.Rd b/man/mockOmopSketch.Rd index 11424ae4..2433e7a1 100644 --- a/man/mockOmopSketch.Rd +++ b/man/mockOmopSketch.Rd @@ -34,5 +34,8 @@ A mock cdm_reference object. Creates a mock database to test OmopSketch package. } \examples{ +\donttest{ +library(OmopSketch) mockOmopSketch(numberIndividuals = 100) } +} diff --git a/man/plotConceptCounts.Rd b/man/plotConceptCounts.Rd index 2492a3d5..aabae5d1 100644 --- a/man/plotConceptCounts.Rd +++ b/man/plotConceptCounts.Rd @@ -10,10 +10,10 @@ plotConceptCounts(result, facet = NULL, colour = NULL) \item{result}{A summarised_result object (output of summariseConceptCounts).} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot2 object showing the concept counts. @@ -23,7 +23,7 @@ Plot the concept counts of a summariseConceptCounts output. } \examples{ \donttest{ -library(dplyr) +library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() @@ -36,8 +36,8 @@ result <- cdm |> ) result |> - filter(variable_name == "Number subjects") |> - plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") + filter(estimate_name == "person_count", variable_name == "overall") |> + plotConceptCounts(facet = "codelist_name", colour = "codelist_name") PatientProfiles::mockDisconnect(cdm) } diff --git a/man/plotInObservation.Rd b/man/plotInObservation.Rd index f0d322ae..c5a6941b 100644 --- a/man/plotInObservation.Rd +++ b/man/plotInObservation.Rd @@ -10,10 +10,10 @@ plotInObservation(result, facet = NULL, colour = NULL) \item{result}{A summarised_result object (output of summariseInObservation).} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot showing the table counts @@ -23,7 +23,7 @@ Create a ggplot2 plot from the output of summariseInObservation(). } \examples{ \donttest{ -library(dplyr) +library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() diff --git a/man/plotObservationPeriod.Rd b/man/plotObservationPeriod.Rd index 543f468a..86a701a4 100644 --- a/man/plotObservationPeriod.Rd +++ b/man/plotObservationPeriod.Rd @@ -22,10 +22,10 @@ plotObservationPeriod( "densityplot".} \item{facet}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot2 object. diff --git a/man/plotRecordCount.Rd b/man/plotRecordCount.Rd index b539d1cb..b80afbda 100644 --- a/man/plotRecordCount.Rd +++ b/man/plotRecordCount.Rd @@ -10,10 +10,10 @@ plotRecordCount(result, facet = NULL, colour = NULL) \item{result}{Output from summariseRecordCount().} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot showing the table counts diff --git a/man/summariseAllConceptCounts.Rd b/man/summariseAllConceptCounts.Rd deleted file mode 100644 index 6437a86f..00000000 --- a/man/summariseAllConceptCounts.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summariseAllConceptCounts.R -\name{summariseAllConceptCounts} -\alias{summariseAllConceptCounts} -\title{Summarise concept use in patient-level data} -\usage{ -summariseAllConceptCounts( - cdm, - omopTableName, - countBy = "record", - year = FALSE, - sex = FALSE, - ageGroup = NULL -) -} -\arguments{ -\item{cdm}{A cdm object} - -\item{omopTableName}{A character vector of the names of the tables to -summarise in the cdm object.} - -\item{countBy}{Either "record" for record-level counts or "person" for -person-level counts} - -\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} - -\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} - -\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be -thus summarised by age groups.} -} -\value{ -A summarised_result object with results overall and, if specified, by -strata. -} -\description{ -Summarise concept use in patient-level data -} diff --git a/man/summariseConceptCounts.Rd b/man/summariseConceptCounts.Rd index 4e826319..3132736d 100644 --- a/man/summariseConceptCounts.Rd +++ b/man/summariseConceptCounts.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/summariseConceptCounts.R \name{summariseConceptCounts} \alias{summariseConceptCounts} -\title{Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted.} +\title{Summarise code use in patient-level data} \usage{ summariseConceptCounts( cdm, conceptId, countBy = c("record", "person"), concept = TRUE, - interval = "overall", + year = FALSE, sex = FALSE, ageGroup = NULL ) @@ -24,7 +24,7 @@ person-level counts} \item{concept}{TRUE or FALSE. If TRUE code use will be summarised by concept.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} \item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} @@ -36,11 +36,10 @@ A summarised_result object with results overall and, if specified, by strata. } \description{ -Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +Summarise code use in patient-level data } \examples{ \donttest{ -library(OmopSketch) cdm <- mockOmopSketch() @@ -51,6 +50,5 @@ results <- summariseConceptCounts(cdm, conceptId = cs) results PatientProfiles::mockDisconnect(cdm) - } } diff --git a/man/summariseInObservation.Rd b/man/summariseInObservation.Rd index 59c4e224..db3b8c2d 100644 --- a/man/summariseInObservation.Rd +++ b/man/summariseInObservation.Rd @@ -7,7 +7,8 @@ time.} \usage{ summariseInObservation( observationPeriod, - interval = "overall", + unit = "year", + unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE @@ -17,7 +18,10 @@ summariseInObservation( \item{observationPeriod}{An observation_period omop table. It must be part of a cdm_reference object.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{unit}{Whether to stratify by "year" or by "month".} + +\item{unitInterval}{Number of years or months to include within the time +interval.} \item{output}{Output format. It can be either the number of records ("records") that are in observation in the specific interval of time, the @@ -43,7 +47,8 @@ cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, - interval = "months", + unit = "month", + unitInterval = 6, output = c("person-days","records"), ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), sex = TRUE @@ -53,6 +58,5 @@ result |> glimpse() PatientProfiles::mockDisconnect(cdm) - } } diff --git a/man/summariseMissingData.Rd b/man/summariseMissingData.Rd deleted file mode 100644 index b5aeb9a1..00000000 --- a/man/summariseMissingData.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summariseMissingData.R -\name{summariseMissingData} -\alias{summariseMissingData} -\title{Summarise missing data in omop tables} -\usage{ -summariseMissingData( - cdm, - omopTableName, - col = NULL, - sex = FALSE, - year = FALSE, - ageGroup = NULL -) -} -\arguments{ -\item{cdm}{A cdm object} - -\item{omopTableName}{A character vector of the names of the tables to -summarise in the cdm object.} - -\item{col}{A character vector of column names to check for missing values. -If \code{NULL}, all columns in the specified tables are checked. Default is \code{NULL}.} - -\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} - -\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} - -\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be -thus summarised by age groups.} -} -\value{ -A summarised_result object with results overall and, if specified, by -strata. -} -\description{ -Summarise missing data in omop tables -} diff --git a/man/summariseObservationPeriod.Rd b/man/summariseObservationPeriod.Rd index 14ad95c3..26763dc4 100644 --- a/man/summariseObservationPeriod.Rd +++ b/man/summariseObservationPeriod.Rd @@ -17,8 +17,8 @@ summariseObservationPeriod( \item{observationPeriod}{observation_period omop table.} \item{estimates}{Estimates to summarise the variables of interest ( -\verb{records per person}, \verb{duration in days} and -\verb{days to next observation period}).} +`records per person`, `duration in days` and +`days to next observation period`).} \item{ageGroup}{A list of age groups to stratify results by.} diff --git a/man/summariseOmopSnapshot.Rd b/man/summariseOmopSnapshot.Rd index 19d6da7b..75552d34 100644 --- a/man/summariseOmopSnapshot.Rd +++ b/man/summariseOmopSnapshot.Rd @@ -18,7 +18,10 @@ Summarise a cdm_reference object creating a snapshot with the metadata of the cdm_reference object. } \examples{ +\donttest{ +library(OmopSketch) cdm <- mockOmopSketch(numberIndividuals = 10) summariseOmopSnapshot(cdm) } +} diff --git a/man/summarisePopulationCharacteristics.Rd b/man/summarisePopulationCharacteristics.Rd new file mode 100644 index 00000000..528473f3 --- /dev/null +++ b/man/summarisePopulationCharacteristics.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarisePopulationCharacteristics.R +\name{summarisePopulationCharacteristics} +\alias{summarisePopulationCharacteristics} +\title{Summarise the characteristics of the base population of a cdm_reference +object.} +\usage{ +summarisePopulationCharacteristics( + cdm, + studyPeriod = c(NA, NA), + sex = FALSE, + ageGroup = NULL +) +} +\arguments{ +\item{cdm}{A cdm_reference object.} + +\item{studyPeriod}{Dates to trim the observation period. If NA, +min(observation_period_start_date) and/or max(observation_period_end_date) +are used.} + +\item{sex}{Whether to stratify the results by sex.} + +\item{ageGroup}{List of age groups to stratify by at index date.} +} +\value{ +A summarised_result object. +} +\description{ +Summarise the characteristics of the base population of a cdm_reference +object. +} +\examples{ +\donttest{ +cdm <- mockOmopSketch() + +summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("2010-01-01", NA), + sex = TRUE, + ageGroup = NULL +) + +summarisedPopulation |> print() + +PatientProfiles::mockDisconnect(cdm = cdm) +} +} diff --git a/man/summariseRecordCount.Rd b/man/summariseRecordCount.Rd index 07d4c507..b502d76c 100644 --- a/man/summariseRecordCount.Rd +++ b/man/summariseRecordCount.Rd @@ -8,7 +8,8 @@ records that fall within the observation period are considered.} summariseRecordCount( cdm, omopTableName, - interval = "overall", + unit = "year", + unitInterval = 1, ageGroup = NULL, sex = FALSE ) @@ -18,7 +19,10 @@ summariseRecordCount( \item{omopTableName}{A character vector of omop tables from the cdm.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{unit}{Time unit it can either be "year" or "month".} + +\item{unitInterval}{Number of years or months to include within the same +interval.} \item{ageGroup}{A list of age groups to stratify results by.} @@ -40,7 +44,8 @@ cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), - interval = "years", + unit = "year", + unitInterval = 10, ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) diff --git a/man/tableAllConceptCounts.Rd b/man/tableAllConceptCounts.Rd deleted file mode 100644 index a3ce8187..00000000 --- a/man/tableAllConceptCounts.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tableAllConceptCounts.R -\name{tableAllConceptCounts} -\alias{tableAllConceptCounts} -\title{Create a visual table from a summariseAllConceptCounts() result.} -\usage{ -tableAllConceptCounts(result, type = "gt") -} -\arguments{ -\item{result}{A summarised_result object.} - -\item{type}{Type of formatting output table, either "gt" or "flextable".} -} -\value{ -A gt or flextable object with the summarised data. -} -\description{ -Create a visual table from a summariseAllConceptCounts() result. -} diff --git a/man/tableMissingData.Rd b/man/tableMissingData.Rd deleted file mode 100644 index 59ac5077..00000000 --- a/man/tableMissingData.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tableMissingData.R -\name{tableMissingData} -\alias{tableMissingData} -\title{Create a visual table from a summariseMissingData() result.} -\usage{ -tableMissingData(result, type = "gt") -} -\arguments{ -\item{result}{A summarised_result object.} - -\item{type}{Type of formatting output table, either "gt" or "flextable".} -} -\value{ -A gt or flextable object with the summarised data. -} -\description{ -Create a visual table from a summariseMissingData() result. -} diff --git a/man/tableOmopSnapshot.Rd b/man/tableOmopSnapshot.Rd index 2cb78528..486d82e4 100644 --- a/man/tableOmopSnapshot.Rd +++ b/man/tableOmopSnapshot.Rd @@ -18,6 +18,8 @@ A gt or flextable object with the summarised data. Create a visual table from a summarise_omop_snapshot result. } \examples{ +\donttest{ +library(OmopSketch) cdm <- mockOmopSketch(numberIndividuals = 10) result <- summariseOmopSnapshot(cdm) @@ -27,3 +29,4 @@ result |> PatientProfiles::mockDisconnect(cdm) } +} diff --git a/man/tablePopulationCharacteristics.Rd b/man/tablePopulationCharacteristics.Rd new file mode 100644 index 00000000..c395f68b --- /dev/null +++ b/man/tablePopulationCharacteristics.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tablePopulationCharacteristics.R +\name{tablePopulationCharacteristics} +\alias{tablePopulationCharacteristics} +\title{Create a visual table from a summarise_population_characteristics result.} +\usage{ +tablePopulationCharacteristics(result, type = "gt") +} +\arguments{ +\item{result}{Output from summarisePopulationCharacteristics().} + +\item{type}{Type of formatting output table, either "gt" or "flextable".} +} +\value{ +A gt or flextable object with the summarised data. +} +\description{ +Create a visual table from a summarise_population_characteristics result. +} +\examples{ +\donttest{ +cdm <- mockOmopSketch() + +summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("2010-01-01", NA), + sex = TRUE, + ageGroup = list("<=60" = c(0, 60), ">60" = c(61, Inf)) +) + +summarisedPopulation |> + suppress(minCellCount = 5) |> + tablePopulationCharacteristics() + +PatientProfiles::mockDisconnect(cdm = cdm) +} +} diff --git a/tests/testthat/test-plotInObservation.R b/tests/testthat/test-plotInObservation.R index 69a06126..b937eeeb 100644 --- a/tests/testthat/test-plotInObservation.R +++ b/tests/testthat/test-plotInObservation.R @@ -4,17 +4,17 @@ test_that("plotInObservation works", { cdm <- cdmEunomia() # summariseInObservationPlot plot ---- - x <- summariseInObservation(cdm$observation_period, interval = "years") + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8) expect_no_error(inherits(plotInObservation(x), "ggplot")) x <- x |> dplyr::filter(result_id == -1) expect_error(plotInObservation(x)) - expect_error(plotInObservation(summariseInObservation(cdm$observation_period, interval = "years", output = c("person-days", "records"), ageGroup = NULL, sex = FALSE))) + expect_error(plotInObservation(summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = c("person-days", "records"), ageGroup = NULL, sex = FALSE))) - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", ageGroup = NULL, sex = FALSE) + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = "person-days", ageGroup = NULL, sex = FALSE) expect_true(inherits(plotInObservation(x), "ggplot")) - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "records", ageGroup = NULL, sex = FALSE) + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE) expect_true(inherits(plotInObservation(x), "ggplot")) result <- cdm$observation_period |> diff --git a/tests/testthat/test-summariseAllConceptCounts.R b/tests/testthat/test-summariseAllConceptCounts.R deleted file mode 100644 index f90bbc62..00000000 --- a/tests/testthat/test-summariseAllConceptCounts.R +++ /dev/null @@ -1,59 +0,0 @@ -test_that("summariseAllConceptCount works", { - skip_on_cran() - - cdm <- cdmEunomia() - - expect_true(inherits(summariseAllConceptCounts(cdm, "drug_exposure"), "summarised_result")) - expect_warning(summariseAllConceptCounts(cdm, "observation_period")) - expect_no_error(x <- summariseAllConceptCounts(cdm, "visit_occurrence")) - expect_no_error(summariseAllConceptCounts(cdm, "condition_occurrence", countBy = c("record", "person"))) - expect_no_error(summariseAllConceptCounts(cdm, "drug_exposure")) - expect_no_error(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "person")) - expect_warning(summariseAllConceptCounts(cdm, "device_exposure")) - expect_no_error(y <- summariseAllConceptCounts(cdm, "measurement")) - expect_no_error(summariseAllConceptCounts(cdm, "observation", year = TRUE)) - expect_warning(summariseAllConceptCounts(cdm, "death")) - - expect_no_error(all <- summariseAllConceptCounts(cdm, c("visit_occurrence", "measurement"))) - expect_equal(all, dplyr::bind_rows(x, y)) - expect_equal(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "record"), summariseAllConceptCounts(cdm, "procedure_occurrence")) - - expect_error(summariseAllConceptCounts(cdm, omopTableName = "")) - expect_error(summariseAllConceptCounts(cdm, omopTableName = "visit_occurrence", countBy = "dd")) - - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> - dplyr::distinct(.data$strata_level) |> - dplyr::tally() |> - dplyr::pull() == 9) - - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", ageGroup = list(c(0, 50))) |> - dplyr::distinct(.data$strata_level) |> - dplyr::tally() |> - dplyr::pull() == 3) - - s <- summariseAllConceptCounts(cdm, "procedure_occurrence") - z <- summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, year = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) - - x <- z |> - dplyr::filter(strata_level == "overall") |> - dplyr::select(variable_level, estimate_value) - s <- s |> - dplyr::select(variable_level, estimate_value) - expect_equal(x, s) - - x <- z |> - dplyr::filter(strata_name == "age_group") |> - dplyr::group_by(variable_level) |> - dplyr::summarise(estimate_value = sum(as.numeric(estimate_value), na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(estimate_value = as.character(estimate_value)) - - p <- s |> - dplyr::select(variable_level, estimate_value) - - expect_true(all.equal( - as.data.frame(x) |> dplyr::arrange(variable_level), - as.data.frame(p) |> dplyr::arrange(variable_level), - check.attributes = FALSE - )) - -}) diff --git a/tests/testthat/test-summariseClinicalRecords.R b/tests/testthat/test-summariseClinicalRecords.R index f2b76f4c..82b65cf3 100644 --- a/tests/testthat/test-summariseClinicalRecords.R +++ b/tests/testthat/test-summariseClinicalRecords.R @@ -193,17 +193,14 @@ test_that("summariseClinicalRecords() sex and ageGroup argument work", { cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) - result <- summariseClinicalRecords( - cdm = cdm, - omopTableName = "observation_period", - inObservation = FALSE, - standardConcept = FALSE, - sourceVocabulary = FALSE, - domainId = FALSE, - typeConcept = FALSE, - sex = TRUE, - ageGroup = list("old" = c(10, Inf), "young" = c(0, 9)) - ) + result <- summariseClinicalRecords(cdm, "observation_period", + inObservation = FALSE, + standardConcept = FALSE, + sourceVocabulary = FALSE, + domainId = FALSE, + typeConcept = FALSE, + sex = TRUE, + ageGroup = list("old" = c(10, Inf), "young" = c(0, 9))) # Check num records records <- result |> diff --git a/tests/testthat/test-summariseConceptCounts.R b/tests/testthat/test-summariseConceptCounts.R index 6e63e46e..950d19c9 100644 --- a/tests/testthat/test-summariseConceptCounts.R +++ b/tests/testthat/test-summariseConceptCounts.R @@ -9,9 +9,7 @@ test_that("summarise code use - eunomia", { startNames <- CDMConnector::listSourceTables(cdm) results <- summariseConceptCounts(cdm = cdm, conceptId = cs, - interval = "years", - countBy = c("record", "person"), - concept = TRUE, + year = TRUE, sex = TRUE, ageGroup = list(c(0,17), c(18,65), @@ -23,17 +21,16 @@ test_that("summarise code use - eunomia", { checkResultType(results, "summarise_concept_counts") # min cell counts: - expect_equal( - omopgenerics::suppress(results, 5) |> - visOmopResults::splitAdditional() |> + expect_true( + all(is.na( + omopgenerics::suppress(results) |> dplyr::filter( - strata_level == "overall", - variable_name == "Number records", - standard_concept_id == "overall", - time_interval == "1909-01-01 to 1909-12-31", - group_level == "acetiminophen") |> - dplyr::pull("estimate_value"), - as.character(NA) + variable_name == "overall", + strata_level == "1909", + group_level == "acetiminophen" + ) |> + dplyr::pull("estimate_value") + )) ) # check is a summarised result @@ -43,17 +40,16 @@ test_that("summarise code use - eunomia", { # overall record count expect_true(results %>% - dplyr::filter(group_name == "codelist_name", - strata_name == "overall", - strata_level == "overall", - additional_level == "overall", - group_level == "acetiminophen", - variable_name == "Number records") %>% + dplyr::filter(group_name == "codelist_name" & + strata_name == "overall" & + strata_level == "overall" & + group_level == "acetiminophen" & + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% - dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> - dplyr::filter(drug_concept_id %in% c(acetiminophen)) %>% + dplyr::filter(drug_concept_id %in% acetiminophen) %>% dplyr::tally() %>% dplyr::pull("n")) @@ -63,8 +59,8 @@ test_that("summarise code use - eunomia", { strata_name == "overall" & strata_level == "overall" & group_level == "acetiminophen" & - variable_name == "Number subjects", - additional_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -77,13 +73,12 @@ test_that("summarise code use - eunomia", { # by year # overall record count expect_true(results %>% - visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & - strata_name == "overall" & - time_interval == "2008-01-01 to 2008-12-31" & + strata_name == "year" & + strata_level == "2008" & group_level == "acetiminophen" & - variable_name == "Number records", - standard_concept_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -94,13 +89,12 @@ test_that("summarise code use - eunomia", { # overall person count expect_true(results %>% - visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & - strata_name == "overall" & - time_interval == "2008-01-01 to 2008-12-31" & + strata_name == "year" & + strata_level == "2008" & group_level == "acetiminophen" & - variable_name == "Number subjects", - standard_concept_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -118,8 +112,8 @@ test_that("summarise code use - eunomia", { strata_name == "sex" & strata_level == "Male" & group_level == "acetiminophen" & - variable_name == "Number records" & - additional_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -134,8 +128,8 @@ test_that("summarise code use - eunomia", { strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & group_level == "acetiminophen" & - variable_name == "Number records", - additional_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -154,8 +148,8 @@ test_that("summarise code use - eunomia", { strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & group_level == "acetiminophen" & - variable_name == "Number subjects", - additional_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -170,140 +164,92 @@ test_that("summarise code use - eunomia", { dplyr::tally() %>% dplyr::pull("n")) - results1 <- summariseConceptCounts(cdm = cdm, - conceptId = cs, - interval = "years", - concept = FALSE, - sex = TRUE, - ageGroup = list(c(0,17), - c(18,65), - c(66, 100))) - - expect_equal( - results1 |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number records") |> - dplyr::arrange(dplyr::across(dplyr::everything())), - results |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number records", standard_concept_name == "overall") |> - dplyr::select(-c(starts_with("standard_"), starts_with("source_"), "domain_id")) |> - dplyr::arrange(dplyr::across(dplyr::everything())) - ) - expect_true(results1 |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number subjects", - group_level == "acetiminophen", - time_interval == "1909-01-01 to 1909-12-31", - strata_level == "0 to 17") |> - dplyr::pull("estimate_value") |> - as.numeric() == - cdm$drug_exposure %>% - dplyr::filter(drug_concept_id %in% acetiminophen) %>% - PatientProfiles::addAge(indexDate = "drug_exposure_start_date") %>% - PatientProfiles::addSex() %>% - dplyr::filter(age >= "0", age <= "17", clock::get_year(drug_exposure_start_date) == 1909) |> - dplyr::select("person_id") %>% - dplyr::distinct() %>% - dplyr::tally() %>% - dplyr::pull("n")) - expect_true(results1$group_level |> unique() |> length() == 2) - results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "person", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number subjects")) > 0) + dplyr::filter(estimate_name == "person_count")) > 0) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number records")) == 0) - + dplyr::filter(estimate_name == "record_count")) == 0) results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "record", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number subjects")) == 0) + dplyr::filter(estimate_name == "person_count")) == 0) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number records")) > 0) + dplyr::filter(estimate_name == "record_count")) > 0) # domains covered # condition expect_true(nrow(summariseConceptCounts(list(cs= c(4112343)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # visit expect_true(nrow(summariseConceptCounts(list(cs= c(9201)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # drug expect_true(nrow(summariseConceptCounts(list(cs= c(40213160)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # measurement expect_true(nrow(summariseConceptCounts(list(cs= c(3006322)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # procedure and condition expect_true(nrow(summariseConceptCounts(list(cs= c(4107731,4112343)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # no records expect_message(results <- summariseConceptCounts(list(cs= c(999999)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_true(nrow(results) == 0) # conceptId NULL (but reduce the computational time by filtering concepts first) - # cdm$concept <- cdm$concept |> - # dplyr::filter(grepl("k", concept_name)) - # - # skip("conceptId = NULL not supported yet") - # results <- summariseConceptCounts(cdm = cdm, - # year = FALSE, - # sex = FALSE, - # ageGroup = NULL) - # - # results_concepts <- results |> - # dplyr::select(variable_name) |> - # dplyr::distinct() |> - # dplyr::pull() - # concepts <- cdm$concept |> - # dplyr::select(concept_name) |> - # dplyr::distinct() |> - # dplyr::pull() - # - # expect_true(all(results_concepts %in% c("overall",concepts))) + cdm$concept <- cdm$concept |> + dplyr::filter(grepl("k", concept_name)) - # check attributes + skip("conceptId = NULL not supported yet") results <- summariseConceptCounts(cdm = cdm, - conceptId = cs, - interval = "years", - sex = TRUE, - ageGroup = list(c(0,17), - c(18,65), - c(66, 100))) + year = FALSE, + sex = FALSE, + ageGroup = NULL) + + results_concepts <- results |> + dplyr::select(variable_name) |> + dplyr::distinct() |> + dplyr::pull() + concepts <- cdm$concept |> + dplyr::select(concept_name) |> + dplyr::distinct() |> + dplyr::pull() + + expect_true(all(results_concepts %in% c("overall",concepts))) + # check attributes expect_true(omopgenerics::settings(results)$package_name == "OmopSketch") expect_true(omopgenerics::settings(results)$result_type == "summarise_concept_counts") expect_true(omopgenerics::settings(results)$package_version == packageVersion("OmopSketch")) @@ -311,42 +257,42 @@ test_that("summarise code use - eunomia", { # expected errors# expected errors expect_error(summariseConceptCounts("not a concept", cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts("123", cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list("123"), # not named cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = "not a cdm", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "Maybe", + year = "Maybe", sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = "Maybe", ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = list(c(18,17)))) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = list(c(0,17), c(15,20)))) @@ -356,6 +302,7 @@ test_that("summarise code use - eunomia", { test_that("summarise code use - mock data", { skip_on_cran() + person <- tibble::tibble( person_id = c(1L,2L), gender_concept_id = c(8532L,8507L), @@ -390,11 +337,11 @@ test_that("summarise code use - mock data", { concept_name = c("Musculoskeletal disorder", "Arthritis", "Osteoarthritis of hip", "Arthritis"), domain_id = c("Condition"), standard_concept = c("S","S","S",NA), - vocabulary_id = c("SNOMED", "SNOMED", "SNOMED", "ICD10"), concept_class_id = c("Clinical Finding", "Clinical Finding", "Clinical Finding", "ICD Code"), concept_code = c("1234"), valid_start_date = c(as.Date(NA)), - valid_end_date = c(as.Date(NA)) + valid_end_date = c(as.Date(NA)), + vocabulary_id = as.character(NA) ) cdm <- omopgenerics::cdmFromTables( @@ -418,88 +365,63 @@ test_that("summarise code use - mock data", { result <- summariseConceptCounts(cdm, conceptId) # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2", "1", "1", "1")) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c("1", "2", "1", "1"))) # Osteoarthritis (code 5), two records ind 2, one record ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Osteoarthritis of hip", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("3","2")) + expect_true(all(result |> + dplyr::filter(variable_name == "Osteoarthritis of hip") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,3))) # Musculoskeletal disorder (code 1), one record each ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) # Individuals belong to the same age group but to different sex groups # Arthritis (codes 3 and 17), one record of each per ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis" & strata_level == "Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name =="Arthritis" & strata_level == "3 to 150 &&& Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis" & strata_level == "3 to 150") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1","1","1")) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150 &&& Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2,1,1))) # Osteoarthritis of hip (code 5), two records ind 2 and one ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Osteoarthritis of hip" & strata_level == "Female") |> - dplyr::tally() |> - dplyr::pull(), - 2) + expect_true(all(result |> + dplyr::filter(variable_name == "Osteoarthritis of hip" & strata_level == "Female") |> + dplyr::tally() |> + dplyr::pull() == 2)) # Musculoskeletal disorder (code 1), one record each ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("1","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("1","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,1))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,1))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "overall") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) PatientProfiles::mockDisconnect(cdm) }) @@ -512,7 +434,7 @@ test_that("plot concept counts works", { # summariseInObservationPlot plot ---- x <- summariseConceptCounts(cdm, conceptId = list(codes = c(40213160))) expect_error(plotConceptCounts(x)) - x <- x |> dplyr::filter(variable_name == "Number records") + x <- x |> dplyr::filter(estimate_name == "record_count") expect_no_error(plotConceptCounts(x)) expect_true(inherits(plotConceptCounts(x), "ggplot")) @@ -520,7 +442,7 @@ test_that("plot concept counts works", { conceptId = list("polio" = c(40213160), "acetaminophen" = c(1125315, 1127433, 40229134, 40231925, 40162522, 19133768, 1127078))) expect_error(plotConceptCounts(x)) - x <- x |> dplyr::filter(variable_name == "Number records") + x <- x |> dplyr::filter(estimate_name == "record_count") expect_no_error(plotConceptCounts(x)) expect_message(plotConceptCounts(x)) expect_no_error(plotConceptCounts(x, facet = "codelist_name")) @@ -529,5 +451,6 @@ test_that("plot concept counts works", { x <- x |> dplyr::filter(result_id == -1) expect_error(plotInObservation(x)) + PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/tests/testthat/test-summariseInObservation.R b/tests/testthat/test-summariseInObservation.R index d2c7d105..024597b4 100644 --- a/tests/testthat/test-summariseInObservation.R +++ b/tests/testthat/test-summariseInObservation.R @@ -5,14 +5,14 @@ test_that("check summariseInObservation works", { # Check all tables work ---- expect_true(inherits(summariseInObservation(cdm$observation_period),"summarised_result")) - expect_true(inherits(summariseInObservation(cdm$observation_period, interval = "months"),"summarised_result")) - expect_true(inherits(summariseInObservation(cdm$observation_period, interval = "years"),"summarised_result")) + expect_true(inherits(summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 10),"summarised_result")) + expect_true(inherits(summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10),"summarised_result")) expect_error(summariseInObservation(cdm$death)) # Check inputs ---- - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1) |> + dplyr::filter(variable_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -24,13 +24,8 @@ test_that("check summariseInObservation works", { dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years") - expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") - x <- summariseInObservation(cdm$observation_period, interval = "overall") - expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") - - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == c("1936-01-01 to 1936-12-31"), estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 2) |> + dplyr::filter(variable_level == c("1936-01-01 to 1937-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -38,13 +33,13 @@ test_that("check summariseInObservation works", { dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% dplyr::filter((.data$start < 1936 & .data$end >= 1936) | - (.data$start >= 1936 & .data$start <= 1936)) |> + (.data$start >= 1936 & .data$start <= 1937)) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == c("1998-01-01 to 1998-12-31"), estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10) |> + dplyr::filter(variable_level == c("1998-01-01 to 2007-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -52,14 +47,14 @@ test_that("check summariseInObservation works", { dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% dplyr::filter((.data$start < 1998 & .data$end >= 1998) | - (.data$start >= 1998 & .data$start <= 1998)) |> + (.data$start >= 1998 & .data$start <= 2007)) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) # Check inputs ---- - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 1) |> + dplyr::filter(variable_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -71,26 +66,26 @@ test_that("check summariseInObservation works", { expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "2015-09-01 to 2015-09-30", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 2) |> + dplyr::filter(variable_level == "2015-09-01 to 2015-10-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter( (observation_period_start_date < as.Date("2015-09-01") & observation_period_end_date >= as.Date("2015-09-01")) | - (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-09-30")) + (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-10-31")) ) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "1982-03-01 to 1982-03-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 10) |> + dplyr::filter(variable_level == "1982-03-01 to 1982-12-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1982-03-01") & observation_period_end_date >= as.Date("1982-03-01") | - (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-03-31"))) |> + (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-12-31"))) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) @@ -103,50 +98,45 @@ test_that("check sex argument works", { cdm <- cdmEunomia() # Check overall - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) - expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") - x <- summariseInObservation(cdm$observation_period, interval = "overall") - expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") - - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("Male","Female"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level %in% c("Male","Female"), variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() |> sum() - y <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("overall"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level %in% c("overall"), variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() expect_equal(x,y) y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | - (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1908-12-31"))) |> + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull() |> as.numeric() expect_equal(x,y) # Check a random group - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1915-01-01") & observation_period_end_date >= as.Date("1915-01-01") | - (observation_period_start_date >= as.Date("1915-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull() |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1915-01-01") & observation_period_end_date >= as.Date("1915-01-01") | - (observation_period_start_date >= as.Date("1915-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull())/(cdm[["person"]] |> dplyr::tally() |> dplyr::pull() |> as.numeric())*100 expect_equal(x,y) @@ -159,16 +149,16 @@ test_that("check ageGroup argument works", { # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(summariseInObservation(cdm$observation_period, ageGroup = list(c(0,20), c(21, Inf)))) + expect_no_error(summariseClinicalRecords(cdm, "condition_occurrence", ageGroup = list(c(0,20), c(21, Inf)))) - x <- summariseInObservation(cdm$observation_period, interval = "years", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(additional_level == "1928-01-01 to 1928-12-31", estimate_name == "count", strata_level == "<=20") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10, ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> + dplyr::filter(variable_level == "1928-01-01 to 1937-12-31", estimate_name == "count", strata_level == "<=20") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1928-01-01") & observation_period_end_date >= as.Date("1928-01-01") | - (observation_period_start_date >= as.Date("1928-01-01") & observation_period_start_date <= as.Date("1928-12-31"))) |> - dplyr::mutate("start" = as.Date("1928-01-01"), "end" = as.Date("1928-12-31")) |> + (observation_period_start_date >= as.Date("1928-01-01") & observation_period_start_date <= as.Date("1937-12-31"))) |> + dplyr::mutate("start" = as.Date("1928-01-01"), "end" = as.Date("1937-12-31")) |> PatientProfiles::addAgeQuery(indexDate = "start", ageName = "age_start") %>% dplyr::mutate(age_end = age_start+10) |> dplyr::filter((age_end <= 20 & age_end >= 0) | (age_start >= 0 & age_start <= 20)) |> @@ -176,15 +166,15 @@ test_that("check ageGroup argument works", { dplyr::pull() |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1918-01-01 to 1918-12-31", estimate_name == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1918-01-01") & observation_period_end_date >= as.Date("1918-01-01") | - (observation_period_start_date >= as.Date("1918-01-01") & observation_period_start_date <= as.Date("1918-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull())/(cdm[["person"]] |> dplyr::tally() |> dplyr::pull() |> as.numeric())*100 expect_equal(x,y) @@ -198,14 +188,14 @@ test_that("check output argument works", { cdm <- cdmEunomia() # check value - x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1970-01-01 to 1970-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% - dplyr::filter(observation_period_start_date < as.Date("1970-01-01") & observation_period_end_date >= as.Date("1970-01-01") | - (observation_period_start_date >= as.Date("1970-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> - dplyr::mutate("start_date" = as.Date("1970-01-01"), "end_date" = as.Date("1970-12-31")) %>% + dplyr::filter(observation_period_start_date < as.Date("1964-01-01") & observation_period_end_date >= as.Date("1964-01-01") | + (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> + dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1970-12-31")) %>% dplyr::mutate("start_date" = pmax(start_date, observation_period_start_date, na.rm = TRUE), "end_date" = pmin(end_date, observation_period_end_date, na.rm = TRUE)) %>% dplyr::mutate(days = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> @@ -217,14 +207,14 @@ test_that("check output argument works", { dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::mutate(days = !!CDMConnector::datediff("observation_period_start_date","observation_period_end_date", interval = "day")+1) |> dplyr::summarise(n = sum(days, na.rm = TRUE)) |> dplyr::pull("n") - x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "percentage") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1964-01-01") & observation_period_end_date >= as.Date("1964-01-01") | - (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1964-12-31"))) |> - dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1964-12-31")) %>% + (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> + dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1970-12-31")) %>% dplyr::mutate("start_date" = pmax(start_date, observation_period_start_date, na.rm = TRUE), "end_date" = pmin(end_date, observation_period_end_date, na.rm = TRUE)) %>% dplyr::mutate(days = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> @@ -232,20 +222,20 @@ test_that("check output argument works", { expect_equal(x,y) # Check sex stratified - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() - y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) # Check age stratified - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() - y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) diff --git a/tests/testthat/test-summariseMissingData.R b/tests/testthat/test-summariseMissingData.R deleted file mode 100644 index da24dbd9..00000000 --- a/tests/testthat/test-summariseMissingData.R +++ /dev/null @@ -1,54 +0,0 @@ -test_that("summariseMissingData() works", { - skip_on_cran() - # Load mock database ---- - cdm <- cdmEunomia() - - # Check all tables work ---- - expect_true(inherits(summariseMissingData(cdm, "drug_exposure"),"summarised_result")) - expect_no_error(y<-summariseMissingData(cdm, "observation_period")) - expect_no_error(x<-summariseMissingData(cdm, "visit_occurrence")) - expect_no_error(summariseMissingData(cdm, "condition_occurrence")) - expect_no_error(summariseMissingData(cdm, "drug_exposure")) - - expect_no_error(summariseMissingData(cdm, "procedure_occurrence", year = TRUE)) - expect_warning(summariseMissingData(cdm, "device_exposure")) - expect_no_error(z<-summariseMissingData(cdm, "measurement")) - expect_no_error(s<-summariseMissingData(cdm, "observation")) - - expect_warning(summariseMissingData(cdm, "death")) - - - expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement"))) - expect_equal(all, dplyr::bind_rows(y, x, z)) - expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[['observation']]))) - x<-summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date") - - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = "procedure_date"), dplyr::bind_rows(x,s)) - y<-summariseMissingData(cdm, "observation",col = "observation_date") - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x,y)) - - # Check inputs ---- - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id")|> - dplyr::select(estimate_value)|> - dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> - dplyr::summarise(sum = sum(estimate_value)) |> - dplyr::pull() == 0) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", sex = TRUE, ageGroup = list(c(0,50), c(51,Inf)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==9) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", ageGroup = list(c(0,50)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==3) - - cdm$procedure_occurrence <- cdm$procedure_occurrence |> - dplyr::mutate(procedure_concept_id = NA_integer_) |> - dplyr::compute(name = "procedure_occurrence", temporary = FALSE) - - expect_warning(summariseMissingData(cdm, "procedure_occurrence", col="procedure_concept_id", ageGroup = list(c(0,50)))) - - PatientProfiles::mockDisconnect(cdm = cdm) -}) diff --git a/tests/testthat/test-summariseObservationPeriod.R b/tests/testthat/test-summariseObservationPeriod.R index 2d8b53c1..b8a0a0ad 100644 --- a/tests/testthat/test-summariseObservationPeriod.R +++ b/tests/testthat/test-summariseObservationPeriod.R @@ -1,5 +1,6 @@ test_that("check summariseObservationPeriod works", { skip_on_cran() + # helper function removeSettings <- function(x) { attr(x, "settings") <- NULL @@ -333,6 +334,7 @@ test_that("check it works with mockOmopSketch", { test_that("check summariseObservationPeriod strata works", { skip_on_cran() + # helper function removeSettings <- function(x) { attr(x, "settings") <- NULL diff --git a/tests/testthat/test-summarisePopulationCharacteristics.R b/tests/testthat/test-summarisePopulationCharacteristics.R new file mode 100644 index 00000000..b2154beb --- /dev/null +++ b/tests/testthat/test-summarisePopulationCharacteristics.R @@ -0,0 +1,229 @@ +test_that("summarisePopulationCharacteristics() works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + # Check that works ---- + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics(cdm = cdm)) + + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2694)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "1908-09-22")) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end") |> + dplyr::tally() |> + dplyr::pull() != + 0) + + expect_no_error(summarisedPopulationEqual <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = NULL) + ) + expect_equal(summarisedPopulation, summarisedPopulationEqual) + + # Add date range + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("1900-01-01", "2010-01-01")) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2694)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "2010-01-01")) + + # Add sex and age group strata + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("1950-01-01", NA), + sex = TRUE, + ageGroup = list(c(0,20),c(21,150))) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() |> + sort() == + c(101,1250,1271,1321,1372,172,2521,2693,71))) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end" & strata_level == "0 to 20" & estimate_name == "min") |> + dplyr::pull("estimate_value") < + summarisedPopulation |> + dplyr::filter(variable_name == "Age at end" & strata_level == "21 to 150" & estimate_name == "min") |> + dplyr::pull("estimate_value")) + + # Only sex + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + sex = TRUE + )) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("overall", "sex"))) + + # Only age group + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + ageGroup = list(c(0,1), c(2,Inf)) + )) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "overall"))) + + # Check result type + checkResultType(summarisedPopulation, "summarise_population_characteristics") + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() strata works", { + skip_on_cran() + # Load mock database ---- + cdm <- omock::mockCdmReference() |> + omock::mockPerson(seed = 1L) |> + omock::mockObservationPeriod(seed = 1L) |> + copyCdm() + + # Add sex and age group strata + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + sex = TRUE, + ageGroup = list(c(0,20),c(21,150))) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) + + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::arrange(strata_name, strata_level) |> + dplyr::select("estimate_value") |> + dplyr::pull() == + c(4,6,1,3,4,2,10,5,5))) + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() expected errors", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_error(summarisePopulationCharacteristics("cdm")) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("2000-01-01", "1990-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c(NA, "1990-51-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("1990-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("01/31/1990", "2000-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = NULL, sex = "Female")) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = NULL, ageGroup = c(0,20,40))) + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("tablePopulationCharacteristics() works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + # Check that works ---- + x <- summarisePopulationCharacteristics(cdm) + expect_no_error(y <- tablePopulationCharacteristics(x)) + expect_true(inherits(y,"gt_tbl")) + + x <- x |> dplyr::filter(.data$result_id == -1) + expect_warning(tablePopulationCharacteristics(x)) + expect_warning(inherits(tablePopulationCharacteristics(x),"gt_tbl")) + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() works with mockOmopSKetch", { + skip_on_cran() + cdm <- mockOmopSketch(numberIndividuals = 2, seed = 1) + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "1999-04-05")) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end", estimate_name == "median") |> + dplyr::pull("estimate_value") == + as.character(mean(c(40,16)))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "2013-06-29")) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Sex", estimate_name == "percentage") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + c(50,50))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Age at start", estimate_name %in% c("min","max")) |> + dplyr::pull("estimate_value") |> + sort() == + cdm$observation_period |> + PatientProfiles::addAge(indexDate = "observation_period_start_date") |> + dplyr::pull("age") |> + sort())) + PatientProfiles::mockDisconnect(cdm = cdm) + +}) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index 2c0e7ebd..e36579e7 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -4,28 +4,28 @@ test_that("summariseRecordCount() works", { cdm <- cdmEunomia() # Check inputs ---- - expect_warning(inherits(summariseRecordCount(cdm, "observation_period", interval = "months"),"summarised_result")) - expect_warning(inherits(summariseRecordCount(cdm, "observation_period"),"summarised_result")) + expect_warning(inherits(summariseRecordCount(cdm, "observation_period", unit = "month"),"summarised_result")) + expect_warning(inherits(summariseRecordCount(cdm, "observation_period", unitInterval = 5),"summarised_result")) expect_warning(summariseRecordCount(cdm, "observation_period")) expect_no_error(summariseRecordCount(cdm, "visit_occurrence")) + expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence")) expect_no_error(summariseRecordCount(cdm, "drug_exposure")) expect_no_error(summariseRecordCount(cdm, "procedure_occurrence")) + expect_warning(de <- summariseRecordCount(cdm, "device_exposure")) expect_no_error(summariseRecordCount(cdm, "measurement")) + expect_no_error(o <- summariseRecordCount(cdm, "observation")) expect_warning(summariseRecordCount(cdm, "death")) - expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence")) - expect_warning(de <- summariseRecordCount(cdm, "device_exposure")) - expect_no_error(o <- summariseRecordCount(cdm, "observation")) - expect_no_error(all <- summariseRecordCount(cdm, c("condition_occurrence", "device_exposure","observation"))) expect_equal(all, dplyr::bind_rows(co,de,o)) + # Check inputs ---- expect_true( - (summariseRecordCount(cdm, "observation_period", interval = "years") |> - dplyr::filter(additional_level == "1963-01-01 to 1963-12-31") |> + (summariseRecordCount(cdm, "observation_period") |> + dplyr::filter(variable_level == "1963-01-01 to 1963-12-31") |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$observation_period |> @@ -38,8 +38,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(additional_level == "1961-02-01 to 1961-02-28") |> + summariseRecordCount(cdm, "condition_occurrence", unit = "month") |> + dplyr::filter(variable_level == "1961-02-01 to 1961-02-28") |> dplyr::pull("estimate_value") |> as.numeric() == (cdm$condition_occurrence |> @@ -53,9 +53,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - (summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(additional_level %in% c("1984-01-01 to 1984-01-31", "1984-02-01 to 1984-02-29", "1984-03-01 to 1984-03-31")) |> - dplyr::summarise("estimate_value" = sum(as.numeric(estimate_value), na.rm = TRUE)) |> + (summariseRecordCount(cdm, "condition_occurrence", unit = "month", unitInterval = 3) |> + dplyr::filter(variable_level %in% c("1984-01-01 to 1984-03-31")) |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$condition_occurrence |> @@ -69,11 +68,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - (summariseRecordCount(cdm, "drug_exposure", interval = "years") |> - dplyr::filter(additional_level %in% c("1981-01-01 to 1981-12-31", "1982-01-01 to 1982-12-31", "1983-01-01 to 1983-12-31", - "1984-01-01 to 1984-12-31", "1985-01-01 to 1985-12-31", "1986-01-01 to 1986-12-31", - "1987-01-01 to 1987-12-31", "1988-01-01 to 1988-12-31")) |> - dplyr::summarise("estimate_value" = sum(as.numeric(.data$estimate_value), na.rm = TRUE)) |> + (summariseRecordCount(cdm, "drug_exposure", unitInterval = 8) |> + dplyr::filter(variable_level == "1981-01-01 to 1988-12-31") |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$drug_exposure |> @@ -86,7 +82,7 @@ test_that("summariseRecordCount() works", { ) # Check result type - result <- summariseRecordCount(cdm, "observation_period", interval = "months") + result <- summariseRecordCount(cdm, "observation_period", unit = "month") checkResultType(result, "summarise_record_count") PatientProfiles::mockDisconnect(cdm = cdm) @@ -97,12 +93,12 @@ test_that("plotRecordCount() works", { # Load mock database ---- cdm <- cdmEunomia() - p <- summariseRecordCount(cdm, "drug_exposure", interval = "years") |> + p <- summariseRecordCount(cdm, "drug_exposure", unitInterval = 8) |> plotRecordCount() expect_true(inherits(p,"ggplot")) - p2 <- summariseRecordCount(cdm, c("condition_occurrence","drug_exposure"), interval = "years") |> + p2 <- summariseRecordCount(cdm, c("condition_occurrence","drug_exposure"), unitInterval = 8) |> plotRecordCount(facet = "omop_table") expect_true(inherits(p2,"ggplot")) @@ -123,16 +119,16 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list(">=65" = c(65, Inf), "<65" = c(0,64)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) @@ -140,25 +136,25 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", - ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) - x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", + ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") y <- cdm$condition_occurrence |> @@ -171,13 +167,14 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") |> as.numeric() + x y <- cdm$condition_occurrence |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> PatientProfiles::addAgeQuery(indexDate = "condition_start_date", ageGroup = list("<=20" = c(0,20))) |> @@ -198,40 +195,40 @@ test_that("summariseRecordCount() sex argument works", { # Check that works ---- expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) expect_warning(t <- summariseRecordCount(cdm, "observation_period", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, interval = "years")) + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "Male", additional_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "Male", variable_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) y <- cdm$condition_occurrence |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> @@ -256,11 +253,11 @@ test_that("summariseRecordCount() works with mockOmopSketch", { dplyr::group_by(year, age_group, sex) |> dplyr::summarise(n = n()) - expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) + expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> dplyr::tally() |> dplyr::pull() == 0) - expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level), additional_level != "overall") |> + expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level)) |> dplyr::pull("estimate_value") |> sort() == conditionpp |> dplyr::pull("n") |> as.character() |> sort())) diff --git a/vignettes/A-summarise_clinical_tables_records.Rmd b/vignettes/A-summarise_clinical_tables_records.Rmd index 3c2a685a..710fdde4 100644 --- a/vignettes/A-summarise_clinical_tables_records.Rmd +++ b/vignettes/A-summarise_clinical_tables_records.Rmd @@ -143,17 +143,17 @@ summarisedResult |> OmopSketch can also help you to summarise the trend of the records of an OMOP table. See the example below, where we use `summariseRecordCount()` to count the number of records within each year, and then, we use `plotRecordCount()` to create a ggplot with the trend. ```{r, warning=FALSE} -summarisedResult <- summariseRecordCount(cdm, "drug_exposure", interval = "years") +summarisedResult <- summariseRecordCount(cdm, "drug_exposure", unit = "year", unitInterval = 1) summarisedResult |> print() summarisedResult |> plotRecordCount() ``` -Note that you can adjust the time interval period using the `interval` argument, which can be set to either "years" or "months". See the example below, where it shows the number of records every 18 months: +Note that you can adjust the time interval period using the `unit` argument, which can be set to either "year" or "month", and the `unitInterval` argument, which must be an integer specifying the number of years or months which to count the records. See the example below, where it shows the number of records every 18 months: ```{r, warning=FALSE} -summariseRecordCount(cdm, "drug_exposure", interval = "months") |> +summariseRecordCount(cdm, "drug_exposure", unit = "month", unitInterval = 18) |> plotRecordCount() ``` @@ -161,7 +161,8 @@ We can further stratify our counts by sex (setting argument `sex = TRUE`) or by ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("<30" = c(0,29), ">=30" = c(30,Inf))) |> @@ -172,7 +173,8 @@ By default, `plotRecordCount()` does not apply faceting or colour to any variabl ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("0-29" = c(0,29), "30-Inf" = c(30,Inf))) |> @@ -183,7 +185,8 @@ Then, we can simply specify this by using the `facet` and `colour` arguments fro ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("0-29" = c(0,29), "30-Inf" = c(30,Inf))) |> diff --git a/vignettes/B-summarise_concept_counts.Rmd b/vignettes/B-summarise_concept_counts.Rmd index fa5ae3e2..81d66a51 100644 --- a/vignettes/B-summarise_concept_counts.Rmd +++ b/vignettes/B-summarise_concept_counts.Rmd @@ -39,7 +39,6 @@ library(CDMConnector) library(DBI) library(duckdb) library(OmopSketch) -library(CodelistGenerator) # Connect to Eunomia database con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) @@ -52,25 +51,12 @@ cdm # Summarise concept counts -First, let's generate a list of codes for the concept `dementia` using [CodelistGenerator](https://darwin-eu.github.io/CodelistGenerator/index.html) package. +First, let's generate a list of codes for the concept `acetaminophen` and `sinusitis. ```{r, warning=FALSE} -acetaminophen <- getCandidateCodes( - cdm = cdm, - keywords = "acetaminophen", - domains = "Drug", - includeDescendants = TRUE -) |> - dplyr::pull("concept_id") - -sinusitis <- getCandidateCodes( - cdm = cdm, - keywords = "sinusitis", - domains = "Condition", - includeDescendants = TRUE -) |> - dplyr::pull("concept_id") +acetaminophen <- c(1125315,1127078, 1127433, 19133768, 40229134, 40231925, 40162522) +sinusitis <- c(4294548, 40481087, 4283893, 257012) ``` Now we want to explore the occurrence of these concepts within the database. For that, we can use `summariseConceptCounts()` from OmopSketch: @@ -109,14 +95,7 @@ summariseConceptCounts(cdm, One can further stratify by year, sex or age group using the `year`, `sex`, and `ageGroup` arguments. ``` {r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, - "sinusitis" = sinusitis), - countBy = "person", - interval = "years", - sex = TRUE, - ageGroup = list("<=50" = c(0,50), ">50" = c(51,Inf))) |> - select(group_level, strata_level, variable_name, estimate_name) |> glimpse() +summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis), countBy = "person", year = TRUE, sex = TRUE, ageGroup = list("<=50" = c(0,50), ">50" = c(51,Inf))) |> select(group_level, strata_level, variable_name, estimate_name) |> glimpse() ``` ## Visualise the results @@ -135,7 +114,7 @@ Notice that either person counts or record counts can be plotted. If both have b summariseConceptCounts(cdm, conceptId = list("sinusitis" = sinusitis), countBy = c("person","record")) |> - filter(variable_name == "Number subjects") |> + filter(estimate_name == "person_count") |> plotConceptCounts() ``` @@ -153,6 +132,6 @@ summariseConceptCounts(cdm, conceptId = list("sinusitis" = sinusitis), countBy = c("person"), sex = TRUE, - ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf))) |> + ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf)))|> plotConceptCounts(facet = "sex", colour = "age_group") ```