diff --git a/DESCRIPTION b/DESCRIPTION index 0e8d051..6ec44a2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: daa.analytics Title: Utilities for Compiling and Analyzing PEPFAR Data Alignment Activity Data -Version: 0.2.0 +Version: 0.3.0 Date: 2021-06-17 Authors@R: person(given = "Chris", diff --git a/NAMESPACE b/NAMESPACE index 72b1203..faa4158 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(get_pvls_emr_table) export(get_upload_timestamps) export(weighted_concordance) export(weighted_discordance) +export(weighting_levels) importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..e171986 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,23 @@ +# daa.analytics v0.3.0 + +## Breaking changes +* `adorn_daa_data` now no longer exports columns `county_of_matched_sites`, + `pepfar_sum_at_matched_sites`, `weighting`, `weighted_discordance`, + or `weighted_concordance`. + - `count_of_matched_sites` and `pepfar_sum_at_matched_sites` will now no + longer be supported. + - `weighting`, `weighted_discordance`, and `weighted_concordance` will be + replaced by weights and metrics calculated at each level of the organisation + hierarchy going forward and will be calculated using the `weighting_levels` + function. + +## Experimental features +* `weighting_levels` is a new function that calculates weightings as well as + concordance and discordance metrics for DAA indicators at all levels of the + organisation hierarchy. + +## Minor improvements and fixes +* Adds UIDs for each organisation hierarchy level to `ou_hierarchy` dataset +* Documentation updates +* Adds `NEWS.md` file + diff --git a/R/combine-data.R b/R/combine-data.R index 91f6619..fbf9226 100644 --- a/R/combine-data.R +++ b/R/combine-data.R @@ -1,6 +1,4 @@ #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Combine DAA datasets together. #' #' @description @@ -29,15 +27,27 @@ combine_data <- function(daa_indicator_data, keep = FALSE) ou_hierarchy %<>% - dplyr::select(-.data$organisationunitid) %>% + dplyr::select(.data$facilityuid, .data$namelevel3, .data$namelevel4, + .data$namelevel5, .data$namelevel6, .data$namelevel7) %>% unique() df <- daa_indicator_data %>% + # Joins DAA Indicator data to OU hierarchy metadata dplyr::left_join(ou_hierarchy, by = c("facilityuid")) %>% + + # Joins PVLS and EMR datasets dplyr::left_join(pvls_emr, by = c("facilityuid", "period", "indicator")) %>% + + # Joins site attribute data dplyr::left_join(attribute_data %>% dplyr::filter(!is.na(.data$moh_id)), by = c("facilityuid")) %>% - dplyr::select(-.data$name, -.data$organisationunitid) + + # Selects rows for export + dplyr::select(.data$facilityuid, dplyr::starts_with("namelevel"), + .data$indicator, .data$period, .data$moh, .data$pepfar, + .data$reported_by, dplyr::starts_with("level"), + dplyr::everything(), -.data$name, -.data$organisationunitid) + return(df) } diff --git a/R/daa.analytics-package.R b/R/daa.analytics-package.R new file mode 100644 index 0000000..5d3051d --- /dev/null +++ b/R/daa.analytics-package.R @@ -0,0 +1,9 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom magrittr %<>% +#' @importFrom magrittr %>% +#' @importFrom rlang .data +## usethis namespace: end +NULL diff --git a/R/data.R b/R/data.R index 2edba17..daace3e 100644 --- a/R/data.R +++ b/R/data.R @@ -136,6 +136,19 @@ #' unit in DATIM.} #' \item{facilityuid}{The alphanumeric UID used to identify the facility in #' DATIM.} +#' \item{namelevel3uid}{The UID of the parent organisation unit at hierarchy +#' level 3 to the given facility.} +#' \item{namelevel4uid}{The UID of the parent organisation unit at hierarchy +#' level 4 to the given facility.} +#' \item{namelevel5uid}{The UID of the parent organisation unit at hierarchy +#' level 5 to the given facility.} +#' \item{namelevel6uid}{For countries with their facility level at hierarchy +#' level 6, this will represent the UID of the facility. For countries with +#' their facility level at hierarchy level 7, this will represent the parent +#' organisation unit at hierarchy level 6 to the given facility.} +#' \item{namelevel7uid}{For countries with their facility level at hierarchy +#' level 7, this will represent the UID of the facility. For countries with +#' their facility level at hierarchy level 6, this value will be 'NA'.} #' \item{namelevel3}{The name of the parent organisation unit at hierarchy #' level 3 to the given facility.} #' \item{namelevel4}{The name of the parent organisation unit at hierarchy @@ -194,24 +207,6 @@ #' \item{reported_by}{A text value indicating whether results were reported #' by just the MOH, just PEPFAR, or both entities at the given site for the #' given indicator during the reporting period.} -#' \item{count_of_matched_sites}{The number of facilities in a country for the -#' particular indicator and reporting period for which results were reported -#' by both the MOH and PEPFAR.} -#' \item{pepfar_sum_at_matched_sites}{The total results reported by PEPFAR -#' at all facilities in a country for the particular indicator and reporting -#' period.} -#' \item{weighting}{The PEPFAR results at the particular facility divided by -#' the total results reported by PEPFAR at all facilities for the given -#' indicator and reporting period. This figure provides the weighting value -#' for concordance and discordance metrics.} -#' \item{weighted_discordance}{The weighted discordance between the PEPFAR -#' and MOH reported results at the particular facility. Can be summed across -#' facilities grouped by country, indicator, and period to calculate the -#' weighted average discordance.} -#' \item{weighted_concordance}{The weighted concordance between the PEPFAR -#' and MOH reported results at the particular facility. Can be summed across -#' facilities grouped by country, indicator, and period to calculate the -#' weighted average concordance.} #' } #' @source \url{http://www.datim.org/} "daa_indicator_data" diff --git a/R/get-attributes.R b/R/get-attributes.R index e9250dc..c3fcc49 100644 --- a/R/get-attributes.R +++ b/R/get-attributes.R @@ -1,6 +1,4 @@ #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetch MOH ID and attributes from DATIM #' #' @description diff --git a/R/get-daa-data.R b/R/get-daa-data.R index a03018f..cb18763 100644 --- a/R/get-daa-data.R +++ b/R/get-daa-data.R @@ -1,6 +1,4 @@ #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Get DAA Indicator Data #' #' @description @@ -67,7 +65,6 @@ get_daa_data <- function(ou_uid, d2_session) { } #' @export -#' @importFrom magrittr %>% %<>% #' @title Adorn DAA Indicator Data #' #' @description @@ -128,42 +125,139 @@ adorn_daa_data <- function(df) { ifelse(!is.na(.data$pepfar), "PEPFAR", "Neither"))) %>% - # Groups rows by indicator and calculates indicator-specific summaries - dplyr::group_by(.data$Data, .data$period) %>% - dplyr::mutate(count_of_matched_sites = - sum(ifelse(.data$reported_by == "Both", 1, 0))) %>% - dplyr::mutate(pepfar_sum_at_matched_sites = - sum(ifelse(.data$reported_by == "Both", - .data$pepfar, 0))) %>% + # Reorganizes table for export + dplyr::select(facilityuid = .data$`Organisation unit`, + indicator = .data$`Data`, + .data$period, + .data$moh, + .data$pepfar, + .data$reported_by) + + return(df) +} + + +#' Adorn DAA Indicator Data with Weighted Metrics for All Levels +#' +#' @param daa_indicator_data Dataframe containing DAA indicator data. +#' @param ou_hierarchy Dataframe containing the Organisational hierarchy. +#' +#' @return A dataframe of DAA Indicator data with weightings and weighted +#' discordance and concordance calculated for levels 3 through 5. +#' @export +#' +weighting_levels <- function(daa_indicator_data = NULL, ou_hierarchy = NULL) { + ou_hierarchy %<>% + dplyr::select(-.data$organisationunitid, -paste0("namelevel", 3:7)) %>% + unique() + + df <- daa_indicator_data %>% + # Joins DAA Indicator data to OU hierarchy metadata + dplyr::left_join(ou_hierarchy, by = c("facilityuid")) %>% + + # Calculates Level 3 weighted concordance and discordance + dplyr::group_by(.data$indicator, + .data$period, + .data$namelevel3uid) %>% + dplyr::mutate(level3_weighting = + ifelse(.data$reported_by == "Both", + .data$pepfar / sum( + ifelse(.data$reported_by == "Both", + .data$pepfar, 0)), + NA)) %>% + dplyr::rowwise() %>% + dplyr::mutate(level3_discordance = + daa.analytics::weighted_discordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level3_weighting), + level3_concordance = + daa.analytics::weighted_concordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level3_weighting) + ) %>% dplyr::ungroup() %>% - # Calculates weighting variables - dplyr::mutate(weighting = + # Calculates Level 4 weighted concordance and discordance + dplyr::group_by(.data$indicator, + .data$period, + .data$namelevel4uid) %>% + dplyr::mutate(level4_weighting = ifelse(.data$reported_by == "Both", - .data$pepfar / .data$pepfar_sum_at_matched_sites, + .data$pepfar / sum( + ifelse(.data$reported_by == "Both", + .data$pepfar, 0)), NA)) %>% dplyr::rowwise() %>% - dplyr::mutate(weighted_discordance = - daa.analytics::weighted_discordance(.data$moh, - .data$pepfar, - .data$weighting)) %>% - dplyr::mutate(weighted_concordance = - daa.analytics::weighted_concordance(.data$moh, - .data$pepfar, - .data$weighting)) %>% + dplyr::mutate(level4_discordance = + daa.analytics::weighted_discordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level4_weighting), + level4_concordance = + daa.analytics::weighted_concordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level4_weighting) + )%>% dplyr::ungroup() %>% - # Reorganizes table for export - dplyr::select(facilityuid = .data$`Organisation unit`, - indicator = .data$`Data`, - .data$period, - .data$moh, .data$pepfar, .data$reported_by, - .data$count_of_matched_sites, - .data$pepfar_sum_at_matched_sites, .data$weighting, - .data$weighted_discordance, .data$weighted_concordance) + # Calculates Level 5 weighted concordance and discordance + dplyr::group_by(.data$indicator, + .data$period, + .data$namelevel5uid) %>% + dplyr::mutate(level5_weighting = + ifelse(.data$reported_by == "Both", + .data$pepfar / sum( + ifelse(.data$reported_by == "Both", + .data$pepfar, 0)), + NA)) %>% + dplyr::rowwise() %>% + dplyr::mutate(level5_discordance = + daa.analytics::weighted_discordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level5_weighting), + level5_concordance = + daa.analytics::weighted_concordance( + moh = .data$moh, + pepfar = .data$pepfar, + weighting = .data$level5_weighting) + ) %>% + dplyr::ungroup() %>% + + # # Calculates Level 6 weighted concordance and discordance + # dplyr::group_by(.data$indicator, + # .data$period, + # .data$namelevel6uid) %>% + # dplyr::mutate(level6_weighting = + # ifelse(.data$reported_by == "Both", + # .data$pepfar / sum( + # ifelse(.data$reported_by == "Both", + # .data$pepfar, 0)), + # NA)) %>% + # dplyr::rowwise() %>% + # dplyr::mutate(level6_discordance = ifelse(is.na(namelevel7), NA_real_, + # daa.analytics::weighted_discordance( + # moh = .data$moh, + # pepfar = .data$pepfar, + # weighting = .data$level6_weighting)), + # level6_concordance = ifelse(is.na(namelevel7), NA_real_, + # daa.analytics::weighted_concordance( + # moh = .data$moh, + # pepfar = .data$pepfar, + # weighting = .data$level6_weighting)) + # ) %>% + # dplyr::ungroup() %>% + + # Selects rows for export + dplyr::select(-dplyr::starts_with("namelevel")) + return(df) } + # Helper functions ------------------------------------------ #' @title Get Indicator Name #' diff --git a/R/get-geoalign-data.R b/R/get-geoalign-data.R index bdbda70..8fec0d1 100644 --- a/R/get-geoalign-data.R +++ b/R/get-geoalign-data.R @@ -1,6 +1,4 @@ #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetch List of Participating Countries from GeoAlign #' #' @description @@ -34,8 +32,6 @@ get_daa_countries <- function(geo_session) { } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetch Indicator Mapping and Data Availability from GeoAlign #' #' @description @@ -104,8 +100,6 @@ get_data_availability <- function(geo_session = geo_session) { } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetch Import Timestamps from GeoAlign #' #' @description diff --git a/R/get-pvls-emr-data.R b/R/get-pvls-emr-data.R index a99c16f..cc7b94d 100644 --- a/R/get-pvls-emr-data.R +++ b/R/get-pvls-emr-data.R @@ -1,6 +1,5 @@ # TODO redocument parameters on all of these functions #' @export -#' @importFrom magrittr %>% %<>% #' @title Fetch Raw PVLS and EMR Indicator Data. #' #' @description @@ -37,8 +36,6 @@ get_pvls_emr_table <- function(s3, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetches Data Element Metadata #' #' @description @@ -77,8 +74,6 @@ get_de_metadata <- function(s3, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetch Category Option Combo Metadata #' #' @description @@ -117,8 +112,6 @@ get_coc_metadata <- function(s3, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetches Organsation Unit Metadata #' #' @description @@ -158,8 +151,6 @@ get_ou_metadata <- function(s3, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Fetches Period Metadata #' #' @description @@ -198,8 +189,6 @@ get_pe_metadata <- function(s3, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Generates Organisation Unit Hierarchy #' #' @description @@ -240,15 +229,13 @@ create_hierarchy <- function(ou_metadata) { by = c("namelevel7uid" = "uid"), keep = FALSE) %>% dplyr::select(.data$organisationunitid, .data$facilityuid, - paste0("namelevel", 3:7)) + dplyr::everything()) return(ou_hierarchy) } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Adorn PVLS and EMR indicator data with metadata. #' #' @description diff --git a/R/metrics.R b/R/metrics.R index 30c2f5f..722a667 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -1,5 +1,4 @@ #' @export -#' @importFrom magrittr %>% %<>% #' @title Calculate Weighted Concordance. #' #' @description @@ -23,7 +22,6 @@ weighted_concordance <- function(moh, pepfar, weighting) { } #' @export -#' @importFrom magrittr %>% %<>% #' @title Calculate Weighted Discordance. #' #' @description diff --git a/R/utils.R b/R/utils.R index 2988570..a0a6fa3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -61,8 +61,6 @@ fetch_s3_files <- function(s3, aws_s3_bucket, key, } #' @export -#' @importFrom magrittr %>% %<>% -#' @importFrom rlang .data #' @title Get Organisation Unit Name from UID. #' #' @description diff --git a/data-raw/daa_indicator_data.R b/data-raw/daa_indicator_data.R index 608fd19..cf1db39 100644 --- a/data-raw/daa_indicator_data.R +++ b/data-raw/daa_indicator_data.R @@ -5,12 +5,15 @@ # datimutils::loginToDATIM("~/.secrets/datim.json") # d2_session <- d2_default_session +if(!exists("ou_hierarchy")){ load("data/ou_hierarchy.Rda") } + daa_indicator_data <- daa.analytics::daa_countries$country_uid %>% {.[!. %in% "YM6xn5QxNpY"]} %>% lapply(., function(x){ print(daa.analytics::get_ou_name(x)) daa.analytics::get_daa_data(x, d2_session = d2_session) %>% - daa.analytics::adorn_daa_data(.) + daa.analytics::adorn_daa_data() %>% + daa.analytics::weighting_levels(ou_hierarchy = ou_hierarchy) }) %>% - dplyr::bind_rows(.) + dplyr::bind_rows() usethis::use_data(daa_indicator_data, overwrite = TRUE) diff --git a/data-raw/update_all.R b/data-raw/update_all.R index d43ad8b..952b223 100644 --- a/data-raw/update_all.R +++ b/data-raw/update_all.R @@ -4,16 +4,6 @@ library(magrittr) source("data-raw/daa_indicators.R") -datimutils::loginToDATIM("~/.secrets/datim.json") -d2_session <- d2_default_session -source("data-raw/attribute_data.R") -source("data-raw/daa_indicator_data.R") - -datimutils::loginToDATIM("~/.secrets/geoalign.json") -geo_session <- d2_default_session -source("data-raw/daa_countries.R") -source("data-raw/data_availability.R") - s3 <- paws::s3() aws_s3_bucket <- Sys.getenv("AWS_S3_BUCKET") source("data-raw/coc_metadata.R") @@ -23,4 +13,16 @@ source("data-raw/ou_metadata.R") source("data-raw/ou_hierarchy.R") source("data-raw/pvls_emr.R") +datim_secret <- Sys.getenv("DATIM_SECRET") +datimutils::loginToDATIM(datim_secret) +d2_session <- d2_default_session +source("data-raw/attribute_data.R") +source("data-raw/daa_indicator_data.R") + +geo_secret <- Sys.getenv("GEO_SECRET") +datimutils::loginToDATIM(geo_secret) +geo_session <- d2_default_session +source("data-raw/daa_countries.R") +source("data-raw/data_availability.R") + source("data-raw/combined_data.R") diff --git a/man/daa.analytics-package.Rd b/man/daa.analytics-package.Rd new file mode 100644 index 0000000..c40749d --- /dev/null +++ b/man/daa.analytics-package.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/daa.analytics-package.R +\docType{package} +\name{daa.analytics-package} +\alias{daa.analytics} +\alias{daa.analytics-package} +\title{daa.analytics: Utilities for Compiling and Analyzing PEPFAR Data Alignment Activity Data} +\description{ +General functions for pulling, joining, analyzing, and visualizing DAA data. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/cnemarich/daa-analytics} + \item Report bugs at \url{https://github.com/cnemarich/daa-analytics} +} + +} +\author{ +\strong{Maintainer}: Chris Nemarich \email{cnemarich@guidehousefederal.com} (\href{https://orcid.org/YOUR-ORCID-ID}{ORCID}) + +} +\keyword{internal} diff --git a/man/daa_indicator_data.Rd b/man/daa_indicator_data.Rd index 74baef5..b095050 100644 --- a/man/daa_indicator_data.Rd +++ b/man/daa_indicator_data.Rd @@ -18,24 +18,6 @@ the associated reporting period.} \item{reported_by}{A text value indicating whether results were reported by just the MOH, just PEPFAR, or both entities at the given site for the given indicator during the reporting period.} -\item{count_of_matched_sites}{The number of facilities in a country for the -particular indicator and reporting period for which results were reported -by both the MOH and PEPFAR.} -\item{pepfar_sum_at_matched_sites}{The total results reported by PEPFAR -at all facilities in a country for the particular indicator and reporting -period.} -\item{weighting}{The PEPFAR results at the particular facility divided by -the total results reported by PEPFAR at all facilities for the given -indicator and reporting period. This figure provides the weighting value -for concordance and discordance metrics.} -\item{weighted_discordance}{The weighted discordance between the PEPFAR -and MOH reported results at the particular facility. Can be summed across -facilities grouped by country, indicator, and period to calculate the -weighted average discordance.} -\item{weighted_concordance}{The weighted concordance between the PEPFAR -and MOH reported results at the particular facility. Can be summed across -facilities grouped by country, indicator, and period to calculate the -weighted average concordance.} } } \source{ diff --git a/man/ou_hierarchy.Rd b/man/ou_hierarchy.Rd index b85da0f..6c61175 100644 --- a/man/ou_hierarchy.Rd +++ b/man/ou_hierarchy.Rd @@ -11,6 +11,19 @@ A data frame with 7 variables: unit in DATIM.} \item{facilityuid}{The alphanumeric UID used to identify the facility in DATIM.} +\item{namelevel3uid}{The UID of the parent organisation unit at hierarchy +level 3 to the given facility.} +\item{namelevel4uid}{The UID of the parent organisation unit at hierarchy +level 4 to the given facility.} +\item{namelevel5uid}{The UID of the parent organisation unit at hierarchy +level 5 to the given facility.} +\item{namelevel6uid}{For countries with their facility level at hierarchy +level 6, this will represent the UID of the facility. For countries with +their facility level at hierarchy level 7, this will represent the parent +organisation unit at hierarchy level 6 to the given facility.} +\item{namelevel7uid}{For countries with their facility level at hierarchy +level 7, this will represent the UID of the facility. For countries with +their facility level at hierarchy level 6, this value will be 'NA'.} \item{namelevel3}{The name of the parent organisation unit at hierarchy level 3 to the given facility.} \item{namelevel4}{The name of the parent organisation unit at hierarchy diff --git a/man/weighting_levels.Rd b/man/weighting_levels.Rd new file mode 100644 index 0000000..bdc2182 --- /dev/null +++ b/man/weighting_levels.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-daa-data.R +\name{weighting_levels} +\alias{weighting_levels} +\title{Adorn DAA Indicator Data with Weighted Metrics for All Levels} +\usage{ +weighting_levels(daa_indicator_data = NULL, ou_hierarchy = NULL) +} +\arguments{ +\item{daa_indicator_data}{Dataframe containing DAA indicator data.} + +\item{ou_hierarchy}{Dataframe containing the Organisational hierarchy.} +} +\value{ +A dataframe of DAA Indicator data with weightings and weighted +discordance and concordance calculated for levels 3 through 5. +} +\description{ +Adorn DAA Indicator Data with Weighted Metrics for All Levels +}