From 6a181eb016d83d84c095b98806426f3afa6494b3 Mon Sep 17 00:00:00 2001 From: Christian Onyango Date: Mon, 15 May 2023 14:41:57 +0300 Subject: [PATCH] worked on new logic for import history to incorporate changes on how we get the keys from the dataStore, also isolated the key for 2022 from the rest to have two independent function --- R/get-import-history.R | 69 +++++++++++++++++++++++++++------------ data-raw/global_summary.R | 4 +-- renv.lock | 6 ++-- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/R/get-import-history.R b/R/get-import-history.R index 2e73b6a..d49c04c 100644 --- a/R/get-import-history.R +++ b/R/get-import-history.R @@ -14,43 +14,72 @@ get_import_history <- function(geo_session = dynGet("d2_default_session", inherits = TRUE)) { - end_point <- "dataStore/MOH_country_indicators" + namespace <- "MOH_country_indicators" + # Fetches data from the server - ls <- datimutils::getMetadata(end_point = "dataStore/MOH_country_indicators", + ls <- datimutils::getDataStoreNamespaceKeys(namespace = "MOH_country_indicators", d2_session = geo_session) - args <- ls[!ls %in% c("config", "2021", "CS_2021")] + args <- ls[!ls %in% c("config", "2021", "CS_2021", "2022")] if (is.null(df)) { return(NULL) } + #separate code to handle 2022 data format + df_2022 <- tryCatch({ + x <- 2022 - # Loops through all available years to pull data availability from GeoAlign - df <- args |> - lapply(function(x) { - tryCatch({ - args2 <- list(end_point = paste0(end_point, "/", x), - d2_session = geo_session) - df2 <- purrr::exec(datimutils::getMetadata, !!!args2) + args2 <- list(namespace = paste0(namespace, "/", x), + d2_session = geo_session) + df2 <- purrr::exec(datimutils::getDataStoreNamespaceKeys, !!!args2) + + if (length(df2$DAA) > 0) { + df2 <- as.data.frame(do.call(rbind, lapply(df2$DAA, as.data.frame))) + rownames(df2) <- c(1:nrow(df2)) + colnames(df2) <- colnames(df2) |> lapply(function(i) { + return(gsub('indicatorMapping.', '', i)) + }) + df2 <- df2 |> + dplyr::select(-code) |> + dplyr::rename("CountryCode" = "countryCode", + "CountryName" = "countryName", + "TX_NEW_hasMappingData" = "TX_NEW", + "HTS_TST_hasMappingData" = "HTS_TST", + "TB_PREV_hasMappingData" = "TB_PREV", + "TX_CURR_hasMappingData" = "TX_CURR", + "PMTCT_ART_hasMappingData" = "PMTCT_ART", + "PMTCT_STAT_hasMappingData" = "PMTCT_STAT", + "TX_PVLSDEN_hasMappingData" = "TX_PVLS_DEN", + "TX_PVLSNUM_hasMappingData" = "TX_PVLS_NUM") + df2 <- df2 |> + dplyr::mutate(period = as.character(x)) + } - if(x %in% c(2022)){ - df2<-as.data.frame(do.call(rbind, lapply(df2$DAA, as.data.frame))) - rownames(df2) <- c(1:nrow(df2)) - colnames(df2) <- colnames(df2) |> lapply(function(i){ return (gsub('indicatorMapping.', '', i))}) - df2 <- df2 |> dplyr::select(-code) |> - dplyr::rename("CountryCode" = "countryCode", "CountryName" = "countryName", "TX_NEW_hasMappingData" = "TX_NEW", "HTS_TST_hasMappingData" = "HTS_TST", "TB_PREV_hasMappingData" = "TB_PREV", "TX_CURR_hasMappingData" = "TX_CURR", "PMTCT_ART_hasMappingData" = "PMTCT_ART", "PMTCT_STAT_hasMappingData" = "PMTCT_STAT", "TX_PVLSDEN_hasMappingData" = "TX_PVLS_DEN", "TX_PVLSNUM_hasMappingData" = "TX_PVLS_NUM") - } + df2 + }, error = function(e) { + NA + }) + #working code for rest of years + df_rest_of_years <- args |> + lapply(function(x) { + tryCatch({ + args2 <- list(namespace = paste0(namespace, "/", x), + d2_session = geo_session) + df2 <- purrr::exec(datimutils::getDataStoreNamespaceKeys, !!!args2) df2 <- df2 |> + purrr::map_dfr(as.data.frame) |> dplyr::mutate(period = x) + return(df2) }, error = function(e) { return(NA) }) - }) |> - remove_missing_dfs() |> - dplyr::bind_rows() |> + }) |> remove_missing_dfs() + + #then bind both and proceed + df <- dplyr::bind_rows(df_2022, df_rest_of_years) |> dplyr::mutate(period = stringr::str_sub(period, start = -4, end = -1)) |> tidyr::pivot_longer(-c(period, CountryName, diff --git a/data-raw/global_summary.R b/data-raw/global_summary.R index 5ca699e..91a4fd3 100644 --- a/data-raw/global_summary.R +++ b/data-raw/global_summary.R @@ -18,7 +18,7 @@ summary_data <- daa.analytics::global_summary(combined_data) |> dplyr::mutate(DataOrMapping = ifelse((is.na(MOH_Results_Total) | MOH_Results_Total == "None") & CourseOrFine == "Coarse" & period < 2022, "Mapping Coarse", ifelse((is.na(MOH_Results_Total) | MOH_Results_Total == "None") & CourseOrFine == "Fine" & period < 2022, "Mapping Fine", ifelse((is.na(CourseOrFine) | CourseOrFine == "None" | CourseOrFine == "NA") & period < 2022, "No Mapping", - ifelse(!is.na(MOH_Results_Total) & CourseOrFine == "Fine" & period < 2022, "Data Fine", - ifelse(!is.na(MOH_Results_Total) & CourseOrFine == "Coarse" & period < 2022, "Data Coarse", DataOrMapping)))))) + ifelse(!is.na(MOH_Results_Total) & !is.na(PEPFAR_Results_Total) & CourseOrFine == "Fine" & period < 2022, "Data Fine", + ifelse(!is.na(MOH_Results_Total) & !is.na(PEPFAR_Results_Total) & CourseOrFine == "Coarse" & period < 2022, "Data Coarse", DataOrMapping)))))) write.csv(summary_data, paste0(output_folder, "global_summary.csv")) diff --git a/renv.lock b/renv.lock index 23010aa..0dae73d 100644 --- a/renv.lock +++ b/renv.lock @@ -227,9 +227,9 @@ "RemoteHost": "api.github.com", "RemoteUsername": "pepfar-datim", "RemoteRepo": "datimutils", - "RemoteRef": "master", - "RemoteSha": "3346faec3d1c0a6d180a1b60cab4176c2b27fa95", - "Hash": "b38f9407699a3f75b74a0b3349d8684a", + "RemoteRef": "datastore-updates", + "RemoteSha": "c792266c0dea99013b64455a6f8f3bf0cccaad70", + "Hash": "b5bc320b7332d08c080a1be49a07b99c", "Requirements": [ "R6", "httr",