From c529994b479f2014e64d9dc4d4c21f4032c48ca1 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Tue, 30 Apr 2024 15:55:56 +0200 Subject: [PATCH] Refactor `remote_to_local()` fix #403 (#404) * Refactor the parsing of Google urls ref #403 * Refactor * Finalize * Correct --- R/convert_google_url.R | 11 ---- R/remote_to_local.R | 113 +++++++++++++++++------------------ R/utils.R | 1 - tests/testthat/test_remote.R | 21 ++++--- 4 files changed, 68 insertions(+), 78 deletions(-) delete mode 100644 R/convert_google_url.R diff --git a/R/convert_google_url.R b/R/convert_google_url.R deleted file mode 100644 index bb45ef77..00000000 --- a/R/convert_google_url.R +++ /dev/null @@ -1,11 +0,0 @@ -convert_google_url <- function(url, export_as = "csv") { - ## convert a google sheets url to google csv export URL - ## extract the doc-id and append /export?format = csv to it. (default) - google_key <- regmatches(url, regexpr("[[:alnum:]_-]{30,}", url)) - if (grepl('gid=[[:digit:]]+', url)) { - gidpart <- paste0(regmatches(url, regexpr("gid=[[:digit:]]+", url))) - } else { - gidpart <- "gid=0" - } - return(paste0('https://docs.google.com/spreadsheets/d/', google_key, '/export?', gidpart, '&format=', export_as)) -} diff --git a/R/remote_to_local.R b/R/remote_to_local.R index 3b145a94..de0a76ea 100644 --- a/R/remote_to_local.R +++ b/R/remote_to_local.R @@ -1,69 +1,68 @@ remote_to_local <- function(file, format) { - if (missing(format)) { - # handle google sheets urls - if (grepl("docs\\.google\\.com/spreadsheets", file)) { - file <- convert_google_url(file, export_as = "csv") + if (grepl("docs\\.google\\.com/spreadsheets", file)) { + if (missing(format) || (!missing(format) && !format %in% c("csv", "tsv", "xlsx", "ods"))) { format <- "csv" - } else { - # try to extract format from URL - format <- try(get_info(file)$format, silent = TRUE) - if (inherits(format, "try-error")) { - format <- "TMP" - } } + file <- .convert_google_url(file, export_as = format) + } + if (missing(format)) { + ## try to extract format from URL, see below + format <- .get_ext_temp(file) } else { - # handle google sheets urls - if (grepl("docs\\.google\\.com/spreadsheets", file)) { - format <- .standardize_format(format) - if (format %in% c("csv", "tsv", "xlsx", "ods")) { - file <- convert_google_url(file, export_as = format) - format <- format - } else { - file <- convert_google_url(file, export_as = "csv") - format <- "csv" - } - } else { - format <- .standardize_format(format) - } + format <- .standardize_format(format) } # save file locally temp_file <- tempfile(fileext = paste0(".", format)) u <- curl::curl_fetch_memory(file) writeBin(object = u$content, con = temp_file) - - if (format == "TMP") { - # try to extract format from curl's final URL - format <- try(get_info(u$url)$format, silent = TRUE) - if (inherits(format, "try-error")) { - # try to extract format from headers - h1 <- curl::parse_headers(u$headers) - # check `Content-Disposition` header - if (any(grepl("^Content-Disposition", h1))) { - h <- h1[grep("filename", h1)] - if (length(h)) { - f <- regmatches(h, regexpr("(?<=\")(.*)(?