-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Refactor the parsing of Google urls ref #403 * Refactor * Finalize * Correct
- Loading branch information
1 parent
c054e80
commit c529994
Showing
4 changed files
with
68 additions
and
78 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,68 @@ | ||
remote_to_local <- function(file, format) { | ||
if (missing(format)) { | ||
# handle google sheets urls | ||
if (grepl("docs\\.google\\.com/spreadsheets", file)) { | ||
file <- convert_google_url(file, export_as = "csv") | ||
if (grepl("docs\\.google\\.com/spreadsheets", file)) { | ||
if (missing(format) || (!missing(format) && !format %in% c("csv", "tsv", "xlsx", "ods"))) { | ||
format <- "csv" | ||
} else { | ||
# try to extract format from URL | ||
format <- try(get_info(file)$format, silent = TRUE) | ||
if (inherits(format, "try-error")) { | ||
format <- "TMP" | ||
} | ||
} | ||
file <- .convert_google_url(file, export_as = format) | ||
} | ||
if (missing(format)) { | ||
## try to extract format from URL, see below | ||
format <- .get_ext_temp(file) | ||
} else { | ||
# handle google sheets urls | ||
if (grepl("docs\\.google\\.com/spreadsheets", file)) { | ||
format <- .standardize_format(format) | ||
if (format %in% c("csv", "tsv", "xlsx", "ods")) { | ||
file <- convert_google_url(file, export_as = format) | ||
format <- format | ||
} else { | ||
file <- convert_google_url(file, export_as = "csv") | ||
format <- "csv" | ||
} | ||
} else { | ||
format <- .standardize_format(format) | ||
} | ||
format <- .standardize_format(format) | ||
} | ||
# save file locally | ||
temp_file <- tempfile(fileext = paste0(".", format)) | ||
u <- curl::curl_fetch_memory(file) | ||
writeBin(object = u$content, con = temp_file) | ||
|
||
if (format == "TMP") { | ||
# try to extract format from curl's final URL | ||
format <- try(get_info(u$url)$format, silent = TRUE) | ||
if (inherits(format, "try-error")) { | ||
# try to extract format from headers | ||
h1 <- curl::parse_headers(u$headers) | ||
# check `Content-Disposition` header | ||
if (any(grepl("^Content-Disposition", h1))) { | ||
h <- h1[grep("filename", h1)] | ||
if (length(h)) { | ||
f <- regmatches(h, regexpr("(?<=\")(.*)(?<!\")", h, perl = TRUE)) | ||
if (!length(f)) { | ||
f <- regmatches(h, regexpr("(?<=filename=)(.*)", h, perl = TRUE)) | ||
} | ||
f <- paste0(dirname(temp_file), "/", f) | ||
file.copy(from = temp_file, to = f) | ||
unlink(temp_file) | ||
temp_file <- f | ||
} | ||
} else { | ||
stop("Unrecognized file format. Try specifying with the format argument.") | ||
} | ||
# check `Content-Type` header | ||
# if (any(grepl("^Content-Type", h1))) { | ||
# h <- h1[grep("^Content-Type", h1)] | ||
# ## PARSE MIME TYPE | ||
# } | ||
} else { | ||
f <- sub("TMP$", format, temp_file) | ||
file.copy(from = temp_file, to = f) | ||
unlink(temp_file) | ||
temp_file <- f | ||
if (format != "TMP") { ## the happiest path | ||
return(temp_file) | ||
} | ||
## fomart = "TMP": try to extract format from curl's final URL | ||
format <- .get_ext_temp(u$url) | ||
if (format != "TMP") { ## contain a file extension, also happy | ||
renamed_file <- sub("TMP$", format, temp_file) | ||
file.copy(from = temp_file, to = renamed_file) | ||
unlink(temp_file) | ||
return(renamed_file) | ||
} | ||
## try to extract format from headers: read #403 about whether this code is doing anything | ||
h1 <- curl::parse_headers(u$headers) | ||
## check `Content-Disposition` header | ||
if (!any(grepl("^Content-Disposition", h1))) { | ||
stop("Unrecognized file format. Try specifying with the format argument.") | ||
} | ||
h <- h1[grep("filename", h1)] | ||
if (length(h)) { | ||
f <- regmatches(h, regexpr("(?<=\")(.*)(?<!\")", h, perl = TRUE)) | ||
if (!length(f)) { | ||
f <- regmatches(h, regexpr("(?<=filename=)(.*)", h, perl = TRUE)) | ||
} | ||
f <- paste0(dirname(temp_file), "/", f) | ||
file.copy(from = temp_file, to = f) | ||
unlink(temp_file) | ||
return(f) | ||
} | ||
} | ||
|
||
.convert_google_url <- function(url, export_as = "csv") { | ||
## convert a google sheets url to google csv export URL | ||
## extract the doc-id and append /export?format = csv to it. (default) | ||
google_key <- regmatches(url, regexpr("[[:alnum:]_-]{30,}", url)) | ||
if (grepl("gid=[[:digit:]]+", url)) { | ||
gidpart <- paste0(regmatches(url, regexpr("gid=[[:digit:]]+", url))) | ||
} else { | ||
gidpart <- "gid=0" | ||
} | ||
return(paste0("https://docs.google.com/spreadsheets/d/", google_key, "/export?", gidpart, "&format=", export_as)) | ||
} | ||
|
||
.get_ext_temp <- function(file, temp_format = "TMP") { | ||
## This is a version of get_ext for internal usage | ||
## When file can't be queried, return `temp_format` instead of error | ||
format <- try(get_info(file)$format, silent = TRUE) | ||
if (inherits(format, "try-error")) { | ||
return(temp_format) | ||
} | ||
return(temp_file) | ||
return(format) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters