Skip to content

Commit

Permalink
Fix #350 fix #351 (#355)
Browse files Browse the repository at this point in the history
This corrects many things:

- correct single.json to make it work for all functions
- make relevant functions using the single source of truth
- create get_info() and replace all get_ext() and get_type()
- There is now only one concept: format. No more fmt, type, ext.


* Implement get_info

* fix data

* Implement get_info and cleaning up import and export

* Reduce export's cyclomatic complexity

* zap get_ext

* Single source

* Rebuild README

* Update NEWS.md [no ci]

* add tests for #350 [no ci]
  • Loading branch information
chainsawriot authored Sep 8, 2023
1 parent 81c0423 commit 44eb387
Show file tree
Hide file tree
Showing 17 changed files with 254 additions and 293 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export(export_list)
export(factorize)
export(gather_attrs)
export(get_ext)
export(get_info)
export(import)
export(import_list)
export(install_formats)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Add support for `qs` #275 h/t David Schoch
* Use `arrow` to import / export `feather` #340
* `export_list` can write multiple data frames to a single archive file (e.g. zip, tar) or a directory #346 h/t David Schoch
* `get_info` is added #350
* Bug fixes
- ... is correctly passed for exporting ODS and feather #318
- POTENTIALLY BREAKING: JSON are exported in UTF-8 by default; solved encoding issues on
Expand All @@ -20,6 +21,7 @@
- remove all @importFrom #325 h/t David Schoch
- rearrange "Package Philosophy" as a Vignette #320
- Create a single source of truth about all import and export functions #313
- Clarify all concepts: now there is only `format` #351
* New authors
- David Schoch @schochastics

Expand Down
20 changes: 8 additions & 12 deletions R/compression.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
find_compress <- function(f) {
if (grepl("zip$", f)) {
file <- sub("\\.zip$", "", f)
compress <- "zip"
} else if (grepl("tar\\.gz$", f)) {
file <- sub("\\.tar\\.gz$", "", f)
compress <- "tar"
} else if (grepl("tar$", f)) {
file <- sub("\\.tar$", "", f)
compress <- "tar"
} else {
file <- f
compress <- NA_character_
return(list(file = sub("\\.zip$", "", f), compress = "zip"))
}
if (grepl("tar\\.gz$", f)) {
return(list(file = sub("\\.tar\\.gz$", "", f), compress = "tar"))
}
if (grepl("tar$", f)) {
return(list(file = sub("\\.tar$", "", f), compress = "tar"))
}
return(list(file = file, compress = compress))
return(list(file = f, compress = NA_character_))
}

compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) {
Expand Down
36 changes: 18 additions & 18 deletions R/export.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,41 +82,41 @@ export <- function(x, file, format, ...) {
.check_file(file, single_only = TRUE)
if (missing(file) && missing(format)) {
stop("Must specify 'file' and/or 'format'")
} else if (!missing(file) && !missing(format)) {
fmt <- tolower(format)
}
if (!missing(file) && !missing(format)) {
format <- tolower(format)
cfile <- file
f <- find_compress(file)
file <- f$file
compress <- f$compress
} else if (!missing(file) && missing(format)) {
}
if (!missing(file) && missing(format)) {
cfile <- file
f <- find_compress(file)
file <- f$file
compress <- f$compress
fmt <- get_ext(file)
} else if (!missing(format)) {
fmt <- get_type(format)
file <- paste0(as.character(substitute(x)), ".", fmt)
format <- get_info(file)$input ## this line is slight confusing
}
if (!missing(format) && missing(file)) {
format <- .standardize_format(format)
file <- paste0(as.character(substitute(x)), ".", format)
compress <- NA_character_
}
fmt <- get_type(fmt)
format <- .standardize_format(format)
outfile <- file

data_name <- as.character(substitute(x))
if (!is.data.frame(x) & !is.matrix(x)) {
if (!fmt %in% c("xlsx", "html", "rdata", "rds", "json")) {
stop("'x' is not a data.frame or matrix")
}
} else if (is.matrix(x)) {
if (is.matrix(x)) {
x <- as.data.frame(x)
}
if (!is.data.frame(x) && !format %in% c("xlsx", "html", "rdata", "rds", "json", "qs")) {
stop("'x' is not a data.frame or matrix", call. = FALSE)
}
.create_directory_if_not_exists(file = file) ## fix 347
if (fmt %in% c("gz", "gzip")) {
fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE))
if (format %in% c("gz", "gzip")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
file <- gzfile(file, "w")
on.exit(close(file))
}
class(file) <- c(paste0("rio_", fmt), class(file))
class(file) <- c(paste0("rio_", format), class(file))
.export(file = file, x = x, ...)
if (!is.na(compress)) {
cfile <- compress_out(cfile = cfile, filename = file, type = compress)
Expand Down
58 changes: 21 additions & 37 deletions R/extensions.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,19 @@

## @rdname extensions
.import.default <- function(file, ...) {
x <- gettext("%s format not supported. Consider using the '%s()' function")
xA <- gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again.")
fmt <- tools::file_ext(file)
out <- switch(fmt,
bean = sprintf(xA, fmt, "ledger", "ledger"),
beancount = sprintf(xA, fmt, "ledger", "ledger"),
bib = sprintf(x, fmt, "bib2df::bib2df"),
bmp = sprintf(x, fmt, "bmp::read.bmp"),
doc = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"),
docx = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"),
gexf = sprintf(x, fmt, "rgexf::read.gexf"),
gnumeric = sprintf(x, fmt, "gnumeric::read.gnumeric.sheet"),
hledger = sprintf(xA, fmt, "ledger", "ledger"),
jpeg = sprintf(x, fmt, "jpeg::readJPEG"),
jpg = sprintf(x, fmt, "jpeg::readJPEG"),
ledger = sprintf(xA, fmt, "ledger", "ledger"),
npy = sprintf(x, fmt, "RcppCNPy::npyLoad"),
pdf = sprintf(x, fmt, "tabulizer::extract_tables"),
png = sprintf(x, fmt, "png::readPNG"),
sdmx = sprintf(x, fmt, "sdmx::readSDMX"),
sss = sprintf(x, fmt, "sss::read.sss"),
tiff = sprintf(x, fmt, "tiff::readTIFF"),
gettext("Format not supported")
)
stop(out, call. = FALSE)
fileinfo <- get_info(file)
if (is.na(fileinfo$type) || is.na(fileinfo$import_function) || fileinfo$import_function == "") {
stop("Format not supported", call. = FALSE)
}
if (fileinfo$type == "known") {
stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"),
fileinfo$format, fileinfo$import_function), call. = FALSE)
}
if (fileinfo$type == "enhance") {
pkg <- stringi::stri_extract_first(fileinfo$import_function, regex = "[a-zA-Z0-9\\.]+")
stop(sprintf(gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again."),
fileinfo$format, pkg, pkg), call. = FALSE)
}
}

## @rdname extensions
Expand All @@ -52,16 +40,12 @@

## @rdname extensions
.export.default <- function(file, x, ...) {
x <- gettext("%s format not supported. Consider using the '%s()' function")
fmt <- tools::file_ext(file)
out <- switch(fmt,
gexf = sprintf(x, fmt, "rgexf::write.gexf"),
jpg = sprintf(x, fmt, "jpeg::writeJPEG"),
npy = sprintf(x, fmt, "RcppCNPy::npySave"),
png = sprintf(x, fmt, "png::writePNG"),
tiff = sprintf(x, fmt, "tiff::writeTIFF"),
xpt = sprintf(x, fmt, "SASxport::write.xport"),
gettext("Format not supported")
)
stop(out, call. = FALSE)
fileinfo <- get_info(file)
if (is.na(fileinfo$type) || is.na(fileinfo$export_function) || fileinfo$export_function == "") {
stop("Format not supported", call. = FALSE)
}
if (fileinfo$type == "known") {
stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"),
fileinfo$format, fileinfo$export_function), call. = FALSE)
}
}
15 changes: 6 additions & 9 deletions R/import.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,17 @@ import <- function(file, format, setclass, which, ...) {
file <- parse_tar(file, which = which)
}
if (missing(format)) {
fmt <- get_ext(file)
if (fmt %in% c("gz", "gzip")) {
fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE))
format <- get_info(file)$format
if (format %in% c("gz", "gzip")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
file <- gzfile(file)
} else {
fmt <- get_type(fmt)
}
} else {
fmt <- get_type(format)
## format such as "|"
format <- .standardize_format(format)
}

args_list <- list(...)

class(file) <- c(paste0("rio_", fmt), class(file))
class(file) <- c(paste0("rio_", format), class(file))
if (missing(which)) {
x <- .import(file = file, ...)
} else {
Expand Down
10 changes: 5 additions & 5 deletions R/import_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ function(file,
if (grepl("^http.*://", file)) {
file <- remote_to_local(file)
}
if (get_ext(file) == "rdata") {
if (get_info(file)$format == "rdata") {
e <- new.env()
load(file, envir = e)
return(as.list(e))
}
if (!get_ext(file) %in% c("html", "xlsx", "xls", "zip")) {
if (!get_info(file)$format %in% c("html", "xlsx", "xls", "zip")) {
which <- 1
whichnames <- NULL
}
## getting list of `whichnames`
if (get_ext(file) == "html") {
if (get_info(file)$format == "html") {
.check_pkg_availability("xml2")
tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")
if (missing(which)) {
Expand All @@ -128,7 +128,7 @@ function(file,
FUN.VALUE = character(1))
names(which) <- whichnames
}
if (get_ext(file) %in% c("xls","xlsx")) {
if (get_info(file)$format %in% c("xls","xlsx")) {
##.check_pkg_availability("readxl")
whichnames <- readxl::excel_sheets(path = file)
if (missing(which)) {
Expand All @@ -140,7 +140,7 @@ function(file,
whichnames <- whichnames[which]
}
}
if (get_ext(file) %in% c("zip")) {
if (get_info(file)$format %in% c("zip")) {
if (missing(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
which <- seq_along(whichnames)
Expand Down
30 changes: 15 additions & 15 deletions R/remote_to_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,38 @@ remote_to_local <- function(file, format) {
# handle google sheets urls
if (grepl("docs\\.google\\.com/spreadsheets", file)) {
file <- convert_google_url(file, export_as = "csv")
fmt <- "csv"
format <- "csv"
} else {
# try to extract format from URL
fmt <- try(get_ext(file), silent = TRUE)
if (inherits(fmt, "try-error")) {
fmt <- "TMP"
format <- try(get_info(file)$format, silent = TRUE)
if (inherits(format, "try-error")) {
format <- "TMP"
}
}
} else {
# handle google sheets urls
if (grepl("docs\\.google\\.com/spreadsheets", file)) {
fmt <- get_type(format)
if (fmt %in% c("csv", "tsv", "xlsx", "ods")) {
file <- convert_google_url(file, export_as = fmt)
fmt <- fmt
format <- .standardize_format(format)
if (format %in% c("csv", "tsv", "xlsx", "ods")) {
file <- convert_google_url(file, export_as = format)
format <- format
} else {
file <- convert_google_url(file, export_as = "csv")
fmt <- "csv"
format <- "csv"
}
} else {
fmt <- get_type(format)
format <- .standardize_format(format)
}
}
# save file locally
temp_file <- tempfile(fileext = paste0(".", fmt))
temp_file <- tempfile(fileext = paste0(".", format))
u <- curl::curl_fetch_memory(file)
writeBin(object = u$content, con = temp_file)

if (fmt == "TMP") {
if (format == "TMP") {
# try to extract format from curl's final URL
fmt <- try(get_ext(u$url), silent = TRUE)
if (inherits(fmt, "try-error")) {
format <- try(get_info(u$url)$format, silent = TRUE)
if (inherits(format, "try-error")) {
# try to extract format from headers
h1 <- curl::parse_headers(u$headers)
# check `Content-Disposition` header
Expand All @@ -59,7 +59,7 @@ remote_to_local <- function(file, format) {
# ## PARSE MIME TYPE
# }
} else {
f <- sub("TMP$", fmt, temp_file)
f <- sub("TMP$", format, temp_file)
file.copy(from = temp_file, to = f)
unlink(temp_file)
temp_file <- f
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit 44eb387

Please sign in to comment.