Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get info #355

Merged
merged 10 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export(export_list)
export(factorize)
export(gather_attrs)
export(get_ext)
export(get_info)
export(import)
export(import_list)
export(install_formats)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Add support for `qs` #275 h/t David Schoch
* Use `arrow` to import / export `feather` #340
* `export_list` can write multiple data frames to a single archive file (e.g. zip, tar) or a directory #346 h/t David Schoch
* `get_info` is added #350
* Bug fixes
- ... is correctly passed for exporting ODS and feather #318
- POTENTIALLY BREAKING: JSON are exported in UTF-8 by default; solved encoding issues on
Expand All @@ -20,6 +21,7 @@
- remove all @importFrom #325 h/t David Schoch
- rearrange "Package Philosophy" as a Vignette #320
- Create a single source of truth about all import and export functions #313
- Clarify all concepts: now there is only `format` #351
* New authors
- David Schoch @schochastics

Expand Down
20 changes: 8 additions & 12 deletions R/compression.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
find_compress <- function(f) {
if (grepl("zip$", f)) {
file <- sub("\\.zip$", "", f)
compress <- "zip"
} else if (grepl("tar\\.gz$", f)) {
file <- sub("\\.tar\\.gz$", "", f)
compress <- "tar"
} else if (grepl("tar$", f)) {
file <- sub("\\.tar$", "", f)
compress <- "tar"
} else {
file <- f
compress <- NA_character_
return(list(file = sub("\\.zip$", "", f), compress = "zip"))
}
if (grepl("tar\\.gz$", f)) {
return(list(file = sub("\\.tar\\.gz$", "", f), compress = "tar"))
}
if (grepl("tar$", f)) {
return(list(file = sub("\\.tar$", "", f), compress = "tar"))
}
return(list(file = file, compress = compress))
return(list(file = f, compress = NA_character_))
}

compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) {
Expand Down
36 changes: 18 additions & 18 deletions R/export.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,41 +82,41 @@ export <- function(x, file, format, ...) {
.check_file(file, single_only = TRUE)
if (missing(file) && missing(format)) {
stop("Must specify 'file' and/or 'format'")
} else if (!missing(file) && !missing(format)) {
fmt <- tolower(format)
}
if (!missing(file) && !missing(format)) {
format <- tolower(format)
cfile <- file
f <- find_compress(file)
file <- f$file
compress <- f$compress
} else if (!missing(file) && missing(format)) {
}
if (!missing(file) && missing(format)) {
cfile <- file
f <- find_compress(file)
file <- f$file
compress <- f$compress
fmt <- get_ext(file)
} else if (!missing(format)) {
fmt <- get_type(format)
file <- paste0(as.character(substitute(x)), ".", fmt)
format <- get_info(file)$input ## this line is slight confusing
}
if (!missing(format) && missing(file)) {
format <- .standardize_format(format)
file <- paste0(as.character(substitute(x)), ".", format)
compress <- NA_character_
}
fmt <- get_type(fmt)
format <- .standardize_format(format)
outfile <- file

data_name <- as.character(substitute(x))
if (!is.data.frame(x) & !is.matrix(x)) {
if (!fmt %in% c("xlsx", "html", "rdata", "rds", "json")) {
stop("'x' is not a data.frame or matrix")
}
} else if (is.matrix(x)) {
if (is.matrix(x)) {
x <- as.data.frame(x)
}
if (!is.data.frame(x) && !format %in% c("xlsx", "html", "rdata", "rds", "json", "qs")) {
stop("'x' is not a data.frame or matrix", call. = FALSE)
}
.create_directory_if_not_exists(file = file) ## fix 347
if (fmt %in% c("gz", "gzip")) {
fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE))
if (format %in% c("gz", "gzip")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
file <- gzfile(file, "w")
on.exit(close(file))
}
class(file) <- c(paste0("rio_", fmt), class(file))
class(file) <- c(paste0("rio_", format), class(file))
.export(file = file, x = x, ...)
if (!is.na(compress)) {
cfile <- compress_out(cfile = cfile, filename = file, type = compress)
Expand Down
58 changes: 21 additions & 37 deletions R/extensions.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,19 @@

## @rdname extensions
.import.default <- function(file, ...) {
x <- gettext("%s format not supported. Consider using the '%s()' function")
xA <- gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again.")
fmt <- tools::file_ext(file)
out <- switch(fmt,
bean = sprintf(xA, fmt, "ledger", "ledger"),
beancount = sprintf(xA, fmt, "ledger", "ledger"),
bib = sprintf(x, fmt, "bib2df::bib2df"),
bmp = sprintf(x, fmt, "bmp::read.bmp"),
doc = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"),
docx = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"),
gexf = sprintf(x, fmt, "rgexf::read.gexf"),
gnumeric = sprintf(x, fmt, "gnumeric::read.gnumeric.sheet"),
hledger = sprintf(xA, fmt, "ledger", "ledger"),
jpeg = sprintf(x, fmt, "jpeg::readJPEG"),
jpg = sprintf(x, fmt, "jpeg::readJPEG"),
ledger = sprintf(xA, fmt, "ledger", "ledger"),
npy = sprintf(x, fmt, "RcppCNPy::npyLoad"),
pdf = sprintf(x, fmt, "tabulizer::extract_tables"),
png = sprintf(x, fmt, "png::readPNG"),
sdmx = sprintf(x, fmt, "sdmx::readSDMX"),
sss = sprintf(x, fmt, "sss::read.sss"),
tiff = sprintf(x, fmt, "tiff::readTIFF"),
gettext("Format not supported")
)
stop(out, call. = FALSE)
fileinfo <- get_info(file)
if (is.na(fileinfo$type) || is.na(fileinfo$import_function) || fileinfo$import_function == "") {
stop("Format not supported", call. = FALSE)
}
if (fileinfo$type == "known") {
stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"),
fileinfo$format, fileinfo$import_function), call. = FALSE)
}
if (fileinfo$type == "enhance") {
pkg <- stringi::stri_extract_first(fileinfo$import_function, regex = "[a-zA-Z0-9\\.]+")
stop(sprintf(gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again."),
fileinfo$format, pkg, pkg), call. = FALSE)
}
}

## @rdname extensions
Expand All @@ -52,16 +40,12 @@

## @rdname extensions
.export.default <- function(file, x, ...) {
x <- gettext("%s format not supported. Consider using the '%s()' function")
fmt <- tools::file_ext(file)
out <- switch(fmt,
gexf = sprintf(x, fmt, "rgexf::write.gexf"),
jpg = sprintf(x, fmt, "jpeg::writeJPEG"),
npy = sprintf(x, fmt, "RcppCNPy::npySave"),
png = sprintf(x, fmt, "png::writePNG"),
tiff = sprintf(x, fmt, "tiff::writeTIFF"),
xpt = sprintf(x, fmt, "SASxport::write.xport"),
gettext("Format not supported")
)
stop(out, call. = FALSE)
fileinfo <- get_info(file)
if (is.na(fileinfo$type) || is.na(fileinfo$export_function) || fileinfo$export_function == "") {
stop("Format not supported", call. = FALSE)
}
if (fileinfo$type == "known") {
stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"),
fileinfo$format, fileinfo$export_function), call. = FALSE)
}
}
15 changes: 6 additions & 9 deletions R/import.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,17 @@ import <- function(file, format, setclass, which, ...) {
file <- parse_tar(file, which = which)
}
if (missing(format)) {
fmt <- get_ext(file)
if (fmt %in% c("gz", "gzip")) {
fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE))
format <- get_info(file)$format
if (format %in% c("gz", "gzip")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
file <- gzfile(file)
} else {
fmt <- get_type(fmt)
}
} else {
fmt <- get_type(format)
## format such as "|"
format <- .standardize_format(format)
}

args_list <- list(...)

class(file) <- c(paste0("rio_", fmt), class(file))
class(file) <- c(paste0("rio_", format), class(file))
if (missing(which)) {
x <- .import(file = file, ...)
} else {
Expand Down
10 changes: 5 additions & 5 deletions R/import_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ function(file,
if (grepl("^http.*://", file)) {
file <- remote_to_local(file)
}
if (get_ext(file) == "rdata") {
if (get_info(file)$format == "rdata") {
e <- new.env()
load(file, envir = e)
return(as.list(e))
}
if (!get_ext(file) %in% c("html", "xlsx", "xls", "zip")) {
if (!get_info(file)$format %in% c("html", "xlsx", "xls", "zip")) {
which <- 1
whichnames <- NULL
}
## getting list of `whichnames`
if (get_ext(file) == "html") {
if (get_info(file)$format == "html") {
.check_pkg_availability("xml2")
tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")
if (missing(which)) {
Expand All @@ -128,7 +128,7 @@ function(file,
FUN.VALUE = character(1))
names(which) <- whichnames
}
if (get_ext(file) %in% c("xls","xlsx")) {
if (get_info(file)$format %in% c("xls","xlsx")) {
##.check_pkg_availability("readxl")
whichnames <- readxl::excel_sheets(path = file)
if (missing(which)) {
Expand All @@ -140,7 +140,7 @@ function(file,
whichnames <- whichnames[which]
}
}
if (get_ext(file) %in% c("zip")) {
if (get_info(file)$format %in% c("zip")) {
if (missing(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
which <- seq_along(whichnames)
Expand Down
30 changes: 15 additions & 15 deletions R/remote_to_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,38 @@ remote_to_local <- function(file, format) {
# handle google sheets urls
if (grepl("docs\\.google\\.com/spreadsheets", file)) {
file <- convert_google_url(file, export_as = "csv")
fmt <- "csv"
format <- "csv"
} else {
# try to extract format from URL
fmt <- try(get_ext(file), silent = TRUE)
if (inherits(fmt, "try-error")) {
fmt <- "TMP"
format <- try(get_info(file)$format, silent = TRUE)
if (inherits(format, "try-error")) {
format <- "TMP"
}
}
} else {
# handle google sheets urls
if (grepl("docs\\.google\\.com/spreadsheets", file)) {
fmt <- get_type(format)
if (fmt %in% c("csv", "tsv", "xlsx", "ods")) {
file <- convert_google_url(file, export_as = fmt)
fmt <- fmt
format <- .standardize_format(format)
if (format %in% c("csv", "tsv", "xlsx", "ods")) {
file <- convert_google_url(file, export_as = format)
format <- format
} else {
file <- convert_google_url(file, export_as = "csv")
fmt <- "csv"
format <- "csv"
}
} else {
fmt <- get_type(format)
format <- .standardize_format(format)
}
}
# save file locally
temp_file <- tempfile(fileext = paste0(".", fmt))
temp_file <- tempfile(fileext = paste0(".", format))
u <- curl::curl_fetch_memory(file)
writeBin(object = u$content, con = temp_file)

if (fmt == "TMP") {
if (format == "TMP") {
# try to extract format from curl's final URL
fmt <- try(get_ext(u$url), silent = TRUE)
if (inherits(fmt, "try-error")) {
format <- try(get_info(u$url)$format, silent = TRUE)
if (inherits(format, "try-error")) {
# try to extract format from headers
h1 <- curl::parse_headers(u$headers)
# check `Content-Disposition` header
Expand All @@ -59,7 +59,7 @@ remote_to_local <- function(file, format) {
# ## PARSE MIME TYPE
# }
} else {
f <- sub("TMP$", fmt, temp_file)
f <- sub("TMP$", format, temp_file)
file.copy(from = temp_file, to = f)
unlink(temp_file)
temp_file <- f
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
Loading