Skip to content

Commit

Permalink
Stop pretent rio can support bzip and xz; gzip is only partial
Browse files Browse the repository at this point in the history
  • Loading branch information
chainsawriot committed May 2, 2024
1 parent 0046dcd commit b2eb65f
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 98 deletions.
30 changes: 18 additions & 12 deletions R/compression.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@ find_compress <- function(f) {
if (grepl("\\.zip$", f)) {
return(list(file = sub("\\.zip$", "", f), compress = "zip"))
}
if (grepl("\\.tar\\.gz$", f)) {
return(list(file = sub("\\.tar\\.gz$", "", f), compress = "tar"))
}
## if (grepl("\\.tar\\.gz$", f)) {
## return(list(file = sub("\\.tar\\.gz$", "", f), compress = "tar"))
## }
if (grepl("\\.tar$", f)) {
return(list(file = sub("\\.tar$", "", f), compress = "tar"))
}
return(list(file = f, compress = NA_character_))
}

compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) {
## KEEPING OLD CODE FOR LATER REIMPLEMENTATION for gzip and bzip2 #400
##compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) {
compress_out <- function(cfile, filename, type = c("zip", "tar")) {
type <- ext <- match.arg(type)
if (ext %in% c("gzip", "bzip2", "xz")) {
ext <- paste0("tar")
}
## if (ext %in% c("gzip", "bzip2", "xz")) {
## ext <- paste0("tar")
## }
if (missing(cfile)) {
cfile <- paste0(filename, ".", ext)
cfile2 <- paste0(basename(filename), ".", ext)
Expand All @@ -32,11 +34,15 @@ compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2"
setwd(tmp)
if (type == "zip") {
o <- utils::zip(cfile2, files = basename(filename))
} else {
if (type == "tar") {
type <- "none"
}
o <- utils::tar(cfile2, files = basename(filename), compression = type)
}
## } else {
## if (type == "tar") {
## type <- "none"
## }
## o <- utils::tar(cfile2, files = basename(filename), compression = type)
## }
if (type == "tar") {
o <- utils::tar(cfile2, files = basename(filename), compression = "none")
}
setwd(wd)
if (o != 0) {
Expand Down
7 changes: 5 additions & 2 deletions R/export.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,13 @@ export <- function(x, file, format, ...) {
if (!is.data.frame(x) && !format %in% c("xlsx", "html", "rdata", "rds", "json", "qs", "fods", "ods")) {
stop("'x' is not a data.frame or matrix", call. = FALSE)
}
.create_directory_if_not_exists(file = file) ## fix 347
if (format %in% c("gz", "gzip")) {
if (format %in% c("gz")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
if (format != "csv") {
stop("gz is only supported for csv (for now).", call. = FALSE)
}
}
.create_directory_if_not_exists(file = file) ## fix 347
class(file) <- c(paste0("rio_", format), class(file))
.export(file = file, x = x, ...)
if (!is.na(compress)) {
Expand Down
6 changes: 4 additions & 2 deletions R/import.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ import <- function(file, format, setclass = getOption("rio.import.class", "data.
}
if (missing(format)) {
format <- get_info(file)$format
if (format %in% c("gz", "gzip")) {
if (format %in% c("gz")) {
format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format
# file <- gzfile(file)
if (format != "csv") {
stop("gz is only supported for csv (for now).", call. = FALSE)
}
}
} else {
## format such as "|"
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
90 changes: 45 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,51 +130,51 @@ install_formats()

The full list of supported formats is below:

| Name | Extensions / “format” | Import Package | Export Package | Type | Note |
| :---------------------------------- | :-------------------- | :------------- | :------------- | :------ | :---------------------- |
| Archive files (handled by tar) | bzip2 / xz / tar | utils | utils | Default | |
| Gzip files | gz / gzip | base | base | Default | |
| Zip files | zip | utils | utils | Default | |
| CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | |
| Comma-separated data | csv | data.table | data.table | Default | |
| Comma-separated data (European) | csv2 | data.table | data.table | Default | |
| Data Interchange Format | dif | utils | | Default | |
| Epiinfo | epiinfo / rec | foreign | | Default | |
| Excel | excel / xlsx | readxl | writexl | Default | |
| Excel (Legacy) | xls | readxl | | Default | |
| Fixed-width format data | fwf | readr | utils | Default | |
| Fortran data | fortran | utils | | Default | No recognized extension |
| Google Sheets | googlesheets | data.table | | Default | As comma-separated data |
| Minitab | minitab / mtp | foreign | | Default | |
| Pipe-separated data | psv | data.table | data.table | Default | |
| R syntax | r | base | base | Default | |
| SAS | sas / sas7bdat | haven | haven | Default | Export is deprecated |
| SAS XPORT | xport / xpt | haven | haven | Default | |
| SPSS | sav / spss | haven | haven | Default | |
| SPSS (compressed) | zsav | haven | haven | Default | |
| SPSS Portable | por | haven | | Default | |
| Saved R objects | rda / rdata | base | base | Default | |
| Serialized R objects | rds | base | base | Default | |
| Stata | dta / stata | haven | haven | Default | |
| Systat | syd / systat | foreign | | Default | |
| Tab-separated data | / tsv / txt | data.table | data.table | Default | |
| Text Representations of R Objects | dump | base | base | Default | |
| Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | |
| XBASE database files | dbf | foreign | foreign | Default | |
| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | |
| Clipboard | clipboard | clipr | clipr | Suggest | default is tsv |
| EViews | eviews / wf1 | hexView | | Suggest | |
| Fast Storage | fst | fst | fst | Suggest | |
| Feather R/Python interchange format | feather | arrow | arrow | Suggest | |
| Graphpad Prism | pzfx | pzfx | pzfx | Suggest | |
| HTML Tables | htm / html | xml2 | xml2 | Suggest | |
| JSON | json | jsonlite | jsonlite | Suggest | |
| Matlab | mat / matlab | rmatio | rmatio | Suggest | |
| OpenDocument Spreadsheet | ods | readODS | readODS | Suggest | |
| OpenDocument Spreadsheet (Flat) | fods | readODS | readODS | Suggest | |
| Serialized R objects (Quick) | qs | qs | qs | Suggest | |
| Shallow XML documents | xml | xml2 | xml2 | Suggest | |
| YAML | yaml / yml | yaml | yaml | Suggest | |
| Name | Extensions / “format” | Import Package | Export Package | Type | Note |
| :---------------------------------- | :-------------------- | :------------- | :------------- | :------ | :----------------------- |
| Archive files (handled by tar) | tar | utils | utils | Default | |
| Compressed CSV | gz | data.table | data.table | Default | Only csv.gz is supported |
| Zip files | zip | utils | utils | Default | |
| CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | |
| Comma-separated data | csv | data.table | data.table | Default | |
| Comma-separated data (European) | csv2 | data.table | data.table | Default | |
| Data Interchange Format | dif | utils | | Default | |
| Epiinfo | epiinfo / rec | foreign | | Default | |
| Excel | excel / xlsx | readxl | writexl | Default | |
| Excel (Legacy) | xls | readxl | | Default | |
| Fixed-width format data | fwf | readr | utils | Default | |
| Fortran data | fortran | utils | | Default | No recognized extension |
| Google Sheets | googlesheets | data.table | | Default | As comma-separated data |
| Minitab | minitab / mtp | foreign | | Default | |
| Pipe-separated data | psv | data.table | data.table | Default | |
| R syntax | r | base | base | Default | |
| SAS | sas / sas7bdat | haven | haven | Default | Export is deprecated |
| SAS XPORT | xport / xpt | haven | haven | Default | |
| SPSS | sav / spss | haven | haven | Default | |
| SPSS (compressed) | zsav | haven | haven | Default | |
| SPSS Portable | por | haven | | Default | |
| Saved R objects | rda / rdata | base | base | Default | |
| Serialized R objects | rds | base | base | Default | |
| Stata | dta / stata | haven | haven | Default | |
| Systat | syd / systat | foreign | | Default | |
| Tab-separated data | / tsv / txt | data.table | data.table | Default | |
| Text Representations of R Objects | dump | base | base | Default | |
| Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | |
| XBASE database files | dbf | foreign | foreign | Default | |
| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | |
| Clipboard | clipboard | clipr | clipr | Suggest | default is tsv |
| EViews | eviews / wf1 | hexView | | Suggest | |
| Fast Storage | fst | fst | fst | Suggest | |
| Feather R/Python interchange format | feather | arrow | arrow | Suggest | |
| Graphpad Prism | pzfx | pzfx | pzfx | Suggest | |
| HTML Tables | htm / html | xml2 | xml2 | Suggest | |
| JSON | json | jsonlite | jsonlite | Suggest | |
| Matlab | mat / matlab | rmatio | rmatio | Suggest | |
| OpenDocument Spreadsheet | ods | readODS | readODS | Suggest | |
| OpenDocument Spreadsheet (Flat) | fods | readODS | readODS | Suggest | |
| Serialized R objects (Quick) | qs | qs | qs | Suggest | |
| Shallow XML documents | xml | xml2 | xml2 | Suggest | |
| YAML | yaml / yml | yaml | yaml | Suggest | |

Additionally, any format that is not supported by **rio** but that has a
known R implementation will produce an informative error message
Expand Down
45 changes: 9 additions & 36 deletions data-raw/single.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,6 @@
"export_function": "arrow::write_parquet",
"note": ""
},
{
"input": "bzip2",
"format": "tar",
"type": "archive",
"format_name": "Archive files (handled by tar)",
"import_function": "utils::untar",
"export_function": "utils::tar",
"note": ""
},
{
"input": "xz",
"format": "tar",
"type": "archive",
"format_name": "Archive files (handled by tar)",
"import_function": "utils::untar",
"export_function": "utils::tar",
"note": ""
},
{
"input": "gz",
"format": "gzip",
"type": "archive",
"format_name": "Gzip files",
"import_function": "base::gzfile",
"export_function": "base::gzfile",
"note": ""
},
{
"input": "gzip",
"format": "gzip",
"type": "archive",
"format_name": "Gzip files",
"import_function": "base::gzfile",
"export_function": "base::gzfile",
"note": ""
},
{
"input": "tar",
"format": "tar",
Expand Down Expand Up @@ -107,6 +71,15 @@
"export_function": "data.table::fwrite",
"note": ""
},
{
"input": "gz",
"format": "gz",
"type": "archive",
"format_name": "Compressed CSV",
"import_function": "data.table::fread",
"export_function": "data.table::fwrite",
"note": "Only csv.gz is supported"
},
{
"input": ";",
"format": "csv2",
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_compress.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ context("Compressed files")
test_that("Recognize compressed file types", {
expect_true(rio:::find_compress("file.zip")$compress == "zip")
expect_true(rio:::find_compress("file.tar")$compress == "tar")
expect_true(rio:::find_compress("file.tar.gz")$compress == "tar")
## expect_true(rio:::find_compress("file.tar.gz")$compress == "tar")
expect_true(is.na(rio:::find_compress("file.gz")$compress))
expect_true(is.na(rio:::find_compress("file.notcompressed")$compress))
## #395
Expand Down
19 changes: 19 additions & 0 deletions tests/testthat/test_format_csv_gz.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@ test_that("Export to and import from csv.gz", {
withr::with_tempfile("iris_file", fileext = ".csv.gz", code = {
export(iris, iris_file)
expect_true(file.exists(iris_file))
expect_true(R.utils::isGzipped(iris_file, method = "content"))
expect_true(is.data.frame(import(iris_file)))
})
})

test_that("Not support other gz format export for now ref #399", {
withr::with_tempfile("iris_file", fileext = ".sav.gz", code = {
expect_error(export(iris, iris_file), "gz is only supported for csv")
expect_false(file.exists(iris_file))
})
})

test_that("Not support other gz format import for now ref #399", {
withr::with_tempfile("iris_file", fileext = ".sav", code = {
export(iris, tmp)
## compress it
R.utils::gzip(tmp, overwrite = TRUE)
expect_true(file.exists(paste0(tmp, ".gz")))
expect_true(R.utils::isGzipped(paste0(tmp, ".gz"), method = "content"))
expect_error(import(paste0(tmp, ".gz")))
})
})

0 comments on commit b2eb65f

Please sign in to comment.