Skip to content

Commit

Permalink
fix bug where gz functionality would write to user library or current…
Browse files Browse the repository at this point in the history
… directory
  • Loading branch information
DavZim committed Jan 15, 2024
1 parent 7a40e5d commit 4a575a1
Show file tree
Hide file tree
Showing 11 changed files with 72 additions and 53 deletions.
6 changes: 3 additions & 3 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Version: 0.1.24
Date: 2024-01-11 14:52:01 UTC
SHA: 5b6d74f511f4d1706a428832df3753703ce29d2d
Version: 0.1.25
Date: 2024-01-13 11:16:03 UTC
SHA: 7a40e5d69f5ab94bba85133e434b838a4c19536c
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# RITCH 0.1.26

* fix bug where gz functionality would write to user library or current directory

# RITCH 0.1.25

* fix Debian segfault when writing to user library
Expand Down
7 changes: 4 additions & 3 deletions R/count_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
#' @param quiet if TRUE, the status messages are supressed, defaults to FALSE
#' @param force_gunzip only applies if file is a gz-file and a file with the same (gunzipped) name already exists.
#' if set to TRUE, the existing file is overwritten. Default value is FALSE
#' @param gz_dir a directory where the gz archive is extracted to.
#' Only applies if file is a gz archive. Default is [tempdir()].
#' @param force_cleanup only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.
#'
#' @return a data.table containing the message-type and their counts for `count_messages`
#' or an integer value for the other functions.
#' @export
Expand All @@ -37,7 +38,7 @@
#' ### Specific class count functions are:
count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1,
quiet = FALSE, force_gunzip = FALSE,
force_cleanup = TRUE) {
gz_dir = tempdir(), force_cleanup = TRUE) {
t0 <- Sys.time()
if (!file.exists(file))
stop(sprintf("File '%s' not found!", file))
Expand All @@ -48,7 +49,7 @@ count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1,
orig_file <- file
# only needed for gz files; gz files are not deleted when the raw file already existed
raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file)))
file <- check_and_gunzip(file, buffer_size, force_gunzip, quiet)
file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet)
df <- count_messages_impl(file, buffer_size, quiet)

df <- data.table::setalloccol(df)
Expand Down
5 changes: 3 additions & 2 deletions R/filter_itch.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ filter_itch <- function(infile, outfile,
orig_infile <- infile
# only needed for gz files; gz files are not deleted when the raw file already existed
raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", infile)))
infile <- check_and_gunzip(infile, buffer_size, force_gunzip, quiet)
infile <- check_and_gunzip(infile, dirname(outfile), buffer_size, force_gunzip, quiet)

filter_itch_impl(infile, outfile, start, end,
filter_msg_type, filter_stock_locate,
Expand All @@ -169,7 +169,8 @@ filter_itch <- function(infile, outfile,
if (gz) {
if (!quiet) cat(sprintf("[gzip] outfile\n"))
of <- outfile
outfile <- gzip_file(outfile)
outfile <- gzip_file(infile = outfile,
outfile = paste0(outfile, ".gz"))
unlink(of) # delete the temporary file
}

Expand Down
43 changes: 22 additions & 21 deletions R/gz_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ gzip_file <- function(infile,
infile,
paste0(infile, ".gz"))
# remove path
xx <- strsplit(outfile, "/")[[1]]
xx <- strsplit(outfile, "\\\\|/")[[1]]
outfile <- xx[length(xx)]
}
if (file.exists(outfile)) unlink(outfile)
Expand All @@ -77,34 +77,35 @@ gzip_file <- function(infile,
return(invisible(outfile))
}

# Helper function

check_and_gunzip <- function(file, buffer_size, force_gunzip, quiet) {
# Helper function
# returns the (if needed gunzipped) file
# note that it only operates in the dir directory
check_and_gunzip <- function(file, dir = dirname(file), buffer_size, force_gunzip, quiet) {
file <- path.expand(file)
if (!grepl("\\.gz$", file)) return(file)

raw_file <- gsub("\\.gz$", "", file)
outfile <- file.path(dir, basename(gsub("\\.gz$", "", file)))
# check if the raw-file at target directory already exists, if so use this (unless force_gunzip = TRUE)
if (file.exists(raw_file) && !quiet && !force_gunzip) {
cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip=TRUE)\n", raw_file))
return(raw_file)
if (file.exists(outfile) && !quiet && !force_gunzip) {
cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n",
outfile))
return(outfile)
}

# look in current directory and extract to current directory if decompress needed
raw_file <- strsplit(raw_file, "/")[[1]]
raw_file <- raw_file[length(raw_file)]

# check if the raw-file at current directory already exists, if so use this (unless force_gunzip = TRUE)
if (file.exists(raw_file) && !quiet && !force_gunzip) {
cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip=TRUE)\n", raw_file))
return(raw_file)
}
# if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file
if (!file.exists(raw_file) || force_gunzip) {
unlink(raw_file)
if (!quiet) cat(sprintf("[Decompressing] '%s' to '%s'\n", file, raw_file))
if (file.exists(outfile) && !force_gunzip) {
if (!quiet)
cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n",
outfile))
return(outfile)
} else {
# if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file
unlink(outfile)
if (!quiet)
cat(sprintf("[Decompressing] '%s' to '%s'\n", file, outfile))

gunzip_file(file, raw_file, buffer_size)
gunzip_file(file, outfile, buffer_size)
}
return(raw_file)
return(outfile)
}
10 changes: 6 additions & 4 deletions R/read_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
#' @param add_meta if TRUE, the date and exchange information of the file are added, defaults to TRUE
#' @param force_gunzip only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists.
#' if set to TRUE, the existing file is overwritten. Default value is FALSE
#' @param gz_dir a directory where the gz archive is extracted to.
#' Only applies if file is a gz archive. Default is [tempdir()].
#' @param force_cleanup only applies if the input file is a gz-archive.
#' If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.
#' Only applies when the gunzipped raw file did not exist before.
Expand Down Expand Up @@ -144,7 +146,7 @@ read_itch <- function(file, filter_msg_class = NA,
max_timestamp = bit64::as.integer64(NA),
filter_stock = NA_character_, stock_directory = NA,
buffer_size = -1, quiet = FALSE, add_meta = TRUE,
force_gunzip = FALSE, force_cleanup = TRUE) {
force_gunzip = FALSE, gz_dir = tempdir(), force_cleanup = TRUE) {
t0 <- Sys.time()
if (!file.exists(file))
stop(sprintf("File '%s' not found!", file))
Expand Down Expand Up @@ -226,8 +228,8 @@ read_itch <- function(file, filter_msg_class = NA,

orig_file <- file
# only needed for gz files; gz files are not deleted when the raw file already existed
raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file)))
file <- check_and_gunzip(file, buffer_size, force_gunzip, quiet)
raw_file_existed <- file.exists(gsub("\\.gz$", "", file))
file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet)

res_raw <- read_itch_impl(filter_msg_class, file, start, end,
filter_msg_type, filter_stock_locate,
Expand Down Expand Up @@ -276,7 +278,7 @@ read_itch <- function(file, filter_msg_class = NA,
# if the file was gzipped and the force_cleanup=TRUE, delete unzipped file
if (grepl("\\.gz$", orig_file) && force_cleanup && !raw_file_existed) {
if (!quiet) cat(sprintf("[Cleanup] Removing file '%s'\n", file))
unlink(basename(gsub("\\.gz$", "", file)))
unlink(gsub("\\.gz$", "", file))
}
return(res)
}
Expand Down
2 changes: 1 addition & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fix bug around writing to user library in tests.
Fix bug where the gz functionality would write to the current directory or to the user library.
35 changes: 18 additions & 17 deletions inst/tinytest/test_filter_itch.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ outfile <- file.path(tempdir(), "testfile_20101224.TEST_ITCH_50")


################################################################################
# Test that filtering for all trades returns all data entries
# Test that filtering for all trades returns all data entries
orig <- read_itch(infile, quiet = TRUE)
trades <- read_trades(infile, quiet = TRUE)
expect_equal(orig$trades, trades)
Expand Down Expand Up @@ -371,31 +371,32 @@ unlink(outfile)

################################################################################
# filter_itch works on gz input files
infile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
gzinfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50")

outfile_plain <- filter_itch(infile, outfile, filter_msg_class = "orders",
quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
expect_equal(file.size(outfile_plain), 190012)
rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders",
quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
expect_equal(rawoutfile, tmpoutfile)
expect_equal(file.size(rawoutfile), 190012)

odf <- read_orders(outfile_plain, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
idf <- read_orders(infile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
expect_equal(odf, idf)
unlink(outfile_plain)
unlink(rawoutfile)


################################################################################
# works also on gz-output files
tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50")

gzoutfile <- filter_itch(infile, tmpoutfile, filter_msg_class = "orders", gz = TRUE,
quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", gz = TRUE,
quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)

expect_true(file.exists(gzoutfile))
expect_equal(file.size(gzoutfile), 72619)
expect_equal(rawoutfile, paste0(tmpoutfile, ".gz"))
expect_true(file.exists(rawoutfile))
expect_equal(file.size(rawoutfile), 72619)

odf <- read_orders(gzoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
idf <- read_orders(infile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)

expect_equal(odf, idf)
unlink(gzoutfile)
unlink(rawoutfile)
unlink(tmpoutfile)
5 changes: 3 additions & 2 deletions inst/tinytest/test_read_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ expect_equal(ct, ct_exp)

# force_cleanup = FALSE leaves the raw file behind
ct2 <- count_messages(gzfile, quiet = TRUE, force_gunzip = TRUE,
force_cleanup = FALSE)
gz_dir = tempdir(), force_cleanup = FALSE)
expect_equal(ct, ct2)
expect_true(file.exists(file_raw))
expect_true(file.exists(file.path(tempdir(),
gsub("\\.gz$", "", basename(gzfile)))))
unlink(file_raw)

# check that force_cleanup works
Expand Down
4 changes: 4 additions & 0 deletions man/count_functions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_functions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4a575a1

Please sign in to comment.