From 0c30f4b6b5a52fe6a113ba0e0cd3642493c020f2 Mon Sep 17 00:00:00 2001 From: Peter <44036274+pbrohan@users.noreply.github.com> Date: Tue, 18 Jul 2023 00:29:09 +0100 Subject: [PATCH 1/3] Add cpp functions to write --- DESCRIPTION | 4 +- NAMESPACE | 4 +- R/cpp11.R | 9 + R/list_ods_sheets.R | 30 + R/readODS-package.R | 3 + R/readODS.R | 369 ---- R/read_ods.R | 191 ++ R/utils.R | 6 + R/zip.R | 21 + man/get_num_sheets_in_ods.Rd | 11 +- man/list_ods_sheets.Rd | 17 +- man/readODS-package.Rd | 1 + man/read_ods.Rd | 11 +- src/.gitignore | 3 + src/cpp11.cpp | 35 + src/get_sheet_names.cpp | 58 + src/is_ods.cpp | 59 + src/is_ods.h | 6 + src/rapidxml/rapidxml.hpp | 2596 +++++++++++++++++++++++++++ src/rapidxml/rapidxml_iterators.hpp | 174 ++ src/rapidxml/rapidxml_print.hpp | 451 +++++ src/rapidxml/rapidxml_utils.hpp | 122 ++ src/read_ods_.cpp | 276 +++ src/readxl/zip.cpp | 44 + src/readxl/zip.h | 7 + tests/testthat/test_col_types.R | 6 +- tests/testthat/test_issue81.R | 9 +- tests/testthat/test_legacy.R | 68 +- tests/testthat/test_na.R | 4 +- tests/testthat/test_read_ods.R | 50 +- tests/testthat/test_verbose.R | 7 - 31 files changed, 4180 insertions(+), 472 deletions(-) create mode 100644 R/cpp11.R create mode 100644 R/list_ods_sheets.R delete mode 100644 R/readODS.R create mode 100644 R/read_ods.R create mode 100644 R/utils.R create mode 100644 R/zip.R create mode 100644 src/.gitignore create mode 100644 src/cpp11.cpp create mode 100644 src/get_sheet_names.cpp create mode 100644 src/is_ods.cpp create mode 100644 src/is_ods.h create mode 100644 src/rapidxml/rapidxml.hpp create mode 100644 src/rapidxml/rapidxml_iterators.hpp create mode 100644 src/rapidxml/rapidxml_print.hpp create mode 100644 src/rapidxml/rapidxml_utils.hpp create mode 100644 src/read_ods_.cpp create mode 100644 src/readxl/zip.cpp create mode 100644 src/readxl/zip.h diff --git a/DESCRIPTION b/DESCRIPTION index 4dfa796..5924c5b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: readODS Type: Package Title: Read and Write ODS Files Version: 1.8.3 -Authors@R: c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "phonixor@gmail.com"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Thomas J.", "Leeper", role = c("aut"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("John", "Foster", role = c("ctb"), email = "john.x.foster@nab.com.au"), person("Sergio", "Oller", role = c("ctb")), person("Jim", "Hester", role = c("ctb"), email = "jim.hester@rstudio.com", comment = c(ORCID = "0000-0002-2739-7082")), person("Stephen", "Watts", role = c("ctb")), person("Arthur", "Katossky", role = c("ctb")), person("Stas", "Malavin", role = c("ctb")), person("Duncan", "Garmonsway", role = c("ctb")), person("Mehrad", "Mahmoudian", role = c("ctb")), person("Matt", "Kerlogue", role = c("ctb")), person("Detlef", "Steuer", role = c("aut"), email = "steuer@hsu-hh.de", comment = c(ORCID = "0000-0003-2676-5290")), person("Michal", "Lauer", role = c("ctb"), email = "michal.lauer.25@gmail.com"), person("Till", "Straube", role = c("ctb"), email = "straube@geo.uni-frankfurt.de")) +Authors@R: c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "phonixor@gmail.com"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Peter", "Brohan", role = c("aut"), email = "peter.brohan@gmail.com"), person("Thomas J.", "Leeper", role = c("aut"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("John", "Foster", role = c("ctb"), email = "john.x.foster@nab.com.au"), person("Sergio", "Oller", role = c("ctb")), person("Jim", "Hester", role = c("ctb"), email = "jim.hester@rstudio.com", comment = c(ORCID = "0000-0002-2739-7082")), person("Stephen", "Watts", role = c("ctb")), person("Arthur", "Katossky", role = c("ctb")), person("Stas", "Malavin", role = c("ctb")), person("Duncan", "Garmonsway", role = c("ctb")), person("Mehrad", "Mahmoudian", role = c("ctb")), person("Matt", "Kerlogue", role = c("ctb")), person("Detlef", "Steuer", role = c("aut"), email = "steuer@hsu-hh.de", comment = c(ORCID = "0000-0003-2676-5290")), person("Michal", "Lauer", role = c("ctb"), email = "michal.lauer.25@gmail.com"), person("Till", "Straube", role = c("ctb"), email = "straube@geo.uni-frankfurt.de")) Description: Read ODS (OpenDocument Spreadsheet) into R as data frame. Also support writing data frame into ODS file. URL: https://github.com/ropensci/readODS BugReports: https://github.com/ropensci/readODS/issues @@ -14,6 +14,8 @@ Imports: utils, purrr, zip +LinkingTo: + cpp11 Suggests: dplyr, testthat, diff --git a/NAMESPACE b/NAMESPACE index 1621b79..eeeb2d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(getNrOfSheetsInODS) export(get_num_sheets_in_ods) export(list_ods_sheets) -export(ods_sheets) -export(read.ods) export(read_ods) export(write_ods) +useDynLib(readODS, .registration = TRUE) diff --git a/R/cpp11.R b/R/cpp11.R new file mode 100644 index 0000000..ad45dc2 --- /dev/null +++ b/R/cpp11.R @@ -0,0 +1,9 @@ +# Generated by cpp11: do not edit by hand + +ods_get_sheet_names_ <- function(file, include_external_data) { + .Call(`_readODS_ods_get_sheet_names_`, file, include_external_data) +} + +read_ods_ <- function(file, start_row, stop_row, start_col, stop_col, sheet, formula_as_formula) { + .Call(`_readODS_read_ods_`, file, start_row, stop_row, start_col, stop_col, sheet, formula_as_formula) +} diff --git a/R/list_ods_sheets.R b/R/list_ods_sheets.R new file mode 100644 index 0000000..028c0d8 --- /dev/null +++ b/R/list_ods_sheets.R @@ -0,0 +1,30 @@ +#' List all sheets in an ODS File +#' +#' List all sheets in an ods file. +#' +#' @param path Path to the ods file +#' @param include_external_data A boolean value to show or hide sheets containing linked data (default false) +#' @return A character vector of sheet names. +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @seealso +#' use \code{\link{read_ods}} to read the data +#' @export +list_ods_sheets <- function(path, include_external_data = FALSE) { + return(ods_get_sheet_names_(path, include_external_data)) +} + +#' Get the Number of Sheets in an ODS File +#' +#' Get the number of sheets in an ods file +#' +#' @param path path to the ods file +#' @param include_external_data A boolean value declaring if external data sheets should be counted +#' @return Number of sheets +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @seealso +#' use \code{\link{read_ods}} to read the data +#' @export +get_num_sheets_in_ods <- function(path, include_external_data = FALSE) { + sheets <- ods_get_sheet_names_(path, include_external_data) + return(length(sheets)) +} \ No newline at end of file diff --git a/R/readODS-package.R b/R/readODS-package.R index b30bbda..682d81f 100644 --- a/R/readODS-package.R +++ b/R/readODS-package.R @@ -1,6 +1,9 @@ #' @keywords internal "_PACKAGE" +#' @useDynLib readODS, .registration = TRUE +NULL + # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start diff --git a/R/readODS.R b/R/readODS.R deleted file mode 100644 index 5bbd96d..0000000 --- a/R/readODS.R +++ /dev/null @@ -1,369 +0,0 @@ -## ' @keywords internal -## ' @description -## ' converts numbers to microplate row names and Excel & ODS column names -## ' -## ' @param list_of_letter the numbers you want to convert to chars -## ' @details -## ' 1=A -## ' 26=Z -## ' 27=ZA -## ' 702=ZZ -## ' 703=AAA -## ' -## ' supports lists of numbers! -## ' -## ' .convert_numbers_to_letters(1:1000) -## ' -.convert_numbers_to_letters <- function(list_of_numbers = NULL) { - return_value <- NULL - for(i in seq_len(length(list_of_numbers))) { - remainder <- list_of_numbers[[i]] - return_letters <- "" - while(TRUE) { - if(remainder == 0) { - break - } - if(remainder %% 26 != 0) { - return_letters <- paste(LETTERS[remainder %% 26],return_letters, sep = "") - remainder <- remainder %/% 26 - } else { - return_letters <- paste("Z", return_letters,sep = "") - remainder <- (remainder %/% 26) - 1 - } - } - return_value[[i]] <- return_letters - } - return(return_value) -} - -.unzip_ods <- function(file) { - exdir <- tempdir() - zip::unzip(file, files = "content.xml", exdir = exdir) - return(file.path(exdir, "content.xml")) -} - -### return a parsed XML tree from an ODS file -.parse_ods_file <- function(file = NULL) { - if(is.null(file)) { - stop("no filename given", call. = FALSE) - } - if(!file.exists(file)) { - stop("file does not exist", call. = FALSE) - } - ## con <- unz(file,filename="content.xml") - con <- .unzip_ods(file) - parsed_ods <- xml2::read_xml(con, options = c("NOBLANKS", "HUGE")) - return(parsed_ods) -} - -.extract_namespace <- function(parsed_ods) { - ods_ns <- xml2::xml_ns(parsed_ods) - return(ods_ns) -} - -.parse_sheets <- function(parsed_ods, ods_ns) { - parsed_sheets <- xml2::xml_find_all(parsed_ods, ".//office:body/office:spreadsheet/table:table", ods_ns) - return(parsed_sheets) -} - -.check_cell_repeat <- function(cell, ods_ns) { - if (xml2::xml_has_attr(cell, "table:number-columns-repeated", ods_ns)) { - return(as.numeric(xml2::xml_attr(cell, "table:number-columns-repeated", ods_ns))) - } - return(1) -} - -.check_cell_with_textp <- function(cell, ods_ns) { - return(length(xml2::xml_find_all(cell, ".//text:p", ods_ns)) != 0) -} - -.parse_textp <- function(cell, ods_ns) { - textp <- xml2::xml_find_all(cell, "./text:p", ods_ns) - purrr::map_chr(textp, .parse_p, ods_ns = ods_ns) -} - -### this function parses cell but with consideration of -### make it extensible through here -.parse_p <- function(ppart, ods_ns) { - p_content <- xml2::xml_contents(ppart) - output <- "" - for (x in p_content) { - if (xml2::xml_name(x, ods_ns) == "text:s") { - rep_space <- as.numeric(xml2::xml_attr(x, "text:c", ns = ods_ns)) - if (is.na(rep_space)) { - rep_space <- 1 - } - output <- paste0(output, paste0(rep(" ", rep_space), collapse = "")) - } else { - output <- paste0(output, xml2::xml_text(x)) - } - } - return(output) -} - -.parse_single_cell <- function(cell, ods_ns, formula_as_formula = FALSE, use_office_value = TRUE) { - cell_value <- paste0(.parse_textp(cell, ods_ns), collapse = "\n") ## handle multiline values, #23 - if (xml2::xml_has_attr(cell, "office:value-type", ods_ns) && - xml2::xml_attr(cell, "office:value-type", ods_ns) %in% c("float", "currency", "percentage")) { - cell_value <- xml2::xml_attr(cell, "office:value", ods_ns) - } - if (cell_value == "" && use_office_value && xml2::xml_has_attr(cell, "office:value", ods_ns)) { - cell_value <- xml2::xml_attr(cell, "office:value", ods_ns) - } - if (formula_as_formula && xml2::xml_has_attr(cell, "table:formula", ods_ns)) { - cell_value <- xml2::xml_attr(cell, "table:formula", ods_ns) - } - return(cell_value) -} - -.parse_rows <- function(parsed_sheet, ods_ns, formula_as_formula, skip = 0) { - rows <- xml2::xml_find_all(parsed_sheet, ".//table:table-row", ods_ns) - cell_values <- new.env(hash = TRUE) - if (skip > 0 && skip >= length(rows)) { - return(cell_values) - } - if (skip > 0) { - rows <- rows[(skip + 1):length(rows)] - } - current_row <- 0 - for (row in rows) { - if (xml2::xml_has_attr(row, "table:number-rows-repeated", ods_ns)) { - ## number of repeats - row_repeats <- as.numeric(xml2::xml_attr(row, "table:number-rows-repeated", ods_ns)) - } else { - ## if no repeat - row_repeats <- 1 - } - if (!any(purrr::map_lgl(xml2::xml_find_all(row, ".//table:table-cell", ods_ns), .check_cell_with_textp, ods_ns = ods_ns))) { - ## Empty row; skip to prevent the below expensive parsing. - current_row <- current_row + row_repeats - } else { - for (rep_row in seq_len(row_repeats)) { - current_row <- current_row + 1 - current_col <- 0 - for (cell in xml2::xml_find_all(row, ".//table:table-cell", ods_ns)) { - bump_cell <- .check_cell_repeat(cell, ods_ns) - cell_with_textp <- .check_cell_with_textp(cell, ods_ns) - current_col <- current_col + 1 - if (cell_with_textp) { - ## non_empty cell, get the value - cell_value <- .parse_single_cell(cell, ods_ns, formula_as_formula = formula_as_formula) - cell_values[[paste0(current_row, ",", current_col)]] <- cell_value - } - if (bump_cell > 1 && !cell_with_textp) { - current_col <- current_col + bump_cell - 1 - } - if (bump_cell > 1 && cell_with_textp) { - for (bump in seq_len(bump_cell - 1)) { - current_col <- current_col + 1 - cell_values[[paste0(current_row, ",", current_col)]] <- cell_value - } - } - } - } - } - - } - return(cell_values) -} - - -.change_df_with_col_row_header <- function(x, col_header, row_header, range) { - if (!is.null(range)) { - x <- .select_range(x, range) - } - irow <- ifelse(col_header, 2, 1) - jcol <- ifelse(row_header, 2, 1) - - g <- x[irow:nrow(x), jcol:ncol(x), drop=FALSE] # maintain as dataframe for single column - rownames(g) <- if (row_header) x[seq(irow, nrow(x)), 1] else NULL # dont want character row headers given by 1:nrow(g) - colnames(g) <- if (col_header) x[1, seq(jcol, ncol(x))] else .convert_numbers_to_letters(seq_len(ncol(g))) - return(g) -} - -.convert_to_data_frame <- function(cell_values, header = FALSE, na = NULL, row_header = FALSE, range) { - cv_keys <- ls(cell_values) - if (length(cv_keys) == 0) { - warning("empty sheet, return empty data frame.", call. = FALSE) - return(data.frame()) - } - row_id <- purrr::map_dbl(strsplit(cv_keys, ","), ~as.numeric(.[1])) - col_id <- purrr::map_dbl(strsplit(cv_keys, ","), ~as.numeric(.[2])) - res <- data.frame(matrix(data = "", nrow = max(row_id) ,ncol= max(col_id)), stringsAsFactors = FALSE) - if (is.null(na)) { - for(key in cv_keys){ - pos <- as.numeric(strsplit(key, ',')[[1]]) - res[pos[1], pos[2]] <- get(key, envir = cell_values) - } - } else { - for(key in cv_keys){ - pos <- as.numeric(strsplit(key, ',')[[1]]) - value <- get(key, envir = cell_values) - res[pos[1], pos[2]] <- ifelse(value %in% na, NA, value) - } - } - res <- .change_df_with_col_row_header(res, header, row_header, range) - return(res) -} - -.parse_ods_to_sheets <- function(file) { - parsed_ods <- .parse_ods_file(file) - ods_ns <- .extract_namespace(parsed_ods) - sheets <- .parse_sheets(parsed_ods, ods_ns) - return(list(sheets, ods_ns)) -} - -.select_sheet <- function(sheets, ods_ns, which_sheet) { - if (is.numeric(which_sheet) && which_sheet > length(sheets)) { - stop("sheet larger than number of sheets in the ods file.", call. = FALSE) - } - if (is.character(which_sheet)) { - sheet_names <- purrr::map_chr(sheets, function(x) xml2::xml_attr(x, "table:name", ods_ns)) - is_in_sheet_names <- stringi::stri_cmp(which_sheet, sheet_names)==0 - if (any(is_in_sheet_names)) { - which_sheet <- which(is_in_sheet_names) - } else { - stop(paste0("No sheet named ", which_sheet, " in the ods file.")) - } - } - return(sheets[which_sheet]) -} - -.select_range <- function(raw_sheet, range) { - range_select <- cellranger::as.cell_limits(range) - selected_sheet <- raw_sheet[range_select$ul[1]:range_select$lr[1], range_select$ul[2]:range_select$lr[2]] - return(selected_sheet) -} - -.convert_strings_to_factors <- function(df) { - i <- purrr::map_lgl(df, is.character) - df[i] <- lapply(df[i], as.factor) - return (df) -} - -.silent_type_convert <- function(x, verbose = TRUE, na = c("", "NA")) { - if (verbose) { - res <- readr::type_convert(df = x, na = na) - } else { - suppressMessages({ - res <- readr::type_convert(df = x, na = na) - }) - } - return(res) -} - -#' Read Data From ODS File -#' -#' read_ods is a function to read a single sheet from an ods file and return a data frame. -#' read.ods always returns a list of data frames with one data frame per sheet. This is a wrapper to read_ods for backward compatibility with previous version of readODS. Please use read_ods if possible. -#' -#' @aliases read_ods read.ods -#' @param path path to the ods file. -#' @param sheet sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1. -#' @param col_names logical, indicating whether the file contains the names of the variables as its first line. Default is TRUE. -#' @param col_types Either NULL to guess from the spreadsheet or refer to [readr::type_convert()] to specify cols specification. NA will return a data frame with all columns being "characters". -#' @param na Character vector of strings to use for missing values. By default read_ods converts blank cells to missing data. It can also be set to -#' NULL, so that empty cells are treated as NA. -#' @param skip the number of lines of the data file to skip before beginning to read data. If this parameter is larger than the total number of lines in the ods file, an empty data frame is returned. -#' @param formula_as_formula logical, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. . Default is FALSE. -#' @param range selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package. -#' @param file for read.ods only, path to the ods file. -#' @param formulaAsFormula for read.ods only, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. -#' @param row_names logical, indicating whether the file contains the names of the rows as its first column. Default is FALSE. -#' @param strings_as_factors logical, if character columns to be converted to factors. Default is FALSE. -#' @param check_names logical, passed down to base::data.frame(). Default is FALSE. -#' @param verbose logical, if messages should be displayed. Default is FALSE. -#' @return A data frame (\code{data.frame}) containing a representation of data in the ods file. -#' @note Currently, ods files that linked to external data source cannot be read. Merged cells cannot be parsed correctly. -#' @author Chung-hong Chan , Gerrit-Jan Schutten -#' @examples -#' \dontrun{ -#' # Read a file -#' read_ods("starwars.ods") -#' # Read a specific sheet, e.g. the 2nd sheet -#' read_ods("starwars.ods", sheet = 2) -#' # Read a specific range, e.g. A1:C11 -#' read_ods("starwars.ods", sheet = 2, range = "A1:C11") -#' } -#' @export -read_ods <- function(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0, formula_as_formula = FALSE, range = NULL, row_names = FALSE, strings_as_factors = FALSE, check_names = FALSE, verbose = FALSE) { - if (missing(path)) { - stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.") - } - res <- .parse_ods_to_sheets(path) - ods_ns <- res[[2]] - sheets <- res[[1]] - target_sheet <- .select_sheet(sheets, ods_ns = ods_ns, which_sheet = sheet) - cell_values <- .parse_rows(target_sheet, ods_ns, formula_as_formula = formula_as_formula, skip = skip) - parsed_df <- .convert_to_data_frame(cell_values = cell_values, header = col_names, na = na, row_header = row_names, range = range) - # Check names in parsed df - parsed_df <- data.frame(parsed_df, check.names = check_names) - ## emulate readxl to first select range. - ## Kill unknown col_types - if (inherits(col_types, 'col_spec')) { - res <- readr::type_convert(df = parsed_df, col_types = col_types, na = na) - } else if (length(col_types) == 0 && is.null(col_types)) { - res <- .silent_type_convert(x = parsed_df, verbose = verbose, na = na) - } else if (length(col_types) == 1 && is.na(col_types[1])) { - res <- parsed_df - } else { - stop("Unknown col_types. Can either be a class col_spec, NULL or NA.", call. = FALSE) - } - if (strings_as_factors) { - res <- .convert_strings_to_factors(res) - } - return(res) -} - -#' @rdname read_ods -#' @export -read.ods <- function(file = NULL, sheet = NULL, formulaAsFormula = FALSE) { - warning("read.ods will be deprecated in the next version. Use read_ods instead.") - if (!is.null(sheet)) { - return(read_ods(path = file, sheet = sheet, col_names = FALSE, formula_as_formula = formulaAsFormula, skip = 0, na = NULL, col_types = NA)) - } else { - return(lapply(list_ods_sheets(file), function(x) read_ods(path = file, sheet = x, col_names = FALSE, formula_as_formula = formulaAsFormula, skip = 0, na = NULL, col_types = NA))) - } -} - - -#' Get the Number of Sheets in an ODS File -#' -#' Get the number of sheets in an ods file -#' -#' @param path path to the ods file -#' @return Number of sheets -#' @author Chung-hong Chan , Gerrit-Jan Schutten -#' @seealso -#' use \code{\link{read_ods}} to read the data -#' @export -get_num_sheets_in_ods <- function(path) { - sheets <- .parse_ods_to_sheets(path)[[1]] - return(length(sheets)) -} - -#' @rdname get_num_sheets_in_ods -#' @export -getNrOfSheetsInODS <- function(path) { - warning("getNrOfSheetsInODS will be deprecated in the next version. Use get_num_sheets_in_ods instead.") - return(get_num_sheets_in_ods(path)) -} - -#' List All Sheets in an ODS File -#' -#' List all sheets in an ods file. -#' -#' @param path Path to the ods file -#' @return A character vector of sheet names. -#' @export -list_ods_sheets <- function(path) { - res <- .parse_ods_to_sheets(path) - return(purrr::map_chr(res[[1]], function(x) xml2::xml_attr(x, "table:name", res[[2]]))) -} - -#' @rdname list_ods_sheets -#' @export -ods_sheets <- function(path) { - warning("ods_sheets will be deprecated in the next version. Use list_ods_sheets instead.") - list_ods_sheets(path) -} diff --git a/R/read_ods.R b/R/read_ods.R new file mode 100644 index 0000000..9d0423d --- /dev/null +++ b/R/read_ods.R @@ -0,0 +1,191 @@ +.change_df_with_col_row_header <- function(x, col_header, row_header){ + if((nrow(x) < 2 && col_header )|| (ncol(x) < 2 && row_header)){ + warning("Cannot make column/row names if this would cause the dataframe to be empty.") + return(x) + } + irow <- ifelse(col_header, 2, 1) + jcol <- ifelse(row_header, 2, 1) + + g <- x[irow:nrow(x), jcol:ncol(x), drop=FALSE] # maintain as dataframe for single column + rownames(g) <- if(row_header) x[seq(irow, nrow(x)), 1] else NULL # don't want character row headers given by 1:nrow(g) + colnames(g) <- if(col_header) x[1, seq(jcol, ncol(x))] else cellranger::num_to_letter(seq_len(ncol(g))) + return(g) +} + + + + +## Based on readxl, although the implementation is different. +## If max row is -1, read to end of row. +## Row and column-numbers are 1-based +.standardise_limits <- function(range, skip) { + if(is.null(range)){ + skip <- check_nonnegative_integer(skip, "skip") + limits <- c( + min_row = skip + 1, + max_row = -1, + min_col = 1, + max_col = -1 + ) + } else { + if(skip != 0){ + warning("Range and non-zero value for skip given. Defaulting to range.") + } + limits <- cellranger::as.cell_limits(range) + limits <- c( + min_row = limits[["ul"]][1], + max_row = limits[["lr"]][1], + min_col = limits[["ul"]][2], + max_col = limits[["lr"]][2] + ) + } + return(limits) +} + +.silent_type_convert <- function(x, verbose = TRUE, na = c("", "NA")) { + if (verbose) { + res <- readr::type_convert(df = x, na = na) + } else { + suppressMessages({ + res <- readr::type_convert(df = x, na = na) + }) + } + return(res) +} + +.convert_strings_to_factors <- function(df) { + i <- purrr::map_lgl(df, is.character) + df[i] <- lapply(df[i], as.factor) + return (df) +} + +#' Read Data From ODS File +#' +#' read_ods is a function to read a single sheet from an ods file and return a data frame. +#' read.ods always returns a list of data frames with one data frame per sheet. This is a wrapper to read_ods for backward compatibility with previous version of readODS. Please use read_ods if possible. +#' +#' @param path path to the ods file. +#' @param sheet sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1. +#' @param col_names logical, indicating whether the file contains the names of the variables as its first line. Default is TRUE. +#' @param col_types Either NULL to guess from the spreadsheet or refer to [readr::type_convert()] to specify cols specification. NA will return a data frame with all columns being "characters". +#' @param na Character vector of strings to use for missing values. By default read_ods converts blank cells to missing data. It can also be set to +#' NULL, so that empty cells are treated as NA. +#' @param skip the number of lines of the data file to skip before beginning to read data. If this parameter is larger than the total number of lines in the ods file, an empty data frame is returned. +#' @param formula_as_formula logical, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. . Default is FALSE. +#' @param range selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package. +#' @param row_names logical, indicating whether the file contains the names of the rows as its first column. Default is FALSE. +#' @param strings_as_factors logical, if character columns to be converted to factors. Default is FALSE. +#' @param check_names logical, passed down to base::data.frame(). Default is FALSE. +#' @param verbose logical, if messages should be displayed. Default is FALSE. +#' @return A data frame (\code{data.frame}) containing a representation of data in the ods file. +#' @note Currently, ods files that linked to external data source cannot be read. Merged cells cannot be parsed correctly. +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Read a file +#' read_ods("starwars.ods") +#' # Read a specific sheet, e.g. the 2nd sheet +#' read_ods("starwars.ods", sheet = 2) +#' # Read a specific range, e.g. A1:C11 +#' read_ods("starwars.ods", sheet = 2, range = "A1:C11") +#' } +#' @export +read_ods <- function(path, + sheet = 1, + col_names = TRUE, + col_types = NULL, + na = "", + skip = 0, + formula_as_formula = FALSE, + range = NULL, + row_names = FALSE, + strings_as_factors = FALSE, + check_names = FALSE, + verbose = FALSE + +){ + if (missing(path) || !is.character(path)){ + stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.") + } + if (!file.exists(path)){ + stop("file does not exist") + } + if (!is.logical(col_names)){ + stop("col_names must be of type `boolean`") + } + if (!is.logical(formula_as_formula)){ + stop("formula_as_formula must be of type `boolean`") + } + if (!is.logical(row_names)){ + stop("row_names must be of type `boolean`") + } + if (!is.logical(strings_as_factors)){ + stop("strings_as_factors must be of type `boolean`") + } + if (!is.logical(check_names)){ + stop("check_names must be of type `boolean`") + } + if (!is.logical(verbose)){ + stop("verbose must be of type `boolean`") + } + + # Get cell range info + limits <- .standardise_limits(range, skip) + # Get sheet number. + sheets <- ods_get_sheet_names_(path, TRUE) + sheet_name <- cellranger::as.cell_limits(range)[["sheet"]] + if(!is.null(range) && !is.na(sheet_name)){ + if(sheet != 1){ + warning("Sheet suggested in range and using sheet argument. Defaulting to range", call. = FALSE) + } + if(any(is_in_sheet_names)){ + is_in_sheet_names <- stringi::stri_cmp(sheet_name, sheets) == 0 + sheet = which(is_in_sheet_names) + } else { + stop(paste0("No sheet found with name '", sheet_name, "'"), call. = FALSE) + } + } else { + is_in_sheet_names <- stringi::stri_cmp(sheet, sheets) == 0 + if (!is.numeric(sheet) && any(is_in_sheet_names)){ + sheet = which(is_in_sheet_names) + } else if (!is.numeric(sheet)) { + stop(paste0("No sheet found with name '", sheet, "'", ),call. = FALSE) + } + if (sheet > length(sheets)){ + stop(paste0("File contains only ", length(sheets), " sheets. Sheet index out of range.", call. = FALSE)) + } + } + + strings <- read_ods_(path, + limits["min_row"], + limits["max_row"], + limits["min_col"], + limits["max_col"], + sheet, + formula_as_formula) + if(strings[1] == 0 || strings[2] == 0){ + warning("empty sheet, return empty data frame.", call. = FALSE) + return(data.frame()) + } + res <- strings[-1:-2] |> + matrix(ncol = strtoi(strings[1]), byrow = TRUE) |> + as.data.frame(stringsAsFactors = FALSE) + res <- .change_df_with_col_row_header(res, col_names, row_names) + res <- data.frame(res, check.names = check_names) + if (inherits(col_types, 'col_spec')){ + res <- readr::type_convert(df = res, col_types = col_types, na = na) + } else if (length(col_types) == 0 && is.null(col_types)){ + res <- .silent_type_convert(x = res, verbose = verbose, na = na) + } else if (length(col_types) == 1 && is.na(col_types[1])) { + {} #Pass + } else { + stop("Unknown col_types. Can either be a class col_spec, NULL or NA.", call. = FALSE) + } + + if (strings_as_factors) { + res <- .convert_strings_to_factors(res) + } + + return(res) +} + diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..e08068d --- /dev/null +++ b/R/utils.R @@ -0,0 +1,6 @@ +check_nonnegative_integer <- function(x, argument){ + if(length(x) != 1 || !is.numeric(x) || floor(x) != x || is.na(x) || x < 0){ + stop(paste0(argument , " must be a positive integer"), call. = FALSE) + } + return(x) +} \ No newline at end of file diff --git a/R/zip.R b/R/zip.R new file mode 100644 index 0000000..e144adc --- /dev/null +++ b/R/zip.R @@ -0,0 +1,21 @@ +# Taken from readxl + +# Called only from C++ code, but currently needs to be implemented in R. +zip_buffer <- function(zip_path, file_path) { + files <- utils::unzip(zip_path, list = TRUE) + + indx <- match(file_path, files$Name) + if (is.na(indx)) { + stop("Couldn't find '", file_path, "' in '", zip_path, "'", call. = FALSE) + } + + size <- files$Length[indx] + + con <- unz(zip_path, file_path, open = "rb") + on.exit(close(con), add = TRUE) + readBin(con, raw(), n = size) +} + +zip_has_file <- function(zip_path, file_path) { + file_path %in% utils::unzip(zip_path, list = TRUE)$Name +} \ No newline at end of file diff --git a/man/get_num_sheets_in_ods.Rd b/man/get_num_sheets_in_ods.Rd index 0da7e55..b2afed4 100644 --- a/man/get_num_sheets_in_ods.Rd +++ b/man/get_num_sheets_in_ods.Rd @@ -1,16 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readODS.R +% Please edit documentation in R/list_ods_sheets.R \name{get_num_sheets_in_ods} \alias{get_num_sheets_in_ods} -\alias{getNrOfSheetsInODS} \title{Get the Number of Sheets in an ODS File} \usage{ -get_num_sheets_in_ods(path) - -getNrOfSheetsInODS(path) +get_num_sheets_in_ods(path, include_external_data = FALSE) } \arguments{ \item{path}{path to the ods file} + +\item{include_external_data}{A boolean value declaring if external data sheets should be counted} } \value{ Number of sheets @@ -22,5 +21,5 @@ Get the number of sheets in an ods file use \code{\link{read_ods}} to read the data } \author{ -Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} } diff --git a/man/list_ods_sheets.Rd b/man/list_ods_sheets.Rd index db93206..757f93f 100644 --- a/man/list_ods_sheets.Rd +++ b/man/list_ods_sheets.Rd @@ -1,16 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readODS.R +% Please edit documentation in R/list_ods_sheets.R \name{list_ods_sheets} \alias{list_ods_sheets} -\alias{ods_sheets} -\title{List All Sheets in an ODS File} +\title{List all sheets in an ODS File} \usage{ -list_ods_sheets(path) - -ods_sheets(path) +list_ods_sheets(path, include_external_data = FALSE) } \arguments{ \item{path}{Path to the ods file} + +\item{include_external_data}{A boolean value to show or hide sheets containing linked data (default false)} } \value{ A character vector of sheet names. @@ -18,3 +17,9 @@ A character vector of sheet names. \description{ List all sheets in an ods file. } +\seealso{ +use \code{\link{read_ods}} to read the data +} +\author{ +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +} diff --git a/man/readODS-package.Rd b/man/readODS-package.Rd index 2b6e89a..4283c27 100644 --- a/man/readODS-package.Rd +++ b/man/readODS-package.Rd @@ -22,6 +22,7 @@ Useful links: Authors: \itemize{ \item Gerrit-Jan Schutten \email{phonixor@gmail.com} + \item Peter Brohan \email{peter.brohan@gmail.com} \item Thomas J. Leeper \email{thosjleeper@gmail.com} (\href{https://orcid.org/0000-0003-4097-6326}{ORCID}) \item Detlef Steuer \email{steuer@hsu-hh.de} (\href{https://orcid.org/0000-0003-2676-5290}{ORCID}) } diff --git a/man/read_ods.Rd b/man/read_ods.Rd index 957722c..f7b71ce 100644 --- a/man/read_ods.Rd +++ b/man/read_ods.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readODS.R +% Please edit documentation in R/read_ods.R \name{read_ods} \alias{read_ods} -\alias{read.ods} \title{Read Data From ODS File} \usage{ read_ods( @@ -19,8 +18,6 @@ read_ods( check_names = FALSE, verbose = FALSE ) - -read.ods(file = NULL, sheet = NULL, formulaAsFormula = FALSE) } \arguments{ \item{path}{path to the ods file.} @@ -47,10 +44,6 @@ NULL, so that empty cells are treated as NA.} \item{check_names}{logical, passed down to base::data.frame(). Default is FALSE.} \item{verbose}{logical, if messages should be displayed. Default is FALSE.} - -\item{file}{for read.ods only, path to the ods file.} - -\item{formulaAsFormula}{for read.ods only, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8"..} } \value{ A data frame (\code{data.frame}) containing a representation of data in the ods file. @@ -73,5 +66,5 @@ read_ods("starwars.ods", sheet = 2, range = "A1:C11") } } \author{ -Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} } diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/cpp11.cpp b/src/cpp11.cpp new file mode 100644 index 0000000..317902e --- /dev/null +++ b/src/cpp11.cpp @@ -0,0 +1,35 @@ +// Generated by cpp11: do not edit by hand +// clang-format off + + +#include "cpp11/declarations.hpp" +#include + +// get_sheet_names.cpp +cpp11::strings ods_get_sheet_names_(const std::string file, const bool include_external_data); +extern "C" SEXP _readODS_ods_get_sheet_names_(SEXP file, SEXP include_external_data) { + BEGIN_CPP11 + return cpp11::as_sexp(ods_get_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + END_CPP11 +} +// read_ods_.cpp +cpp11::strings read_ods_(const std::string file, int start_row, int stop_row, int start_col, int stop_col, const int sheet, const bool formula_as_formula); +extern "C" SEXP _readODS_read_ods_(SEXP file, SEXP start_row, SEXP stop_row, SEXP start_col, SEXP stop_col, SEXP sheet, SEXP formula_as_formula) { + BEGIN_CPP11 + return cpp11::as_sexp(read_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet), cpp11::as_cpp>(formula_as_formula))); + END_CPP11 +} + +extern "C" { +static const R_CallMethodDef CallEntries[] = { + {"_readODS_ods_get_sheet_names_", (DL_FUNC) &_readODS_ods_get_sheet_names_, 2}, + {"_readODS_read_ods_", (DL_FUNC) &_readODS_read_ods_, 7}, + {NULL, NULL, 0} +}; +} + +extern "C" attribute_visible void R_init_readODS(DllInfo* dll){ + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); + R_forceSymbols(dll, TRUE); +} diff --git a/src/get_sheet_names.cpp b/src/get_sheet_names.cpp new file mode 100644 index 0000000..ce25dbb --- /dev/null +++ b/src/get_sheet_names.cpp @@ -0,0 +1,58 @@ +#include "cpp11.hpp" +#include "cpp11/r_string.hpp" +#include "cpp11/strings.hpp" + +#include "rapidxml/rapidxml.hpp" +#include "readxl/zip.cpp" +#include "is_ods.h" + + +#include + + + +[[cpp11::register]] +cpp11::strings ods_get_sheet_names_(const std::string file, const bool include_external_data){ + if (!is_ods(file)){ + throw std::invalid_argument(file + " is not a correct ODS file"); + } + cpp11::writable::strings sheetNames(1); + + std::string xmlFile = zip_buffer(file, "content.xml"); + + rapidxml::xml_document<> spreadsheet; + spreadsheet.parse<0>(&xmlFile[0]); + rapidxml::xml_node<>* rootNode; + + + int i = 0; + int n = 1; + rootNode = spreadsheet.first_node()->first_node("office:body")-> + first_node("office:spreadsheet"); + + for (rapidxml::xml_node<>* sheetData = rootNode->first_node("table:table"); + sheetData; + sheetData = sheetData->next_sibling("table:table")){ + + + if (!include_external_data && sheetData->first_node("table:table-source")){ + continue; + } + if (i >= n) { + n *= 2; + sheetNames = Rf_lengthgets(sheetNames, n); + } + rapidxml::xml_attribute<>* name = sheetData->first_attribute("table:name"); + sheetNames[i] = (name != NULL) ? Rf_mkCharCE(name->value(), CE_UTF8) : NA_STRING; + i++; + } + + if (i != n) { + sheetNames = Rf_lengthgets(sheetNames, i); + n = i; + } + + return sheetNames; +} + + diff --git a/src/is_ods.cpp b/src/is_ods.cpp new file mode 100644 index 0000000..d3ad9fa --- /dev/null +++ b/src/is_ods.cpp @@ -0,0 +1,59 @@ +#include "is_ods.h" +#include "rapidxml/rapidxml.hpp" + + + +#include + + +bool is_ods(const std::string file, const bool strict){ + /*Checks that file conforms to some of the spec at + https://docs.oasis-open.org/office/OpenDocument/v1.3/. + + It's not all of them, but if it passes all of these and isn't a spreadsheet + something is very wrong. + + We don't care about the file extension*/ + /*Check that it contains the proper files*/ + if (!zip_has_file(file, "content.xml")){ + /*Strictly speaking this isn't required in the spec, but + we're only interested in files with content.*/ + return false; + } + + + /*Mimetype is not in v1.0 so mostly we ignore this. Keeping this here in case it's useful later + as it is a requirement of later versions*/ + if(strict) { + if (!zip_has_file(file, "mimetype")){ + return false; + } + /*Check Section 2.2.4 B)*/ + std::string mimetype = zip_buffer(file, "mimetype"); + mimetype = mimetype.replace(mimetype.end()-1,mimetype.end(),""); // This is some very lazy string trimming + if (!(strcmp( + mimetype.c_str(), + "application/vnd.oasis.opendocument.spreadsheet" // We also don't accept templates + ) == 0)){ + return false; + } + } + rapidxml::xml_document<> workbook; + rapidxml::xml_node<>* rootNode; + std:: string xmlFile = zip_buffer(file, "content.xml"); + workbook.parse<0>(&xmlFile[0]); + rootNode = workbook.first_node(); + /*Check Section 2.2.1 B) 2.1 - is this a well formed OpenDocument*/ + if (strcmp(rootNode->name(),"office:document-content") != 0){ + return false; + } + /*Check Section 3.3 C)*/ + if (!(rootNode->first_node("office:body"))){ + return false; + } + /*Check Section 2.2.4 C) - this is a spreadsheet*/ + if (!(rootNode->first_node("office:body")->first_node("office:spreadsheet"))){ + return false; + } + return true; +} \ No newline at end of file diff --git a/src/is_ods.h b/src/is_ods.h new file mode 100644 index 0000000..08a8fb2 --- /dev/null +++ b/src/is_ods.h @@ -0,0 +1,6 @@ +#pragma once + +#include +#include "readxl/zip.h" + +bool is_ods(const std::string file, const bool strict = false); \ No newline at end of file diff --git a/src/rapidxml/rapidxml.hpp b/src/rapidxml/rapidxml.hpp new file mode 100644 index 0000000..ae91e08 --- /dev/null +++ b/src/rapidxml/rapidxml.hpp @@ -0,0 +1,2596 @@ +#ifndef RAPIDXML_HPP_INCLUDED +#define RAPIDXML_HPP_INCLUDED + +// Copyright (C) 2006, 2009 Marcin Kalicinski +// Version 1.13 +// Revision $DateTime: 2009/05/13 01:46:17 $ +//! \file rapidxml.hpp This file contains rapidxml parser and DOM implementation + +// If standard library is disabled, user must provide implementations of required functions and typedefs +#if !defined(RAPIDXML_NO_STDLIB) + #include // For std::size_t + #include // For assert + #include // For placement new +#endif + +// On MSVC, disable "conditional expression is constant" warning (level 4). +// This warning is almost impossible to avoid with certain types of templated code +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable:4127) // Conditional expression is constant +#endif + +/////////////////////////////////////////////////////////////////////////// +// RAPIDXML_PARSE_ERROR + +#if defined(RAPIDXML_NO_EXCEPTIONS) + +#define RAPIDXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); } + +namespace rapidxml +{ + //! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, + //! this function is called to notify user about the error. + //! It must be defined by the user. + //!

+ //! This function cannot return. If it does, the results are undefined. + //!

+ //! A very simple definition might look like that: + //!
+    //! void %rapidxml::%parse_error_handler(const char *what, void *where)
+    //! {
+    //!     std::cout << "Parse error: " << what << "\n";
+    //!     std::abort();
+    //! }
+    //! 
+ //! \param what Human readable description of the error. + //! \param where Pointer to character data where error was detected. + void parse_error_handler(const char *what, void *where); +} + +#else + +#include // For std::exception + +#define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where) + +namespace rapidxml +{ + + //! Parse error exception. + //! This exception is thrown by the parser when an error occurs. + //! Use what() function to get human-readable error message. + //! Use where() function to get a pointer to position within source text where error was detected. + //!

+ //! If throwing exceptions by the parser is undesirable, + //! it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included. + //! This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception. + //! This function must be defined by the user. + //!

+ //! This class derives from std::exception class. + class parse_error: public std::exception + { + + public: + + //! Constructs parse error + parse_error(const char *what, void *where) + : m_what(what) + , m_where(where) + { + } + + //! Gets human readable description of error. + //! \return Pointer to null terminated description of the error. + virtual const char *what() const throw() + { + return m_what; + } + + //! Gets pointer to character data where error happened. + //! Ch should be the same as char type of xml_document that produced the error. + //! \return Pointer to location within the parsed string where error occured. + template + Ch *where() const + { + return reinterpret_cast(m_where); + } + + private: + + const char *m_what; + void *m_where; + + }; +} + +#endif + +/////////////////////////////////////////////////////////////////////////// +// Pool sizes + +#ifndef RAPIDXML_STATIC_POOL_SIZE + // Size of static memory block of memory_pool. + // Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. + // No dynamic memory allocations are performed by memory_pool until static memory is exhausted. + #define RAPIDXML_STATIC_POOL_SIZE (64 * 1024) +#endif + +#ifndef RAPIDXML_DYNAMIC_POOL_SIZE + // Size of dynamic memory block of memory_pool. + // Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. + // After the static block is exhausted, dynamic blocks with approximately this size are allocated by memory_pool. + #define RAPIDXML_DYNAMIC_POOL_SIZE (64 * 1024) +#endif + +#ifndef RAPIDXML_ALIGNMENT + // Memory allocation alignment. + // Define RAPIDXML_ALIGNMENT before including rapidxml.hpp if you want to override the default value, which is the size of pointer. + // All memory allocations for nodes, attributes and strings will be aligned to this value. + // This must be a power of 2 and at least 1, otherwise memory_pool will not work. + #define RAPIDXML_ALIGNMENT sizeof(void *) +#endif + +namespace rapidxml +{ + // Forward declarations + template class xml_node; + template class xml_attribute; + template class xml_document; + + //! Enumeration listing all node types produced by the parser. + //! Use xml_node::type() function to query node type. + enum node_type + { + node_document, //!< A document node. Name and value are empty. + node_element, //!< An element node. Name contains element name. Value contains text of first data node. + node_data, //!< A data node. Name is empty. Value contains data text. + node_cdata, //!< A CDATA node. Name is empty. Value contains data text. + node_comment, //!< A comment node. Name is empty. Value contains comment text. + node_declaration, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes. + node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. + node_pi //!< A PI node. Name contains target. Value contains instructions. + }; + + /////////////////////////////////////////////////////////////////////// + // Parsing flags + + //! Parse flag instructing the parser to not create data nodes. + //! Text of first data node will still be placed in value of parent element, unless rapidxml::parse_no_element_values flag is also specified. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_no_data_nodes = 0x1; + + //! Parse flag instructing the parser to not use text of first data node as a value of parent element. + //! Can be combined with other flags by use of | operator. + //! Note that child data nodes of element node take precendence over its value when printing. + //! That is, if element has one or more child data nodes and a value, the value will be ignored. + //! Use rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you want to manipulate data using values of elements. + //!

+ //! See xml_document::parse() function. + const int parse_no_element_values = 0x2; + + //! Parse flag instructing the parser to not place zero terminators after strings in the source text. + //! By default zero terminators are placed, modifying source text. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_no_string_terminators = 0x4; + + //! Parse flag instructing the parser to not translate entities in the source text. + //! By default entities are translated, modifying source text. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_no_entity_translation = 0x8; + + //! Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters. + //! By default, UTF-8 handling is enabled. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_no_utf8 = 0x10; + + //! Parse flag instructing the parser to create XML declaration node. + //! By default, declaration node is not created. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_declaration_node = 0x20; + + //! Parse flag instructing the parser to create comments nodes. + //! By default, comment nodes are not created. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_comment_nodes = 0x40; + + //! Parse flag instructing the parser to create DOCTYPE node. + //! By default, doctype node is not created. + //! Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_doctype_node = 0x80; + + //! Parse flag instructing the parser to create PI nodes. + //! By default, PI nodes are not created. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_pi_nodes = 0x100; + + //! Parse flag instructing the parser to validate closing tag names. + //! If not set, name inside closing tag is irrelevant to the parser. + //! By default, closing tags are not validated. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_validate_closing_tags = 0x200; + + //! Parse flag instructing the parser to trim all leading and trailing whitespace of data nodes. + //! By default, whitespace is not trimmed. + //! This flag does not cause the parser to modify source text. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_trim_whitespace = 0x400; + + //! Parse flag instructing the parser to condense all whitespace runs of data nodes to a single space character. + //! Trimming of leading and trailing whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. + //! By default, whitespace is not normalized. + //! If this flag is specified, source text will be modified. + //! Can be combined with other flags by use of | operator. + //!

+ //! See xml_document::parse() function. + const int parse_normalize_whitespace = 0x800; + + // Compound flags + + //! Parse flags which represent default behaviour of the parser. + //! This is always equal to 0, so that all other flags can be simply ored together. + //! Normally there is no need to inconveniently disable flags by anding with their negated (~) values. + //! This also means that meaning of each flag is a negation of the default setting. + //! For example, if flag name is rapidxml::parse_no_utf8, it means that utf-8 is enabled by default, + //! and using the flag will disable it. + //!

+ //! See xml_document::parse() function. + const int parse_default = 0; + + //! A combination of parse flags that forbids any modifications of the source text. + //! This also results in faster parsing. However, note that the following will occur: + //!
    + //!
  • names and values of nodes will not be zero terminated, you have to use xml_base::name_size() and xml_base::value_size() functions to determine where name and value ends
  • + //!
  • entities will not be translated
  • + //!
  • whitespace will not be normalized
  • + //!
+ //! See xml_document::parse() function. + const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation; + + //! A combination of parse flags resulting in fastest possible parsing, without sacrificing important data. + //!

+ //! See xml_document::parse() function. + const int parse_fastest = parse_non_destructive | parse_no_data_nodes; + + //! A combination of parse flags resulting in largest amount of data being extracted. + //! This usually results in slowest parsing. + //!

+ //! See xml_document::parse() function. + const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags; + + /////////////////////////////////////////////////////////////////////// + // Internals + + //! \cond internal + namespace internal + { + + // Struct that contains lookup tables for the parser + // It must be a template to allow correct linking (because it has static data members, which are defined in a header file). + template + struct lookup_tables + { + static const unsigned char lookup_whitespace[256]; // Whitespace table + static const unsigned char lookup_node_name[256]; // Node name table + static const unsigned char lookup_text[256]; // Text table + static const unsigned char lookup_text_pure_no_ws[256]; // Text table + static const unsigned char lookup_text_pure_with_ws[256]; // Text table + static const unsigned char lookup_attribute_name[256]; // Attribute name table + static const unsigned char lookup_attribute_data_1[256]; // Attribute data table with single quote + static const unsigned char lookup_attribute_data_1_pure[256]; // Attribute data table with single quote + static const unsigned char lookup_attribute_data_2[256]; // Attribute data table with double quotes + static const unsigned char lookup_attribute_data_2_pure[256]; // Attribute data table with double quotes + static const unsigned char lookup_digits[256]; // Digits + static const unsigned char lookup_upcase[256]; // To uppercase conversion table for ASCII characters + }; + + // Find length of the string + template + inline std::size_t measure(const Ch *p) + { + const Ch *tmp = p; + while (*tmp) + ++tmp; + return tmp - p; + } + + // Compare strings for equality + template + inline bool compare(const Ch *p1, std::size_t size1, const Ch *p2, std::size_t size2, bool case_sensitive) + { + if (size1 != size2) + return false; + if (case_sensitive) + { + for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) + if (*p1 != *p2) + return false; + } + else + { + for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) + if (lookup_tables<0>::lookup_upcase[static_cast(*p1)] != lookup_tables<0>::lookup_upcase[static_cast(*p2)]) + return false; + } + return true; + } + } + //! \endcond + + /////////////////////////////////////////////////////////////////////// + // Memory pool + + //! This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation. + //! In most cases, you will not need to use this class directly. + //! However, if you need to create nodes manually or modify names/values of nodes, + //! you are encouraged to use memory_pool of relevant xml_document to allocate the memory. + //! Not only is this faster than allocating them by using new operator, + //! but also their lifetime will be tied to the lifetime of document, + //! possibly simplyfing memory management. + //!

+ //! Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool. + //! You can also call allocate_string() function to allocate strings. + //! Such strings can then be used as names or values of nodes without worrying about their lifetime. + //! Note that there is no free() function -- all allocations are freed at once when clear() function is called, + //! or when the pool is destroyed. + //!

+ //! It is also possible to create a standalone memory_pool, and use it + //! to allocate nodes, whose lifetime will not be tied to any document. + //!

+ //! Pool maintains RAPIDXML_STATIC_POOL_SIZE bytes of statically allocated memory. + //! Until static memory is exhausted, no dynamic memory allocations are done. + //! When static memory is exhausted, pool allocates additional blocks of memory of size RAPIDXML_DYNAMIC_POOL_SIZE each, + //! by using global new[] and delete[] operators. + //! This behaviour can be changed by setting custom allocation routines. + //! Use set_allocator() function to set them. + //!

+ //! Allocations for nodes, attributes and strings are aligned at RAPIDXML_ALIGNMENT bytes. + //! This value defaults to the size of pointer on target architecture. + //!

+ //! To obtain absolutely top performance from the parser, + //! it is important that all nodes are allocated from a single, contiguous block of memory. + //! Otherwise, cache misses when jumping between two (or more) disjoint blocks of memory can slow down parsing quite considerably. + //! If required, you can tweak RAPIDXML_STATIC_POOL_SIZE, RAPIDXML_DYNAMIC_POOL_SIZE and RAPIDXML_ALIGNMENT + //! to obtain best wasted memory to performance compromise. + //! To do it, define their values before rapidxml.hpp file is included. + //! \param Ch Character type of created nodes. + template + class memory_pool + { + + public: + + //! \cond internal + typedef void *(alloc_func)(std::size_t); // Type of user-defined function used to allocate memory + typedef void (free_func)(void *); // Type of user-defined function used to free memory + //! \endcond + + //! Constructs empty pool with default allocator functions. + memory_pool() + : m_alloc_func(0) + , m_free_func(0) + { + init(); + } + + //! Destroys pool and frees all the memory. + //! This causes memory occupied by nodes allocated by the pool to be freed. + //! Nodes allocated from the pool are no longer valid. + ~memory_pool() + { + clear(); + } + + //! Allocates a new node from the pool, and optionally assigns name and value to it. + //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. + //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function + //! will call rapidxml::parse_error_handler() function. + //! \param type Type of node to create. + //! \param name Name to assign to the node, or 0 to assign no name. + //! \param value Value to assign to the node, or 0 to assign no value. + //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. + //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. + //! \return Pointer to allocated node. This pointer will never be NULL. + xml_node *allocate_node(node_type type, + const Ch *name = 0, const Ch *value = 0, + std::size_t name_size = 0, std::size_t value_size = 0) + { + void *memory = allocate_aligned(sizeof(xml_node)); + xml_node *node = new(memory) xml_node(type); + if (name) + { + if (name_size > 0) + node->name(name, name_size); + else + node->name(name); + } + if (value) + { + if (value_size > 0) + node->value(value, value_size); + else + node->value(value); + } + return node; + } + + //! Allocates a new attribute from the pool, and optionally assigns name and value to it. + //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. + //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function + //! will call rapidxml::parse_error_handler() function. + //! \param name Name to assign to the attribute, or 0 to assign no name. + //! \param value Value to assign to the attribute, or 0 to assign no value. + //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. + //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. + //! \return Pointer to allocated attribute. This pointer will never be NULL. + xml_attribute *allocate_attribute(const Ch *name = 0, const Ch *value = 0, + std::size_t name_size = 0, std::size_t value_size = 0) + { + void *memory = allocate_aligned(sizeof(xml_attribute)); + xml_attribute *attribute = new(memory) xml_attribute; + if (name) + { + if (name_size > 0) + attribute->name(name, name_size); + else + attribute->name(name); + } + if (value) + { + if (value_size > 0) + attribute->value(value, value_size); + else + attribute->value(value); + } + return attribute; + } + + //! Allocates a char array of given size from the pool, and optionally copies a given string to it. + //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. + //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function + //! will call rapidxml::parse_error_handler() function. + //! \param source String to initialize the allocated memory with, or 0 to not initialize it. + //! \param size Number of characters to allocate, or zero to calculate it automatically from source string length; if size is 0, source string must be specified and null terminated. + //! \return Pointer to allocated char array. This pointer will never be NULL. + Ch *allocate_string(const Ch *source = 0, std::size_t size = 0) + { + assert(source || size); // Either source or size (or both) must be specified + if (size == 0) + size = internal::measure(source) + 1; + Ch *result = static_cast(allocate_aligned(size * sizeof(Ch))); + if (source) + for (std::size_t i = 0; i < size; ++i) + result[i] = source[i]; + return result; + } + + //! Clones an xml_node and its hierarchy of child nodes and attributes. + //! Nodes and attributes are allocated from this memory pool. + //! Names and values are not cloned, they are shared between the clone and the source. + //! Result node can be optionally specified as a second parameter, + //! in which case its contents will be replaced with cloned source node. + //! This is useful when you want to clone entire document. + //! \param source Node to clone. + //! \param result Node to put results in, or 0 to automatically allocate result node + //! \return Pointer to cloned node. This pointer will never be NULL. + xml_node *clone_node(const xml_node *source, xml_node *result = 0) + { + // Prepare result node + if (result) + { + result->remove_all_attributes(); + result->remove_all_nodes(); + result->type(source->type()); + } + else + result = allocate_node(source->type()); + + // Clone name and value + result->name(source->name(), source->name_size()); + result->value(source->value(), source->value_size()); + + // Clone child nodes and attributes + for (xml_node *child = source->first_node(); child; child = child->next_sibling()) + result->append_node(clone_node(child)); + for (xml_attribute *attr = source->first_attribute(); attr; attr = attr->next_attribute()) + result->append_attribute(allocate_attribute(attr->name(), attr->value(), attr->name_size(), attr->value_size())); + + return result; + } + + //! Clears the pool. + //! This causes memory occupied by nodes allocated by the pool to be freed. + //! Any nodes or strings allocated from the pool will no longer be valid. + void clear() + { + while (m_begin != m_static_memory) + { + char *previous_begin = reinterpret_cast
(align(m_begin))->previous_begin; + if (m_free_func) + m_free_func(m_begin); + else + delete[] m_begin; + m_begin = previous_begin; + } + init(); + } + + //! Sets or resets the user-defined memory allocation functions for the pool. + //! This can only be called when no memory is allocated from the pool yet, otherwise results are undefined. + //! Allocation function must not return invalid pointer on failure. It should either throw, + //! stop the program, or use longjmp() function to pass control to other place of program. + //! If it returns invalid pointer, results are undefined. + //!

+ //! User defined allocation functions must have the following forms: + //!
+ //!
void *allocate(std::size_t size); + //!
void free(void *pointer); + //!

+ //! \param af Allocation function, or 0 to restore default function + //! \param ff Free function, or 0 to restore default function + void set_allocator(alloc_func *af, free_func *ff) + { + assert(m_begin == m_static_memory && m_ptr == align(m_begin)); // Verify that no memory is allocated yet + m_alloc_func = af; + m_free_func = ff; + } + + private: + + struct header + { + char *previous_begin; + }; + + void init() + { + m_begin = m_static_memory; + m_ptr = align(m_begin); + m_end = m_static_memory + sizeof(m_static_memory); + } + + char *align(char *ptr) + { + std::size_t alignment = ((RAPIDXML_ALIGNMENT - (std::size_t(ptr) & (RAPIDXML_ALIGNMENT - 1))) & (RAPIDXML_ALIGNMENT - 1)); + return ptr + alignment; + } + + char *allocate_raw(std::size_t size) + { + // Allocate + void *memory; + if (m_alloc_func) // Allocate memory using either user-specified allocation function or global operator new[] + { + memory = m_alloc_func(size); + assert(memory); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp + } + else + { + memory = new char[size]; +#ifdef RAPIDXML_NO_EXCEPTIONS + if (!memory) // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc + RAPIDXML_PARSE_ERROR("out of memory", 0); +#endif + } + return static_cast(memory); + } + + void *allocate_aligned(std::size_t size) + { + // Calculate aligned pointer + char *result = align(m_ptr); + + // If not enough memory left in current pool, allocate a new pool + if (result + size > m_end) + { + // Calculate required pool size (may be bigger than RAPIDXML_DYNAMIC_POOL_SIZE) + std::size_t pool_size = RAPIDXML_DYNAMIC_POOL_SIZE; + if (pool_size < size) + pool_size = size; + + // Allocate + std::size_t alloc_size = sizeof(header) + (2 * RAPIDXML_ALIGNMENT - 2) + pool_size; // 2 alignments required in worst case: one for header, one for actual allocation + char *raw_memory = allocate_raw(alloc_size); + + // Setup new pool in allocated memory + char *pool = align(raw_memory); + header *new_header = reinterpret_cast
(pool); + new_header->previous_begin = m_begin; + m_begin = raw_memory; + m_ptr = pool + sizeof(header); + m_end = raw_memory + alloc_size; + + // Calculate aligned pointer again using new pool + result = align(m_ptr); + } + + // Update pool and return aligned pointer + m_ptr = result + size; + return result; + } + + char *m_begin; // Start of raw memory making up current pool + char *m_ptr; // First free byte in current pool + char *m_end; // One past last available byte in current pool + char m_static_memory[RAPIDXML_STATIC_POOL_SIZE]; // Static raw memory + alloc_func *m_alloc_func; // Allocator function, or 0 if default is to be used + free_func *m_free_func; // Free function, or 0 if default is to be used + }; + + /////////////////////////////////////////////////////////////////////////// + // XML base + + //! Base class for xml_node and xml_attribute implementing common functions: + //! name(), name_size(), value(), value_size() and parent(). + //! \param Ch Character type to use + template + class xml_base + { + + public: + + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + // Construct a base with empty name, value and parent + xml_base() + : m_name(0) + , m_value(0) + , m_parent(0) + { + } + + /////////////////////////////////////////////////////////////////////////// + // Node data access + + //! Gets name of the node. + //! Interpretation of name depends on type of node. + //! Note that name will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. + //!

+ //! Use name_size() function to determine length of the name. + //! \return Name of node, or empty string if node has no name. + Ch *name() const + { + return m_name ? m_name : nullstr(); + } + + //! Gets size of node name, not including terminator character. + //! This function works correctly irrespective of whether name is or is not zero terminated. + //! \return Size of node name, in characters. + std::size_t name_size() const + { + return m_name ? m_name_size : 0; + } + + //! Gets value of node. + //! Interpretation of value depends on type of node. + //! Note that value will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. + //!

+ //! Use value_size() function to determine length of the value. + //! \return Value of node, or empty string if node has no value. + Ch *value() const + { + return m_value ? m_value : nullstr(); + } + + //! Gets size of node value, not including terminator character. + //! This function works correctly irrespective of whether value is or is not zero terminated. + //! \return Size of node value, in characters. + std::size_t value_size() const + { + return m_value ? m_value_size : 0; + } + + /////////////////////////////////////////////////////////////////////////// + // Node modification + + //! Sets name of node to a non zero-terminated string. + //! See \ref ownership_of_strings. + //!

+ //! Note that node does not own its name or value, it only stores a pointer to it. + //! It will not delete or otherwise free the pointer on destruction. + //! It is reponsibility of the user to properly manage lifetime of the string. + //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - + //! on destruction of the document the string will be automatically freed. + //!

+ //! Size of name must be specified separately, because name does not have to be zero terminated. + //! Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated). + //! \param name Name of node to set. Does not have to be zero terminated. + //! \param size Size of name, in characters. This does not include zero terminator, if one is present. + void name(const Ch *name, std::size_t size) + { + m_name = const_cast(name); + m_name_size = size; + } + + //! Sets name of node to a zero-terminated string. + //! See also \ref ownership_of_strings and xml_node::name(const Ch *, std::size_t). + //! \param name Name of node to set. Must be zero terminated. + void name(const Ch *name) + { + this->name(name, internal::measure(name)); + } + + //! Sets value of node to a non zero-terminated string. + //! See \ref ownership_of_strings. + //!

+ //! Note that node does not own its name or value, it only stores a pointer to it. + //! It will not delete or otherwise free the pointer on destruction. + //! It is reponsibility of the user to properly manage lifetime of the string. + //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - + //! on destruction of the document the string will be automatically freed. + //!

+ //! Size of value must be specified separately, because it does not have to be zero terminated. + //! Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated). + //!

+ //! If an element has a child node of type node_data, it will take precedence over element value when printing. + //! If you want to manipulate data of elements using values, use parser flag rapidxml::parse_no_data_nodes to prevent creation of data nodes by the parser. + //! \param value value of node to set. Does not have to be zero terminated. + //! \param size Size of value, in characters. This does not include zero terminator, if one is present. + void value(const Ch *value, std::size_t size) + { + m_value = const_cast(value); + m_value_size = size; + } + + //! Sets value of node to a zero-terminated string. + //! See also \ref ownership_of_strings and xml_node::value(const Ch *, std::size_t). + //! \param value Vame of node to set. Must be zero terminated. + void value(const Ch *value) + { + this->value(value, internal::measure(value)); + } + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets node parent. + //! \return Pointer to parent node, or 0 if there is no parent. + xml_node *parent() const + { + return m_parent; + } + + protected: + + // Return empty string + static Ch *nullstr() + { + static Ch zero = Ch('\0'); + return &zero; + } + + Ch *m_name; // Name of node, or 0 if no name + Ch *m_value; // Value of node, or 0 if no value + std::size_t m_name_size; // Length of node name, or undefined of no name + std::size_t m_value_size; // Length of node value, or undefined if no value + xml_node *m_parent; // Pointer to parent node, or 0 if none + + }; + + //! Class representing attribute node of XML document. + //! Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base). + //! Note that after parse, both name and value of attribute will point to interior of source text used for parsing. + //! Thus, this text must persist in memory for the lifetime of attribute. + //! \param Ch Character type to use. + template + class xml_attribute: public xml_base + { + + friend class xml_node; + + public: + + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + //! Constructs an empty attribute with the specified type. + //! Consider using memory_pool of appropriate xml_document if allocating attributes manually. + xml_attribute() + { + } + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets document of which attribute is a child. + //! \return Pointer to document that contains this attribute, or 0 if there is no parent document. + xml_document *document() const + { + if (xml_node *node = this->parent()) + { + while (node->parent()) + node = node->parent(); + return node->type() == node_document ? static_cast *>(node) : 0; + } + else + return 0; + } + + //! Gets previous attribute, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return previous attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *previous_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_prev_attribute; attribute; attribute = attribute->m_prev_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) + return attribute; + return 0; + } + else + return this->m_parent ? m_prev_attribute : 0; + } + + //! Gets next attribute, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return next attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *next_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_next_attribute; attribute; attribute = attribute->m_next_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) + return attribute; + return 0; + } + else + return this->m_parent ? m_next_attribute : 0; + } + + private: + + xml_attribute *m_prev_attribute; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero + xml_attribute *m_next_attribute; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero + + }; + + /////////////////////////////////////////////////////////////////////////// + // XML node + + //! Class representing a node of XML document. + //! Each node may have associated name and value strings, which are available through name() and value() functions. + //! Interpretation of name and value depends on type of the node. + //! Type of node can be determined by using type() function. + //!

+ //! Note that after parse, both name and value of node, if any, will point interior of source text used for parsing. + //! Thus, this text must persist in the memory for the lifetime of node. + //! \param Ch Character type to use. + template + class xml_node: public xml_base + { + + public: + + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + //! Constructs an empty node with the specified type. + //! Consider using memory_pool of appropriate document to allocate nodes manually. + //! \param type Type of node to construct. + xml_node(node_type type) + : m_type(type) + , m_first_node(0) + , m_first_attribute(0) + { + } + + /////////////////////////////////////////////////////////////////////////// + // Node data access + + //! Gets type of node. + //! \return Type of node. + node_type type() const + { + return m_type; + } + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets document of which node is a child. + //! \return Pointer to document that contains this node, or 0 if there is no parent document. + xml_document *document() const + { + xml_node *node = const_cast *>(this); + while (node->parent()) + node = node->parent(); + return node->type() == node_document ? static_cast *>(node) : 0; + } + + //! Gets first child node, optionally matching node name. + //! \param name Name of child to find, or 0 to return first child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found child, or 0 if not found. + xml_node *first_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *child = m_first_node; child; child = child->next_sibling()) + if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) + return child; + return 0; + } + else + return m_first_node; + } + + //! Gets last child node, optionally matching node name. + //! Behaviour is undefined if node has no children. + //! Use first_node() to test if node has children. + //! \param name Name of child to find, or 0 to return last child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found child, or 0 if not found. + xml_node *last_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + assert(m_first_node); // Cannot query for last child if node has no children + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *child = m_last_node; child; child = child->previous_sibling()) + if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) + return child; + return 0; + } + else + return m_last_node; + } + + //! Gets previous sibling node, optionally matching node name. + //! Behaviour is undefined if node has no parent. + //! Use parent() to test if node has a parent. + //! \param name Name of sibling to find, or 0 to return previous sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found sibling, or 0 if not found. + xml_node *previous_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + assert(this->m_parent); // Cannot query for siblings if node has no parent + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *sibling = m_prev_sibling; sibling; sibling = sibling->m_prev_sibling) + if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) + return sibling; + return 0; + } + else + return m_prev_sibling; + } + + //! Gets next sibling node, optionally matching node name. + //! Behaviour is undefined if node has no parent. + //! Use parent() to test if node has a parent. + //! \param name Name of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found sibling, or 0 if not found. + xml_node *next_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + assert(this->m_parent); // Cannot query for siblings if node has no parent + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *sibling = m_next_sibling; sibling; sibling = sibling->m_next_sibling) + if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) + return sibling; + return 0; + } + else + return m_next_sibling; + } + + //! Gets first attribute of node, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return first attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *first_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) + return attribute; + return 0; + } + else + return m_first_attribute; + } + + //! Gets last attribute of node, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return last attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *last_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const + { + if (name) + { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_last_attribute; attribute; attribute = attribute->m_prev_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) + return attribute; + return 0; + } + else + return m_first_attribute ? m_last_attribute : 0; + } + + /////////////////////////////////////////////////////////////////////////// + // Node modification + + //! Sets type of node. + //! \param type Type of node to set. + void type(node_type type) + { + m_type = type; + } + + /////////////////////////////////////////////////////////////////////////// + // Node manipulation + + //! Prepends a new child node. + //! The prepended child becomes the first child, and all existing children are moved one position back. + //! \param child Node to prepend. + void prepend_node(xml_node *child) + { + assert(child && !child->parent() && child->type() != node_document); + if (first_node()) + { + child->m_next_sibling = m_first_node; + m_first_node->m_prev_sibling = child; + } + else + { + child->m_next_sibling = 0; + m_last_node = child; + } + m_first_node = child; + child->m_parent = this; + child->m_prev_sibling = 0; + } + + //! Appends a new child node. + //! The appended child becomes the last child. + //! \param child Node to append. + void append_node(xml_node *child) + { + assert(child && !child->parent() && child->type() != node_document); + if (first_node()) + { + child->m_prev_sibling = m_last_node; + m_last_node->m_next_sibling = child; + } + else + { + child->m_prev_sibling = 0; + m_first_node = child; + } + m_last_node = child; + child->m_parent = this; + child->m_next_sibling = 0; + } + + //! Inserts a new child node at specified place inside the node. + //! All children after and including the specified node are moved one position back. + //! \param where Place where to insert the child, or 0 to insert at the back. + //! \param child Node to insert. + void insert_node(xml_node *where, xml_node *child) + { + assert(!where || where->parent() == this); + assert(child && !child->parent() && child->type() != node_document); + if (where == m_first_node) + prepend_node(child); + else if (where == 0) + append_node(child); + else + { + child->m_prev_sibling = where->m_prev_sibling; + child->m_next_sibling = where; + where->m_prev_sibling->m_next_sibling = child; + where->m_prev_sibling = child; + child->m_parent = this; + } + } + + //! Removes first child node. + //! If node has no children, behaviour is undefined. + //! Use first_node() to test if node has children. + void remove_first_node() + { + assert(first_node()); + xml_node *child = m_first_node; + m_first_node = child->m_next_sibling; + if (child->m_next_sibling) + child->m_next_sibling->m_prev_sibling = 0; + else + m_last_node = 0; + child->m_parent = 0; + } + + //! Removes last child of the node. + //! If node has no children, behaviour is undefined. + //! Use first_node() to test if node has children. + void remove_last_node() + { + assert(first_node()); + xml_node *child = m_last_node; + if (child->m_prev_sibling) + { + m_last_node = child->m_prev_sibling; + child->m_prev_sibling->m_next_sibling = 0; + } + else + m_first_node = 0; + child->m_parent = 0; + } + + //! Removes specified child from the node + // \param where Pointer to child to be removed. + void remove_node(xml_node *where) + { + assert(where && where->parent() == this); + assert(first_node()); + if (where == m_first_node) + remove_first_node(); + else if (where == m_last_node) + remove_last_node(); + else + { + where->m_prev_sibling->m_next_sibling = where->m_next_sibling; + where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; + where->m_parent = 0; + } + } + + //! Removes all child nodes (but not attributes). + void remove_all_nodes() + { + for (xml_node *node = first_node(); node; node = node->m_next_sibling) + node->m_parent = 0; + m_first_node = 0; + } + + //! Prepends a new attribute to the node. + //! \param attribute Attribute to prepend. + void prepend_attribute(xml_attribute *attribute) + { + assert(attribute && !attribute->parent()); + if (first_attribute()) + { + attribute->m_next_attribute = m_first_attribute; + m_first_attribute->m_prev_attribute = attribute; + } + else + { + attribute->m_next_attribute = 0; + m_last_attribute = attribute; + } + m_first_attribute = attribute; + attribute->m_parent = this; + attribute->m_prev_attribute = 0; + } + + //! Appends a new attribute to the node. + //! \param attribute Attribute to append. + void append_attribute(xml_attribute *attribute) + { + assert(attribute && !attribute->parent()); + if (first_attribute()) + { + attribute->m_prev_attribute = m_last_attribute; + m_last_attribute->m_next_attribute = attribute; + } + else + { + attribute->m_prev_attribute = 0; + m_first_attribute = attribute; + } + m_last_attribute = attribute; + attribute->m_parent = this; + attribute->m_next_attribute = 0; + } + + //! Inserts a new attribute at specified place inside the node. + //! All attributes after and including the specified attribute are moved one position back. + //! \param where Place where to insert the attribute, or 0 to insert at the back. + //! \param attribute Attribute to insert. + void insert_attribute(xml_attribute *where, xml_attribute *attribute) + { + assert(!where || where->parent() == this); + assert(attribute && !attribute->parent()); + if (where == m_first_attribute) + prepend_attribute(attribute); + else if (where == 0) + append_attribute(attribute); + else + { + attribute->m_prev_attribute = where->m_prev_attribute; + attribute->m_next_attribute = where; + where->m_prev_attribute->m_next_attribute = attribute; + where->m_prev_attribute = attribute; + attribute->m_parent = this; + } + } + + //! Removes first attribute of the node. + //! If node has no attributes, behaviour is undefined. + //! Use first_attribute() to test if node has attributes. + void remove_first_attribute() + { + assert(first_attribute()); + xml_attribute *attribute = m_first_attribute; + if (attribute->m_next_attribute) + { + attribute->m_next_attribute->m_prev_attribute = 0; + } + else + m_last_attribute = 0; + attribute->m_parent = 0; + m_first_attribute = attribute->m_next_attribute; + } + + //! Removes last attribute of the node. + //! If node has no attributes, behaviour is undefined. + //! Use first_attribute() to test if node has attributes. + void remove_last_attribute() + { + assert(first_attribute()); + xml_attribute *attribute = m_last_attribute; + if (attribute->m_prev_attribute) + { + attribute->m_prev_attribute->m_next_attribute = 0; + m_last_attribute = attribute->m_prev_attribute; + } + else + m_first_attribute = 0; + attribute->m_parent = 0; + } + + //! Removes specified attribute from node. + //! \param where Pointer to attribute to be removed. + void remove_attribute(xml_attribute *where) + { + assert(first_attribute() && where->parent() == this); + if (where == m_first_attribute) + remove_first_attribute(); + else if (where == m_last_attribute) + remove_last_attribute(); + else + { + where->m_prev_attribute->m_next_attribute = where->m_next_attribute; + where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; + where->m_parent = 0; + } + } + + //! Removes all attributes of node. + void remove_all_attributes() + { + for (xml_attribute *attribute = first_attribute(); attribute; attribute = attribute->m_next_attribute) + attribute->m_parent = 0; + m_first_attribute = 0; + } + + private: + + /////////////////////////////////////////////////////////////////////////// + // Restrictions + + // No copying + xml_node(const xml_node &); + void operator =(const xml_node &); + + /////////////////////////////////////////////////////////////////////////// + // Data members + + // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0. + // This is required for maximum performance, as it allows the parser to omit initialization of + // unneded/redundant values. + // + // The rules are as follows: + // 1. first_node and first_attribute contain valid pointers, or 0 if node has no children/attributes respectively + // 2. last_node and last_attribute are valid only if node has at least one child/attribute respectively, otherwise they contain garbage + // 3. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage + + node_type m_type; // Type of node; always valid + xml_node *m_first_node; // Pointer to first child node, or 0 if none; always valid + xml_node *m_last_node; // Pointer to last child node, or 0 if none; this value is only valid if m_first_node is non-zero + xml_attribute *m_first_attribute; // Pointer to first attribute of node, or 0 if none; always valid + xml_attribute *m_last_attribute; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero + xml_node *m_prev_sibling; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero + xml_node *m_next_sibling; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero + + }; + + /////////////////////////////////////////////////////////////////////////// + // XML document + + //! This class represents root of the DOM hierarchy. + //! It is also an xml_node and a memory_pool through public inheritance. + //! Use parse() function to build a DOM tree from a zero-terminated XML text string. + //! parse() function allocates memory for nodes and attributes by using functions of xml_document, + //! which are inherited from memory_pool. + //! To access root node of the document, use the document itself, as if it was an xml_node. + //! \param Ch Character type to use. + template + class xml_document: public xml_node, public memory_pool + { + + public: + + //! Constructs empty XML document + xml_document() + : xml_node(node_document) + { + } + + //! Parses zero-terminated XML string according to given flags. + //! Passed string will be modified by the parser, unless rapidxml::parse_non_destructive flag is used. + //! The string must persist for the lifetime of the document. + //! In case of error, rapidxml::parse_error exception will be thrown. + //!

+ //! If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning. + //! Make sure that data is zero-terminated. + //!

+ //! Document can be parsed into multiple times. + //! Each new call to parse removes previous nodes and attributes (if any), but does not clear memory pool. + //! \param text XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser. + template + void parse(Ch *text) + { + assert(text); + + // Remove current contents + this->remove_all_nodes(); + this->remove_all_attributes(); + + // Parse BOM, if any + parse_bom(text); + + // Parse children + while (1) + { + // Skip whitespace before node + skip(text); + if (*text == 0) + break; + + // Parse and append new child + if (*text == Ch('<')) + { + ++text; // Skip '<' + if (xml_node *node = parse_node(text)) + this->append_node(node); + } + else + RAPIDXML_PARSE_ERROR("expected <", text); + } + + } + + //! Clears the document by deleting all nodes and clearing the memory pool. + //! All nodes owned by document pool are destroyed. + void clear() + { + this->remove_all_nodes(); + this->remove_all_attributes(); + memory_pool::clear(); + } + + private: + + /////////////////////////////////////////////////////////////////////// + // Internal character utility functions + + // Detect whitespace character + struct whitespace_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_whitespace[static_cast(ch)]; + } + }; + + // Detect node name character + struct node_name_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_node_name[static_cast(ch)]; + } + }; + + // Detect attribute name character + struct attribute_name_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_attribute_name[static_cast(ch)]; + } + }; + + // Detect text character (PCDATA) + struct text_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_text[static_cast(ch)]; + } + }; + + // Detect text character (PCDATA) that does not require processing + struct text_pure_no_ws_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast(ch)]; + } + }; + + // Detect text character (PCDATA) that does not require processing + struct text_pure_with_ws_pred + { + static unsigned char test(Ch ch) + { + return internal::lookup_tables<0>::lookup_text_pure_with_ws[static_cast(ch)]; + } + }; + + // Detect attribute value character + template + struct attribute_value_pred + { + static unsigned char test(Ch ch) + { + if (Quote == Ch('\'')) + return internal::lookup_tables<0>::lookup_attribute_data_1[static_cast(ch)]; + if (Quote == Ch('\"')) + return internal::lookup_tables<0>::lookup_attribute_data_2[static_cast(ch)]; + return 0; // Should never be executed, to avoid warnings on Comeau + } + }; + + // Detect attribute value character + template + struct attribute_value_pure_pred + { + static unsigned char test(Ch ch) + { + if (Quote == Ch('\'')) + return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast(ch)]; + if (Quote == Ch('\"')) + return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast(ch)]; + return 0; // Should never be executed, to avoid warnings on Comeau + } + }; + + // Insert coded character, using UTF8 or 8-bit ASCII + template + static void insert_coded_character(Ch *&text, unsigned long code) + { + if (Flags & parse_no_utf8) + { + // Insert 8-bit ASCII character + // Todo: possibly verify that code is less than 256 and use replacement char otherwise? + text[0] = static_cast(code); + text += 1; + } + else + { + // Insert UTF8 sequence + if (code < 0x80) // 1 byte sequence + { + text[0] = static_cast(code); + text += 1; + } + else if (code < 0x800) // 2 byte sequence + { + text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[0] = static_cast(code | 0xC0); + text += 2; + } + else if (code < 0x10000) // 3 byte sequence + { + text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[0] = static_cast(code | 0xE0); + text += 3; + } + else if (code < 0x110000) // 4 byte sequence + { + text[3] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; + text[0] = static_cast(code | 0xF0); + text += 4; + } + else // Invalid, only codes up to 0x10FFFF are allowed in Unicode + { + RAPIDXML_PARSE_ERROR("invalid numeric character entity", text); + } + } + } + + // Skip characters until predicate evaluates to true + template + static void skip(Ch *&text) + { + Ch *tmp = text; + while (StopPred::test(*tmp)) + ++tmp; + text = tmp; + } + + // Skip characters until predicate evaluates to true while doing the following: + // - replacing XML character entity references with proper characters (' & " < > &#...;) + // - condensing whitespace sequences to single space character + template + static Ch *skip_and_expand_character_refs(Ch *&text) + { + // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip + if (Flags & parse_no_entity_translation && + !(Flags & parse_normalize_whitespace) && + !(Flags & parse_trim_whitespace)) + { + skip(text); + return text; + } + + // Use simple skip until first modification is detected + skip(text); + + // Use translation skip + Ch *src = text; + Ch *dest = src; + while (StopPred::test(*src)) + { + // If entity translation is enabled + if (!(Flags & parse_no_entity_translation)) + { + // Test if replacement is needed + if (src[0] == Ch('&')) + { + switch (src[1]) + { + + // & ' + case Ch('a'): + if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) + { + *dest = Ch('&'); + ++dest; + src += 5; + continue; + } + if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';')) + { + *dest = Ch('\''); + ++dest; + src += 6; + continue; + } + break; + + // " + case Ch('q'): + if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';')) + { + *dest = Ch('"'); + ++dest; + src += 6; + continue; + } + break; + + // > + case Ch('g'): + if (src[2] == Ch('t') && src[3] == Ch(';')) + { + *dest = Ch('>'); + ++dest; + src += 4; + continue; + } + break; + + // < + case Ch('l'): + if (src[2] == Ch('t') && src[3] == Ch(';')) + { + *dest = Ch('<'); + ++dest; + src += 4; + continue; + } + break; + + // &#...; - assumes ASCII + case Ch('#'): + if (src[2] == Ch('x')) + { + unsigned long code = 0; + src += 3; // Skip &#x + while (1) + { + unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast(*src)]; + if (digit == 0xFF) + break; + code = code * 16 + digit; + ++src; + } + insert_coded_character(dest, code); // Put character in output + } + else + { + unsigned long code = 0; + src += 2; // Skip &# + while (1) + { + unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast(*src)]; + if (digit == 0xFF) + break; + code = code * 10 + digit; + ++src; + } + insert_coded_character(dest, code); // Put character in output + } + if (*src == Ch(';')) + ++src; + else + RAPIDXML_PARSE_ERROR("expected ;", src); + continue; + + // Something else + default: + // Ignore, just copy '&' verbatim + break; + + } + } + } + + // If whitespace condensing is enabled + if (Flags & parse_normalize_whitespace) + { + // Test if condensing is needed + if (whitespace_pred::test(*src)) + { + *dest = Ch(' '); ++dest; // Put single space in dest + ++src; // Skip first whitespace char + // Skip remaining whitespace chars + while (whitespace_pred::test(*src)) + ++src; + continue; + } + } + + // No replacement, only copy character + *dest++ = *src++; + + } + + // Return new end + text = src; + return dest; + + } + + /////////////////////////////////////////////////////////////////////// + // Internal parsing functions + + // Parse BOM, if any + template + void parse_bom(Ch *&text) + { + // UTF-8? + if (static_cast(text[0]) == 0xEF && + static_cast(text[1]) == 0xBB && + static_cast(text[2]) == 0xBF) + { + text += 3; // Skup utf-8 bom + } + } + + // Parse XML declaration ( + xml_node *parse_xml_declaration(Ch *&text) + { + // If parsing of declaration is disabled + if (!(Flags & parse_declaration_node)) + { + // Skip until end of declaration + while (text[0] != Ch('?') || text[1] != Ch('>')) + { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 2; // Skip '?>' + return 0; + } + + // Create declaration + xml_node *declaration = this->allocate_node(node_declaration); + + // Skip whitespace before attributes or ?> + skip(text); + + // Parse declaration attributes + parse_node_attributes(text, declaration); + + // Skip ?> + if (text[0] != Ch('?') || text[1] != Ch('>')) + RAPIDXML_PARSE_ERROR("expected ?>", text); + text += 2; + + return declaration; + } + + // Parse XML comment (' + return 0; // Do not produce comment node + } + + // Remember value start + Ch *value = text; + + // Skip until end of comment + while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) + { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + + // Create comment node + xml_node *comment = this->allocate_node(node_comment); + comment->value(value, text - value); + + // Place zero terminator after comment value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 3; // Skip '-->' + return comment; + } + + // Parse DOCTYPE + template + xml_node *parse_doctype(Ch *&text) + { + // Remember value start + Ch *value = text; + + // Skip to > + while (*text != Ch('>')) + { + // Determine character type + switch (*text) + { + + // If '[' encountered, scan for matching ending ']' using naive algorithm with depth + // This works for all W3C test files except for 2 most wicked + case Ch('['): + { + ++text; // Skip '[' + int depth = 1; + while (depth > 0) + { + switch (*text) + { + case Ch('['): ++depth; break; + case Ch(']'): --depth; break; + case 0: RAPIDXML_PARSE_ERROR("unexpected end of data", text); + } + ++text; + } + break; + } + + // Error on end of text + case Ch('\0'): + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + + // Other character, skip it + default: + ++text; + + } + } + + // If DOCTYPE nodes enabled + if (Flags & parse_doctype_node) + { + // Create a new doctype node + xml_node *doctype = this->allocate_node(node_doctype); + doctype->value(value, text - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 1; // skip '>' + return doctype; + } + else + { + text += 1; // skip '>' + return 0; + } + + } + + // Parse PI + template + xml_node *parse_pi(Ch *&text) + { + // If creation of PI nodes is enabled + if (Flags & parse_pi_nodes) + { + // Create pi node + xml_node *pi = this->allocate_node(node_pi); + + // Extract PI target name + Ch *name = text; + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected PI target", text); + pi->name(name, text - name); + + // Skip whitespace between pi target and pi + skip(text); + + // Remember start of pi + Ch *value = text; + + // Skip to '?>' + while (text[0] != Ch('?') || text[1] != Ch('>')) + { + if (*text == Ch('\0')) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + + // Set pi value (verbatim, no entity expansion or whitespace normalization) + pi->value(value, text - value); + + // Place zero terminator after name and value + if (!(Flags & parse_no_string_terminators)) + { + pi->name()[pi->name_size()] = Ch('\0'); + pi->value()[pi->value_size()] = Ch('\0'); + } + + text += 2; // Skip '?>' + return pi; + } + else + { + // Skip to '?>' + while (text[0] != Ch('?') || text[1] != Ch('>')) + { + if (*text == Ch('\0')) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 2; // Skip '?>' + return 0; + } + } + + // Parse and append data + // Return character that ends data. + // This is necessary because this character might have been overwritten by a terminating 0 + template + Ch parse_and_append_data(xml_node *node, Ch *&text, Ch *contents_start) + { + // Backup to contents start if whitespace trimming is disabled + if (!(Flags & parse_trim_whitespace)) + text = contents_start; + + // Skip until end of data + Ch *value = text, *end; + if (Flags & parse_normalize_whitespace) + end = skip_and_expand_character_refs(text); + else + end = skip_and_expand_character_refs(text); + + // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > + if (Flags & parse_trim_whitespace) + { + if (Flags & parse_normalize_whitespace) + { + // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end + if (*(end - 1) == Ch(' ')) + --end; + } + else + { + // Backup until non-whitespace character is found + while (whitespace_pred::test(*(end - 1))) + --end; + } + } + + // If characters are still left between end and value (this test is only necessary if normalization is enabled) + // Create new data node + if (!(Flags & parse_no_data_nodes)) + { + xml_node *data = this->allocate_node(node_data); + data->value(value, end - value); + node->append_node(data); + } + + // Add data to parent node if no data exists yet + if (!(Flags & parse_no_element_values)) + if (*node->value() == Ch('\0')) + node->value(value, end - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) + { + Ch ch = *text; + *end = Ch('\0'); + return ch; // Return character that ends data; this is required because zero terminator overwritten it + } + + // Return character that ends data + return *text; + } + + // Parse CDATA + template + xml_node *parse_cdata(Ch *&text) + { + // If CDATA is disabled + if (Flags & parse_no_data_nodes) + { + // Skip until end of cdata + while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) + { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 3; // Skip ]]> + return 0; // Do not produce CDATA node + } + + // Skip until end of cdata + Ch *value = text; + while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) + { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + + // Create new cdata node + xml_node *cdata = this->allocate_node(node_cdata); + cdata->value(value, text - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 3; // Skip ]]> + return cdata; + } + + // Parse element node + template + xml_node *parse_element(Ch *&text) + { + // Create element node + xml_node *element = this->allocate_node(node_element); + + // Extract element name + Ch *name = text; + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected element name", text); + element->name(name, text - name); + + // Skip whitespace between element name and attributes or > + skip(text); + + // Parse attributes, if any + parse_node_attributes(text, element); + + // Determine ending type + if (*text == Ch('>')) + { + ++text; + parse_node_contents(text, element); + } + else if (*text == Ch('/')) + { + ++text; + if (*text != Ch('>')) + RAPIDXML_PARSE_ERROR("expected >", text); + ++text; + } + else + RAPIDXML_PARSE_ERROR("expected >", text); + + // Place zero terminator after name + if (!(Flags & parse_no_string_terminators)) + element->name()[element->name_size()] = Ch('\0'); + + // Return parsed element + return element; + } + + // Determine node type, and parse it + template + xml_node *parse_node(Ch *&text) + { + // Parse proper node type + switch (text[0]) + { + + // <... + default: + // Parse and append element node + return parse_element(text); + + // (text); + } + else + { + // Parse PI + return parse_pi(text); + } + + // (text); + } + break; + + // (text); + } + break; + + // (text); + } + + } // switch + + // Attempt to skip other, unrecognized node types starting with ')) + { + if (*text == 0) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + ++text; // Skip '>' + return 0; // No node recognized + + } + } + + // Parse contents of the node - children, data etc. + template + void parse_node_contents(Ch *&text, xml_node *node) + { + // For all children and text + while (1) + { + // Skip whitespace between > and node contents + Ch *contents_start = text; // Store start of node contents before whitespace is skipped + skip(text); + Ch next_char = *text; + + // After data nodes, instead of continuing the loop, control jumps here. + // This is because zero termination inside parse_and_append_data() function + // would wreak havoc with the above code. + // Also, skipping whitespace after data nodes is unnecessary. + after_data_node: + + // Determine what comes next: node closing, child node, data node, or 0? + switch (next_char) + { + + // Node closing or child node + case Ch('<'): + if (text[1] == Ch('/')) + { + // Node closing + text += 2; // Skip '(text); + if (!internal::compare(node->name(), node->name_size(), closing_name, text - closing_name, true)) + RAPIDXML_PARSE_ERROR("invalid closing tag name", text); + } + else + { + // No validation, just skip name + skip(text); + } + // Skip remaining whitespace after node name + skip(text); + if (*text != Ch('>')) + RAPIDXML_PARSE_ERROR("expected >", text); + ++text; // Skip '>' + return; // Node closed, finished parsing contents + } + else + { + // Child node + ++text; // Skip '<' + if (xml_node *child = parse_node(text)) + node->append_node(child); + } + break; + + // End of data - error + case Ch('\0'): + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + + // Data node + default: + next_char = parse_and_append_data(node, text, contents_start); + goto after_data_node; // Bypass regular processing after data nodes + + } + } + } + + // Parse XML attributes of the node + template + void parse_node_attributes(Ch *&text, xml_node *node) + { + // For all attributes + while (attribute_name_pred::test(*text)) + { + // Extract attribute name + Ch *name = text; + ++text; // Skip first character of attribute name + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected attribute name", name); + + // Create new attribute + xml_attribute *attribute = this->allocate_attribute(); + attribute->name(name, text - name); + node->append_attribute(attribute); + + // Skip whitespace after attribute name + skip(text); + + // Skip = + if (*text != Ch('=')) + RAPIDXML_PARSE_ERROR("expected =", text); + ++text; + + // Add terminating zero after name + if (!(Flags & parse_no_string_terminators)) + attribute->name()[attribute->name_size()] = 0; + + // Skip whitespace after = + skip(text); + + // Skip quote and remember if it was ' or " + Ch quote = *text; + if (quote != Ch('\'') && quote != Ch('"')) + RAPIDXML_PARSE_ERROR("expected ' or \"", text); + ++text; + + // Extract attribute value and expand char refs in it + Ch *value = text, *end; + const int AttFlags = Flags & ~parse_normalize_whitespace; // No whitespace normalization in attributes + if (quote == Ch('\'')) + end = skip_and_expand_character_refs, attribute_value_pure_pred, AttFlags>(text); + else + end = skip_and_expand_character_refs, attribute_value_pure_pred, AttFlags>(text); + + // Set attribute value + attribute->value(value, end - value); + + // Make sure that end quote is present + if (*text != quote) + RAPIDXML_PARSE_ERROR("expected ' or \"", text); + ++text; // Skip quote + + // Add terminating zero after value + if (!(Flags & parse_no_string_terminators)) + attribute->value()[attribute->value_size()] = 0; + + // Skip whitespace after attribute value + skip(text); + } + } + + }; + + //! \cond internal + namespace internal + { + + // Whitespace (space \n \r \t) + template + const unsigned char lookup_tables::lookup_whitespace[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F + }; + + // Node name (anything but space \n \r \t / > ? \0) + template + const unsigned char lookup_tables::lookup_node_name[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Text (i.e. PCDATA) (anything but < \0) + template + const unsigned char lookup_tables::lookup_text[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled + // (anything but < \0 &) + template + const unsigned char lookup_tables::lookup_text_pure_no_ws[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled + // (anything but < \0 & space \n \r \t) + template + const unsigned char lookup_tables::lookup_text_pure_with_ws[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Attribute name (anything but space \n \r \t / < > = ? ! \0) + template + const unsigned char lookup_tables::lookup_attribute_name[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Attribute data with single quote (anything but ' \0) + template + const unsigned char lookup_tables::lookup_attribute_data_1[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Attribute data with single quote that does not require processing (anything but ' \0 &) + template + const unsigned char lookup_tables::lookup_attribute_data_1_pure[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Attribute data with double quote (anything but " \0) + template + const unsigned char lookup_tables::lookup_attribute_data_2[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Attribute data with double quote that does not require processing (anything but " \0 &) + template + const unsigned char lookup_tables::lookup_attribute_data_2_pure[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F + }; + + // Digits (dec and hex, 255 denotes end of numeric character reference) + template + const unsigned char lookup_tables::lookup_digits[256] = + { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 + 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 + 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F + }; + + // Upper case conversion + template + const unsigned char lookup_tables::lookup_upcase[256] = + { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0 + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, // 1 + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, // 2 + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, // 3 + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 4 + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, // 5 + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 6 + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127, // 7 + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, // 8 + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, // 9 + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, // A + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, // B + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, // C + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, // D + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, // E + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 // F + }; + } + //! \endcond + +} + +// Undefine internal macros +#undef RAPIDXML_PARSE_ERROR + +// On MSVC, restore warnings state +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#endif diff --git a/src/rapidxml/rapidxml_iterators.hpp b/src/rapidxml/rapidxml_iterators.hpp new file mode 100644 index 0000000..52ebc29 --- /dev/null +++ b/src/rapidxml/rapidxml_iterators.hpp @@ -0,0 +1,174 @@ +#ifndef RAPIDXML_ITERATORS_HPP_INCLUDED +#define RAPIDXML_ITERATORS_HPP_INCLUDED + +// Copyright (C) 2006, 2009 Marcin Kalicinski +// Version 1.13 +// Revision $DateTime: 2009/05/13 01:46:17 $ +//! \file rapidxml_iterators.hpp This file contains rapidxml iterators + +#include "rapidxml.hpp" + +namespace rapidxml +{ + + //! Iterator of child nodes of xml_node + template + class node_iterator + { + + public: + + typedef typename xml_node value_type; + typedef typename xml_node &reference; + typedef typename xml_node *pointer; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + node_iterator() + : m_node(0) + { + } + + node_iterator(xml_node *node) + : m_node(node->first_node()) + { + } + + reference operator *() const + { + assert(m_node); + return *m_node; + } + + pointer operator->() const + { + assert(m_node); + return m_node; + } + + node_iterator& operator++() + { + assert(m_node); + m_node = m_node->next_sibling(); + return *this; + } + + node_iterator operator++(int) + { + node_iterator tmp = *this; + ++this; + return tmp; + } + + node_iterator& operator--() + { + assert(m_node && m_node->previous_sibling()); + m_node = m_node->previous_sibling(); + return *this; + } + + node_iterator operator--(int) + { + node_iterator tmp = *this; + ++this; + return tmp; + } + + bool operator ==(const node_iterator &rhs) + { + return m_node == rhs.m_node; + } + + bool operator !=(const node_iterator &rhs) + { + return m_node != rhs.m_node; + } + + private: + + xml_node *m_node; + + }; + + //! Iterator of child attributes of xml_node + template + class attribute_iterator + { + + public: + + typedef typename xml_attribute value_type; + typedef typename xml_attribute &reference; + typedef typename xml_attribute *pointer; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + attribute_iterator() + : m_attribute(0) + { + } + + attribute_iterator(xml_node *node) + : m_attribute(node->first_attribute()) + { + } + + reference operator *() const + { + assert(m_attribute); + return *m_attribute; + } + + pointer operator->() const + { + assert(m_attribute); + return m_attribute; + } + + attribute_iterator& operator++() + { + assert(m_attribute); + m_attribute = m_attribute->next_attribute(); + return *this; + } + + attribute_iterator operator++(int) + { + attribute_iterator tmp = *this; + ++this; + return tmp; + } + + attribute_iterator& operator--() + { + assert(m_attribute && m_attribute->previous_attribute()); + m_attribute = m_attribute->previous_attribute(); + return *this; + } + + attribute_iterator operator--(int) + { + attribute_iterator tmp = *this; + ++this; + return tmp; + } + + bool operator ==(const attribute_iterator &rhs) + { + return m_attribute == rhs.m_attribute; + } + + bool operator !=(const attribute_iterator &rhs) + { + return m_attribute != rhs.m_attribute; + } + + private: + + xml_attribute *m_attribute; + + }; + +} + +#endif diff --git a/src/rapidxml/rapidxml_print.hpp b/src/rapidxml/rapidxml_print.hpp new file mode 100644 index 0000000..5321521 --- /dev/null +++ b/src/rapidxml/rapidxml_print.hpp @@ -0,0 +1,451 @@ +#ifndef RAPIDXML_PRINT_HPP_INCLUDED +#define RAPIDXML_PRINT_HPP_INCLUDED + +// Copyright (C) 2006, 2009 Marcin Kalicinski +// Version 1.13 +// Revision $DateTime: 2009/05/13 01:46:17 $ +//! \file rapidxml_print.hpp This file contains rapidxml printer implementation + +#include "rapidxml.hpp" + +// Only include streams if not disabled +#ifndef RAPIDXML_NO_STREAMS + #include + #include +#endif + +namespace rapidxml +{ + + /////////////////////////////////////////////////////////////////////// + // Printing flags + + const int print_no_indenting = 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. See print() function. + + /////////////////////////////////////////////////////////////////////// + // Internal + + //! \cond internal + namespace internal + { + + /////////////////////////////////////////////////////////////////////////// + // Internal character operations + + // Copy characters from given range to given output iterator + template + inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out) + { + while (begin != end) + *out++ = *begin++; + return out; + } + + // Copy characters from given range to given output iterator and expand + // characters into references (< > ' " &) + template + inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out) + { + while (begin != end) + { + if (*begin == noexpand) + { + *out++ = *begin; // No expansion, copy character + } + else + { + switch (*begin) + { + case Ch('<'): + *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';'); + break; + case Ch('>'): + *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';'); + break; + case Ch('\''): + *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';'); + break; + case Ch('"'): + *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';'); + break; + case Ch('&'): + *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';'); + break; + default: + *out++ = *begin; // No expansion, copy character + } + } + ++begin; // Step to next character + } + return out; + } + + // Fill given output iterator with repetitions of the same character + template + inline OutIt fill_chars(OutIt out, int n, Ch ch) + { + for (int i = 0; i < n; ++i) + *out++ = ch; + return out; + } + + // Find character + template + inline bool find_char(const Ch *begin, const Ch *end) + { + while (begin != end) + if (*begin++ == ch) + return true; + return false; + } + + /////////////////////////////////////////////////////////////////////////// + // Internal printing operations + + + //fix for gcc compilation + //https://stackoverflow.com/questions/14113923/rapidxml-print-header-has-undefined-methods + template +inline OutIt print_children(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_attributes(OutIt out, const xml_node *node, int flags); + +template +inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_declaration_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, int indent); + +template +inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, int indent); + + // Print node + template + inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent) + { + // Print proper node type + switch (node->type()) + { + + // Document + case node_document: + out = print_children(out, node, flags, indent); + break; + + // Element + case node_element: + out = print_element_node(out, node, flags, indent); + break; + + // Data + case node_data: + out = print_data_node(out, node, flags, indent); + break; + + // CDATA + case node_cdata: + out = print_cdata_node(out, node, flags, indent); + break; + + // Declaration + case node_declaration: + out = print_declaration_node(out, node, flags, indent); + break; + + // Comment + case node_comment: + out = print_comment_node(out, node, flags, indent); + break; + + // Doctype + case node_doctype: + out = print_doctype_node(out, node, flags, indent); + break; + + // Pi + case node_pi: + out = print_pi_node(out, node, flags, indent); + break; + + // Unknown + default: + assert(0); + break; + } + + // If indenting not disabled, add line break after node + if (!(flags & print_no_indenting)) + *out = Ch('\n'), ++out; + + // Return modified iterator + return out; + } + + // Print children of the node + template + inline OutIt print_children(OutIt out, const xml_node *node, int flags, int indent) + { + for (xml_node *child = node->first_node(); child; child = child->next_sibling()) + out = print_node(out, child, flags, indent); + return out; + } + + // Print attributes of the node + template + inline OutIt print_attributes(OutIt out, const xml_node *node, int flags) + { + for (xml_attribute *attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute()) + { + if (attribute->name() && attribute->value()) + { + // Print attribute name + *out = Ch(' '), ++out; + out = copy_chars(attribute->name(), attribute->name() + attribute->name_size(), out); + *out = Ch('='), ++out; + // Print attribute value using appropriate quote type + if (find_char(attribute->value(), attribute->value() + attribute->value_size())) + { + *out = Ch('\''), ++out; + out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('"'), out); + *out = Ch('\''), ++out; + } + else + { + *out = Ch('"'), ++out; + out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('\''), out); + *out = Ch('"'), ++out; + } + } + } + return out; + } + + // Print data node + template + inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_data); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); + return out; + } + + // Print data node + template + inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_cdata); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'); ++out; + *out = Ch('!'); ++out; + *out = Ch('['); ++out; + *out = Ch('C'); ++out; + *out = Ch('D'); ++out; + *out = Ch('A'); ++out; + *out = Ch('T'); ++out; + *out = Ch('A'); ++out; + *out = Ch('['); ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch(']'); ++out; + *out = Ch(']'); ++out; + *out = Ch('>'); ++out; + return out; + } + + // Print element node + template + inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_element); + + // Print element name and attributes, if any + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + out = print_attributes(out, node, flags); + + // If node is childless + if (node->value_size() == 0 && !node->first_node()) + { + // Print childless node tag ending + *out = Ch('/'), ++out; + *out = Ch('>'), ++out; + } + else + { + // Print normal node tag ending + *out = Ch('>'), ++out; + + // Test if node contains a single data node only (and no other nodes) + xml_node *child = node->first_node(); + if (!child) + { + // If node has no children, only print its value without indenting + out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); + } + else if (child->next_sibling() == 0 && child->type() == node_data) + { + // If node has a sole data child, only print its value without indenting + out = copy_and_expand_chars(child->value(), child->value() + child->value_size(), Ch(0), out); + } + else + { + // Print all children with full indenting + if (!(flags & print_no_indenting)) + *out = Ch('\n'), ++out; + out = print_children(out, node, flags, indent + 1); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + } + + // Print node end + *out = Ch('<'), ++out; + *out = Ch('/'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + *out = Ch('>'), ++out; + } + return out; + } + + // Print declaration node + template + inline OutIt print_declaration_node(OutIt out, const xml_node *node, int flags, int indent) + { + // Print declaration start + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('?'), ++out; + *out = Ch('x'), ++out; + *out = Ch('m'), ++out; + *out = Ch('l'), ++out; + + // Print attributes + out = print_attributes(out, node, flags); + + // Print declaration end + *out = Ch('?'), ++out; + *out = Ch('>'), ++out; + + return out; + } + + // Print comment node + template + inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_comment); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('!'), ++out; + *out = Ch('-'), ++out; + *out = Ch('-'), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('-'), ++out; + *out = Ch('-'), ++out; + *out = Ch('>'), ++out; + return out; + } + + // Print doctype node + template + inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_doctype); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('!'), ++out; + *out = Ch('D'), ++out; + *out = Ch('O'), ++out; + *out = Ch('C'), ++out; + *out = Ch('T'), ++out; + *out = Ch('Y'), ++out; + *out = Ch('P'), ++out; + *out = Ch('E'), ++out; + *out = Ch(' '), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('>'), ++out; + return out; + } + + // Print pi node + template + inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, int indent) + { + assert(node->type() == node_pi); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('?'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + *out = Ch(' '), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('?'), ++out; + *out = Ch('>'), ++out; + return out; + } + + } + //! \endcond + + /////////////////////////////////////////////////////////////////////////// + // Printing + + //! Prints XML to given output iterator. + //! \param out Output iterator to print to. + //! \param node Node to be printed. Pass xml_document to print entire document. + //! \param flags Flags controlling how XML is printed. + //! \return Output iterator pointing to position immediately after last character of printed text. + template + inline OutIt print(OutIt out, const xml_node &node, int flags = 0) + { + return internal::print_node(out, &node, flags, 0); + } + +#ifndef RAPIDXML_NO_STREAMS + + //! Prints XML to given output stream. + //! \param out Output stream to print to. + //! \param node Node to be printed. Pass xml_document to print entire document. + //! \param flags Flags controlling how XML is printed. + //! \return Output stream. + template + inline std::basic_ostream &print(std::basic_ostream &out, const xml_node &node, int flags = 0) + { + print(std::ostream_iterator(out), node, flags); + return out; + } + + //! Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process. + //! \param out Output stream to print to. + //! \param node Node to be printed. + //! \return Output stream. + template + inline std::basic_ostream &operator <<(std::basic_ostream &out, const xml_node &node) + { + return print(out, node); + } + +#endif + +} + +#endif diff --git a/src/rapidxml/rapidxml_utils.hpp b/src/rapidxml/rapidxml_utils.hpp new file mode 100644 index 0000000..37c2953 --- /dev/null +++ b/src/rapidxml/rapidxml_utils.hpp @@ -0,0 +1,122 @@ +#ifndef RAPIDXML_UTILS_HPP_INCLUDED +#define RAPIDXML_UTILS_HPP_INCLUDED + +// Copyright (C) 2006, 2009 Marcin Kalicinski +// Version 1.13 +// Revision $DateTime: 2009/05/13 01:46:17 $ +//! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful +//! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective. + +#include "rapidxml.hpp" +#include +#include +#include +#include + +namespace rapidxml +{ + + //! Represents data loaded from a file + template + class file + { + + public: + + //! Loads file into the memory. Data will be automatically destroyed by the destructor. + //! \param filename Filename to load. + file(const char *filename) + { + using namespace std; + + // Open stream + basic_ifstream stream(filename, ios::binary); + if (!stream) + throw runtime_error(string("cannot open file ") + filename); + stream.unsetf(ios::skipws); + + // Determine stream size + stream.seekg(0, ios::end); + size_t size = stream.tellg(); + stream.seekg(0); + + // Load data and add terminating 0 + m_data.resize(size + 1); + stream.read(&m_data.front(), static_cast(size)); + m_data[size] = 0; + } + + //! Loads file into the memory. Data will be automatically destroyed by the destructor + //! \param stream Stream to load from + file(std::basic_istream &stream) + { + using namespace std; + + // Load data and add terminating 0 + stream.unsetf(ios::skipws); + m_data.assign(istreambuf_iterator(stream), istreambuf_iterator()); + if (stream.fail() || stream.bad()) + throw runtime_error("error reading stream"); + m_data.push_back(0); + } + + //! Gets file data. + //! \return Pointer to data of file. + Ch *data() + { + return &m_data.front(); + } + + //! Gets file data. + //! \return Pointer to data of file. + const Ch *data() const + { + return &m_data.front(); + } + + //! Gets file data size. + //! \return Size of file data, in characters. + std::size_t size() const + { + return m_data.size(); + } + + private: + + std::vector m_data; // File data + + }; + + //! Counts children of node. Time complexity is O(n). + //! \return Number of children of node + template + inline std::size_t count_children(xml_node *node) + { + xml_node *child = node->first_node(); + std::size_t count = 0; + while (child) + { + ++count; + child = child->next_sibling(); + } + return count; + } + + //! Counts attributes of node. Time complexity is O(n). + //! \return Number of attributes of node + template + inline std::size_t count_attributes(xml_node *node) + { + xml_attribute *attr = node->first_attribute(); + std::size_t count = 0; + while (attr) + { + ++count; + attr = attr->next_attribute(); + } + return count; + } + +} + +#endif diff --git a/src/read_ods_.cpp b/src/read_ods_.cpp new file mode 100644 index 0000000..2bf3f3e --- /dev/null +++ b/src/read_ods_.cpp @@ -0,0 +1,276 @@ +#include "cpp11.hpp" +#include "cpp11/r_string.hpp" + +#include "rapidxml/rapidxml.hpp" +#include "is_ods.h" + +#include +#include +#include + + + + + + +std::string parse_p(rapidxml::xml_node<>* node){ + /*Deal with text inside cells. Cells can contain just text (node_data), or a + mixture of text and other nodes (node_element). We usually just want the text + from these nodes (e.g. if there's a link), but we also need to consider the + text:s node, which saves repeated spaces*/ + std::string out; + char* name; + int rep_space; + for (rapidxml::xml_node<>* n = node->first_node(); n; n=n->next_sibling()){ + if (n->type() == rapidxml::node_element) + { + name = n->name(); + if (strcmp(name,"text:s") == 0){ + if(n->first_attribute("text:c") != NULL){ + rep_space = atoi(n->first_attribute("text:c")->value()); + } else { + rep_space = 1; + } + out = out.append(std::string(rep_space, ' ')); + } else if (strcmp(name,"text:line-break") == 0){ + out = out.append("\n"); + } else if (strcmp(name, "text:a") == 0){ + if(!(n->first_node("text:a"))){ //Prevent crash by making pathological recursive links + out = out.append(parse_p(n)); + } + } else { + out = out.append(n->value()); + } + } + else if (n->type() == rapidxml::node_data){ + out = out.append(n->value()); + } + } + return out; +} + +std::string parse_textp(rapidxml::xml_node<>* cell){ + //This isn't very efficient. It is theoretically faster to make a list of pointers, assign the + //memory first and then concatenate them all into the freed memory. However this is hard to understand + //and not a significant problem. If you were looking for efficincies though, this would be a good choice. + std::string out; + int i = 0; + for (rapidxml::xml_node<>* n = cell->first_node("text:p"); n ; n=n->next_sibling("text:p")){ + if (i > 0){ + out = out.append("\n"); + } + out = out.append(parse_p(n)); + i++; + } + return out; +} + +std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value){ + std::string cell_value; + + char* value_type = (cell->first_attribute("office:value-type") != 0) ? + cell->first_attribute("office:value-type")->value() : NULL; + if(formula_as_formula && cell->first_attribute("table:formula")){ + cell_value = cell->first_attribute("table:formula")->value(); + } else { + cell_value = (cell->first_node("text:p") != 0) ? parse_textp(cell) : ""; + if((value_type) && + + ((cell_value.length() == 0 && use_office_value) || + (strcmp(value_type, "float") == 0 || + strcmp(value_type, "currency") == 0|| + strcmp(value_type, "percentage") == 0))){ + + cell_value = cell->first_attribute("office:value")->value(); + } + } + return cell_value; +} + +// Make an array of pointers to each cell +std::vector*>> find_rows(rapidxml::xml_node<>* sheet, + int start_row, + const int stop_row, + int start_col, + const int stop_col){ + + /*Rows and columns are 1-based because both Excel and R treat arrays + this way*/ + int row_repeat_count; + int col_repeat_count; + rapidxml::xml_node<>* row = sheet->first_node("table:table-row"); + rapidxml::xml_node<>* cell; + + if (start_row < 1){ + start_row = 1; + } + if (start_col < 1){ + start_col = 1; + } + int nrows = stop_row - start_row + 1; + + std::vector*>> rows((nrows < 1) ? 1 : nrows); + + for (int i = 1; i <= stop_row || stop_row < 1; ){ + // i keeps track of what nominal row we are on + + + // Check for row repeats + if (row->first_attribute("table:number-rows-repeated") == nullptr){ + row_repeat_count = 1; + } else { + row_repeat_count = std::atoi(row->first_attribute("table:number-rows-repeated")->value()); + } + // Stop if all repeats done, or if we're at the last requested row + for (int r_repeat = 0; r_repeat < row_repeat_count && (stop_row < 1 || r_repeat + i <= stop_row); r_repeat++){ + + // Check size of container. + if ((int)rows.size() < i - start_row + 1){ + rows.resize(rows.size() * 2); + } + // If this row is blank (i.e. it contains only one or no children, which have no contents) + if (row->first_node()->next_sibling() == 0 && row->first_node()->first_node() == 0){ + // Look ahead. If this is the last row, stop, otherwise add a blank row + if(row->next_sibling() == 0){ + break; + } + // Otherwise leave the row blank + + // if row is not blank, and in range deal with cells + } else if(i + r_repeat >= start_row) { + unsigned int last_non_blank = 0; + cell = row->first_node("table:table-cell"); + for (int j = 1; j <= stop_col || stop_col < 1; ){ + // Check for column repeats + if (cell->first_attribute("table:number-columns-repeated")){ + col_repeat_count = std::atoi(cell->first_attribute("table:number-columns-repeated")->value()); + } else { + col_repeat_count = 1; + } + + // Stop if all column repeats done, or if we're at the last requested row + for (int c_repeat = 0; c_repeat < col_repeat_count && (stop_col < 1 || c_repeat + j <= stop_col); c_repeat++){ + bool is_blank = true; + // If this cell is blank (i.e. contains no children) + if (cell->first_node() == 0){ + // Look ahead. If this is the last column, stop. + if(cell->next_sibling() == 0){ + break; + } + } else { + // Otherwise mark that cell is not blank + is_blank = false; + } + // If we're in range add pointer to the array + if (stop_col < 1 || j + c_repeat >= start_col){ + rows[i - start_row].push_back(cell); + if(!is_blank){ + last_non_blank = rows[i - start_row].size(); + } + } + + j++; + + } + cell = cell->next_sibling("table:table-cell"); + // If that was the last cell, stop. + if (cell == 0){ + break; + } + + } + // Remove trailing blank cells + rows[i - start_row].resize(last_non_blank); + + } + i++; + } + row = row->next_sibling("table:table-row"); + // If that was the last row, stop. + if (row == 0){ + break; + } + + } + // Remove trailing empty elements + unsigned int rowsize = 0; + for (unsigned int i = 0; i < rows.size(); i++){ + if(rows[i].size() > 0){ + rowsize = i; + } + } + rows.resize(rowsize + 1); + return rows; +} + +[[cpp11::register]] +cpp11::strings read_ods_(const std::string file, + int start_row, + int stop_row, + int start_col, + int stop_col, + const int sheet, + const bool formula_as_formula) { + if(!is_ods(file)){ + throw std::invalid_argument(file + " is not a correct ODS file"); + } + if(sheet < 1){ + throw std::invalid_argument("Cannot have sheet index less than 1"); + } + + unsigned int out_width = 0; + unsigned int out_length; + + std::string xmlFile = zip_buffer(file, "content.xml"); + rapidxml::xml_document<> spreadsheet; + spreadsheet.parse<0>(&xmlFile[0]); + rapidxml::xml_node<>* rootNode; + rootNode = spreadsheet.first_node()->first_node("office:body")-> + first_node("office:spreadsheet")->first_node("table:table"); + + for (int i = 1; i < sheet; i++){ + rootNode = rootNode->next_sibling("table:table"); + } + + std::vector*>> contents; + + contents = find_rows(rootNode, start_row,stop_row,start_col,stop_col); + + // Get dimensions of output + out_length = contents.size(); + for (unsigned int i = 0; i < contents.size(); i++){ + if (contents[i].size() > out_width){ + out_width = contents[i].size(); + } + } + + // If there is no content + if (out_width * out_length == 0){ + cpp11::writable::strings cell_values(2); + cell_values[0] = "0"; + cell_values[1] = "0"; + return cell_values; + } + + cpp11::writable::strings cell_values(out_width*out_length + 2); + cell_values[0] = std::to_string(out_width); + cell_values[1] = std::to_string(out_length); + + int t = 2; + for (unsigned int i = 0; i < contents.size(); i++){ + for (unsigned int j = 0; j < contents[i].size(); j++){ + cell_values[t] = (contents[i][j] != 0) ? + Rf_mkCharCE(parse_single_cell(contents[i][j], formula_as_formula, true).c_str(), CE_UTF8) : NA_STRING; + t++; + } + // Pad rows to even width + if(contents[i].size() < out_width){ + unsigned int row_width = contents[i].size(); + for (unsigned int j = 0; j + row_width < out_width; j++){ + cell_values[t] = ""; + t++; + } + } + } + return cell_values; + } \ No newline at end of file diff --git a/src/readxl/zip.cpp b/src/readxl/zip.cpp new file mode 100644 index 0000000..f01e1d0 --- /dev/null +++ b/src/readxl/zip.cpp @@ -0,0 +1,44 @@ +#pragma once +#include "zip.h" +#include "../rapidxml/rapidxml_print.hpp" + +#include "cpp11/function.hpp" +#include "cpp11/raws.hpp" + + +std::string zip_buffer(const std::string& zip_path, + const std::string& file_path) { + cpp11::function zip_buffer = cpp11::package("readODScpp")["zip_buffer"]; + + cpp11::raws xml(zip_buffer(zip_path, file_path)); + std::string buffer(RAW(xml), RAW(xml) + xml.size()); + buffer.push_back('\0'); + + return buffer; +} + +bool zip_has_file(const std::string& zip_path, + const std::string& file_path) { + cpp11::function zip_has_file = cpp11::package("readODScpp")["zip_has_file"]; + return zip_has_file(zip_path, file_path); +} + +std::string xml_print(std::string xml) { + rapidxml::xml_document<> doc; + + xml.push_back('\0'); + doc.parse<0>(&xml[0]); + + std::string s; + rapidxml::print(std::back_inserter(s), doc, 0); + + return s; +} + +[[cpp11::register]] +void zip_xml(const std::string& zip_path, + const std::string& file_path) { + + std::string buffer = zip_buffer(zip_path, file_path); + Rprintf("%s", xml_print(buffer).c_str()); +} \ No newline at end of file diff --git a/src/readxl/zip.h b/src/readxl/zip.h new file mode 100644 index 0000000..463f6e5 --- /dev/null +++ b/src/readxl/zip.h @@ -0,0 +1,7 @@ +#pragma once + +#include "../rapidxml/rapidxml.hpp" +#include + +std::string zip_buffer(const std::string& zip_path, const std::string& file_path); +bool zip_has_file(const std::string& zip_path, const std::string& file_path); \ No newline at end of file diff --git a/tests/testthat/test_col_types.R b/tests/testthat/test_col_types.R index 77a551d..c53d516 100644 --- a/tests/testthat/test_col_types.R +++ b/tests/testthat/test_col_types.R @@ -6,7 +6,6 @@ test_that("col_types ODS", { }) ### test for issue #41 - test_that("multi col_types ODS", { x <- read_ods('../testdata/col_types.ods', col_types = NA) expect_equal(class(x[,2]), "character") @@ -14,11 +13,10 @@ test_that("multi col_types ODS", { expect_equal(class(x[,2]), "character") }) -### throw an error if col_types is not col_spec, single value NA or single value NULL. +### throw an error if col_types is not col_spec, single value NA or single value NULL test_that("col_types throw error", { expect_error(read_ods('../testdata/col_types.ods', col_types = 123)) expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, NA))) expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, 123))) - -}) +}) \ No newline at end of file diff --git a/tests/testthat/test_issue81.R b/tests/testthat/test_issue81.R index 2f5b1b6..cf0649c 100644 --- a/tests/testthat/test_issue81.R +++ b/tests/testthat/test_issue81.R @@ -2,21 +2,18 @@ ## excel_repeat.ods is created with MS Office 365 online test_that("issue 81, correctness", { - skip_on_cran() res <- read_ods("../testdata/excel_repeat.ods", col_names = FALSE) expect_identical(res[,1], c(rep("A", 12), rep("C", 11))) expect_identical(res[,2], c(rep("B", 12), rep("D", 11))) }) test_that("issue 81 real test", { - skip_on_cran() file <- "../testdata/issue81.ods" res <- read_ods(file, sheet = 2, skip = 4) - testthat::expect_equal(sum(is.na(res[,1])), 0) + expect_equal(sum(is.na(res[,1])), 0) }) test_that("issue 84", { - skip_on_cran() file <- "../testdata/issue81.ods" - testthat::expect_error(readODS::read_ods(file, sheet = "Leavers"), NA) -}) + expect_error(read_ods(file, sheet = "Leavers"), NA) +}) \ No newline at end of file diff --git a/tests/testthat/test_legacy.R b/tests/testthat/test_legacy.R index c656071..1c4105d 100644 --- a/tests/testthat/test_legacy.R +++ b/tests/testthat/test_legacy.R @@ -1,62 +1,42 @@ -## tests for readODS.R +## Updated to remove read.ods and getNrOfSheetsInODS -s_getNrOfSheetsInODS <- function(...) { - suppressWarnings({ - return(getNrOfSheetsInODS(...)) - }) -} - -s_read.ods <- function(...) { - suppressWarnings({ - return(read.ods(...)) - }) -} - -test_that("ods_sheets", { +test_that("get_num_sheets_in_ods", { file <- "../testdata/test.ods" - expect_warning(ods_sheets(file)) -}) - -test_that("getNrOfSheetsInODS", { - file <- "../testdata/test.ods" - expect_warning(getNrOfSheetsInODS(file)) - expect_equal(s_getNrOfSheetsInODS(file),1) + expect_equal(get_num_sheets_in_ods(file),1) file <- "../testdata/multisheet.ods" - expect_equal(s_getNrOfSheetsInODS(file),4) + expect_equal(get_num_sheets_in_ods(file),4) file <- "../testdata/sum.ods" - expect_equal(s_getNrOfSheetsInODS(file),1) + expect_equal(get_num_sheets_in_ods(file),1) file <- "../testdata/readODStestfilegoogledocscreated.ods" - expect_equal(s_getNrOfSheetsInODS(file),4) + expect_equal(get_num_sheets_in_ods(file),4) }) - test_that("read_ods", { file <- "../testdata/sum.ods" - expect_warning(read.ods(file)) - expect_equal(s_read.ods(file, sheet=1, formulaAsFormula=TRUE)[3,1],"of:=SUM([.A1:.A2])") - expect_equal(s_read.ods(file, sheet=1, formulaAsFormula=FALSE)[3,1],"3") + expect_equal(read_ods(file, sheet=1, col_names = FALSE, formula_as_formula=TRUE)[3,1],"of:=SUM([.A1:.A2])") + expect_equal(read_ods(file, sheet=1, col_names = FALSE, col_types = NA, formula_as_formula=FALSE)[3,1],"3") - df <- data.frame(A=as.character(1:3),stringsAsFactors = F) - rODS <- s_read.ods(file, sheet=1, formulaAsFormula=FALSE) + df <- data.frame(A = as.character(1:3),stringsAsFactors = F) + rODS <- read_ods(file, sheet = 1, col_names = FALSE, col_types=NA, formula_as_formula = FALSE) expect_equal(rODS, df) file <- "../testdata/lotsofnothing_test.ods" - expect_equal(dim(s_read.ods(file, sheet=1)),c(21,13)) # test if empty rows at the end are ignored - expect_equal(class(s_read.ods(file, sheet=1)),"data.frame") - ## small file + expect_equal(dim(read_ods(file, sheet = 1, col_names = FALSE)),c(21,13)) # test if empty rows at the end are ignored + expect_equal(class(read_ods(file, sheet=1)), "data.frame") + ## small file file <- "../testdata/table.ods" - df <- data.frame(A=c("gender","m","f","m"), - B=c("visit1","4","8","8"), - C=c("visit2","6","9","2"), - D=c("visit3","8","4","1"), + df <- data.frame(A = c("gender", "m", "f", "m"), + B = c("visit1", "4", "8", "8"), + C = c("visit2", "6", "9", "2"), + D = c("visit3", "8", "4", "1"), stringsAsFactors = F) - expect_equal(s_read.ods(file, sheet=1),df) + expect_equal(read_ods(file, sheet = 1, col_names = FALSE, col_types = NA), df) file <- "../testdata/layout_test.ods" - sheet1 <- s_read.ods(file, sheet=1) - expect_equal(sheet1[8,"F"],"empty") # this is a repeated element + sheet1 <- read_ods(file, sheet = 1, col_names = FALSE) + expect_equal(sheet1[8, "F"], "empty") # this is a repeated element - sheet2 <- s_read.ods(file, sheet=2) + sheet2 <- read_ods(file, sheet=2, col_names = FALSE) expect_equal(dim(sheet2),c(22,13)) expect_true(all(sheet1[21,]==sheet2[22,])) @@ -70,9 +50,9 @@ test_that("read_ods", { df[7,5] <- "3" df[9,5] <- "1" df[10,7] <- "1" - sheet2 <- s_read.ods(file, sheet=2) + sheet2 <- read_ods(file, sheet=2, col_names = FALSE) expect_true(all(sheet2==df, na.rm = TRUE)) file <- "../testdata/1996-2000.ods" - expect_true(all(dim(s_read.ods(file)[[2]])==c(36,21))) -}) + expect_true(all(dim(read_ods(file, sheet = 2, col_names = FALSE)) == c(36, 21))) +}) \ No newline at end of file diff --git a/tests/testthat/test_na.R b/tests/testthat/test_na.R index ab962a5..9671f25 100644 --- a/tests/testthat/test_na.R +++ b/tests/testthat/test_na.R @@ -1,4 +1,4 @@ -test_that("Expected na behavior", { +test_that("expected na behaviour", { na_res <- read_ods('../testdata/na_test.ods', na = c("3", "999", "missing")) expect_true(is.na(na_res[4,1])) expect_true(is.na(na_res[4,2])) @@ -40,4 +40,4 @@ test_that("type_convert honoring na #2, issue 78", { expect_false(any(is.na(na_res$ex2[1:9]))) expect_false(any(is.na(na_res$ex3[1:9]))) expect_false(any(is.na(na_res$ex4[1:9]))) -}) +}) \ No newline at end of file diff --git a/tests/testthat/test_read_ods.R b/tests/testthat/test_read_ods.R index a0ec5f0..2c870d8 100644 --- a/tests/testthat/test_read_ods.R +++ b/tests/testthat/test_read_ods.R @@ -1,24 +1,44 @@ -test_that("No path", { - expect_error(read_ods(), "No file path was provided for the") +test_that("Incorrect Argument", { + expect_error(read_ods(), "No file path was") + expect_error(read_ods(path = "not/real/file.ods"), "file does not exist") + expect_error(read_ods(path = "../testdata/sum.ods", col_names = "a"), "col_names must be of type `boolean`") + expect_error(read_ods(path = '../testdata/sum.ods', col_types = "a"), "Unknown col_types. Can either be a class col_spec, NULL or NA.") + expect_error(read_ods(path = "../testdata/sum.ods", skip = -1), "skip must be a positive integer") + expect_error(read_ods(path = "../testdata/sum.ods", formula_as_formula = "a"), "formula_as_formula must be of type `boolean`") + expect_error(read_ods(path = "../testdata/sum.ods", row_names = "a"), "row_names must be of type `boolean`") + expect_error(read_ods(path = "../testdata/sum.ods", strings_as_factors = "a"), "strings_as_factors must be of type `boolean`") + expect_error(read_ods(path = "../testdata/sum.ods", check_names = "a"), "check_names must be of type `boolean`") + expect_error(read_ods(path = "../testdata/sum.ods", verbose = "a"), "verbose must be of type `boolean`") }) test_that("Single column ODS", { single_col <- read_ods('../testdata/sum.ods', sheet = 1) expect_equal(ncol(single_col),1) expect_equal(colnames(single_col), c("1")) + expect_warning(read_ods('../testdata/sum.ods', sheet = 1, row_names = TRUE), "Cannot make") }) -test_that('read_ods works with all kind of character encodings', { - expect_error(read_ods('../testdata/wild_character_encoding.ods', sheet='évaluation'), NA) # é as e and accent - expect_error(read_ods('../testdata/wild_character_encoding.ods', sheet='évaluation 2'), NA) # é as one character +test_that("Single row ODS", { + expect_warning(single_row <- read_ods('../testdata/onerow.ods', sheet = 1), "Cannot make") + expect_equal(nrow(single_row), 1) + expect_equal(single_row[1,1], 1) }) -test_that('read_ods reads decimals properly with comma', { - df <- read_ods('../testdata/decimal_comma.ods') - df_expected <- structure(list(A = 3.4, B = 2.3, C = 0.03), +test_that("Single column range", { + expect_error(read_ods("../testdata/starwars.ods", range="A1:A5"), NA) +}) + +test_that("read_ods works with all kind of character encodings", { + expect_error(read_ods('../testdata/wild_character_encoding.ods', sheet='évaluation'), NA) # é as e and accent + expect_error(read_ods('../testdata/wild_character_encoding.ods', sheet='évaluation 2'), NA) # é as one character +}) + +test_that("read_ods reads decimals properly with comma", { + df <- read_ods('../testdata/decimal_comma.ods') + df_expected <- structure(list(A = 3.4, B = 2.3, C = 0.03), .Names = c("A", "B", "C"), row.names = 1L, class = "data.frame") - expect_equal(df, df_expected) + expect_equal(df, df_expected) }) test_that("eating space issue #74", { @@ -37,13 +57,13 @@ test_that("skip", { expect_equal(nrow(x), 10) expect_silent(x <- read_ods("../testdata/starwars.ods", skip = 1, col_names = FALSE)) expect_equal(nrow(x), 10) - expect_warning(x <- read_ods("../testdata/starwars.ods", skip = 11)) + expect_warning(x <- read_ods("../testdata/starwars.ods", skip = 11), "empty sheet") expect_equal(nrow(x), 0) }) test_that("Check names works properly", { - expect_silent(x <- read_ods("../testdata/test_naming.ods")) - expect_equal(colnames(x), c("a", "a", "Var.3")) - expect_silent(x <- read_ods("../testdata/test_naming.ods", check_names = TRUE)) - expect_equal(colnames(x), c("a", "a.1", "Var.3")) -}) + expect_silent(x <- read_ods("../testdata/test_naming.ods")) + expect_equal(colnames(x), c("a", "a", "Var.3")) + expect_silent(x <- read_ods("../testdata/test_naming.ods", check_names = TRUE)) + expect_equal(colnames(x), c("a", "a.1", "Var.3")) +}) \ No newline at end of file diff --git a/tests/testthat/test_verbose.R b/tests/testthat/test_verbose.R index 9678502..daf9c96 100644 --- a/tests/testthat/test_verbose.R +++ b/tests/testthat/test_verbose.R @@ -2,10 +2,3 @@ test_that("read_ods verbose switch", { expect_silent(read_ods('../testdata/col_types.ods')) expect_message(read_ods('../testdata/col_types.ods', verbose = TRUE)) }) - -## test_that("write_ods verbose switch", { -## expect_silent(write_ods(iris, 'iris.ods')) -## expect_message(write_ods(iris, 'iris.ods', verbose = TRUE)) -## }) - -## unlink("iris.ods") From 95c13bb0e59b8b75d27a0b340d6ce2f06f7d2007 Mon Sep 17 00:00:00 2001 From: Peter <44036274+pbrohan@users.noreply.github.com> Date: Sat, 22 Jul 2023 02:43:35 +0100 Subject: [PATCH 2/3] Updated And Refactored code For better info on updates, see pbrohan/readODScpp --- .Rbuildignore | 6 +- .covrignore | 1 + DESCRIPTION | 9 +- NAMESPACE | 3 + NEWS.md | 23 + R/cpp11.R | 12 +- R/list_ods_sheets.R | 59 ++- R/readODS-package.R | 6 +- R/read_fods.R | 58 ++ R/read_ods.R | 164 ++++-- R/utils.R | 2 +- R/writeODS.R | 12 +- README.Rmd | 9 +- README.md | 40 +- codemeta.json | 31 +- man/get_num_sheets_in_fods.Rd | 31 ++ man/get_num_sheets_in_ods.Rd | 8 +- man/list_fods_sheets.Rd | 31 ++ man/list_ods_sheets.Rd | 8 +- man/read_fods.Rd | 69 +++ man/read_ods.Rd | 3 +- src/cpp11.cpp | 26 +- src/get_sheet_names.cpp | 67 ++- src/is_ods.cpp | 86 ++- src/is_ods.h | 3 +- src/read_flat_ods_.cpp | 98 ++++ src/read_ods_.cpp | 201 +------ src/read_ods_internals.cpp | 201 +++++++ src/read_ods_internals.h | 18 + src/readxl/zip.cpp | 25 +- starwars.fods | 846 ++++++++++++++++++++++++++++++ tests/testdata/empty.fods | 240 +++++++++ tests/testdata/empty.ods | Bin 0 -> 2776 bytes tests/testdata/flat.fods | 309 +++++++++++ tests/testdata/linkeddata.ods | Bin 0 -> 3300 bytes tests/testdata/merged.ods | Bin 17971 -> 3698 bytes tests/testdata/nocells.fods | 239 +++++++++ tests/testdata/norows.fods | 237 +++++++++ tests/testdata/onerow.ods | Bin 0 -> 2856 bytes tests/testthat.R | 1 + tests/testthat/test_list_sheets.R | 19 + tests/testthat/test_merged.R | 11 + tests/testthat/test_multiline.R | 2 +- tests/testthat/test_read_fods.R | 16 + tests/testthat/test_read_ods.R | 21 + vignettes/overview.Rmd | 11 +- 46 files changed, 2903 insertions(+), 359 deletions(-) create mode 100644 .covrignore create mode 100644 NEWS.md create mode 100644 R/read_fods.R create mode 100644 man/get_num_sheets_in_fods.Rd create mode 100644 man/list_fods_sheets.Rd create mode 100644 man/read_fods.Rd create mode 100644 src/read_flat_ods_.cpp create mode 100644 src/read_ods_internals.cpp create mode 100644 src/read_ods_internals.h create mode 100644 starwars.fods create mode 100644 tests/testdata/empty.fods create mode 100644 tests/testdata/empty.ods create mode 100644 tests/testdata/flat.fods create mode 100644 tests/testdata/linkeddata.ods create mode 100644 tests/testdata/nocells.fods create mode 100644 tests/testdata/norows.fods create mode 100644 tests/testdata/onerow.ods create mode 100644 tests/testthat/test_list_sheets.R create mode 100644 tests/testthat/test_merged.R create mode 100644 tests/testthat/test_read_fods.R diff --git a/.Rbuildignore b/.Rbuildignore index 52412f3..c9fdf18 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,13 +8,17 @@ notes.R ^\.travis\.yml$ ^codecov\.yml$ ^starwars\.ods$ +^starwars\.fods$ ^CRAN-RELEASE$ ^doc$ ^Meta$ ^\.github$ +^\.gitignore$ ^codemeta\.json$ ^benchmark$ ^CRAN-SUBMISSION$ ^tests/testdata/jts0501.ods$ - +^.vscode ^cran-comments\.md$ + +^.covrignore$ \ No newline at end of file diff --git a/.covrignore b/.covrignore new file mode 100644 index 0000000..1cc6d50 --- /dev/null +++ b/.covrignore @@ -0,0 +1 @@ +src/rapidxml diff --git a/DESCRIPTION b/DESCRIPTION index 5924c5b..83f2fad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,9 @@ Package: readODS Type: Package Title: Read and Write ODS Files -Version: 1.8.3 -Authors@R: c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "phonixor@gmail.com"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Peter", "Brohan", role = c("aut"), email = "peter.brohan@gmail.com"), person("Thomas J.", "Leeper", role = c("aut"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("John", "Foster", role = c("ctb"), email = "john.x.foster@nab.com.au"), person("Sergio", "Oller", role = c("ctb")), person("Jim", "Hester", role = c("ctb"), email = "jim.hester@rstudio.com", comment = c(ORCID = "0000-0002-2739-7082")), person("Stephen", "Watts", role = c("ctb")), person("Arthur", "Katossky", role = c("ctb")), person("Stas", "Malavin", role = c("ctb")), person("Duncan", "Garmonsway", role = c("ctb")), person("Mehrad", "Mahmoudian", role = c("ctb")), person("Matt", "Kerlogue", role = c("ctb")), person("Detlef", "Steuer", role = c("aut"), email = "steuer@hsu-hh.de", comment = c(ORCID = "0000-0003-2676-5290")), person("Michal", "Lauer", role = c("ctb"), email = "michal.lauer.25@gmail.com"), person("Till", "Straube", role = c("ctb"), email = "straube@geo.uni-frankfurt.de")) +Version: 1.9.0 +Authors@R: + c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "phonixor@gmail.com"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Peter", "Brohan", role = c("aut"), email = "peter.brohan@gmail.com"), person("Thomas J.", "Leeper", role = c("aut"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("John", "Foster", role = c("ctb"), email = "john.x.foster@nab.com.au"), person("Sergio", "Oller", role = c("ctb")), person("Jim", "Hester", role = c("ctb"), email = "jim.hester@rstudio.com", comment = c(ORCID = "0000-0002-2739-7082")), person("Stephen", "Watts", role = c("ctb")), person("Arthur", "Katossky", role = c("ctb")), person("Stas", "Malavin", role = c("ctb")), person("Duncan", "Garmonsway", role = c("ctb")), person("Mehrad", "Mahmoudian", role = c("ctb")), person("Matt", "Kerlogue", role = c("ctb")), person("Detlef", "Steuer", role = c("aut"), email = "steuer@hsu-hh.de", comment = c(ORCID = "0000-0003-2676-5290")), person("Michal", "Lauer", role = c("ctb"), email = "michal.lauer.25@gmail.com"), person("Till", "Straube", role = c("ctb"), email = "straube@geo.uni-frankfurt.de")) Description: Read ODS (OpenDocument Spreadsheet) into R as data frame. Also support writing data frame into ODS file. URL: https://github.com/ropensci/readODS BugReports: https://github.com/ropensci/readODS/issues @@ -14,9 +15,9 @@ Imports: utils, purrr, zip -LinkingTo: +LinkingTo: cpp11 -Suggests: +Suggests: dplyr, testthat, datasets, diff --git a/NAMESPACE b/NAMESPACE index eeeb2d4..612f7ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(get_num_sheets_in_fods) export(get_num_sheets_in_ods) +export(list_fods_sheets) export(list_ods_sheets) +export(read_fods) export(read_ods) export(write_ods) useDynLib(readODS, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..0276b1f --- /dev/null +++ b/NEWS.md @@ -0,0 +1,23 @@ +# readODS 1.9.0 + +* Added a `NEWS.md` file to track changes to the package. +* Rewrote all reading functions in C++ for significant speed increase + +## list_ods_sheets + +* Added `include_external_data` as an argument (`FALSE` by default). This hides stored data from external sources not normally accessible to the user. + +## read_ods + +* Changed behaviour when only one row is read. The row now correctly appears as the top row in the dataframe, and a warning is given if column headers are requested that this would cause the output to be empty (**Note:** in this case column names are not assigned) +* Changed behaviour when only one column is read. Previously gave an error. If row names are requested, gives a warning that this would cause the output to be empty, and does not assign names. +* Sheets are now accepted as part of the `range` argument, e.g. `Range = "Sheet2!A2:B7"`. If this and the `sheets` argument are given, this is preferred. +* Merged cells now have their value places in the top-left cell. All other cells that would be covered by the merge are filled with `NA`. + +## read_fods + +* Reading (but not writing) flat ODS files is now supported using the functions `read_fods()`, `list_fods_sheets()`, `get_num_sheets_in_fods()`. These work the same way as their analogue ODS functions. The extension does not need to be `fods`, however they do need to conform to the OO specification. + +## write_ods + +* Attempted to fix writing non UTF-8 strings to files. This still has some issues in versions of R \< 4.2, however it should now correctly write most text within your current locale. See [readme](README.md) for more details. \ No newline at end of file diff --git a/R/cpp11.R b/R/cpp11.R index ad45dc2..7e125c6 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,7 +1,15 @@ # Generated by cpp11: do not edit by hand -ods_get_sheet_names_ <- function(file, include_external_data) { - .Call(`_readODS_ods_get_sheet_names_`, file, include_external_data) +get_sheet_names_ <- function(file, include_external_data) { + .Call(`_readODS_get_sheet_names_`, file, include_external_data) +} + +get_flat_sheet_names_ <- function(file, include_external_data) { + .Call(`_readODS_get_flat_sheet_names_`, file, include_external_data) +} + +read_flat_ods_ <- function(file, start_row, stop_row, start_col, stop_col, sheet, formula_as_formula) { + .Call(`_readODS_read_flat_ods_`, file, start_row, stop_row, start_col, stop_col, sheet, formula_as_formula) } read_ods_ <- function(file, start_row, stop_row, start_col, stop_col, sheet, formula_as_formula) { diff --git a/R/list_ods_sheets.R b/R/list_ods_sheets.R index 028c0d8..9cc72ad 100644 --- a/R/list_ods_sheets.R +++ b/R/list_ods_sheets.R @@ -3,14 +3,19 @@ #' List all sheets in an ods file. #' #' @param path Path to the ods file -#' @param include_external_data A boolean value to show or hide sheets containing linked data (default false) +#' @param include_external_data A boolean value to show or hide sheets containing archived linked data (default false) #' @return A character vector of sheet names. #' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Get the list of names of sheets +#' list_ods_sheets("starwars.ods") +#' } #' @seealso #' use \code{\link{read_ods}} to read the data #' @export list_ods_sheets <- function(path, include_external_data = FALSE) { - return(ods_get_sheet_names_(path, include_external_data)) + return(get_sheet_names_(path, include_external_data)) } #' Get the Number of Sheets in an ODS File @@ -18,13 +23,59 @@ list_ods_sheets <- function(path, include_external_data = FALSE) { #' Get the number of sheets in an ods file #' #' @param path path to the ods file -#' @param include_external_data A boolean value declaring if external data sheets should be counted +#' @param include_external_data A boolean value declaring if sheets holding archived linked data should be included #' @return Number of sheets #' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Get the number of sheets +#' get_num_sheets_in_ods("starwars.ods") +#' } #' @seealso #' use \code{\link{read_ods}} to read the data #' @export get_num_sheets_in_ods <- function(path, include_external_data = FALSE) { - sheets <- ods_get_sheet_names_(path, include_external_data) + sheets <- get_sheet_names_(path, include_external_data) + return(length(sheets)) +} + +#' List all sheets in an FODS File +#' +#' List all sheets in an fods file. +#' +#' @param path Path to the fods file +#' @param include_external_data A boolean value to show or hide sheets containing archived linked data (default false) +#' @return A character vector of sheet names. +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Get the list of names of sheets +#' list_fods_sheets("starwars.fods") +#' } +#' @seealso +#' use \code{\link{read_fods}} to read the data +#' @export +list_fods_sheets <- function(path, include_external_data = FALSE) { + return(get_flat_sheet_names_(path, include_external_data)) +} + +#' Get the Number of Sheets in an FODS File +#' +#' Get the number of sheets in an fods file +#' +#' @param path path to the fods file +#' @param include_external_data A boolean value declaring if sheets holding archived linked data should be included +#' @return Number of sheets +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Get the number of sheets +#' get_num_sheets_in_fods("starwars.fods") +#' } +#' @seealso +#' use \code{\link{read_fods}} to read the data +#' @export +get_num_sheets_in_fods <- function(path, include_external_data = FALSE) { + sheets <- get_flat_sheet_names_(path, include_external_data) return(length(sheets)) } \ No newline at end of file diff --git a/R/readODS-package.R b/R/readODS-package.R index 682d81f..20c0bd7 100644 --- a/R/readODS-package.R +++ b/R/readODS-package.R @@ -1,9 +1,9 @@ -#' @keywords internal -"_PACKAGE" - #' @useDynLib readODS, .registration = TRUE NULL +#' @keywords internal +"_PACKAGE" + # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start diff --git a/R/read_fods.R b/R/read_fods.R new file mode 100644 index 0000000..447cbb0 --- /dev/null +++ b/R/read_fods.R @@ -0,0 +1,58 @@ +#' Read Data From FODS File +#' +#' read_flat_ods is a function to read a single sheet from a flat ods file and return a data frame. +#' +#' @param path path to the fods file. +#' @param sheet sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1. +#' @param col_names logical, indicating whether the file contains the names of the variables as its first line. Default is TRUE. +#' @param col_types Either NULL to guess from the spreadsheet or refer to [readr::type_convert()] to specify cols specification. NA will return a data frame with all columns being "characters". +#' @param na Character vector of strings to use for missing values. By default read_ods converts blank cells to missing data. It can also be set to +#' NULL, so that empty cells are treated as NA. +#' @param skip the number of lines of the data file to skip before beginning to read data. If this parameter is larger than the total number of lines in the ods file, an empty data frame is returned. +#' @param formula_as_formula logical, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. . Default is FALSE. +#' @param range selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package. +#' @param row_names logical, indicating whether the file contains the names of the rows as its first column. Default is FALSE. +#' @param strings_as_factors logical, if character columns to be converted to factors. Default is FALSE. +#' @param check_names logical, passed down to base::data.frame(). Default is FALSE. +#' @param verbose logical, if messages should be displayed. Default is FALSE. +#' @return A data frame (\code{data.frame}) containing a representation of data in the ods file. +#' @note For packaged ods files (.ods), use (\code{read_ods}) +#' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten +#' @examples +#' \dontrun{ +#' # Read a file +#' read_fods("starwars.fods") +#' # Read a specific sheet, e.g. the 2nd sheet +#' read_fods("starwars.fods", sheet = 2) +#' # Read a specific range, e.g. A1:C11 +#' read_fods("starwars.fods", sheet = 2, range = "A1:C11") +#' } +#' @export +read_fods <- function(path, + sheet = 1, + col_names = TRUE, + col_types = NULL, + na = "", + skip = 0, + formula_as_formula = FALSE, + range = NULL, + row_names = FALSE, + strings_as_factors = FALSE, + check_names = FALSE, + verbose = FALSE +){ + ## Should use match.call but there's a weird bug if one of the variable names is 'file' + .read_ods(path, + sheet, + col_names, + col_types, + na, + skip, + formula_as_formula, + range, + row_names, + strings_as_factors, + check_names, + verbose, + flat = TRUE) +} \ No newline at end of file diff --git a/R/read_ods.R b/R/read_ods.R index 9d0423d..00da462 100644 --- a/R/read_ods.R +++ b/R/read_ods.R @@ -1,6 +1,6 @@ .change_df_with_col_row_header <- function(x, col_header, row_header){ if((nrow(x) < 2 && col_header )|| (ncol(x) < 2 && row_header)){ - warning("Cannot make column/row names if this would cause the dataframe to be empty.") + warning("Cannot make column/row names if this would cause the dataframe to be empty.", call. = FALSE) return(x) } irow <- ifelse(col_header, 2, 1) @@ -29,9 +29,13 @@ ) } else { if(skip != 0){ - warning("Range and non-zero value for skip given. Defaulting to range.") + warning("Range and non-zero value for skip given. Defaulting to range.", call. = FALSE) } + tryCatch({ limits <- cellranger::as.cell_limits(range) + }, error = function(e){ + stop("Invalid `range`") + }) limits <- c( min_row = limits[["ul"]][1], max_row = limits[["lr"]][1], @@ -59,10 +63,48 @@ return (df) } +.check_read_args <- function(path, + sheet = 1, + col_names = TRUE, + col_types = NULL, + na = "", + skip = 0, + formula_as_formula = FALSE, + range = NULL, + row_names = FALSE, + strings_as_factors = FALSE, + check_names = FALSE, + verbose = FALSE){ + if (missing(path) || !is.character(path)){ + stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.", call. = FALSE) + } + if (!file.exists(path)){ + stop("file does not exist", call. = FALSE) + } + if (!is.logical(col_names)){ + stop("col_names must be of type `boolean`", call. = FALSE) + } + if (!is.logical(formula_as_formula)){ + stop("formula_as_formula must be of type `boolean`", call. = FALSE) + } + if (!is.logical(row_names)){ + stop("row_names must be of type `boolean`", call. = FALSE) + } + if (!is.logical(strings_as_factors)){ + stop("strings_as_factors must be of type `boolean`", call. = FALSE) + } + if (!is.logical(check_names)){ + stop("check_names must be of type `boolean`", call. = FALSE) + } + if (!is.logical(verbose)){ + stop("verbose must be of type `boolean`", call. = FALSE) + } +} + + #' Read Data From ODS File #' #' read_ods is a function to read a single sheet from an ods file and return a data frame. -#' read.ods always returns a list of data frames with one data frame per sheet. This is a wrapper to read_ods for backward compatibility with previous version of readODS. Please use read_ods if possible. #' #' @param path path to the ods file. #' @param sheet sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1. @@ -78,7 +120,7 @@ #' @param check_names logical, passed down to base::data.frame(). Default is FALSE. #' @param verbose logical, if messages should be displayed. Default is FALSE. #' @return A data frame (\code{data.frame}) containing a representation of data in the ods file. -#' @note Currently, ods files that linked to external data source cannot be read. Merged cells cannot be parsed correctly. +#' @note For flat ods files (.fods or .xml), use (\code{read_fods}). #' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten #' @examples #' \dontrun{ @@ -104,58 +146,93 @@ read_ods <- function(path, verbose = FALSE ){ - if (missing(path) || !is.character(path)){ - stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.") - } - if (!file.exists(path)){ - stop("file does not exist") - } - if (!is.logical(col_names)){ - stop("col_names must be of type `boolean`") - } - if (!is.logical(formula_as_formula)){ - stop("formula_as_formula must be of type `boolean`") - } - if (!is.logical(row_names)){ - stop("row_names must be of type `boolean`") - } - if (!is.logical(strings_as_factors)){ - stop("strings_as_factors must be of type `boolean`") - } - if (!is.logical(check_names)){ - stop("check_names must be of type `boolean`") - } - if (!is.logical(verbose)){ - stop("verbose must be of type `boolean`") - } + ## Should use match.call but there's a weird bug if one of the variable names is 'file' + .read_ods(path, + sheet, + col_names, + col_types, + na, + skip, + formula_as_formula, + range, + row_names, + strings_as_factors, + check_names, + verbose, + flat = FALSE) +} + + +.read_ods <- function(path, + sheet = 1, + col_names = TRUE, + col_types = NULL, + na = "", + skip = 0, + formula_as_formula = FALSE, + range = NULL, + row_names = FALSE, + strings_as_factors = FALSE, + check_names = FALSE, + verbose = FALSE, + flat = FALSE){ + .check_read_args(path, + sheet, + col_names, + col_types, + na, + skip, + formula_as_formula, + range, + row_names, + strings_as_factors, + check_names, + verbose) # Get cell range info limits <- .standardise_limits(range, skip) # Get sheet number. - sheets <- ods_get_sheet_names_(path, TRUE) + if (flat){ + sheets <- get_flat_sheet_names_(path, TRUE) + } else { + sheets <- get_sheet_names_(path, TRUE) + } sheet_name <- cellranger::as.cell_limits(range)[["sheet"]] if(!is.null(range) && !is.na(sheet_name)){ if(sheet != 1){ - warning("Sheet suggested in range and using sheet argument. Defaulting to range", call. = FALSE) + warning("Sheet suggested in range and using sheet argument. Defaulting to range", + call. = FALSE) } + is_in_sheet_names <- stringi::stri_cmp(sheet_name, sheets) == 0 if(any(is_in_sheet_names)){ - is_in_sheet_names <- stringi::stri_cmp(sheet_name, sheets) == 0 - sheet = which(is_in_sheet_names) + sheet <- which(is_in_sheet_names) } else { - stop(paste0("No sheet found with name '", sheet_name, "'"), call. = FALSE) + stop(paste0("No sheet found with name '", sheet_name, "'", sep = ""), + call. = FALSE) } } else { is_in_sheet_names <- stringi::stri_cmp(sheet, sheets) == 0 if (!is.numeric(sheet) && any(is_in_sheet_names)){ - sheet = which(is_in_sheet_names) + sheet <- which(is_in_sheet_names) } else if (!is.numeric(sheet)) { - stop(paste0("No sheet found with name '", sheet, "'", ),call. = FALSE) + stop(paste0("No sheet found with name '", sheet, "'", sep = ""), + call. = FALSE) } if (sheet > length(sheets)){ - stop(paste0("File contains only ", length(sheets), " sheets. Sheet index out of range.", call. = FALSE)) + stop(paste0("File contains only ", length(sheets), " sheets. Sheet index out of range.", + call. = FALSE)) } } + if(flat){ + strings <- read_flat_ods_(path, + limits["min_row"], + limits["max_row"], + limits["min_col"], + limits["max_col"], + sheet, + formula_as_formula) + } else { strings <- read_ods_(path, limits["min_row"], limits["max_row"], @@ -163,13 +240,17 @@ read_ods <- function(path, limits["max_col"], sheet, formula_as_formula) + } if(strings[1] == 0 || strings[2] == 0){ warning("empty sheet, return empty data frame.", call. = FALSE) return(data.frame()) } - res <- strings[-1:-2] |> - matrix(ncol = strtoi(strings[1]), byrow = TRUE) |> - as.data.frame(stringsAsFactors = FALSE) + res <- as.data.frame( + matrix( + strings[-1:-2], + ncol = strtoi(strings[1]), + byrow = TRUE), + stringsAsFactors = FALSE) res <- .change_df_with_col_row_header(res, col_names, row_names) res <- data.frame(res, check.names = check_names) if (inherits(col_types, 'col_spec')){ @@ -179,7 +260,8 @@ read_ods <- function(path, } else if (length(col_types) == 1 && is.na(col_types[1])) { {} #Pass } else { - stop("Unknown col_types. Can either be a class col_spec, NULL or NA.", call. = FALSE) + stop("Unknown col_types. Can either be a class col_spec, NULL or NA.", + call. = FALSE) } if (strings_as_factors) { @@ -187,5 +269,5 @@ read_ods <- function(path, } return(res) -} +} \ No newline at end of file diff --git a/R/utils.R b/R/utils.R index e08068d..b753fbf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,6 @@ check_nonnegative_integer <- function(x, argument){ if(length(x) != 1 || !is.numeric(x) || floor(x) != x || is.na(x) || x < 0){ - stop(paste0(argument , " must be a positive integer"), call. = FALSE) + stop(paste0(argument, " must be a positive integer"), call. = FALSE) } return(x) } \ No newline at end of file diff --git a/R/writeODS.R b/R/writeODS.R index 38cbcb6..4826b54 100644 --- a/R/writeODS.R +++ b/R/writeODS.R @@ -29,16 +29,18 @@ } .escape_xml <- function(x) { - x_no_amp <- stringi::stri_replace_all_fixed(str = x, pattern = c("&"), replacement = c("&"), vectorize_all = FALSE) + x_utf8 <- stringi::stri_enc_toutf8(x) + x_no_amp <- stringi::stri_replace_all_fixed(str = x_utf8, pattern = c("&"), replacement = c("&"), vectorize_all = FALSE) stringi::stri_replace_all_fixed(str = x_no_amp, pattern = c("\"", "<", ">", "'"), replacement = c(""", "<", ">", "'"), vectorize_all = FALSE) + } .cell_out <- function(type, value, con) { escaped_value <- .escape_xml(value) cat("", escaped_value, - "", + "", sep = "", file = con) } @@ -91,7 +93,7 @@ .convert_df_to_sheet <- function(x, sheet = "Sheet1", row_names = FALSE, col_names = FALSE, na_as_string = FALSE) { throwaway_xml_file <- tempfile(fileext = ".xml") - con <- file(file.path(throwaway_xml_file), open="w") + con <- file(file.path(throwaway_xml_file), open="w", encoding = "UTF-8") .write_sheet_con(x = x, con = con, sheet = sheet, row_names = row_names, col_names = col_names, na_as_string = na_as_string) close(con) return(file.path(throwaway_xml_file)) @@ -101,7 +103,7 @@ .vfwrite_ods <- function(x, temp_ods_dir, sheet = "Sheet1", row_names = FALSE, col_names = FALSE, na_as_string = FALSE) { templatedir <- system.file("template", package = "readODS") file.copy(dir(templatedir, full.names = TRUE), temp_ods_dir, recursive = TRUE, copy.mode = FALSE) - con <- file(file.path(temp_ods_dir, "content.xml"), open="w") + con <- file(file.path(temp_ods_dir, "content.xml"), open="w", encoding = "UTF-8") cat(.CONTENT[1], file = con) cat(.CONTENT[2], file = con) .write_sheet_con(x = x, con = con, sheet = sheet, row_names = row_names, col_names = col_names, na_as_string = na_as_string) diff --git a/README.Rmd b/README.Rmd index f6dc4fd..005ac6a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -90,17 +90,18 @@ read_ods("mtcars.ods") read_ods("mtcars.ods", sheet = "plant", range = "A1:B10") ``` -### About the speed and file size +### Text Encoding -This package is written entirely in R. Although the efficiency has been improved, please don't expect the heavily optimized performance of [readxl](https://readxl.tidyverse.org/), [readr](https://readr.tidyverse.org/) and data.table's [fread](https://cran.r-project.org/package=data.table). +In older versions of R (<4.2) on Windows, the default encoding for text is not UTF-8, and instead dependes on your locale. This can cause problems processing characters that are not part of the character set R is using (ususally [Windows-1252](https://en.wikipedia.org/wiki/Windows-1252)). Sheets written using these characters generally contains errors. The problem can be fixed by upgrading to a version of R >= 4.2. + +**Radian:** Even for up-to-date versions of R, these issues with character encoding are still a known issue with Radian. Their suggested workaround is [here](https://github.com/randy3k/radian/issues/269#issuecomment-1169663251). -Also, this package can't handle ODS files larger than "medium size". See [issue #71](https://github.com/ropensci/readODS/issues/71). If you need to read large ODS files efficiently, the [headless interface of LibreOffice](https://help.libreoffice.org/Common/Starting_the_Software_With_Parameters) is recommended to convert your ODS to CSV. ### Misc The logo of readODS is a remix of LibreOffice Calc v6.1 icon created by the Document Foundation. The original LibreOffice logo is licensed under the [Creative Commons Attribution Share-Alike 3.0 Unported License](https://wiki.documentfoundation.org/File:LibO6_MIME.svg). readODS is not a product of the Document Foundation. The logo of readODS is licensed under the [Creative Commons Attribution Share-Alike 3.0 Unported License](https://creativecommons.org/licenses/by-sa/3.0/). -The creator of this package is Gerrit-Jan Schutten. The current maintainer is Chung-hong Chan. This package benefits from contributions by Thomas J. Leeper, John Foster, Sergio Oller, Jim Hester, Stephen Watts, Arthur Katossky, Stas Malavin, Duncan Garmonsway, Mehrad Mahmoudian, Matt Kerlogue, Detlef Steuer, Michal Lauer, and Till Straube. +The creator of this package is Gerrit-Jan Schutten. The current maintainer is Chung-hong Chan. This package benefits from contributions by Peter Brohan, Thomas J. Leeper, John Foster, Sergio Oller, Jim Hester, Stephen Watts, Arthur Katossky, Stas Malavin, Duncan Garmonsway, Mehrad Mahmoudian, Matt Kerlogue, Detlef Steuer, Michal Lauer, and Till Straube. This package emulates the behaviors of `readxl::read_xlsx`, `writexl::write_xlsx` and `xlsx::write.xlsx`. diff --git a/README.md b/README.md index 1fdc97a..30d4eb5 100644 --- a/README.md +++ b/README.md @@ -122,8 +122,7 @@ write_ods(PlantGrowth, "mtcars.ods", append = TRUE, sheet = "plant") ``` r ## Default: First sheet read_ods("mtcars.ods") -#> Warning: Missing column names filled in: 'X1' [1] -#> NA mpg cyl disp hp drat wt qsec vs am gear carb +#> Var.1 mpg cyl disp hp drat wt qsec vs am gear carb #> 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 @@ -172,19 +171,20 @@ read_ods("mtcars.ods", sheet = "plant", range = "A1:B10") #> 9 5.33 ctrl ``` -### About the speed and file size +### Text Encoding -This package is written entirely in R. Although the efficiency has been -improved, please don’t expect the heavily optimized performance of -[readxl](https://readxl.tidyverse.org/), -[readr](https://readr.tidyverse.org/) and data.table’s -[fread](https://cran.r-project.org/package=data.table). +In older versions of R (\<4.2) on Windows, the default encoding for text +is not UTF-8, and instead dependes on your locale. This can cause +problems processing characters that are not part of the character set R +is using (ususally +[Windows-1252](https://en.wikipedia.org/wiki/Windows-1252)). Sheets +written using these characters generally contains errors. The problem +can be fixed by upgrading to a version of R \>= 4.2. -Also, this package can’t handle ODS files larger than “medium size”. See -[issue \#71](https://github.com/ropensci/readODS/issues/71). If you need -to read large ODS files efficiently, the [headless interface of -LibreOffice](https://help.libreoffice.org/Common/Starting_the_Software_With_Parameters) -is recommended to convert your ODS to CSV. +**Radian:** Even for up-to-date versions of R, these issues with +character encoding are still a known issue with Radian. Their suggested +workaround is +[here](https://github.com/randy3k/radian/issues/269#issuecomment-1169663251). ### Misc @@ -198,9 +198,9 @@ Unported License](https://creativecommons.org/licenses/by-sa/3.0/). The creator of this package is Gerrit-Jan Schutten. The current maintainer is Chung-hong Chan. This package benefits from contributions -by Thomas J. Leeper, John Foster, Sergio Oller, Jim Hester, Stephen -Watts, Arthur Katossky, Stas Malavin, Duncan Garmonsway, Mehrad -Mahmoudian, Matt Kerlogue, Detlef Steuer, Michal Lauer, and Till +by Peter Brohan, Thomas J. Leeper, John Foster, Sergio Oller, Jim +Hester, Stephen Watts, Arthur Katossky, Stas Malavin, Duncan Garmonsway, +Mehrad Mahmoudian, Matt Kerlogue, Detlef Steuer, Michal Lauer, and Till Straube. This package emulates the behaviors of `readxl::read_xlsx`, @@ -218,10 +218,10 @@ GPL3 Contributions in the form of feedback, comments, code, and bug report are welcome. - - Fork the source code, modify, and issue a [pull - request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork). - - Issues, bug reports: [File a Github - issue](https://github.com/ropensci/readODS). +- Fork the source code, modify, and issue a [pull + request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork). +- Issues, bug reports: [File a Github + issue](https://github.com/ropensci/readODS). Please note that this package is released with a [Contributor Code of Conduct](https://ropensci.org/code-of-conduct/). By contributing to this diff --git a/codemeta.json b/codemeta.json index afe0c83..9b7c49d 100644 --- a/codemeta.json +++ b/codemeta.json @@ -7,13 +7,13 @@ "codeRepository": "https://github.com/ropensci/readODS", "issueTracker": "https://github.com/ropensci/readODS/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "1.8.0", + "version": "1.9.0", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.2.2 Patched (2022-11-10 r83330)", + "runtimePlatform": "R version 4.3.1 (2023-06-16 ucrt)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -34,6 +34,12 @@ "email": "chainsawtiney@gmail.com", "@id": "https://orcid.org/0000-0002-6232-7530" }, + { + "@type": "Person", + "givenName": "Peter", + "familyName": "Brohan", + "email": "peter.brohan@gmail.com" + }, { "@type": "Person", "givenName": "Thomas J.", @@ -103,6 +109,12 @@ "givenName": "Michal", "familyName": "Lauer", "email": "michal.lauer.25@gmail.com" + }, + { + "@type": "Person", + "givenName": "Till", + "familyName": "Straube", + "email": "straube@geo.uni-frankfurt.de" } ], "maintainer": [ @@ -250,6 +262,18 @@ "sameAs": "https://CRAN.R-project.org/package=purrr" }, "7": { + "@type": "SoftwareApplication", + "identifier": "zip", + "name": "zip", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=zip" + }, + "8": { "@type": "SoftwareApplication", "identifier": "R", "name": "R", @@ -257,7 +281,8 @@ }, "SystemRequirements": null }, - "fileSize": "71480.39KB", + "fileSize": "52009.582KB", + "releaseNotes": "https://github.com/ropensci/readODS/blob/master/NEWS.md", "readme": "https://github.com/ropensci/readODS/blob/v1.8/README.md", "contIntegration": ["https://app.codecov.io/gh/ropensci/readODS?branch=master", "https://github.com/ropensci/readODS/actions/workflows/R-CMD-check.yaml"], "developmentStatus": "https://lifecycle.r-lib.org/articles/stages.html#stable", diff --git a/man/get_num_sheets_in_fods.Rd b/man/get_num_sheets_in_fods.Rd new file mode 100644 index 0000000..3baee88 --- /dev/null +++ b/man/get_num_sheets_in_fods.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/list_ods_sheets.R +\name{get_num_sheets_in_fods} +\alias{get_num_sheets_in_fods} +\title{Get the Number of Sheets in an FODS File} +\usage{ +get_num_sheets_in_fods(path, include_external_data = FALSE) +} +\arguments{ +\item{path}{path to the fods file} + +\item{include_external_data}{A boolean value declaring if sheets holding archived linked data should be included} +} +\value{ +Number of sheets +} +\description{ +Get the number of sheets in an fods file +} +\examples{ +\dontrun{ +# Get the number of sheets +get_num_sheets_in_fods("starwars.fods") +} +} +\seealso{ +use \code{\link{read_fods}} to read the data +} +\author{ +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +} diff --git a/man/get_num_sheets_in_ods.Rd b/man/get_num_sheets_in_ods.Rd index b2afed4..07a4881 100644 --- a/man/get_num_sheets_in_ods.Rd +++ b/man/get_num_sheets_in_ods.Rd @@ -9,7 +9,7 @@ get_num_sheets_in_ods(path, include_external_data = FALSE) \arguments{ \item{path}{path to the ods file} -\item{include_external_data}{A boolean value declaring if external data sheets should be counted} +\item{include_external_data}{A boolean value declaring if sheets holding archived linked data should be included} } \value{ Number of sheets @@ -17,6 +17,12 @@ Number of sheets \description{ Get the number of sheets in an ods file } +\examples{ +\dontrun{ +# Get the number of sheets +get_num_sheets_in_ods("starwars.ods") +} +} \seealso{ use \code{\link{read_ods}} to read the data } diff --git a/man/list_fods_sheets.Rd b/man/list_fods_sheets.Rd new file mode 100644 index 0000000..c0a7dc5 --- /dev/null +++ b/man/list_fods_sheets.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/list_ods_sheets.R +\name{list_fods_sheets} +\alias{list_fods_sheets} +\title{List all sheets in an FODS File} +\usage{ +list_fods_sheets(path, include_external_data = FALSE) +} +\arguments{ +\item{path}{Path to the fods file} + +\item{include_external_data}{A boolean value to show or hide sheets containing archived linked data (default false)} +} +\value{ +A character vector of sheet names. +} +\description{ +List all sheets in an fods file. +} +\examples{ +\dontrun{ +# Get the list of names of sheets +list_fods_sheets("starwars.fods") +} +} +\seealso{ +use \code{\link{read_fods}} to read the data +} +\author{ +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +} diff --git a/man/list_ods_sheets.Rd b/man/list_ods_sheets.Rd index 757f93f..ef376af 100644 --- a/man/list_ods_sheets.Rd +++ b/man/list_ods_sheets.Rd @@ -9,7 +9,7 @@ list_ods_sheets(path, include_external_data = FALSE) \arguments{ \item{path}{Path to the ods file} -\item{include_external_data}{A boolean value to show or hide sheets containing linked data (default false)} +\item{include_external_data}{A boolean value to show or hide sheets containing archived linked data (default false)} } \value{ A character vector of sheet names. @@ -17,6 +17,12 @@ A character vector of sheet names. \description{ List all sheets in an ods file. } +\examples{ +\dontrun{ +# Get the list of names of sheets +list_ods_sheets("starwars.ods") +} +} \seealso{ use \code{\link{read_ods}} to read the data } diff --git a/man/read_fods.Rd b/man/read_fods.Rd new file mode 100644 index 0000000..024a928 --- /dev/null +++ b/man/read_fods.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_fods.R +\name{read_fods} +\alias{read_fods} +\title{Read Data From FODS File} +\usage{ +read_fods( + path, + sheet = 1, + col_names = TRUE, + col_types = NULL, + na = "", + skip = 0, + formula_as_formula = FALSE, + range = NULL, + row_names = FALSE, + strings_as_factors = FALSE, + check_names = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{path}{path to the fods file.} + +\item{sheet}{sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1.} + +\item{col_names}{logical, indicating whether the file contains the names of the variables as its first line. Default is TRUE.} + +\item{col_types}{Either NULL to guess from the spreadsheet or refer to \code{\link[readr:type_convert]{readr::type_convert()}} to specify cols specification. NA will return a data frame with all columns being "characters".} + +\item{na}{Character vector of strings to use for missing values. By default read_ods converts blank cells to missing data. It can also be set to +NULL, so that empty cells are treated as NA.} + +\item{skip}{the number of lines of the data file to skip before beginning to read data. If this parameter is larger than the total number of lines in the ods file, an empty data frame is returned.} + +\item{formula_as_formula}{logical, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. . Default is FALSE.} + +\item{range}{selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package.} + +\item{row_names}{logical, indicating whether the file contains the names of the rows as its first column. Default is FALSE.} + +\item{strings_as_factors}{logical, if character columns to be converted to factors. Default is FALSE.} + +\item{check_names}{logical, passed down to base::data.frame(). Default is FALSE.} + +\item{verbose}{logical, if messages should be displayed. Default is FALSE.} +} +\value{ +A data frame (\code{data.frame}) containing a representation of data in the ods file. +} +\description{ +read_flat_ods is a function to read a single sheet from a flat ods file and return a data frame. +} +\note{ +For packaged ods files (.ods), use (\code{read_ods}) +} +\examples{ +\dontrun{ +# Read a file +read_fods("starwars.fods") +# Read a specific sheet, e.g. the 2nd sheet +read_fods("starwars.fods", sheet = 2) +# Read a specific range, e.g. A1:C11 +read_fods("starwars.fods", sheet = 2, range = "A1:C11") +} +} +\author{ +Peter Brohan \href{mailto:peter.brohan+cran@gmail.com}{peter.brohan+cran@gmail.com}, Chung-hong Chan \href{mailto:chainsawtiney@gmail.com}{chainsawtiney@gmail.com}, Gerrit-Jan Schutten \href{mailto:phonixor@gmail.com}{phonixor@gmail.com} +} diff --git a/man/read_ods.Rd b/man/read_ods.Rd index f7b71ce..08558ef 100644 --- a/man/read_ods.Rd +++ b/man/read_ods.Rd @@ -50,10 +50,9 @@ A data frame (\code{data.frame}) containing a representation of data in the ods } \description{ read_ods is a function to read a single sheet from an ods file and return a data frame. -read.ods always returns a list of data frames with one data frame per sheet. This is a wrapper to read_ods for backward compatibility with previous version of readODS. Please use read_ods if possible. } \note{ -Currently, ods files that linked to external data source cannot be read. Merged cells cannot be parsed correctly. +For flat ods files (.fods or .xml), use (\code{read_fods}). } \examples{ \dontrun{ diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 317902e..727d4c3 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -6,10 +6,24 @@ #include // get_sheet_names.cpp -cpp11::strings ods_get_sheet_names_(const std::string file, const bool include_external_data); -extern "C" SEXP _readODS_ods_get_sheet_names_(SEXP file, SEXP include_external_data) { +cpp11::strings get_sheet_names_(const std::string file, const bool include_external_data); +extern "C" SEXP _readODS_get_sheet_names_(SEXP file, SEXP include_external_data) { BEGIN_CPP11 - return cpp11::as_sexp(ods_get_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + return cpp11::as_sexp(get_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + END_CPP11 +} +// get_sheet_names.cpp +cpp11::strings get_flat_sheet_names_(const std::string file, const bool include_external_data); +extern "C" SEXP _readODS_get_flat_sheet_names_(SEXP file, SEXP include_external_data) { + BEGIN_CPP11 + return cpp11::as_sexp(get_flat_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + END_CPP11 +} +// read_flat_ods_.cpp +cpp11::strings read_flat_ods_(const std::string file, int start_row, int stop_row, int start_col, int stop_col, const int sheet, const bool formula_as_formula); +extern "C" SEXP _readODS_read_flat_ods_(SEXP file, SEXP start_row, SEXP stop_row, SEXP start_col, SEXP stop_col, SEXP sheet, SEXP formula_as_formula) { + BEGIN_CPP11 + return cpp11::as_sexp(read_flat_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet), cpp11::as_cpp>(formula_as_formula))); END_CPP11 } // read_ods_.cpp @@ -22,8 +36,10 @@ extern "C" SEXP _readODS_read_ods_(SEXP file, SEXP start_row, SEXP stop_row, SEX extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_readODS_ods_get_sheet_names_", (DL_FUNC) &_readODS_ods_get_sheet_names_, 2}, - {"_readODS_read_ods_", (DL_FUNC) &_readODS_read_ods_, 7}, + {"_readODS_get_flat_sheet_names_", (DL_FUNC) &_readODS_get_flat_sheet_names_, 2}, + {"_readODS_get_sheet_names_", (DL_FUNC) &_readODS_get_sheet_names_, 2}, + {"_readODS_read_flat_ods_", (DL_FUNC) &_readODS_read_flat_ods_, 7}, + {"_readODS_read_ods_", (DL_FUNC) &_readODS_read_ods_, 7}, {NULL, NULL, 0} }; } diff --git a/src/get_sheet_names.cpp b/src/get_sheet_names.cpp index ce25dbb..700c2a0 100644 --- a/src/get_sheet_names.cpp +++ b/src/get_sheet_names.cpp @@ -8,28 +8,16 @@ #include +#include +#include +cpp11::strings get_sheet_names_from_content (rapidxml::xml_node<>* rootNode, const bool include_external_data){ - -[[cpp11::register]] -cpp11::strings ods_get_sheet_names_(const std::string file, const bool include_external_data){ - if (!is_ods(file)){ - throw std::invalid_argument(file + " is not a correct ODS file"); - } cpp11::writable::strings sheetNames(1); - std::string xmlFile = zip_buffer(file, "content.xml"); - - rapidxml::xml_document<> spreadsheet; - spreadsheet.parse<0>(&xmlFile[0]); - rapidxml::xml_node<>* rootNode; - - int i = 0; int n = 1; - rootNode = spreadsheet.first_node()->first_node("office:body")-> - first_node("office:spreadsheet"); - + for (rapidxml::xml_node<>* sheetData = rootNode->first_node("table:table"); sheetData; sheetData = sheetData->next_sibling("table:table")){ @@ -53,6 +41,53 @@ cpp11::strings ods_get_sheet_names_(const std::string file, const bool include_e } return sheetNames; + } + +[[cpp11::register]] +cpp11::strings get_sheet_names_(const std::string file, const bool include_external_data){ + if (!is_ods(file)){ + throw std::invalid_argument(file + " is not a correct ODS file"); + } + std::string xmlFile = zip_buffer(file, "content.xml"); + + rapidxml::xml_document<> spreadsheet; + spreadsheet.parse<0>(&xmlFile[0]); + rapidxml::xml_node<>* rootNode; + + rootNode = spreadsheet.first_node()->first_node("office:body")-> + first_node("office:spreadsheet"); + return (get_sheet_names_from_content(rootNode, include_external_data)); + +} + +[[cpp11::register]] +cpp11::strings get_flat_sheet_names_(const std::string file, const bool include_external_data){ + if (!is_flat_ods(file)){ + throw std::invalid_argument(file + " is not a correct FODS file"); + } + std::string xmlFile; + + std::ifstream in(file, std::ios::in | std::ios::binary); + if (in) { + in.seekg(0, std::ios::end); + xmlFile.resize(in.tellg()); + in.seekg(0, std::ios::beg); + in.read(&xmlFile[0], xmlFile.size()); + in.close(); + } else{ + throw std::invalid_argument("No such file"); + } + rapidxml::xml_document<> spreadsheet; + + xmlFile.push_back('\0'); + spreadsheet.parse<0>(&xmlFile[0]); + + rapidxml::xml_node<>* rootNode; + rootNode = spreadsheet.first_node("office:document")->first_node("office:body")-> + first_node("office:spreadsheet"); + + return (get_sheet_names_from_content(rootNode, include_external_data)); +} diff --git a/src/is_ods.cpp b/src/is_ods.cpp index d3ad9fa..1f5d949 100644 --- a/src/is_ods.cpp +++ b/src/is_ods.cpp @@ -1,12 +1,10 @@ #include "is_ods.h" #include "rapidxml/rapidxml.hpp" - - #include +#include - -bool is_ods(const std::string file, const bool strict){ +bool is_ods(const std::string file){ /*Checks that file conforms to some of the spec at https://docs.oasis-open.org/office/OpenDocument/v1.3/. @@ -21,27 +19,18 @@ bool is_ods(const std::string file, const bool strict){ return false; } - - /*Mimetype is not in v1.0 so mostly we ignore this. Keeping this here in case it's useful later - as it is a requirement of later versions*/ - if(strict) { - if (!zip_has_file(file, "mimetype")){ - return false; - } - /*Check Section 2.2.4 B)*/ - std::string mimetype = zip_buffer(file, "mimetype"); - mimetype = mimetype.replace(mimetype.end()-1,mimetype.end(),""); // This is some very lazy string trimming - if (!(strcmp( - mimetype.c_str(), - "application/vnd.oasis.opendocument.spreadsheet" // We also don't accept templates - ) == 0)){ - return false; - } - } rapidxml::xml_document<> workbook; rapidxml::xml_node<>* rootNode; std:: string xmlFile = zip_buffer(file, "content.xml"); - workbook.parse<0>(&xmlFile[0]); + try { + workbook.parse<0>(&xmlFile[0]); + } catch (const rapidxml::parse_error& e) { + if (strcmp(e.what(), "expected <")){ + throw std::invalid_argument(file + " does not contain a valid content.xml"); + } else { + throw std::invalid_argument("XML parse error"); + } + } rootNode = workbook.first_node(); /*Check Section 2.2.1 B) 2.1 - is this a well formed OpenDocument*/ if (strcmp(rootNode->name(),"office:document-content") != 0){ @@ -56,4 +45,55 @@ bool is_ods(const std::string file, const bool strict){ return false; } return true; -} \ No newline at end of file +} + +bool is_flat_ods(const std::string file){ + /*Checks that file conforms to some of the spec at + https://docs.oasis-open.org/office/OpenDocument/v1.3/.*/ + rapidxml::xml_document<> workbook; + rapidxml::xml_node<>* rootNode; + std::string xmlFile; + + std::ifstream in(file, std::ios::in | std::ios::binary); + if (in) { + in.seekg(0, std::ios::end); + xmlFile.resize(in.tellg()); + in.seekg(0, std::ios::beg); + in.read(&xmlFile[0], xmlFile.size()); + in.close(); + } else{ + throw std::invalid_argument("No such file"); + } + + xmlFile.push_back('\0'); + + try { + workbook.parse<0>(&xmlFile[0]); + } catch (const rapidxml::parse_error& e) { + if (strcmp(e.what(), "expected <")){ + throw std::invalid_argument(file + " is not a flat XML file"); + } else { + throw std::invalid_argument("XML parse error"); + } + } + + rootNode = workbook.first_node(); + // Section 2.2.1C) + while(rootNode != 0 && strcmp(rootNode->name(), "office:document") != 0){ + rootNode->next_sibling(); + } + if (rootNode == 0){ + return false; + } + + /*Check Section 3.3 C)*/ + if (!(rootNode->first_node("office:body"))){ + return false; + } + /*Check Section 2.2.4 C) - this is a spreadsheet*/ + if (!(rootNode->first_node("office:body")->first_node("office:spreadsheet"))){ + return false; + } + + return true; +} diff --git a/src/is_ods.h b/src/is_ods.h index 08a8fb2..69c562e 100644 --- a/src/is_ods.h +++ b/src/is_ods.h @@ -3,4 +3,5 @@ #include #include "readxl/zip.h" -bool is_ods(const std::string file, const bool strict = false); \ No newline at end of file +bool is_ods(const std::string file); +bool is_flat_ods(const std::string file); \ No newline at end of file diff --git a/src/read_flat_ods_.cpp b/src/read_flat_ods_.cpp new file mode 100644 index 0000000..ec800ec --- /dev/null +++ b/src/read_flat_ods_.cpp @@ -0,0 +1,98 @@ +#include "cpp11.hpp" +#include "cpp11/r_string.hpp" + +#include "rapidxml/rapidxml.hpp" +#include "is_ods.h" +#include "read_ods_internals.h" + +#include +#include +#include + + +[[cpp11::register]] +cpp11::strings read_flat_ods_(const std::string file, + int start_row, + int stop_row, + int start_col, + int stop_col, + const int sheet, + const bool formula_as_formula) { + if(!is_flat_ods(file)){ + throw std::invalid_argument(file + " is not a correct FODS file"); + } + if(sheet < 1){ + throw std::invalid_argument("Cannot have sheet index less than 1"); + } + + unsigned int out_width = 0; + unsigned int out_length; + std::string xmlFile; + + std::ifstream in(file, std::ios::in | std::ios::binary); + if (in) { + in.seekg(0, std::ios::end); + xmlFile.resize(in.tellg()); + in.seekg(0, std::ios::beg); + in.read(&xmlFile[0], xmlFile.size()); + in.close(); + } else{ + throw std::invalid_argument("No such file"); + } + rapidxml::xml_document<> spreadsheet; + + xmlFile.push_back('\0'); + spreadsheet.parse<0>(&xmlFile[0]); + + rapidxml::xml_node<>* rootNode; + rootNode = spreadsheet.first_node("office:document")->first_node("office:body")-> + first_node("office:spreadsheet")->first_node("table:table"); + + for (int i = 1; i < sheet; i++){ + rootNode = rootNode->next_sibling("table:table"); + } + + std::vector*>> contents; + + contents = find_rows(rootNode, start_row,stop_row,start_col,stop_col); + + // Get dimensions of output + out_length = contents.size(); + for (unsigned int i = 0; i < contents.size(); i++){ + if (contents[i].size() > out_width){ + out_width = contents[i].size(); + } + } + + // If there is no content + if (out_width * out_length == 0){ + cpp11::writable::strings cell_values(2); + cell_values[0] = "0"; + cell_values[1] = "0"; + return cell_values; + } + + cpp11::writable::strings cell_values(out_width*out_length + 2); + cell_values[0] = std::to_string(out_width); + cell_values[1] = std::to_string(out_length); + + int t = 2; + for (unsigned int i = 0; i < contents.size(); i++){ + for (unsigned int j = 0; j < contents[i].size(); j++){ + cell_values[t] = (contents[i][j] != 0) ? + Rf_mkCharCE(parse_single_cell(contents[i][j], formula_as_formula, true).c_str(), CE_UTF8) : NA_STRING; + t++; + } + // Pad rows to even width + if(contents[i].size() < out_width){ + unsigned int row_width = contents[i].size(); + for (unsigned int j = 0; j + row_width < out_width; j++){ + cell_values[t] = ""; + t++; + } + } + } + return cell_values; + } + + \ No newline at end of file diff --git a/src/read_ods_.cpp b/src/read_ods_.cpp index 2bf3f3e..3f11498 100644 --- a/src/read_ods_.cpp +++ b/src/read_ods_.cpp @@ -3,205 +3,10 @@ #include "rapidxml/rapidxml.hpp" #include "is_ods.h" +#include "read_ods_internals.h" #include #include -#include - - - - - - -std::string parse_p(rapidxml::xml_node<>* node){ - /*Deal with text inside cells. Cells can contain just text (node_data), or a - mixture of text and other nodes (node_element). We usually just want the text - from these nodes (e.g. if there's a link), but we also need to consider the - text:s node, which saves repeated spaces*/ - std::string out; - char* name; - int rep_space; - for (rapidxml::xml_node<>* n = node->first_node(); n; n=n->next_sibling()){ - if (n->type() == rapidxml::node_element) - { - name = n->name(); - if (strcmp(name,"text:s") == 0){ - if(n->first_attribute("text:c") != NULL){ - rep_space = atoi(n->first_attribute("text:c")->value()); - } else { - rep_space = 1; - } - out = out.append(std::string(rep_space, ' ')); - } else if (strcmp(name,"text:line-break") == 0){ - out = out.append("\n"); - } else if (strcmp(name, "text:a") == 0){ - if(!(n->first_node("text:a"))){ //Prevent crash by making pathological recursive links - out = out.append(parse_p(n)); - } - } else { - out = out.append(n->value()); - } - } - else if (n->type() == rapidxml::node_data){ - out = out.append(n->value()); - } - } - return out; -} - -std::string parse_textp(rapidxml::xml_node<>* cell){ - //This isn't very efficient. It is theoretically faster to make a list of pointers, assign the - //memory first and then concatenate them all into the freed memory. However this is hard to understand - //and not a significant problem. If you were looking for efficincies though, this would be a good choice. - std::string out; - int i = 0; - for (rapidxml::xml_node<>* n = cell->first_node("text:p"); n ; n=n->next_sibling("text:p")){ - if (i > 0){ - out = out.append("\n"); - } - out = out.append(parse_p(n)); - i++; - } - return out; -} - -std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value){ - std::string cell_value; - - char* value_type = (cell->first_attribute("office:value-type") != 0) ? - cell->first_attribute("office:value-type")->value() : NULL; - if(formula_as_formula && cell->first_attribute("table:formula")){ - cell_value = cell->first_attribute("table:formula")->value(); - } else { - cell_value = (cell->first_node("text:p") != 0) ? parse_textp(cell) : ""; - if((value_type) && - - ((cell_value.length() == 0 && use_office_value) || - (strcmp(value_type, "float") == 0 || - strcmp(value_type, "currency") == 0|| - strcmp(value_type, "percentage") == 0))){ - - cell_value = cell->first_attribute("office:value")->value(); - } - } - return cell_value; -} - -// Make an array of pointers to each cell -std::vector*>> find_rows(rapidxml::xml_node<>* sheet, - int start_row, - const int stop_row, - int start_col, - const int stop_col){ - - /*Rows and columns are 1-based because both Excel and R treat arrays - this way*/ - int row_repeat_count; - int col_repeat_count; - rapidxml::xml_node<>* row = sheet->first_node("table:table-row"); - rapidxml::xml_node<>* cell; - - if (start_row < 1){ - start_row = 1; - } - if (start_col < 1){ - start_col = 1; - } - int nrows = stop_row - start_row + 1; - - std::vector*>> rows((nrows < 1) ? 1 : nrows); - - for (int i = 1; i <= stop_row || stop_row < 1; ){ - // i keeps track of what nominal row we are on - - - // Check for row repeats - if (row->first_attribute("table:number-rows-repeated") == nullptr){ - row_repeat_count = 1; - } else { - row_repeat_count = std::atoi(row->first_attribute("table:number-rows-repeated")->value()); - } - // Stop if all repeats done, or if we're at the last requested row - for (int r_repeat = 0; r_repeat < row_repeat_count && (stop_row < 1 || r_repeat + i <= stop_row); r_repeat++){ - - // Check size of container. - if ((int)rows.size() < i - start_row + 1){ - rows.resize(rows.size() * 2); - } - // If this row is blank (i.e. it contains only one or no children, which have no contents) - if (row->first_node()->next_sibling() == 0 && row->first_node()->first_node() == 0){ - // Look ahead. If this is the last row, stop, otherwise add a blank row - if(row->next_sibling() == 0){ - break; - } - // Otherwise leave the row blank - - // if row is not blank, and in range deal with cells - } else if(i + r_repeat >= start_row) { - unsigned int last_non_blank = 0; - cell = row->first_node("table:table-cell"); - for (int j = 1; j <= stop_col || stop_col < 1; ){ - // Check for column repeats - if (cell->first_attribute("table:number-columns-repeated")){ - col_repeat_count = std::atoi(cell->first_attribute("table:number-columns-repeated")->value()); - } else { - col_repeat_count = 1; - } - - // Stop if all column repeats done, or if we're at the last requested row - for (int c_repeat = 0; c_repeat < col_repeat_count && (stop_col < 1 || c_repeat + j <= stop_col); c_repeat++){ - bool is_blank = true; - // If this cell is blank (i.e. contains no children) - if (cell->first_node() == 0){ - // Look ahead. If this is the last column, stop. - if(cell->next_sibling() == 0){ - break; - } - } else { - // Otherwise mark that cell is not blank - is_blank = false; - } - // If we're in range add pointer to the array - if (stop_col < 1 || j + c_repeat >= start_col){ - rows[i - start_row].push_back(cell); - if(!is_blank){ - last_non_blank = rows[i - start_row].size(); - } - } - - j++; - - } - cell = cell->next_sibling("table:table-cell"); - // If that was the last cell, stop. - if (cell == 0){ - break; - } - - } - // Remove trailing blank cells - rows[i - start_row].resize(last_non_blank); - - } - i++; - } - row = row->next_sibling("table:table-row"); - // If that was the last row, stop. - if (row == 0){ - break; - } - - } - // Remove trailing empty elements - unsigned int rowsize = 0; - for (unsigned int i = 0; i < rows.size(); i++){ - if(rows[i].size() > 0){ - rowsize = i; - } - } - rows.resize(rowsize + 1); - return rows; -} [[cpp11::register]] cpp11::strings read_ods_(const std::string file, @@ -273,4 +78,6 @@ cpp11::strings read_ods_(const std::string file, } } return cell_values; - } \ No newline at end of file + } + + \ No newline at end of file diff --git a/src/read_ods_internals.cpp b/src/read_ods_internals.cpp new file mode 100644 index 0000000..0c58cb3 --- /dev/null +++ b/src/read_ods_internals.cpp @@ -0,0 +1,201 @@ +#include "read_ods_internals.h" + +std::string parse_p(rapidxml::xml_node<>* node){ + /*Deal with text inside cells. Cells can contain just text (node_data), or a + mixture of text and other nodes (node_element). We usually just want the text + from these nodes (e.g. if there's a link), but we also need to consider the + text:s node, which saves repeated spaces*/ + std::string out; + char* name; + int rep_space; + for (rapidxml::xml_node<>* n = node->first_node(); n; n=n->next_sibling()){ + if (n->type() == rapidxml::node_element) + { + name = n->name(); + if (strcmp(name,"text:s") == 0){ + if(n->first_attribute("text:c") != NULL){ + rep_space = atoi(n->first_attribute("text:c")->value()); + } else { + rep_space = 1; + } + out = out.append(std::string(rep_space, ' ')); + } else if (strcmp(name,"text:line-break") == 0){ + out = out.append("\n"); + } else if (strcmp(name, "text:a") == 0){ + if(!(n->first_node("text:a"))){ //Prevent crash by making pathological recursive links + out = out.append(parse_p(n)); + } + } else { + out = out.append(n->value()); + } + } + else if (n->type() == rapidxml::node_data){ + out = out.append(n->value()); + } + } + return out; +} + +std::string parse_textp(rapidxml::xml_node<>* cell){ + std::string out; + int i = 0; + for (rapidxml::xml_node<>* n = cell->first_node("text:p"); n ; n=n->next_sibling("text:p")){ + if (i > 0){ + out = out.append("\n"); + } + out = out.append(parse_p(n)); + i++; + } + return out; +} + +std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value){ + std::string cell_value; + + char* value_type = (cell->first_attribute("office:value-type") != 0) ? + cell->first_attribute("office:value-type")->value() : NULL; + if(formula_as_formula && cell->first_attribute("table:formula")){ + cell_value = cell->first_attribute("table:formula")->value(); + } else { + cell_value = (cell->first_node("text:p") != 0) ? parse_textp(cell) : ""; + if((value_type) && + + ((cell_value.length() == 0 && use_office_value) || + (strcmp(value_type, "float") == 0 || + strcmp(value_type, "currency") == 0|| + strcmp(value_type, "percentage") == 0))){ + + cell_value = cell->first_attribute("office:value")->value(); + } + } + return cell_value; +} + +// Make an array of pointers to each cell +std::vector*>> find_rows(rapidxml::xml_node<>* sheet, + int start_row, + const int stop_row, + int start_col, + const int stop_col){ + + /*Rows and columns are 1-based because both Excel and R treat arrays + this way*/ + int row_repeat_count; + int col_repeat_count; + + rapidxml::xml_node<>* cell; + + if (start_row < 1){ + start_row = 1; + } + if (start_col < 1){ + start_col = 1; + } + int nrows = stop_row - start_row + 1; + + std::vector*>> rows((nrows < 1) ? 1 : nrows); + + rapidxml::xml_node<>* row = sheet->first_node("table:table-row"); + + // If table has no rows or cells, return blank + if (row == 0 || row->first_node("table:table-cell") == 0){ + return rows; + } + + for (int i = 1; i <= stop_row || stop_row < 1; ){ + // i keeps track of what nominal row we are on + + + // Check for row repeats + if (row->first_attribute("table:number-rows-repeated") == nullptr){ + row_repeat_count = 1; + } else { + row_repeat_count = std::atoi(row->first_attribute("table:number-rows-repeated")->value()); + } + // Stop if all repeats done, or if we're at the last requested row + for (int r_repeat = 0; r_repeat < row_repeat_count && (stop_row < 1 || i <= stop_row); r_repeat++){ + + // Check size of container. + if ((int)rows.size() < i - start_row + 1){ + rows.resize(rows.size() * 2); + } + // If this row is blank (i.e. it contains only one or no children, which have no contents) + if (row->first_node()->next_sibling() == 0 && row->first_node()->first_node() == 0){ + // Look ahead. If this is the last row, stop, otherwise add a blank row + if(row->next_sibling() == 0){ + break; + } + // Otherwise leave the row blank + + // if row is not blank, and in range deal with cells + } else if(i >= start_row) { + unsigned int last_non_blank = 0; + cell = row->first_node(); + for (int j = 1; j <= stop_col || stop_col < 1; ){ + // find first cell or covered cell + while(cell != 0){ + if (strcmp(cell->name(),"table:table-cell")==0 || strcmp(cell->name(), "table:covered-table-cell")==0){ + break; + } else { + cell = cell->next_sibling(); + } + } + // Check for column repeats + if (cell->first_attribute("table:number-columns-repeated")){ + col_repeat_count = std::atoi(cell->first_attribute("table:number-columns-repeated")->value()); + } else { + col_repeat_count = 1; + } + + // Stop if all column repeats done, or if we're at the last requested row + for (int c_repeat = 0; c_repeat < col_repeat_count && (stop_col < 1 || j <= stop_col); c_repeat++){ + bool is_blank = true; + // If this cell is blank (i.e. contains no children) + if (cell->first_node() == 0){ + // Look ahead. If this is the last column, stop. + if(cell->next_sibling() == 0){ + break; + } + } else { + // Otherwise mark that cell is not blank + is_blank = false; + } + // If we're in range add pointer to the array + if (stop_col < 1 || j >= start_col){ + rows[i - start_row].push_back(cell); + if(!is_blank){ + last_non_blank = rows[i - start_row].size(); + } + } + j++; + } + cell = cell->next_sibling(); + // If that was the last cell, stop. + if (cell == 0){ + break; + } + + } + // Remove trailing blank cells + rows[i - start_row].resize(last_non_blank); + + } + i++; + } + row = row->next_sibling("table:table-row"); + // If that was the last row, stop. + if (row == 0){ + break; + } + + } + // Remove trailing empty elements + unsigned int rowsize = 0; + for (unsigned int i = 0; i < rows.size(); i++){ + if(rows[i].size() > 0){ + rowsize = i; + } + } + rows.resize(rowsize + 1); + return rows; +} diff --git a/src/read_ods_internals.h b/src/read_ods_internals.h new file mode 100644 index 0000000..189e389 --- /dev/null +++ b/src/read_ods_internals.h @@ -0,0 +1,18 @@ +#pragma once + +#include "cpp11.hpp" +#include "cpp11/r_string.hpp" + +#include "rapidxml/rapidxml.hpp" + +#include +#include + +std::string parse_p(rapidxml::xml_node<>* node); +std::string parse_textp(rapidxml::xml_node<>* cell); +std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value); +std::vector*>> find_rows(rapidxml::xml_node<>* sheet, + int start_row, + const int stop_row, + int start_col, + const int stop_col); \ No newline at end of file diff --git a/src/readxl/zip.cpp b/src/readxl/zip.cpp index f01e1d0..63b5c07 100644 --- a/src/readxl/zip.cpp +++ b/src/readxl/zip.cpp @@ -1,6 +1,5 @@ #pragma once #include "zip.h" -#include "../rapidxml/rapidxml_print.hpp" #include "cpp11/function.hpp" #include "cpp11/raws.hpp" @@ -8,7 +7,7 @@ std::string zip_buffer(const std::string& zip_path, const std::string& file_path) { - cpp11::function zip_buffer = cpp11::package("readODScpp")["zip_buffer"]; + cpp11::function zip_buffer = cpp11::package("readODS")["zip_buffer"]; cpp11::raws xml(zip_buffer(zip_path, file_path)); std::string buffer(RAW(xml), RAW(xml) + xml.size()); @@ -19,26 +18,6 @@ std::string zip_buffer(const std::string& zip_path, bool zip_has_file(const std::string& zip_path, const std::string& file_path) { - cpp11::function zip_has_file = cpp11::package("readODScpp")["zip_has_file"]; + cpp11::function zip_has_file = cpp11::package("readODS")["zip_has_file"]; return zip_has_file(zip_path, file_path); } - -std::string xml_print(std::string xml) { - rapidxml::xml_document<> doc; - - xml.push_back('\0'); - doc.parse<0>(&xml[0]); - - std::string s; - rapidxml::print(std::back_inserter(s), doc, 0); - - return s; -} - -[[cpp11::register]] -void zip_xml(const std::string& zip_path, - const std::string& file_path) { - - std::string buffer = zip_buffer(zip_path, file_path); - Rprintf("%s", xml_print(buffer).c_str()); -} \ No newline at end of file diff --git a/starwars.fods b/starwars.fods new file mode 100644 index 0000000..79bf872 --- /dev/null +++ b/starwars.fods @@ -0,0 +1,846 @@ + + + + LibreOffice/7.4.4.2$Windows_X86_64 LibreOffice_project/85569322deea74ec9134968a29af2df5663baa212020-06-28T20:40:59.971536573PT2M8S42021-11-29T14:09:23.133281388 + + + 0 + 0 + 6773 + 5842 + + + view1 + + + 0 + 10 + 2 + 0 + 0 + 0 + 0 + 0 + 59 + 60 + true + false + + + 0 + 11 + 2 + 0 + 0 + 0 + 0 + 0 + 59 + 60 + true + false + + + 0 + 4 + 2 + 0 + 0 + 0 + 0 + 0 + 59 + 60 + true + false + + + Sheet3 + 2499 + 0 + 59 + 60 + false + true + true + true + 12632256 + true + true + 1 + true + false + false + false + 1270 + 1270 + 1 + 1 + true + false + + + + + true + true + true + 0 + true + true + false + true + false + 12632256 + true + true + 0 + false + false + true + true + false + 3 + false + Microsoft Print to PDF + false + GRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEANhUAAAAAAAAEAAhSAAAEdAAAM1ROVwAAAAAKAE0AaQBjAHIAbwBzAG8AZgB0ACAAUAByAGkAbgB0ACAAdABvACAAUABEAEYAAAAAAAAAAAAAAAAAAAAAAAAAAAABBAMG3ABQFAMvAQABAAkAmgs0CGQAAQAPAFgCAgABAFgCAwABAEEANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAIAAAABAAAA/////0dJUzQAAAAAAAAAAAAAAABESU5VIgDIACQDLBE/XXt+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAAAAAAUAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAFNNVEoAAAAAEAC4AHsAMAA4ADQARgAwADEARgBBAC0ARQA2ADMANAAtADQARAA3ADcALQA4ADMARQBFAC0AMAA3ADQAOAAxADcAQwAwADMANQA4ADEAfQAAAFJFU0RMTABVbmlyZXNETEwAUGFwZXJTaXplAEE0AE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAAAAAAAAsEQAAVjRETQEAAAAAAAAAnApwIhwAAADsAAAAAwAAAPoBTwg05ndNg+4HSBfANYHQAAAATAAAAAMAAAAACAAAAAAAAAAAAAADAAAAAAgAACoAAAAACAAAAwAAAEAAAABWAAAAABAAAEQAbwBjAHUAbQBlAG4AdABVAHMAZQByAFAAYQBzAHMAdwBvAHIAZAAAAEQAbwBjAHUAbQBlAG4AdABPAHcAbgBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AEMAcgB5AHAAdABTAGUAYwB1AHIAaQB0AHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEgBDT01QQVRfRFVQTEVYX01PREUTAER1cGxleE1vZGU6OlVua25vd24= + false + 1270 + 1270 + 1 + 1 + true + false + true + true + true + true + 7 + true + + + Sheet1 + + + Sheet2 + + + Sheet3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ??? + + + + + Page 1 + + + + + + + + + + + + + + + + + + + + + + Name + + + homeworld + + + species + + + + + + Luke Skywalker + + + Tatooine + + + Human + + + + + + C-3PO + + + Tatooine + + + Human + + + + + + R2-D2 + + + Alderaan + + + Human + + + + + + Darth Vader + + + Tatooine + + + Human + + + + + + Leia Organa + + + Tatooine + + + Human + + + + + + Owen Lars + + + Tatooine + + + Human + + + + + + Beru Whitesun lars + + + Stewjon + + + Human + + + + + + R5-D4 + + + Tatooine + + + Human + + + + + + Biggs Darklighter + + + Kashyyyk + + + Wookiee + + + + + + Obi-Wan Kenobi + + + Corellia + + + Human + + + + + + + + + + + + + + + Name + + + height + + + mass + + + hair_color + + + skin_color + + + eye_color + + + birth_year + + + gender + + + + + Luke Skywalker + + + 172 + + + 77 + + + blond + + + fair + + + blue + + + 19 + + + male + + + + + C-3PO + + + 202 + + + 136 + + + none + + + white + + + yellow + + + 41.9 + + + male + + + + + R2-D2 + + + 150 + + + 49 + + + brown + + + light + + + brown + + + 19 + + + female + + + + + Darth Vader + + + 178 + + + 120 + + + brown, grey + + + light + + + blue + + + 52 + + + male + + + + + Leia Organa + + + 165 + + + 75 + + + brown + + + light + + + blue + + + 47 + + + female + + + + + Owen Lars + + + 183 + + + 84 + + + black + + + light + + + brown + + + 24 + + + male + + + + + Beru Whitesun lars + + + 182 + + + 77 + + + auburn, white + + + fair + + + blue-gray + + + 57 + + + male + + + + + R5-D4 + + + 188 + + + 84 + + + blond + + + fair + + + blue + + + 41.9 + + + male + + + + + Biggs Darklighter + + + 228 + + + 112 + + + brown + + + unknown + + + blue + + + 200 + + + male + + + + + Obi-Wan Kenobi + + + 180 + + + 80 + + + brown + + + fair + + + brown + + + 29 + + + male + + + + + + + + Name + + + homeworld + + + species + + + + + Luke Skywalker + + + Tatooine + + + Human + + + + + C-3PO + + + Tatooine + + + Human + + + + + R2-D2 + + + Alderaan + + + Human + + + + + + + + Darth Vader + + + Tatooine + + + Human + + + + + Leia Organa + + + Tatooine + + + Human + + + + + Owen Lars + + + Tatooine + + + Human + + + + + Beru Whitesun lars + + + Stewjon + + + Human + + + + + R5-D4 + + + Tatooine + + + Human + + + + + Biggs Darklighter + + + Kashyyyk + + + Wookiee + + + + + Obi-Wan Kenobi + + + Corellia + + + Human + + + + + + + \ No newline at end of file diff --git a/tests/testdata/empty.fods b/tests/testdata/empty.fods new file mode 100644 index 0000000..e287464 --- /dev/null +++ b/tests/testdata/empty.fods @@ -0,0 +1,240 @@ + + + + 2023-07-19T12:59:17.4890000002023-07-19T17:41:29.203000000PT1M24S2LibreOffice/7.4.4.2$Windows_X86_64 LibreOffice_project/85569322deea74ec9134968a29af2df5663baa21 + + + 0 + 0 + 6773 + 1355 + + + view1 + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + Sheet2 + 2499 + 0 + 100 + 60 + false + true + true + true + 12632256 + true + true + 1 + true + false + false + false + 1000 + 1000 + 1 + 1 + true + false + + + + + true + true + true + 0 + true + true + false + true + false + 12632256 + true + true + 0 + false + false + true + true + false + 3 + false + Microsoft Print to PDF + false + GRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEANhUAAAAAAAAEAAhSAAAEdAAAM1ROVwAAAAAKAE0AaQBjAHIAbwBzAG8AZgB0ACAAUAByAGkAbgB0ACAAdABvACAAUABEAEYAAAAAAAAAAAAAAAAAAAAAAAAAAAABBAMG3ABQFAMvAQABAAkAmgs0CGQAAQAPAFgCAgABAFgCAwABAEEANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAIAAAABAAAA/////0dJUzQAAAAAAAAAAAAAAABESU5VIgDIACQDLBE/XXt+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAAAAAAUAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAFNNVEoAAAAAEAC4AHsAMAA4ADQARgAwADEARgBBAC0ARQA2ADMANAAtADQARAA3ADcALQA4ADMARQBFAC0AMAA3ADQAOAAxADcAQwAwADMANQA4ADEAfQAAAFJFU0RMTABVbmlyZXNETEwAUGFwZXJTaXplAEE0AE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAAAAAAAAsEQAAVjRETQEAAAAAAAAAnApwIhwAAADsAAAAAwAAAPoBTwg05ndNg+4HSBfANYHQAAAATAAAAAMAAAAACAAAAAAAAAAAAAADAAAAAAgAACoAAAAACAAAAwAAAEAAAABWAAAAABAAAEQAbwBjAHUAbQBlAG4AdABVAHMAZQByAFAAYQBzAHMAdwBvAHIAZAAAAEQAbwBjAHUAbQBlAG4AdABPAHcAbgBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AEMAcgB5AHAAdABTAGUAYwB1AHIAaQB0AHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEgBDT01QQVRfRFVQTEVYX01PREUTAER1cGxleE1vZGU6OlVua25vd24= + false + 1000 + 1000 + 1 + 1 + true + false + true + true + true + true + 7 + true + + + Sheet1 + + + Sheet2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ??? + + + + + Page 1 + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/testdata/empty.ods b/tests/testdata/empty.ods new file mode 100644 index 0000000000000000000000000000000000000000..19d94d79e044369cff9cec78531689ebc8e43b51 GIT binary patch literal 2776 zcmZ{m2T+sQ7RQ4$A%Jv2x4LPS1Qi3Q z6iEOP=^{mn^eO@ii6B{ZgKu_x&wYEpxpTk!-T9w8bLRZM^M_e5vv30bGh_hx0PQ3t z#^!JyWlWSG3K1NJL3m&=0e)T{!G7pKWn7@Q650dnhgCvj5P{xkuWKkoV6YMv6NK>a z#`+=*wL2r_`i%;*aqx<(3v91@f2OgCeFQE>B}n)8CXd>g|_tr8RL^I z)A)Z$tjY0(P*?T_s2uVWa2qcrR^lKYNp1difH;+Rlh1iHi%UWXG_p*ow35yrEF-h? zyo}>7>&RWZG@BNiO{l9lc|cHGcvJT2h*UT+0_?qvUVT?XYiQV=s!gr$?+@K{g+w}k z2tY9{J&TYXJ9}TFKT>jcU9d{a-?ST1-L>z=FfyKeNaPvgPIP$jOU_s z2$M`EJBcb46(igowFFzDfH`K)f-Zr$P?Z<_M^#HaLNYB|RQAqznJRreNj^3)8za^< z*U-sEx1ah{9v>vBj>|sph}3*$Pz&8scdBTKrP<#kOKhLjn^K7l<9t+)ao>h}v&YMW z{k+547VGVTc-1fFS`^*2aa@&z%octG$wdnlB(M#@qi#37a4c(|9MkOU4)M7XpswH| z%Y~1uY82~S3-e)GbAk7LHCQknvJQRsyS;dcn?ssR!30nD-qP1DkiTvcD{%f-XFOdN zE{tm5(*}LYCU7q_8#o8qmv|G50??aVQt0mMgRdk7e4-nT3vU2bQG`gmq#=vNRpkrT zH5y%NA8x!|yGt4vCAkC3%8fri#K}?hU zpK<_^tWL7>Mo}|Dq~OxWx%31?PIc9b*ds7v2do;OlEG(*hy6K28)=!R}C;W+5fItiS^EFlRGnSO}>yMrGzZQBD$_ zzMi+$S`K-@8tF*_4AwMp6oqJYG^%`EPS*K+3;_7{jT!L2bQYc--sNV{SjFgiSLc0$TUGm$`7LU|ADoQ1>g3F)55?ue-+EU)RbUMEx>93 zJ@%TZaM;zp=fZ?j7a{2{fGHI&OMZ%c(@u7N>+#isD5k!eH*B`JDwEk?maT3IM%2>V zycf6THdeyfH?$ge?|tr-4c+Kd{J7%7;~20z-ZF$O7_iDR@fy2q*wW^c-kx6uLpRN@ zwp6~Z+oB_0xFI9J4tag$DHWY6;Qm^zVJA_W{5#Sz7f0??FIDwAj?}Al$RqHB?^8&BfJY?b`L%#;}O#u?D4gW9^3JfBcABnqa0 zgkTh`5|Gg(1Go3qufH^i@3a-y}=IOB+vQTc(G3s8QqtM$a=v%;yh zNFX)!?VxXRumL#-XtC{vxxuB`zw2}E;E@I|#I!f4?sD!YNy}>bG4UZ)P&S2qfv;mO z@m}Y8w@cHAw)8~k&?WPiB4{1(rl4jXsp9^gMq@>18IuLqxi*!a(VFwNYRGHi`ki>S ztOrL0I&Iz1_%1r0l)G@WRKB(KBkJxtQI4HBn(JO$JZ^GyPE}55Z9KROujW?NQ9yAXEK@nMf831nj4?!l7>BmN-_bi{!9bd{(|FWHJow z7OkI}mUPmRK5j9aV&uASZ8#|RF`^(9mT%@9*X)T&RIvP0GO-+gAb@wIXp7Y6Qaj@pw7P_`BhZ8DN^c+gW1mSAGY5LuiMUQ-T5s_t?gyU*sU)& z7)<<=nuOQK{upPR2p$0dgcz%(v8}$Mxs{1B$|KOv2Z8<0PfdnClKBjm@gDv?IaC+- z1Oh7D2#@^)po~Jcuk)CNAVnKJuK6%2TiDbNdLs`%f^N#FA zZ*|}5fs9r-8_&QCpwEru%xze5#bju$MpAkA_L+?P)fBT;Ph6lf^AM{i^C%m*EM}pb zn(Dq%W3x_gZ~2)f*xWi%UFl(Uw#tb+E!?V)k7`tmg>sL!TPxGtap?tER^=fwUAA+7 z-0_Y`Q&+4PY_K9YTaKmV%!;}Bmuoj0QF){=>5LI1zFp9Gq3}skkget9-liR&>V|$C z;{Eu#U66nNyU&d!Wi8Lbwc``m0az=y1k*s$O0pgjxJ^ZjqQWuQ$Z$$w(0`@ju}+B3q? z%eWLj=h?@U;+i#eSADJg_2Yk69@7!U@6L&lf51OI+Tqt9<8Q|Xvp7`!y9@iFuw}gd z@MJ&he(n!HbUTc$@n7BIXW`F?{1AfK{{@(zL;PG?KSHQ6DvjYxzZV+Jf|c#?gq86~ LGYatm$KmcT+asx6 literal 0 HcmV?d00001 diff --git a/tests/testdata/flat.fods b/tests/testdata/flat.fods new file mode 100644 index 0000000..10da805 --- /dev/null +++ b/tests/testdata/flat.fods @@ -0,0 +1,309 @@ + + + + 2023-07-19T12:59:17.4890000002023-07-19T17:41:29.203000000PT1M24S2LibreOffice/7.4.4.2$Windows_X86_64 LibreOffice_project/85569322deea74ec9134968a29af2df5663baa21 + + + 0 + 0 + 6773 + 1355 + + + view1 + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + Sheet2 + 2499 + 0 + 100 + 60 + false + true + true + true + 12632256 + true + true + 1 + true + false + false + false + 1000 + 1000 + 1 + 1 + true + false + + + + + true + true + true + 0 + true + true + false + true + false + 12632256 + true + true + 0 + false + false + true + true + false + 3 + false + Microsoft Print to PDF + false + GRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEANhUAAAAAAAAEAAhSAAAEdAAAM1ROVwAAAAAKAE0AaQBjAHIAbwBzAG8AZgB0ACAAUAByAGkAbgB0ACAAdABvACAAUABEAEYAAAAAAAAAAAAAAAAAAAAAAAAAAAABBAMG3ABQFAMvAQABAAkAmgs0CGQAAQAPAFgCAgABAFgCAwABAEEANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAIAAAABAAAA/////0dJUzQAAAAAAAAAAAAAAABESU5VIgDIACQDLBE/XXt+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAAAAAAUAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAFNNVEoAAAAAEAC4AHsAMAA4ADQARgAwADEARgBBAC0ARQA2ADMANAAtADQARAA3ADcALQA4ADMARQBFAC0AMAA3ADQAOAAxADcAQwAwADMANQA4ADEAfQAAAFJFU0RMTABVbmlyZXNETEwAUGFwZXJTaXplAEE0AE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAAAAAAAAsEQAAVjRETQEAAAAAAAAAnApwIhwAAADsAAAAAwAAAPoBTwg05ndNg+4HSBfANYHQAAAATAAAAAMAAAAACAAAAAAAAAAAAAADAAAAAAgAACoAAAAACAAAAwAAAEAAAABWAAAAABAAAEQAbwBjAHUAbQBlAG4AdABVAHMAZQByAFAAYQBzAHMAdwBvAHIAZAAAAEQAbwBjAHUAbQBlAG4AdABPAHcAbgBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AEMAcgB5AHAAdABTAGUAYwB1AHIAaQB0AHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEgBDT01QQVRfRFVQTEVYX01PREUTAER1cGxleE1vZGU6OlVua25vd24= + false + 1000 + 1000 + 1 + 1 + true + false + true + true + true + true + 7 + true + + + Sheet1 + + + Sheet2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ??? + + + + + Page 1 + + + + + + + + + + + + + A1 + + + B1 + + + C1 + + + + + A2 + + + B2 + + + C2 + + + + + A3 + + + B3 + + + C3 + + + + + + + + S2A1 + + + S2B1 + + + S2C1 + + + + + S2A2 + + + S2B2 + + + S2C2 + + + + + S2A3 + + + S2B3 + + + S2C3 + + + S2D3 + + + + + + + \ No newline at end of file diff --git a/tests/testdata/linkeddata.ods b/tests/testdata/linkeddata.ods new file mode 100644 index 0000000000000000000000000000000000000000..e8d903e020e6ff170ea12e599d22b5fcf83a755c GIT binary patch literal 3300 zcmZ{n2UJr{8iqqhP(&aUY0?4+2Bay{L8M6)X-ZF!k`N#eLMTdTN|O?h-n$_xgbqp- zQK|@H01<)EMWrcOWrKTm-Syvp?wND%oqOJI&YknU-#lYsLPyU6_*WAF#K2VJ!PJ+d z&P?raFE}(X#1HD~=Lhq0cMbGH_{yPtAz*|n(hCVj_(6Rk2=^d3)He`}^b3HxLXe(N zXrKi(CKrJ5R|FA2Q^Ix%iVgq>p-yu&6EZLa21SC=a9FR+fbXo7V~ioP6Ddq z))l~Z4zp4H1CUb!0dYyESi|<$)nFBK}A^JjCs+ntgJgbI-dv(dGqV_8oJ>&91!s?jmmm(r(np z;TjO%(i^txxB4nxl+^OJ(p~qF!9VE|{yc{0tO>#c#?49TLnPih8arQ04k?Aw?}8)) z#tR%%(=NK>IUIz2HRoL^#sj)~!6=y>K_trJIO=2VxhSLlZ_w8eiX->eBoVj8Gr=?> zsZ7Kj;fP4?frurGrEaNXbC*C?pyTjrW1mTdQrF;A(+>Il^X>-VP0_BCqz{q&ZS#$N zj1-%hy^81nA!SsSrk(eN7F~kUma=_i2X@ycwoCByCG8n`YzWKa20!P|E)b?@2`Mi~ zNate1^#FF|t2rh`nU;1dLNXr;HbLTdfcbHZBQ6o?ZO`n=d#2xB7#axnxCK*|zA1J* zCak83zi&0fgJ$)n%g|5V1^qE|^t&n>fl?>iWRZLl+ra+P&wh}PRy+giM|Fo7ikJ&G zyk}n(w3mfDvCyvT5MWaZ!Ci(SK5Ypj2Es>Q2yuGcY0@va&8h&$g~bTz(VO_o8Jpu( z`dOB8+nYm43iIEEasIpv(YK|zYewqJ{7P36Ao2;xd^Oy@HU@sq#XT+pr)b?+Qz|r& z+TEHlyQhuFT>=+c`_GxGK7G+8+gP*}e~OenB;fQxJMKE`ja%Zl48$i{#`K*tC)g?R z*m1^!lCbS&qAgy-?5j4C;?X_X+TR`4B~y7 zkFk0)*o#SAj+g@y1IwP|P~xE34{M%bX)kAcM%0#BygHN~zwc+cP*>2|$j1XBgp1p{ zI5ItQyc|(>lbvM9`ZB0vmNPiV!@e7yzdaUpp`iOtdA^C?qwG(S<3Mluw>xyFPFE25 zX2Wy0o>s_|Fod}g0i*b~V@1Iidz<8ck`vUvp9BC759t8^>rPZFY>5lLXJhC&@o-_8dh-nq}+rp0zyGB`d?%7yZerJdA^k6jzog~c@dOFu|rZ`cCwD^s~ z2m>o?@f*fCWut_}u6qLuvs;=W{jypcteS1*#){H5mI_)49@gyH=bt$0J8K@^Ba~#T z*g>*PbZo~W*H(HC-a2P0jPMnEfsC$OxU#2qcW&{0=Dehg?ul%*63wtv%xAf2VF#*~ zt!9pU&E~%bp)ibuu6KYJ<~+ zzk>NkZJ8Q3y$S1RK9z61ng`t7-VWA))w>vl!k2nvm(1WX{IOKCJh_q}=6L~8o~0}c zoMrbeX4SIk75gDO2N-KRCY)8D|KY>yzsH7A0E0yNR;h_)2mL_ z8t&IhCXe=#3o{Dc<@(FHMRD7Yuem`LM^i;|Js+;blAAw*EbN?j_6{#404i^(pShlF zl+k{cK5MuTR@AI-ebx{T<(;U%S;4RW&^5A8 z5TY|BeN~z9+Q0#`7(S2&vxo9Gc!y%@6Af{0oUBpOuT#Qz3Ius2)_5wF%s@sNPfn|3 zRo*EpX30RC@H=xu1IkSM-wZQ&Jr5i>PhT9jGVu_oXyI%jqUc9s`vO)mJ?Ub|KT^qq zcKyOdh9s(zPj7r=wb@L4dm}Yk;~@OGuj@>2Q3 zube%P2LkTC@{K=vrOU8HlfC?@Lwl9vmdOagUMPdN^y&?BZ~-aZRz5Mw(S}hgy%tcR7CI9l}9L4@jKh;I=1RV2e2ZXD^O$;PJUK$<92oU z`}2ahIJVY15w5^4`TB!p`0xkXgoSUQq-UO4(+g?32!%PRmLAckUeEmn4+W>eD+@!C zR+)Z>URg^UmC8OaQ#`bsIe)9={j(pN>F~_7@40hx;?+OY4=)%Zx|u3~eS2`lA`{u@ zb%xm0o2p*-guqyvImwZNKKC`3Ce8a0?WQeH!f0qddbu0C?d0>7rD1QCzuV0kX}-4> zrfJiFaca7(WwT8I$2@4ER64SZ*l)(_zA@|QkYV|*po&CzteRP(F_>5ufyd3$PqRs3 zrfg~vcO~o8JH;#Jy$r;pzs;k2zedfD0sItV5|uUgGZlj+1^rQpAf-Fw#{P3(4_U?E z%oGZ`T|09dl|@L?ka;osv&9xlxj34y9TtVjOPb?D{3zjc%D`;tyN*4Rib2c^bB_XA z4g>Ji()oYtXWYw^U#6&alZzGr08;O!`qnzWuSsE6r8F6oghupsS0B?2?S+lr1-0m5} z<$ZN$b+?}~{kaCnY7u?&Scg;Uu4`BXiKj?OBw?Cm!l<`tavch?jpS6rVmHK_Nhxk8 zOmsN*k#ml-3xaCpiF(etwheqU#6@+bl+w30bNvkNSe?nqZ5EE_Eq>!6P)#^=KbSv>GvwlIMDw`%Nq5eet>I)zdEbt{|?Q9FJXs#ZAh=~o`y1<~sl#Z;HSf3Oln;higa5kl}GB*t=tN9yIS z9n{FT<#TmX1Vtm!{WFnnw@?!n=NU`Wu+hg~J3r|YYu*@MP9QwlN4yNrQ@(2OCtYIQ5 z@h0aw7SD2XWBXXqkH_+rnFx|ECCaK8DEv=(6W3@aCGYusDmFIC>01Pu4P|v(ouzx;dhS09PD6X{7e!MC0DtB6qtV~(-}G)_ za-{luBLA(hrau1W^FMWevchlOSE@JuiyZzG{%MlmLWN_0W0^lg{Fz+8LugPB71UG8 YuT--zVPHJ^!a%*wP*ag`gfxJE0)~dXq5uE@ literal 0 HcmV?d00001 diff --git a/tests/testdata/merged.ods b/tests/testdata/merged.ods index ecada9df41b4e2820a9beb31b6ad5040aebda438..d7755850af2232e715328a171876de1b1383efeb 100644 GIT binary patch literal 3698 zcmZ`+2{@Ep8y;Dj?7JzEU1X_9D!V4@ScZ@o24k7AWrk4pealcp82b`qU$SozA&HDF zGnSBj$x>AQ(RX!y|Ns5}|D5Z*=RN1V_w!!wd!BQj=Qhxxq@o4_hD361e_MxdpE`(x}D`L70k7oVV^GCehPpI>)4(Ym0Hd&yIX@(0BPX9svkFpOEH zi#qdXY<=)#eF<86(!fh1ajDHytxYKb-DOufEzvA(=vT3_OaRHx24^%enOX>H5G7}6 z1hZ3*2?jW5nHI#(eE0>D+S3IR-<2^~MEMY^HhV!j)!eczqHacCrfZdP)L=SPLx7WK zjMepczlm51Td15kBWKx>8Q{!GoQ!(%rOftp5>|eaFS(9>7&sR8?C7fqC?5S@onw#y z#gwjxn0wiQI*6Aj4Gb+`>2^c|DRT@vvVU^gwto~U{IqZut$WMM(eLdpHeHXKy%Tk$z&hLLVI?O zx3}n!Mpy?;^bOC`>&J#70erpmJy{KEWozqQI#=Kl?z)tiQFh%IiJr(u%W!`lrp9Ll zprKe$>8x7ONKR042peS}PvvrPEXaJgbW`Sl^3ElGs*JjukU2yG-^cysNBmn^YfaHk8?iGPX>- zwgYSSRb_E5on76ld4tzW)++5i6VBds2v=-43=mjRPOWYw?Qb0~WsEyjsMEaRO4!oo zcU9qxVvT)M_gEk&UQ$+=TKzf8Ey~hI`Xlcx@4(x3XTNbHfZWz8Y{!Ie0eD8`rH}b| zOdMu+@QhBnWwiA=&}WAfflBnLUE)AU*vLUU)%Mp`Ru z4>QD{<-H^649?))uQkLTYLvwBDd_#Yf6cW(DrI~5Q|H&qh%Xm!Dw>B+io|5|^wPhk z%V<=s3O7yToFNR~&MI6F4Y&LjJD^D&GaZG;tm5(y2W|bI1#?VG)a~EvndK@qcq@rj zi5mCgGb=eu6g+c5B!sW{P|=%dRapUF`Ub$JUlIy_o^=IvL?{ChIbGKb*+9A!ao;ToiV)*flJ%<;*7?D%XX@%kQJRRPdb#1?@OyD3H zvj^Q(RC&U>S~1ERApI9f`L}`ysmYhX5NDG_k-&cEd?>bdq!`gGUeO;kjnIC%;ZkPRX zoP3I-wX`i_{#igQLME7GzHfG!?&SB9IwMBe8FZ%F@;N?j!SH+fymND#jRXZ0>am-u z9rAjcd9*-F;tBWW>wwg%HqW&_@Ox6(Po2&Wa1a;X}W{*%5IARYRE_-5ovsP3Z%PmfK|RC4&-iZ<8eHg;mB>nrNY+|%LJ&!6Av)UdciuImAlMhD>&$kSk!?-m*<8ys^E0Iax`4_#~ zwhQK$fA}}lXWq36IRx*;ji!yzzv+K#1H3+Ms#eO#V^hCVBe~P9*!6*zgK2SoS-|C{ zq>PF&a-J%!z{TE^2Br@kY?f>pXoBf(`JaM7&yD)w@>E5$O|CAWXyYN`Un&dCCV4T= z+v1{8b0Dyz%|m^s!xquUwUtIc#uZZ7WtQxjtY59No)e8ho_#X0!lJ<&Dk+44=O*0;eYxzjXdHT|;yvoNUNG+a-4OSV8qkqAa(fb*dJc+H zZNE;$*Uc2Y+}@77-9aQvc6L^OS*Vgc65>0HwS z6oS4So6tsGI7Nk_`k}+yjsJ;c30UT97PA)#03zuyH|alW*|xFKQ9r&9R?jZl8}wp6 zLJ~VFEZokF8{WG~|0R4h9rn)CdWpzdk^7M6CF}d7|Z`PCAXrEoeq8>oXHm2qpEEG@Atr-`0 zS3nsSr%OJp2Jxg(gj4vH+k0=*Ti_3-N3VWjxb(Vzm4}h?5Vq&9YN!n3Ze4ochnr2y z)ZY7$Z7jXPrqLj9no(J$%wgDnGkVRfTHyVXS7p%ia?}j2zjd@-CjzRNv}(MZg2r0y z6p98(HQr$FH2kd0lX84s;Plgyw=_@T6GYCM+#IV2ik8}(^)tw@)PYT|hg_s^U%Q7h z*V+b3TB=->Y88(Z>R=Mem8F@D$8@A$O#hirM)WalkCVxZ{S*MeL2e;nV^v8ly*tt_ za91aL1o}6f;jyYn$}9BXgD_%;x3X9DB@x`3Z}2{#lORiTUrrFx`$j&iPaa+Td2g`UF^nClySu#Cb2PP8%t2=mYH_9omaqqZ>_5qsCCeW(MLVk1 z^@^~H5HSs6yA>R~CR9I(wPDavWj#QXET`vrZso_STcw%Sp8ZtwSwR+?J7z-ap|%ZH zB@n)S~gx%~#Kl1)i_Jw_g+IzSifZd30cBI>nYKje9jPFp@WAlpkzWQ6_s+T{hMLU=F&s z;uo$I!_|Bz=&OZJS!_6cMn|@bf57t>(`D@Kc`TmPlk>C+Oy)A{;{Zfa-HWpu+C8%6 z?N_8zZUp0m))L@Y+?;=z)1uNRzQ0luPn8^9kSaO-hS~mYF+GW ze3zebDL~p}HJ-bhzQ9h`w-<8==@}-O{yP0*MthWB`X#WHJ6E*+bKqtVzERU?u{^xf3r;C3E i-X90#P$cISWX6Y#e>FzG+lJ1l)=}=m_ySoJG?v{psqhEOM{qBAK@j1tH z^vvv;HEX@=UGJQIG4+(Q`0!ptG?xvNp6aaI!M8cBFH#wKvi;bTBnCa+Ld*F->h#oUc!o9wU}Ke>6M^ z*arw?agcx$ZpL7uJ|>^+gAkmAHh*R44@86-?+h_ekFMOPYj-&owF-`XlLxmQAJ}D= z8$63kN=lZCkE%BBj=i7uyLXC9O3x}M1K=?5XDi zyXuQ6>R0wZlyTgUe=YeT&m(DE3?|(ZqpYk^O`ZnHp{nD_(gU4M`1ajUZZ4G&e8T51 zm~1)o&eL1!aNEQ@*^IHcg!$K)QfCbM10UsAzt=kiArX`+6``IMb&@D1M49(SzM|Hw zJ8@q2RBdoHyG;z1$sI7%A1^iK!67duR)1~Pj^g0+n3s8t32EDMvz3}H9QKl0MauE* zg*@Hyaw~yT*7ny~ZOIPLFN&Tg!e~>>r$&;nkdNn?Pb)4bD{qxSIkn!wOe5m4^TR5Y zYGTVcT3o||cnZ6s3Z*m`*1_*;0x4!p3+N}JC}C?a*d?2938cE8GCu}Hvj zx^Z01#bvcj;&d|45s9d=S>|;)(UpIjL49+!38&=qC?L3}rk?FB8iC_A$|9gov)Fi2R6R)2{i4&8JteV){6~K_uG6Hv!$M+JXcal z44@|`Q;M>lk2l-i5BoYcjfbrFA7f*|3QfneyI&A^_y}AbEy`wa17@>HpixuM1?N~Z z$@C25#Gq2z`CfnC0Vu z9Qir8Dy!qA=Bs7zC%(s1q-yAX{;w<+Gi`OiR3W)Bq`A-fh{r&ys>EW5G-@o{(;mm8 zyp+D0PhTy$BPD@CF1f5noNtdF&sD51xgTS=oE+9J%Oo>FGTd~b$xao@8{A)?lFOv+ zOyt2Sp-QZE-5oU|ZGNjV5lnt#GSa50T&$$l>~@*;*6;oC5|2So_}S**vmt`Oc3$0E zPUcIq3+N7~uq=CCEn1Y;Z}pRB3JJ1fR&ACx9-+dvIj(^FujI3Rw*^OM_qxBP_d11J zVf2C3uC!Th$$Y#hIQf)7t08dkrFKaIR1(Vlt$Ka}J$p;5p(ESF&=wn3hxgOnGT+^| z(Lf}ENIgG`r$ZZWKc}(tL~S=p7M9C{Z&e^SWi7YGvYEU)J%D%WT42@cw0i9rRv8a7 z=n{BczYY=UWtc-|F&@Hl>A7RXVbBFJMiPPalW_3t3Pw4Bf+qLB(k}$By}n+L0u?J~ z;zk08A^2bdLPdxMGWJREMH;jx0||lW1&W9D(f9h+heMlHI@6=s)<>3$UJyegqxOEc z%RR_h*Mo{)WVBV2VVdaS`|A34?SVh1$H4=hQw z5FE|*Gz$t+4(%ZTC97>BPZA}A0b-Qzi5m~)DZ4)NeuKO_3{#YC^#Ry=REkYftm0~n zYEUuU+-i)o@vaX}rLB**xHe5FZ!iQK9kyjm!8sl=R zh5r+&VF)uv+1yfw(}yqltx?ulGa<()hf4CrZM`-hnchKr3m6 zobS~chyZK?g8Mi;R9;@5-q|OZ@QWI&g|^;ZAG9IMsov48;ndd48I5a9!XTOU^-oZ8 zuqog@U5?9NEiltoek|CCVnaHSN#IQih=1GV zYf$UZaY(Yaf)pnOd=D^MK467Nkh*LNAa1abgt=Oc@5>nm+MI@XhNLLUy&l{`)mGX* ztyZ3>&M9tysDc5}jD_WhiioJe&k@hqleC}Y$^bEjRYW9$Id4GaakH6di(ZFA#^?cA z)6*r41X^r}B1JX_jp4Hz%#t*A4Mc&H7+Fx;N_*T)sTQ0l1dZpz<*cn-3}j9?+BoML zbP|X|VNfQfK7?~S1wN-Ev^@%LJx5L8pg$NL+8!uzn7<9KdU4NewdwS|)z>Tk#CjsA0^${D2pJ5?d!_=ZxL}}$^C78q3P&=MaBKQ4CXWvWYT?W__T)fq~lO$Db z*G=cvyI1i-XZ-y>@04$#CaMkN*5Qf5MhD0ku^X}o58l6binP<622f7o$1x|zZzo>kedQcqj^^jq6gX2jZCyd z=x@-`-l=OhBooe<-L^BHW7BeDETEgR*5;6eRe>iGbc0N{8VsGDWIKj%is4@K-R z60@8|YftEdolV|@QLImb%#?-Ig8>7J4q{GZ;D=}mDJ|F^`HCx>Y>!w0Jp?76i!<^i z<6dxaE_@nDkpciet4DGm9(Ba`@B(rXXZTXZEuMD*f)|*^pai8*>7PKl1VFP0^9jHx zRx~Mmt|4WGy}%*IUe(bPBL5I_NXlfsLN;f7vzp?=398g``S}FKDwLQjj{661P_(D; zkJp**Ap^^I2zVd72q{*e7gmGcNlH0_h|Xcw_^j8OPs8G2shWb9ccVNIa5_>LK{2$K zd0kS&hhu7lEnyKC!v`UgzdxLx4jbl{;0lIue(lTjIz2bVdC8R=_JnVZ@9}zFsSPBH zxSYNRAKJgp4|IR32<>5kP=w<1y9)iYjs9xl4>6_-HMGh;hAx5MiF%g8RuN)VlD&Ih zIj>MO5I>Sau4wqk>XPKu8iMYzINis|^1h{$f(40=mV<-D=@n!4-=(^W?_=9w+GJe{ zwnDZ`nTsYQoL4}KaC=O}Kf;YScvgdtk#`a)PRbf+(|^5C4gS0P=_POHq!{@1D(pjt%Ka`V)2wkYE!l`*;s$u(?rd+5vIC^t>) zVX*pYuRl|Mhm#?;85LSf70-pKkCzo3_qS0L#V22-*542I_@IO_oduF6azF`pz&PN@0B^t_+yg zU8RHx;^b5nN}FKKvRX2M)bTwQSDF_>6)pCmxds@Mpfw_h==awb3hqZ1@w0CqR2SYZ zxAw^ZWaJgewU|!yeFu0nm7W?R0hnmZm_ck-!Zb`ikCugp$W7a=dgZtr-L({LV*TMw z8d)gKyK{qnpO7O`j35}%m|}w>u|}Gewt|YD_6{sK(j6G9L;BYH^4;uZ2D^bbL&g|a zQ-r8k1<@FK&0ji-R4NO?6nT}q>*230QtLQH-1zi(=gBtU5<2bxW)qw>lq()5lgZ2D{=vY=Dj*yo8>y&9RVsBf2?UP1#}DV8$l5`Yojhl|N-mheo^ zyC0DEW2~SY)$a+XD{Gu^3-m~Wbj@$ba=yW5X60_OeF(V^oGGOcTHiO%1mlQci@8pC zDn!7*zhP1BvwX*8C$zi=|Q$TW$kkTFcn%wz}%-IApgo zUo=+vsU)@qyaZ=sSMYK~qe=ljZ*08f3;tq-4X9-@xSWpw@(aR#u+-dGhdhW&F1x`d zB`d{l)*+SZLlhY=tE;=h{BFO)rrhO}X9xu=mBzqSSErBPv9}MPnAQM`Zm^6DnD#^B zg+{mhR%ujgy|__Q?+Z3PLt7Eb=#(u*yKKmO=g}sqpOy^?SM57#xln`fsXa`GS1Odr z6vviHzbkmZtQEh+4mM{6ljfbAZaDGV{tMjjUV!H7xF3EUN#}Z6Me;VCD}TLMnR$1E zK&l*uNw3}DNEz=dvAeqq{jNI{Loh9c&)X~PB`vao+1rf42CZat51#M#U-mVOEk-{^ zG5Z0uhYRq?hQg2w0mKUR{^xxfa+|)wq>Vpn*e~UD+Vi+Oti2YGg6!}e$>j6i=tF7X zQ$JWc>CsC{kUW*#RnG>)&M&T_&>0H*8WrglD5|!pm_#lpJsJ^e05ApW9k25gh&y`>P|B2Uc8_0QGKI!EqI1P{ygy4JE!*zyR}Tu~VZJ$1I9%h1 zrQWw)t<&~Uk>B!q{Ww=6&Nj^+(^QT)3jo;l*%li!RnoUOa@nPN2NMa2fj6VvevZ9U z5|LYM5cZ-`_^;Tk@pwGW2X!7}2}N7jZ3dd9%NOO%^2;82j5T>slapMPei;~t};K~5t%v70-w)?%vWVc%b$_Z`mV|k7K-Fqnf zD(gi~5)D5ngzUQwGfWd;$3ar_rBjLQ$1jWc*Ed4$MdIyUp8^SA*uhq;bRzN5sC=FS zmeO=hY|aa5Z}PMi$#vvIFq-^^oQLP(QxHCw$-!L3WPz;cT(N4IHuIySPL|zS*JA{7 z%AgA|SeLB6A*w~JtLrHlDlz&|R19WNCs4)_D`S$2e0P&rkuJaG5JZn6st_%t>k4MI zr-|zwEQQDDa=-5aZ^}(=LfGk(yggTB9^VY8otLBgTmYeHIoF5uBNpif4Ada4>1&1p zd0Y*)j->Ycg;Vg)GoRM$9kx}zs0Unp2$Z;#TzchMTr9$-);kMvGXR7kP<8l?iF0-H zrz|!)muWXQNT;&eYY$=dnpnu%d1iPN7w6=BG(att!9R>wWKk7WEKS?k#@@+$9Y-de zjcIce+pF1$OOGF$B^8Cwv(gI`T^A0(jsnH6>(j#(f$IiJt+xY;O*Mikt_pP@9yZ^U z;*@v5q~&4*#(5(Eh$bUxlnS}QoHo@=U)9ZIr}hr+s8QWNBF3wg>%PsC__%XP!5)dn zakhXpzAv9dR&AbiU?9HQkWk1UShVK>42(~!I>6Bcn$GL#1{BXpYkn_~c)cH+YCdVU z;ey4!;(AY`)6ygugb0*MezM?BK>1$@r~(+&Dr1Mi2|fL2F2oY~bc`5=G9-8J%YoRy zim(d8<9ZK&#ALJ71SB?EjT#yr9<9Zov4JV`%}Kch;^kc-!&e)Owu!rw1!X#|_!YZ7 zxHh@7TIt-bu^B6dDPzTYl~JSt`@dl2T)0)QZP-7KTV>%syr)c7`SZKe2)3oRLd)fr zbnPFM^v>W0aYpb62XKoZQ_Lc2YHD&cw6qBKK?GiP-cOGwmbd;s@E}e6yi;dgNu2We z2{ueAp1doh4{k2Z84)sR>=pdJU?_{C)b(WBs%5~UG3d3^64us8 zt(xr4AeJCXGUL!g6air+t|WvU(#g$mF>I4(bKql+H*Fc}28zVIo5Tt6H%z%0k}m~9 zZA6iE?esXr_STX^CP@jO_7r|u{S`eGiFfO}@KEF<3yRqDeaiNmy^^v>d>+_>`qe;O z5MRff3foupFg?$pl>vJoqD@#i0>qvBkAoW^SUa?U@Ly%K#hITy)l3_Z5!a9{!bl3d zJ}H2$K&O_3dd2He2PD3)%?DIBzo6h5VF2X}n0dji z{`}_U-GgQtYHxLQ_33(Vr`ddsl?G7PUrR5yc&dI;hm0G46>`?Lamk*iwTrSjt6 zL%DCum`Bys7q)m!jvx6c<*0?W-kvGbBr6AxvKryr_RSE^0Ex7*i2mrN&^ppRsv`F` zbrg{eUZ7chZbLjkJ{W94OTI)`?<->RsNVQHnBE)e$BCy5g&RL*qtwH3hg2F{fwh;Q zV70oyKI1nO8Pq&%f74SS6x99#5I`O7(f$*v7Srx~_M9>>MhSFU4IVdVRLaGU2XiQc zB?!2z{5&)ryCqf60c0ofm2f${jFi6d@EUXmKaoc5gKrT4I3zN~I%h{aipnev{hS|U z-&F#^1TdU%92QtAAXHF~xBwyObN}w7h`O)(`nBaDW$9>~tg@41wY^lwbbn5|eeu?w z>A=T0S8v8?4nVL1j4Zb^^95IhF=K?@0mmj75SDQ8wuh1x3*|ncl1gO3d&(h~`N89X zf1TysLV95X))*rwgrl1PAX9^8y*+?U*x12TAETO_qydG{pf$){8 zf2GRC_#RxDra7}Gk#ETBj2Y70YX~F*tk<>-0{u9+>O4SD*#LxOe6F^XlJzZ*n`ekg z#+YkdY?HSiMRZEa?d^UUCat>TSk~KdlIC9j`>h!b8>`vUR}5u@ zDA_MsjWtR|3bR_y>3I6!!e#)SX4>owLb3tq9xOVYW}W@!zy+o0DZJkty-IuMYDAzt zw2djg0m-vccsU3}Zgl9xX>3~3ldEvSmfJqU<#uBr=ru57m zVy+{D>y~97-Ha51)82iz0&n0DxhQkjR?H`9rGG`e0NCr3W){D%es^d{*r+)*t6gHh znbbsZmpY7$!%7=y8~k*8N)xc(VxeY~*2&ZiQiJH*zp%gG(7xY8rvcObvD%Ri=J9Yo#7f9tRS5TlO>aU-9g&g+}zx70zUk@LT&K=s`!njq}V_}vW>#yh$UpCEMhcc0@QUT zqk(WNMj~S3b$aic!AV)ZbUyD#0LC4*N6h%t6A~$tukKtcTYW@8f<-`|6jINAF5 zHeF0eX6Mh#WpUm9s^C{9E9};Lj0O`H0Tnt{pQcs*g{vLFr&q@wZZC5laE(X$4sS0` zFH^`fi}!X+HIsJ#DAAXZ1*@=i_k~#r2xJ1~ zsyrl3kr39f=Vd9sMEM##{u`5P>it) z%ImsE8*JlqKZsAwfl-nw@R9SFnY7!Nb9oM2WMBX$&Z!x10Y?j@?{>-%fE}_ ze1d;H0C3x?%1Rs-b0OE$jsUm{#MU447Oy6*Df({^h=4;I9v1VdLLe{T@wi<82Nv;H zVGa?df=DuN^g8^Pn%#jNK`r96Pw9^%=ezpu0Pr@c0mWQZCergqT=h>tu$ln6W1oGO zXLgIC)K3WfYBXU2kUe17IzXBL_+`E2Hzq=9U?)(Dt0h12`R5&i1HBe{T63Qmxy@Q=nsR{20FG4f= zOQkrxLzz2RY`m%(T$31A~g+$a3N*$f~ifR~@@+u&rS zM(!4842IEfb@zAR;)KtCwQ$~>+U>^F5lX{hG8ALJd+@3cWHUtHP^MP-{0;~LpakCO zt`f78NT9(Ue*B&u(MG_##DK6tu@r0oAZfSLmgx_h#%-LXu_gF39mAco2XJT&?RzNj zFKta>Z1Ygn2r^njW{j=2}84=8Oe8@#_drc31@TpM`;>GEu zgn!Pn)sj>q>{N1kxe16cMED`9j1wzG>RWhAWAfi$p?q9|6Nibo7e=~ir(0rog|o3w z&SvazN#k^a4O;?K6N9(bX5R)&z|+X6sY?LSeGJs10Z5>;0Am&ifW3x;I}1d>RVmd} zYq1!KN$k20yna6T8qX003JV4SS?<#=o~!S1smEw}4AqAHu|Sq~0+0nL`hWwlj@ai@ zrn06pAj*^_66vG~))dEy%GMDk(QfD!)gCGR_@3UNwwhVo{Wt`6w$leVID^?gF&<9o z_v#FrcnS6~e$W=D4l&`o90KKIe!P} zlZEBL5gTxVC-nVs_+wQDQ98rmZeFze)e%r!e8%gjj!h<0Lioyaw#z>hACDey)3BQ# z#rIeT5c}t2#lz(+nzA-*IMZo{Z@sY{PPFpA@QTxF@p1Q$`np5z$IU@Iwtzql9B3wd zh%ft;)`>e{^Gw<7HziCJzNoWoFaxv)I4!oCF2>kHr&1Er-sEVi4`wT7I~l%KKdW4b zB+$JP3m_5>4S&V;^1M+b0%uKVn%>p-7~Bb8@P4kgfW*SWjralCHO($Y?|h{Qw+6a>`!W zsvmJX2VGMXgZi#8`wIdkff>Mp)zwpMajw3nZ`C(y_Tb9eyM{vUtB0?1Rz1a=u}~?hl6) zkkqBnoJGaxV^B^P=D4}Cs%Ot*X_Wh87K6NdiEm`gaK~5=PI^{{g`39Rsx|vpBRX-X z<@!C2M#i6X?C;n#b*jHCKm*eQ#r9{TyE8b^el>EIh66r@x@aDCqgq3s(+zUT&mTR}bIa2FS<0 z6x3a!qoGTmPNQ2`p`ws=>*1_Tk|_uOIiJ1)Q*OpR(t)WnX_2Z?VxB8rSP?PZ3mBzN z5ir4Lt)Q)~i(jMt@UEU6gX*Wk)YyMz<>Tv%m;V~SN9lEfKKNa>)uIR?Qu5#9^g4}x z>3kJHoQT=j^sX#-FpPzB;f3hdXMPq-5(e~Aix60NDfns(%`|)TK;X$vqAWK|5hoq6 zr`>=&YTu%$9y%(P=WhyHJFKFhCyn^(fr@Ya;3C7rFj6X-udGI!pgIW#&7Y6>PS^*BK z4OnRzD*(c{(bs%V?3XKJ)gh0o*t%BaFl^lS$Ya@ity93)JJo3f?SC|!A;?&u-}+^| z%=z=K!o*)Sx!1#L{R6JzzI?6s`cbQ$d0jWtT&$vXbZS=m{$9&U^nOyLn$VXKDQOWV1K%-Jte+(_AbFa|zjv(&Q~b21 zq|1iKq0qnCjc7xMU?Vjyj|;6F$x6g@I-#U!C8ja?_+NV80D)hN$kqq5_;Y4oKU$>) zs{Rq7!zVEIvjug}41NQMAgRBvvfi-$BTV{> z=%QCeBmNQKs^`z+-b4US0PN?DbRpod;&m*+!?KrsY=#4e(%)MCjK=>!hcLyzLt2`Z z+}`Ecf$n=Y+1yCAyw-Ql;_hU!?_}2U0Q{BCsvh)|{XpY{kIYn zTtpiW?Ckk)`iNLr6$e3HUzT5UnzYI+F2>6onJ4owx!JoOS&J~(iL9Ht0hgiDUq;%Wr#Qla6QbKLO)2sk*0lpuF_kpN9M)Q@N6 zmTk41=dvaVelD_YrPH7QR^gNe*;Iz&wu#M zmjXb6E+dH(b)%DE6wpjtTZ`|+&8MHAv4Y@ivU4%urk0D~_GL3KxxhTlv0ffMyfd5q zb!hhg2w`?3^q;N>F}R8_sT0K30{aXCfG72IQ^p}<7SyqK`=r876P5PBEarEXC3A=$qw9{+ep0R{-5r7hMR+&xLo(_UpSG#iZ@w*rkk{7c38oq zdM?j>d1udoMej~Az(W5Ca{zY#fpAyLwtwW!hK;}A_TjO3`%FI(GYB{-`qxCM*Z3UtiONhKBz6)z9bu`Ln?TFC2^<9nGvw9OzuEEDzNd?AO?lJ(tVGZk!U^-}gaM z=?(b6AX~TCyQ0;6(CUjFj5X++ErZu%?Ob^U_G4;l=FF#EA#>8&skUJ=sB9lZtGOfi z7J?6U0#m;3!`$oAsKlrbMm3UG9kaItNE@5o&bq^-GI;z$66j1l1!L57k!DtG_Y_?y zr6~NK>Yao>)jerAFt3-wrg=w7t@__P&-C?{h_)NheEv8n9_t}q+8S-DmQXxx?#k7W zxrcneMlYK;Kl~^r41FUK4|@>*Ew(@V9p&*Ooa(WJ7efTi)YbbrJ|GLF>pD~3z(W;T zl`h#76!XBQRsPuP+(@1FH?UtGoa4R3gr-7)AcF~bAYAbG80(cP3`5!^c*_glp1jy1?yocGdNpR5i-W)wn8uTQLYV;k) znDj;;j|v_|_jwa1fg1YvnSif#2%fu>G_s+YzGB>qC^XiGL96Z1* zz$Yhyrsa>X_ZxJ6YMkq)OF(6IA+5p@WuS$u{C)^Yn6he9FgBzoRC7BydUsBFE!YwI zftMcIJ1{Mq6^3XWl$pp3TKw$rE6wuvt@2XHcX{9xsyvA0GO>sdbT(43ufLI@W|+b9 zA5X{+xx9H1Aze-wjB{c;I2_Q`M#vn3NZ}U*mo92}=+k)7OiUU6HYmQ#+4yF-Y9SeW z2l>$y!Hcp9g^oH0i#%tL%JU~@7~qEWRZ zFc=@G)w#!Vix#F9YrRi}gQSs=8xqSYfKXLNfhLSiF~OHzob4jw=5&*YqU8BdL^}d+ zwAdAakn}zH+^p8TM0rw9UhxgdRBcd#)h^GubX?$!ZDFmz@cd^MX{ij%*=E}jBV$D# z&AyMtmr9Fu=@~Eb=g*}|$#nHwBcxd#q%qS>tR>nlE4@-JUZ+aCC!K#Wj+3)5bxK$Z zmkvNo5udQ3RX`zG*Vq7Dw8Wt$`;KyofO1^2Q%AEa8-^ z%P3w_@7sOjN;NxfQ7U)fXgMA`TV_qIv>jK!Ei;onVeR7ZjkTRwDCZ+pe!l$F>RdH0 zLZg9zWHc=EDuvxY6YzdKE3_;>!-MJ-DKjNNh=D{cT7?x z)321RtyBC|tSs+YtbdFWe-*%Fn2U5Vfbc^;UA}oyhx%p$qAw)OSJe;FaiPh1hz8Sp zUt%80pM$3W9|6-rFsB+r`4ow)>)C8uT-(9#&89_NpLm3HYXDC`)*7u(z;Qbnj1O@ zhJcBG8)>C4qF0REFiKI9rel?ANp|&kgQswIcFwq(!z0!$)BsDSx~>aBiGdO#t(Fkjw9Qc7EzayR`OF&vM=B>I$039cd z{zqa7K0BLpumg1qc5@wrFspB|5#AfXThUlNj|keC;JuXIj@j!%_Rs)%p8jBC6Pn1h zyr>f00;iYy9_)Cfh*i|DYh5be(bzIBAm<~{4U*E!p>(&kLn*r0v$V6Yf=YYbsC|~C z9F9W}voQ-M8?8(cmxIzzuz;H!jj@<4E_;}1Z1}N&pS&9>cFalLcBqgn{FuiSrn%t1 zqb2LLecdkgk^t$dpdmKIh9=BPT$psQDN!w3%l);G;51HrYI7FXtEe)&n}xRobR9^s zX3>ag+Wv|iW;u0Y4MK0~U|69c+%?XI$tnd3@%;2w#&k+%aNR zD4E4(#K*A0G*dUXqcXjAB9=N>_K>d0N2pDgaq-?SuVQhe&M8DQ-x(@5BQ{3M;qmJn z{j%g%kN>g`r2gR|yWKFHHvd~J*>TQ{lP+g?7wO!@U&p#e{7{aR<<1-@|Ex7 zx)o5Unv^0g{RFcvTe5-~3v;=TU5W=ULOc61swNuewN#_ri+m-x7)EJn!9}$G(7n#Y zq6etu{>&RAb;O>aiMWaArqDQa8mf+nBj{~h0fM`Fo?gU`QyH*};4P`<1kj_B8uPRb z^(63kd8KQ!f%0q1(>(nNh!U5Vi~ZkUxMbwMwc16@9y*|kPVvR*Ci*T$7-b4>?1vr1 z>Udc3f#99t=-KJCM~kjm`|_vBQRyFcacj0BL*aFw2PnDfz-t!Ga6Rm~*xKpQMlc9t z%o30zH7#fzPPV^@vmLuJFkA$gkklYn$KLauxUMii&9{n;e9<<`1J~fKSLK zzonv$C~q(1rRUlT2fnuY1ua@5__3yR3RTrl(gT0%5PW<033EP#yBP~RGj9{a_4aLr zlwYZGwCVm5Ad>P@h+DzI= zR0hoP;Vndqny5uw*i^)3<_>mV+PEJ^4lb(qQdKSP?T>7*?nz++pP1W~l1yZS$NI^= z0=zPtF7B8My-wX12n9o?uu~W9z+o?H&dSP$@*(HJV5EgfWdyEqCiSCRxyO~l6X#Q->jTm= z45nmlz>YwGfRy3?FBtrJ`;n2O-ZKV9#0^GHGGGN-^;mylE}yRk%~?W%!C0>5XFI;y z9rti@@xB|6kx`hO*(yLV2`!0X$gU*`8h1~yPcAHe|JMiyk5(a9o4eDT5uB$r3RdKJce31B}R3~ z?pnoBXPwG}YcFr**&yEO=!He%Z61x|*Xyv1%@Cdseuh6jc`*fnWcEbF5n=AGBFXk# zCO*{Kf7QYGhEUE2`2?;+5|Bmf>9UMl!kFpsNW&ql**Z>_pPE*%Ospz+E+qNbmbLv%l5ph9CvzyS)d>wQsDn zo$R%?2n~&58cZ#+XlNJ=mPxmQb7@+viDV$G0?nKFHGRqs(m^}p@C2s|1%y0}_^OI6 zSk2m(*MvPdl9rt>t$Lz9w>j+x=RCYK2!WK|-(tkI@SrHfeZyvG774*&V3$R5S8iV; zupv3RaC|lS;4$E`1{v1fq1c>YYij4(UnC&>DTp$`4?~*)f!j~Ig5Tgq+%jE3B_DT4 zRMHQ^T$p9M4g^|Cb!(Gd)G1ZO>o9iw+gC|9L&VzPsF632*7*b6IGI%Eh1RJ=6^9|m zAFpsYr^{x`WL&!^D-6Uf1&gEOJT9178mvdLs+974wGTp%hORQ*ZtDY=kUFl?poc6@ zY?1MaaMZii=1Fl@z>tvXmipt_OzFrM_irZF*-bRx5uIKSBTglM%WJ@(Jk%+!oaINu z7fzSUdUK`wts~pc8RD28$NgLRBb==7-ra20+ZA_IgG7)%_jyZK{V2brk7~Jz=`XNeHg`J z)`=irE$a`RRhL1=Ba|;QmvT_rIyK6N4I@la#CSQeD$|LlnlTF03kZyPk-po(IsQ2w z`&)Q3te6lItc#GQCmr@|L;?KCpO@};C%b6~zlmejOU>tpn#<1jT8hgj#8dccrxgVv zt5mMEooo}NJ7CgTW>Ij-Dg-ck!Edb3Tqq$*_sZJLTW_Msoa)V0Kw91Z?i%3;-+IVckGZ$qj#MYCbv4b`SlL(=g-xT_CdZ-iV^>a*Z^_G76pms9D zRyD^6`!*)v%L?gxPCUFxgq%R>H~LT-Ve_}gtu$3{2zb!^NysR(tz};4>Dj+AaoNjf zTm3vVJuS#sMmdpo|4ursMM=d~9DAw|)9#=%5wGNovI013|BT;i1oIV)a=Wc?z)b&qgR16O_tUG)YfyGhsgo6Da+{M*|atwz8`Z}^htva6IuKx;yanch<}Xue`7AnAlsWRS?l@5>N6^NNsYcSD~3 zpzK%t^lDHqO1v_l5sId1N3lD3Q8GQhd=JN^MLAQ&%za0PA@=wOudME8c~M_VQvWuR zWR3FJL9jPuFV^u&C$YpC^_KhjTd>BnYw8_emJ;6d?+(DUBcDI&PaDJBKpVUmwO_q3 z8tMF?Mv3!XQUNWJG1fj`$Ese=QtTG4ReFM}kxeOzsa}x*ad9hvDA9>QSX3pnCrx|X zY{k)MK6e8bC&?$Z68eC`xqAyej&|rssfTr5zlYGn1T;=`yax6IUpg(_w$Soxg)X$6 z9QFjBVG-@s)GxSX{vO~PvJc(Uik>B>w`$6cEl`Se0ZtQ^3TNsSSSX#}o@fFqZ8}{@ zmT5;<8d5sLH%EBuPo@*WHYg@CwSAJGxEYLXB?i^W!iL^^q1>J=NGJ*N=f$t6bX4cx zSs6}wD_1w3FYGTi7e}IF%m~{RODX4EFe`D*s0#Jw7wndmO_PhbUe?a(e;(gYO-;2p zZmKZv9~EMUYYorkXb}g6ys2;kF}FI{vDwrtej%=*I=tT&u4-W+&rph!jihj*&bdT- z*tmCi?NeEmP??V^wh==HHGs{aQ>z5nF zl1MbPd@ftJzFh7{pFNiq`C4=;qT4Q!&;;3=7rsKIYtqv{Yz}2=LnW#vh7s1q3(`zS zr8}zLvTR>X+uN^A`|f@1bE_x>cUr(4&?MFzA%OY0YUqprPs6W zWu<3rW^Cl(NM~Y*N9Usz>fyArbjCFy{e>DOIq&PKZWyamqrxf*JMlvk(betDTx z4bIk796AXBv~2K}qgB}yE-B~SvQ0$o)9iL)@IBR$&@lKB0%n&R9ylB$YcR&D@{api z(XLm7JYkERMOl@wdNm5oK{bRizWfm|QUm^ues~QslHG3-&gD7UKOL_1>NZdMWpnGE zP)Cl=Ed|5pZ)&eJmmg{REqi=4bvrU}bDHcvT_%p#O=CKe-D~7>ecFDc8-2oFE31Al zWXV60V{#k4U42HNEL!%Q+^erlCm@03?)O)6|MQMzk%NQ2p8X#^|4FDM|A(G>1_nl!M!*DY z?CA}h?Ct-kqrria`^7V<1bM!n{JH)22|eGY{_hJhwzSc6H2QNg|FnF-wf>`*nU$W2 zkpsPenWL4St;7GY!9QK{*>-=g|DP`TPrZP<@tyv6uYcMn2S+_er{ApoPgnb=ezrEY zPPWgN{2#l70T@PFSm`aTxQqz>U-1R-`Y%e7)}4gNi`(cg7Y4?Rl|QkoDeb{bfXi{e zkJjZMLME-H2W~@A&MTfi*t%OXfO=g5&g>aTnR zWz&MEEz*I`>2g>6rK=##Z)ge796NG1(pfYIr|{ifQE6JyzhGVI(0Cdcr`(Bqs?9h7R)If35+r^Kb3XA8+{gUjP1s41ae~0$%>NpJ(`U zlt07pcQNZ3H~(&Tzz;qAetPilJ%6wNp2e=en+yHFNMHZn@%IwrXC(jC)0-Ip?mvpf zfA94>^gov$e>Y{;-;0rd@Ao_A{p$kHx&Jjy{Mqxr#mwJVWa9l#i1-~L|8;x+?)M8L z|9)@&599nRV*VQG&&_)7`6ui>W9Hv2_a8?3F9`a-miY`sf494T81!%a{O^l=M$o?- zBj0}->p$@HzYq4WQ?mYt!T!eA|L-Wd|1iqmc>CW+c}C8^+x$O_@;mo^o`O^n}2a;z*{<}%={}+6glY|5~69@ + + + 2023-07-19T12:59:17.4890000002023-07-19T17:41:29.203000000PT1M24S2LibreOffice/7.4.4.2$Windows_X86_64 LibreOffice_project/85569322deea74ec9134968a29af2df5663baa21 + + + 0 + 0 + 6773 + 1355 + + + view1 + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + Sheet2 + 2499 + 0 + 100 + 60 + false + true + true + true + 12632256 + true + true + 1 + true + false + false + false + 1000 + 1000 + 1 + 1 + true + false + + + + + true + true + true + 0 + true + true + false + true + false + 12632256 + true + true + 0 + false + false + true + true + false + 3 + false + Microsoft Print to PDF + false + GRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEANhUAAAAAAAAEAAhSAAAEdAAAM1ROVwAAAAAKAE0AaQBjAHIAbwBzAG8AZgB0ACAAUAByAGkAbgB0ACAAdABvACAAUABEAEYAAAAAAAAAAAAAAAAAAAAAAAAAAAABBAMG3ABQFAMvAQABAAkAmgs0CGQAAQAPAFgCAgABAFgCAwABAEEANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAIAAAABAAAA/////0dJUzQAAAAAAAAAAAAAAABESU5VIgDIACQDLBE/XXt+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAAAAAAUAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAFNNVEoAAAAAEAC4AHsAMAA4ADQARgAwADEARgBBAC0ARQA2ADMANAAtADQARAA3ADcALQA4ADMARQBFAC0AMAA3ADQAOAAxADcAQwAwADMANQA4ADEAfQAAAFJFU0RMTABVbmlyZXNETEwAUGFwZXJTaXplAEE0AE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAAAAAAAAsEQAAVjRETQEAAAAAAAAAnApwIhwAAADsAAAAAwAAAPoBTwg05ndNg+4HSBfANYHQAAAATAAAAAMAAAAACAAAAAAAAAAAAAADAAAAAAgAACoAAAAACAAAAwAAAEAAAABWAAAAABAAAEQAbwBjAHUAbQBlAG4AdABVAHMAZQByAFAAYQBzAHMAdwBvAHIAZAAAAEQAbwBjAHUAbQBlAG4AdABPAHcAbgBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AEMAcgB5AHAAdABTAGUAYwB1AHIAaQB0AHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEgBDT01QQVRfRFVQTEVYX01PREUTAER1cGxleE1vZGU6OlVua25vd24= + false + 1000 + 1000 + 1 + 1 + true + false + true + true + true + true + 7 + true + + + Sheet1 + + + Sheet2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ??? + + + + + Page 1 + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/testdata/norows.fods b/tests/testdata/norows.fods new file mode 100644 index 0000000..b814270 --- /dev/null +++ b/tests/testdata/norows.fods @@ -0,0 +1,237 @@ + + + + 2023-07-19T12:59:17.4890000002023-07-19T17:41:29.203000000PT1M24S2LibreOffice/7.4.4.2$Windows_X86_64 LibreOffice_project/85569322deea74ec9134968a29af2df5663baa21 + + + 0 + 0 + 6773 + 1355 + + + view1 + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 0 + 100 + 60 + true + false + + + Sheet2 + 2499 + 0 + 100 + 60 + false + true + true + true + 12632256 + true + true + 1 + true + false + false + false + 1000 + 1000 + 1 + 1 + true + false + + + + + true + true + true + 0 + true + true + false + true + false + 12632256 + true + true + 0 + false + false + true + true + false + 3 + false + Microsoft Print to PDF + false + GRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEANhUAAAAAAAAEAAhSAAAEdAAAM1ROVwAAAAAKAE0AaQBjAHIAbwBzAG8AZgB0ACAAUAByAGkAbgB0ACAAdABvACAAUABEAEYAAAAAAAAAAAAAAAAAAAAAAAAAAAABBAMG3ABQFAMvAQABAAkAmgs0CGQAAQAPAFgCAgABAFgCAwABAEEANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAIAAAABAAAA/////0dJUzQAAAAAAAAAAAAAAABESU5VIgDIACQDLBE/XXt+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAAAAAAUAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAFNNVEoAAAAAEAC4AHsAMAA4ADQARgAwADEARgBBAC0ARQA2ADMANAAtADQARAA3ADcALQA4ADMARQBFAC0AMAA3ADQAOAAxADcAQwAwADMANQA4ADEAfQAAAFJFU0RMTABVbmlyZXNETEwAUGFwZXJTaXplAEE0AE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAAAAAAAAsEQAAVjRETQEAAAAAAAAAnApwIhwAAADsAAAAAwAAAPoBTwg05ndNg+4HSBfANYHQAAAATAAAAAMAAAAACAAAAAAAAAAAAAADAAAAAAgAACoAAAAACAAAAwAAAEAAAABWAAAAABAAAEQAbwBjAHUAbQBlAG4AdABVAHMAZQByAFAAYQBzAHMAdwBvAHIAZAAAAEQAbwBjAHUAbQBlAG4AdABPAHcAbgBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AEMAcgB5AHAAdABTAGUAYwB1AHIAaQB0AHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEgBDT01QQVRfRFVQTEVYX01PREUTAER1cGxleE1vZGU6OlVua25vd24= + false + 1000 + 1000 + 1 + 1 + true + false + true + true + true + true + 7 + true + + + Sheet1 + + + Sheet2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ??? + + + + + Page 1 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/testdata/onerow.ods b/tests/testdata/onerow.ods new file mode 100644 index 0000000000000000000000000000000000000000..2a643c290d3489e8cabd611ca73bb59650203544 GIT binary patch literal 2856 zcmZ{m2{e>#8^>pOhcxzGwuCV?kz~!DB*|8mA!4Q(%rG-DW8bnxwvZ)6gi+JbVr-!r zvgDP7?6PGWqOxWA_(t#fPT$x2z4vqO^E~%+&i}g4b3NDZzW!EbtbZH?{550%F1Q|ee!b*xCD*zD4+-83#A}P=dO;ifNdA+fFiyv0zZ4jB$KfoE3 zE{`@u(tWrUgBBnC3y|{w1ra=&V&k>3pgls9<1V;DjGGvoeV1G4|9&=EfnbAy7{|n$ zC-WbRqh@=+R%bWfuX4x_xmsUS0+n^eU{~w+w$KW>H~Ad<(l{lBARlKMA6Xo}*HuDi z=WdDOFL{$Qe`!29@^1R`(j!~xr>43~)>)*3%7US&8N%Ekb*-vuWu!c*va>T_$r%>n zxZs5YPS*y@_Mg75W+|S{Ke#ra3<#E*>BtT!>-uo_WykZLd79QJt{7W^tQ&Q+NN6m; zstm0~O?FZsV$;3cw(ia3wg+XRFIa6oVbo`I>8~FMuPMG%K#%3nop(c`&w+5fH9u6P zXj5{?giV8gVA%CcPY!?ihPKNZlx>T(LNW@rDoOOS;`Ey=Q;afg{ekXAZ95BpPm)1? zKI{*aNoA+4N{2-VzYUwVnr?)0jGcyDf^gyWGWx^p z`iTpcRQ1+_)4BCE!9yw&o6_mmMM2TwP+`jZiem4gxdfDQLb4b|7;k6d?NZp}D#6Es z1gDU7i3W{&F>8lRXXp|dFIpANwO+Q-k5?5e$MX$kzmvE+ZV+n=zTz&Io{)3Q}8k#*-*W$YYaoer2kM0QD*g6>CEn6AP8xf^bZ-peCNrux+0?w?` zU?%aNH5_0p$U#>AY}l9(ja2+BXFC?1{h0D9lI3i+M_5_0)t`Gudv1GMPd(3nT_q+0 zDG!yicXeie;(Ru&*ok-OJhp7dXA-nGc>sXDJyyW~(h1+YHRZ&lGn|Pg zm`Ud~0-m&wGZRmm9X)x7iKjh7zhI=nA#>F4)(u@6<5^afwCf@Vd|@E-3-6EZ~zP6O!q*&8}5ZudU^`7E`96|O*Y6j`a;MzY0YlzQ>bJ6oGcIwGfLA5Gd|K?{?A zlm`rk>^$xERTQ;xs;B}^bM&-NEptvCwGVue;W6rHi(y1R7Q_Lv97?zzF4}MpfKQNt6MC1dGl< zV}hJTf|t#0GoAZKeG{~C{?j3~PMO!M=fKT_gPkHE#3p% zWw-`_#$(^gN&}a-!dP6Yg{JpRV_Kj>%~Kw#M^5W_}9?BVs6Rtq<5_lDi<&#evPi8_2wOso6(AGEDx zr`bcz73AD|ef(?Dh*+oN5wiTg6Bb5Ae2c?co`ssJB=C2yDC3LD-|7@{4lztLPuZ0# zNS}Uw!V)^y##3rHmj)vrKcH@WzJjLV`z8{|h$!OKSIn2L5VFKc2%Kb$DT}mtMO}Jg za8oL)$_7hGFa?=bqPwhlFkIkdk(|I4+K_ ziOx1ZrKI+!%nzeAa<~Gi#q+6q(;Ir}p&g_lDE!!@(07MeuL=e5pyj~GGh#nt(>*6rc16d|xxv=c|e)zmdaNZ61_fvQZx z@WGF!Z>swi&=C6w0qw}hMY)=x6eO3KA^!$(%z1Q5QoAJ4$R*dlQf!3wSw}UcsNZg^ z6?84qu&-~$&{GSP5|*rMt?B~o@P9I)E_+t*NPN|8afiM{uONm?Z>0OP{#ktkBl&rg z-BXnXucOWfZ;zCTo?w3H-_N7bY0Xy`fB--oGZjA_2%3c0Ka3JD)mbzt3y!A#h;0j= ztht@;XnHKd)kry=+bc74*iq9DtzEpMd+Ux(A>*~j*vbnNjj?+$q0lJb?m3oJW(|Ab zqkCgnLcLrj*#))U(_GsdSRRrrQ}UT^rIq+h#Gy@AQHrQpc_34S79^5eg2`F-;Q4)%eC4g8)|yH&B#c+47@&yU0m0ZhzcrjGV-C%<9?vJ9<#%9 z<%Bzow9MRBG@YAKmGWZdS)}YxZmIByye7qS=R^`RPg)RlYzDH_Ubesv#9rQIX7sOo z16dS*Ix*%C0Q}wa?Z5ps{^J0x%=T6P?CO3gY?zO~Jl^lR-}}uk-FIde`mgTuyYP2L sehK~A|ACm_L;PM_zd}SZOO5Gte^#268Hfof003fMhnbc5onwFXZ_ahpPyhe` literal 0 HcmV?d00001 diff --git a/tests/testthat.R b/tests/testthat.R index 37364a6..b5aaa69 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,4 @@ library(testthat) library(readODS) + test_check("readODS") diff --git a/tests/testthat/test_list_sheets.R b/tests/testthat/test_list_sheets.R new file mode 100644 index 0000000..b525e05 --- /dev/null +++ b/tests/testthat/test_list_sheets.R @@ -0,0 +1,19 @@ +test_that("Sheets are listed correctly", { + expect_equal(list_ods_sheets("../testdata/linkeddata.ods"), + c("Own", "contains_linked_data")) + expect_equal(list_ods_sheets("../testdata/linkeddata.ods", + include_external_data = TRUE), + c("Own", + "contains_linked_data", + "'file:///D:/Users/peter.brohan/Documents/R/readODScpp/tests/testdata/linksource.xlsx'#Sheet1")) + expect_equal(get_num_sheets_in_ods("../testdata/linkeddata.ods"), 2) + expect_equal(get_num_sheets_in_ods("../testdata/linkeddata.ods", + include_external_data = TRUE), + 3) +}) + +test_that("fods works as well", { + expect_equal(list_fods_sheets("../testdata/flat.fods"), + c("Sheet1", "Sheet2")) + expect_equal(get_num_sheets_in_fods("../testdata/flat.fods"), 2) +}) \ No newline at end of file diff --git a/tests/testthat/test_merged.R b/tests/testthat/test_merged.R new file mode 100644 index 0000000..2ac498a --- /dev/null +++ b/tests/testthat/test_merged.R @@ -0,0 +1,11 @@ +test_that("merged cells work", { + merged <- read_ods("../testdata/merged.ods") + expect_equal(merged[4, 1], 4) + expect_true(is.na(merged[4, 2])) + expect_equal(merged[4, 3], "d") + expect_equal(merged[5, 1], 5) + expect_true(is.na(merged[6, 1])) + expect_equal(merged[7, 1], 7) + expect_true(is.na(merged[8, 2])) + expect_equal(merged[9,2], "hidden_text") +}) diff --git a/tests/testthat/test_multiline.R b/tests/testthat/test_multiline.R index d029ed9..97649bc 100644 --- a/tests/testthat/test_multiline.R +++ b/tests/testthat/test_multiline.R @@ -1,4 +1,4 @@ test_that("multiline values", { x <- read_ods('../testdata/multiline_cells.ods', col_names = FALSE) - expect_equal(x[1,1], "Multiline cell, line 1\nMultiline cell, line 2") + expect_equal(x[1, 1], "Multiline cell, line 1\nMultiline cell, line 2") }) diff --git a/tests/testthat/test_read_fods.R b/tests/testthat/test_read_fods.R new file mode 100644 index 0000000..4077ac5 --- /dev/null +++ b/tests/testthat/test_read_fods.R @@ -0,0 +1,16 @@ +test_that("Read fods", { + expect_silent(a <- read_fods("../testdata/flat.fods")) + expect_equal(a[1,1], "A2") + b <- read_fods("../testdata/flat.fods", range = "Sheet2!B2:D3") + expect_equal(b[1,3], "S2D3") +}) + +test_that("Error when not correct", { + expect_error(read_fods("../testdata/sum.ods")) + expect_error(read_fods("../testdata/notreal.ods")) +}) + +test_that("Return blank/error if mangled FODS", { + expect_warning(read_fods("../testdata/norows.fods")) + expect_warning(read_fods("../testdata/nocells.fods")) +}) \ No newline at end of file diff --git a/tests/testthat/test_read_ods.R b/tests/testthat/test_read_ods.R index 2c870d8..6288234 100644 --- a/tests/testthat/test_read_ods.R +++ b/tests/testthat/test_read_ods.R @@ -66,4 +66,25 @@ test_that("Check names works properly", { expect_equal(colnames(x), c("a", "a", "Var.3")) expect_silent(x <- read_ods("../testdata/test_naming.ods", check_names = TRUE)) expect_equal(colnames(x), c("a", "a.1", "Var.3")) +}) + +test_that("Parses range inputs correctly", { + expect_warning(x <- read_ods("../testdata/multisheet.ods", sheet = 3, range = "Sheet2!B4:D9"), "Sheet suggested in range and using sheet") + expect_equal(x[2,2], 2) + expect_silent(x <- read_ods("../testdata/multisheet.ods", range = "Sheet3!D2:E4")) + expect_equal(x[1,1], 3) +}) + +test_that("Deals with repeated spaces correctly when fetching only part of sheet",{ + df <- data.frame(A = c(1, NA, NA, NA), + B = c(NA, NA, 2, NA), + C = c(NA, NA, NA, NA), + D = c(NA, NA, NA, 3)) + expect_equal(read_ods("../testdata/multisheet.ods", range = "Sheet2!B4:E7", col_names = FALSE), df) + expect_equal(read_ods("../testdata/excel_repeat.ods", range = "A9:B18", col_names = FALSE)[5,1], "C") +}) + +test_that("Warns of empty sheet", { + expect_warning(read_ods("../testdata/empty.ods")) + expect_warning(read_fods("../testdata/empty.fods")) }) \ No newline at end of file diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index f3645ba..b8260e9 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -84,9 +84,18 @@ You cannot update a missing sheet. write_ods(iris, "plant.ods", sheet = "iris", update = TRUE) ``` +## Flat ODS files (`.xml` or `.fods`) + +Can be read (but not written) with `read_fods()` (note that the same function is used to read flat files, no matter the extension). +This has the same behaviour and arguments as `read_ods()` + +```{r read fods, eval = file.exists("plant.fods")} +read_fods("plant.fods") +``` + ## Misc. -Use the function `list_ods_sheets()` to list out all sheets in an ODS file. +Use the function `list_ods_sheets()` or `list_fods_sheets()` to list out all sheets in an (F)ODS file. ```{r, list_ods_sheets} list_ods_sheets("plant.ods") From 15da0fe46b9e8859979085dfde00ded5d8927d94 Mon Sep 17 00:00:00 2001 From: Peter <44036274+pbrohan@users.noreply.github.com> Date: Sat, 22 Jul 2023 02:51:45 +0100 Subject: [PATCH 3/3] Update NEWS.md Mentioned removing read.ods and ods_sheets --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 0276b1f..dff6faf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,10 @@ * Added a `NEWS.md` file to track changes to the package. * Rewrote all reading functions in C++ for significant speed increase +## Removed read.ods and ods_sheets + +These have been deprecated for several years. + ## list_ods_sheets * Added `include_external_data` as an argument (`FALSE` by default). This hides stored data from external sources not normally accessible to the user.