From 15b3cef2feac7b6b15955b803bfa69e9d5f3ee9a Mon Sep 17 00:00:00 2001 From: Daniel Possenriede Date: Tue, 22 Nov 2022 20:51:23 +0100 Subject: [PATCH] Support for system catalogs Full history: 699813b5eb00d39288a1fb2f12be7a5bd8d3e7df refactor `dbListTables()` with `list_tables()`, now orders result by `table_type` and `table_name` refactor `dbExistsTable()` with `list_tables()` refactor `dbListObjects()` with `list_tables()` merge `find_table()` code into `list_fields()` `find_table()` isn't used anywhere else anymore (e.g. `exists_table()`) simplify the "get current_schemas() as table" code pass full `id` to `list_fields()` align `dbExistsTable()` with `dbListFields()` add some comments and whitespace simplify `where_schema` in `list_tables()` align `where_table` with `where_schema` in `list_tables()` add `system_catalogs` argument to `dbConnect()` add materialized view tests `list_tables()`: query system catalogs if available `list_fields()`: query system catalogs if available --- R/PqConnection.R | 1 + R/dbConnect_PqDriver.R | 14 ++++- R/dbConnect_RedshiftDriver.R | 17 ++++- R/dbListObjects_PqConnection_ANY.R | 14 ++--- R/tables.R | 94 ++++++++++++++++++++++++---- man/Postgres.Rd | 7 +++ tests/testthat/helper-with_matview.R | 24 +++++++ tests/testthat/helper-with_schema.R | 14 +++++ tests/testthat/test-list_matviews.R | 57 +++++++++++++++++ 9 files changed, 221 insertions(+), 21 deletions(-) create mode 100644 tests/testthat/helper-with_matview.R create mode 100644 tests/testthat/helper-with_schema.R create mode 100644 tests/testthat/test-list_matviews.R diff --git a/R/PqConnection.R b/R/PqConnection.R index e01b0378..3f288308 100644 --- a/R/PqConnection.R +++ b/R/PqConnection.R @@ -10,6 +10,7 @@ setClass("PqConnection", slots = list( ptr = "externalptr", bigint = "character", + system_catalogs = "logical", timezone = "character", timezone_out = "character", typnames = "data.frame" diff --git a/R/dbConnect_PqDriver.R b/R/dbConnect_PqDriver.R index fca12c9d..aad55145 100644 --- a/R/dbConnect_PqDriver.R +++ b/R/dbConnect_PqDriver.R @@ -43,6 +43,13 @@ #' @param check_interrupts Should user interrupts be checked during the query execution (before #' first row of data is available)? Setting to `TRUE` allows interruption of queries #' running too long. +#' @param system_catalogs Should `dbList*()` functions query the [`system +#' catalogs`](https://www.postgresql.org/docs/current/catalogs.html) (`TRUE`) +#' or the +#' [`information_schema`](https://www.postgresql.org/docs/current/information-schema.html)? +#' The `information_schema` does not contain PostgreSQL-specific information, +#' in particular [Materialized +#' Views](https://www.postgresql.org/docs/current/sql-creatematerializedview.html). #' @param timezone Sets the timezone for the connection. The default is `"UTC"`. #' If `NULL` then no timezone is set, which defaults to the server's time zone. #' @param timezone_out The time zone returned to R, defaults to `timezone`. @@ -60,7 +67,8 @@ dbConnect_PqDriver <- function(drv, dbname = NULL, host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ..., bigint = c("integer64", "integer", "numeric", "character"), - check_interrupts = FALSE, timezone = "UTC", timezone_out = NULL) { + check_interrupts = FALSE, system_catalogs = TRUE, + timezone = "UTC", timezone_out = NULL) { opts <- unlist(list( dbname = dbname, user = user, password = password, host = host, port = as.character(port), service = service, client_encoding = "utf8", ... @@ -70,6 +78,7 @@ dbConnect_PqDriver <- function(drv, dbname = NULL, } bigint <- match.arg(bigint) stopifnot(is.logical(check_interrupts), all(!is.na(check_interrupts)), length(check_interrupts) == 1) + stopifnot(is.logical(system_catalogs)) if (!is.null(timezone)) { stopifnot(is.character(timezone), all(!is.na(timezone)), length(timezone) == 1) } @@ -85,7 +94,8 @@ dbConnect_PqDriver <- function(drv, dbname = NULL, # timezone is set later conn <- new("PqConnection", - ptr = ptr, bigint = bigint, timezone = character(), typnames = data.frame() + ptr = ptr, bigint = bigint, system_catalogs = system_catalogs, + timezone = character(), typnames = data.frame() ) on.exit(dbDisconnect(conn)) diff --git a/R/dbConnect_RedshiftDriver.R b/R/dbConnect_RedshiftDriver.R index d2b0c0b1..fae65d10 100644 --- a/R/dbConnect_RedshiftDriver.R +++ b/R/dbConnect_RedshiftDriver.R @@ -4,7 +4,22 @@ dbConnect_RedshiftDriver <- function(drv, dbname = NULL, host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ..., bigint = c("integer64", "integer", "numeric", "character"), check_interrupts = FALSE, timezone = "UTC") { - new("RedshiftConnection", callNextMethod()) + new("RedshiftConnection", + callNextMethod( + drv = drv, + dbname = dbname, + host = host, + port = port, + password = password, + user = user, + service = service, + ..., + bigint = bigint, + check_interrupts = check_interrupts, + system_catalogs = FALSE, + timezone = timezone + ) + ) } #' @rdname Redshift diff --git a/R/dbListObjects_PqConnection_ANY.R b/R/dbListObjects_PqConnection_ANY.R index 30a90601..9331fe6f 100644 --- a/R/dbListObjects_PqConnection_ANY.R +++ b/R/dbListObjects_PqConnection_ANY.R @@ -14,11 +14,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { } query <- paste0( "SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n", - list_tables(conn = conn, order_by = "table_type, table_name"), + list_tables(conn = conn, order_by = "table_type, table_name"), ") as table_query \n", "UNION ALL\n", "SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n", - list_tables(conn = conn, where_schema = "true"), + list_tables(conn = conn, where_schema = "true"), ") as schema_query;" ) } else { @@ -38,11 +38,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { ) query <- paste0( "SELECT table_schema AS schema, table_name AS table FROM ( \n", - list_tables( - conn = conn, - where_schema = where_schema, - order_by = "table_type, table_name" - ), + list_tables( + conn = conn, + where_schema = where_schema, + order_by = "table_type, table_name" + ), ") as table_query" ) } diff --git a/R/tables.R b/R/tables.R index 97f989cd..f6800bb6 100644 --- a/R/tables.R +++ b/R/tables.R @@ -122,12 +122,21 @@ db_append_table <- function(conn, name, value, copy, warn) { list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by = NULL) { - query <- paste0( - # information_schema.table docs: https://www.postgresql.org/docs/current/infoschema-tables.html - "SELECT table_schema, table_name \n", - "FROM information_schema.tables \n", - "WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND` - ) + if (conn@system_catalogs) { + query <- paste0( + "SELECT table_schema, table_name \n", + "FROM ( ", list_tables_from_system_catalog(), ") AS schema_tables \n", + "WHERE TRUE \n" + ) + } else { + query <- paste0( + # information_schema.table docs: + # https://www.postgresql.org/docs/current/infoschema-tables.html + "SELECT table_schema, table_name \n", + "FROM information_schema.tables \n", + "WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND` + ) + } if (is.null(where_schema)) { # `true` in `current_schemas(true)` is necessary to get temporary tables @@ -147,6 +156,38 @@ list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by query } +list_tables_from_system_catalog <- function() { + # This imitates (parts of) information_schema.tables, but includes materialized views + paste0( + # pg_class vs. information_schema: https://stackoverflow.com/a/24089729 + # pg_class docs: https://www.postgresql.org/docs/current/catalog-pg-class.html + "SELECT n.nspname AS table_schema, cl.relname AS table_name, \n", + " CASE + WHEN (n.oid = pg_my_temp_schema()) THEN 'LOCAL TEMPORARY' + WHEN (cl.relkind IN ('r', 'p')) THEN 'BASE TABLE' + WHEN (cl.relkind = 'v') THEN 'VIEW' + WHEN (cl.relkind = 'f') THEN 'FOREIGN' + WHEN (cl.relkind = 'm') THEN 'MATVIEW' + ELSE NULL + END AS table_type \n", + "FROM pg_class AS cl \n", + "JOIN pg_namespace AS n ON cl.relnamespace = n.oid \n", + # include: r = ordinary table, v = view, m = materialized view, + # f = foreign table, p = partitioned table + "WHERE (cl.relkind IN ('r', 'v', 'm', 'f', 'p')) \n", + # do not return individual table partitions + " AND NOT cl.relispartition \n", + # do not return other people's temp schemas + " AND (NOT pg_is_other_temp_schema(n.oid)) \n", + # Return only objects (relations) which the current user may access + # https://www.postgresql.org/docs/current/functions-info.html + " AND (pg_has_role(cl.relowner, 'USAGE') \n", + " OR has_table_privilege(cl.oid, 'SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER') \n", + " OR has_any_column_privilege(cl.oid, 'SELECT, INSERT, UPDATE, REFERENCES') \n", + " ) \n" + ) +} + exists_table <- function(conn, id) { name <- id@name stopifnot("table" %in% names(name)) @@ -168,6 +209,14 @@ exists_table <- function(conn, id) { } list_fields <- function(conn, id) { + if (conn@system_catalogs) { + list_fields_from_system_catalog(conn, id) + } else { + list_fields_from_info_schema(conn, id) + } +} + +list_fields_from_info_schema <- function(conn, id) { name <- id@name is_redshift <- is(conn, "RedshiftConnection") @@ -185,7 +234,7 @@ list_fields <- function(conn, id) { # as there cannot be multiple tables with the same name in a single schema only_first <- FALSE - # or we have to look the table up in the schemas on the search path + # or we have to look the table up in the schemas on the search path } else if (is_redshift) { # A variant of the Postgres version that uses CTEs and generate_series() # instead of generate_subscripts(), the latter is not supported on Redshift @@ -211,10 +260,10 @@ list_fields <- function(conn, id) { # How to unnest `current_schemas(true)` array with element number (works since v9.4): # https://stackoverflow.com/a/8767450/2114932 query <- paste0( - "(", - "SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n", - "WHERE table_schema != 'pg_catalog'", - ") schemas_on_path" + "(", + "SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n", + "WHERE table_schema != 'pg_catalog'", + ") schemas_on_path" ) only_first <- TRUE } @@ -252,6 +301,29 @@ list_fields <- function(conn, id) { fields } +list_fields_from_system_catalog <- function(conn, id) { + if (exists_table(conn, id)) { + # we know from exists_table() that id@name["table"] exists + # and the user has access priviledges + tname_str <- stats::na.omit(id@name[c("schema", "table")]) + tname_qstr <- dbQuoteString(conn, paste(tname_str, collapse = ".")) + # cast to `regclass` resolves the table name according to the current + # `search_path` https://dba.stackexchange.com/a/75124 + query <- + paste0( + "SELECT attname \n", + "FROM pg_attribute \n", + "WHERE attrelid = ", tname_qstr, "::regclass \n", + " AND attnum > 0 \n", + " AND NOT attisdropped \n", + "ORDER BY attnum;" + ) + dbGetQuery(conn, query)[[1]] + } else { + stop("Table ", dbQuoteIdentifier(conn, id), " not found.", call. = FALSE) + } +} + find_temp_schema <- function(conn, fail_if_missing = TRUE) { if (!is.na(connection_get_temp_schema(conn@ptr))) return(connection_get_temp_schema(conn@ptr)) diff --git a/man/Postgres.Rd b/man/Postgres.Rd index 90cd8a77..5089ea6b 100644 --- a/man/Postgres.Rd +++ b/man/Postgres.Rd @@ -22,6 +22,7 @@ Postgres() ..., bigint = c("integer64", "integer", "numeric", "character"), check_interrupts = FALSE, + system_catalogs = TRUE, timezone = "UTC", timezone_out = NULL ) @@ -66,6 +67,12 @@ integers.} first row of data is available)? Setting to \code{TRUE} allows interruption of queries running too long.} +\item{system_catalogs}{Should \verb{dbList*()} functions query the \href{https://www.postgresql.org/docs/current/catalogs.html}{\verb{system catalogs}} (\code{TRUE}) +or the +\href{https://www.postgresql.org/docs/current/information-schema.html}{\code{information_schema}}? +The \code{information_schema} does not contain PostgreSQL-specific information, +in particular \href{https://www.postgresql.org/docs/current/sql-creatematerializedview.html}{Materialized Views}.} + \item{timezone}{Sets the timezone for the connection. The default is \code{"UTC"}. If \code{NULL} then no timezone is set, which defaults to the server's time zone.} diff --git a/tests/testthat/helper-with_matview.R b/tests/testthat/helper-with_matview.R new file mode 100644 index 00000000..1f83035a --- /dev/null +++ b/tests/testthat/helper-with_matview.R @@ -0,0 +1,24 @@ +#' Run an expression that creates a materialized view and clean up afterwards. +#' +#' @param con PqConnection. The database connection. +#' @param matview character. The materialized view name. +#' @param query character. A SELECT, TABLE, or VALUES command to populate the +#' materialized view. +#' @param expr expression. The R expression to execute. +#' @return the return value of the \code{expr}. +#' @seealso https://www.postgresql.org/docs/current/sql-creatematerializedview.html +with_matview <- function(con, matview, query, expr) { + on.exit( + DBI::dbExecute(con, paste0("DROP MATERIALIZED VIEW IF EXISTS ", matview)), + add = TRUE + ) + dbExecute( + con, + paste0( + "CREATE MATERIALIZED VIEW IF NOT EXISTS ", matview, + " AS ", query + ) + ) + + force(expr) +} diff --git a/tests/testthat/helper-with_schema.R b/tests/testthat/helper-with_schema.R new file mode 100644 index 00000000..98e70b08 --- /dev/null +++ b/tests/testthat/helper-with_schema.R @@ -0,0 +1,14 @@ +#' Run an expression that creates a schema and clean up afterwards. +#' +#' @param con PqConnection. The database connection. +#' @param schema character. The schema name. +#' @param expr expression. The R expression to execute. +#' @return the return value of the \code{expr}. +with_schema <- function(con, schema, expr) { + on.exit( + DBI::dbExecute(con, paste0("DROP SCHEMA IF EXISTS ", schema)), + add = TRUE + ) + DBI::dbExecute(con, paste0("CREATE SCHEMA IF NOT EXISTS ", schema)) + force(expr) +} diff --git a/tests/testthat/test-list_matviews.R b/tests/testthat/test-list_matviews.R new file mode 100644 index 00000000..dce12365 --- /dev/null +++ b/tests/testthat/test-list_matviews.R @@ -0,0 +1,57 @@ +if (postgresHasDefault()) { + with_database_connection(con = postgresDefault(system_catalogs = TRUE), { + + skip_if( + is(con, "RedshiftConnection"), + "Redshift doesn't expose system catalogs" + ) + skip_if_not( + dbExistsTable(con, Id(schema = "pg_catalog", table = "pg_class")), + "`pg_catalog`.`pg_class` not available" + ) + + test_that("Materialized View is listed", { + with_matview(con, "matview1", "SELECT 1 AS col1", { + + expect_true(dbExistsTable(con, "matview1")) + expect_true("matview1" %in% dbListTables(con)) + + objects <- dbListObjects(con) + quoted_tables <- + vapply( + objects$table, + dbQuoteIdentifier, + conn = con, + character(1) + ) + expect_true(dbQuoteIdentifier(con, "matview1") %in% quoted_tables) + + expect_true("col1" %in% dbListFields(con, "matview1")) + }) + }) + + test_that("Materialized View in custom schema is listed", { + with_schema(con, "matschema1", { + matview_id <- Id(schema = "matschema1", table = "matview1") + matview_chr <- "matschema1.matview1" + + with_matview(con, matview_chr, "SELECT 1 AS col1", { + + expect_true(dbExistsTable(con, matview_id)) + + objects <- dbListObjects(con, prefix = Id(schema = "matschema1")) + quoted_tables <- + vapply( + objects$table, + dbQuoteIdentifier, + conn = con, + character(1) + ) + expect_true(dbQuoteIdentifier(con, matview_id) %in% quoted_tables) + + expect_true("col1" %in% dbListFields(con, matview_id)) + }) + }) + }) + }) +}