You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
library(redcapcustodian)
library(RMariaDB)
library(DBI)
library(tidyverse)
library(dotenv)
library(lubridate)
load_dot_env("prod_migrate.env")
if (Sys.getenv("REDCAP_DB_NAME") =="") dotenv::load_dot_env(here::here(".env"))
init_etl("compare_two_mysql_dbs")
rc_source<- connect_to_redcap_db()
rc_target<- connect_to_db(
drv=RMariaDB::MariaDB(),
prefix="TARGETREDCAP"
)
# Check log tables first# check last redcap_log_event entriesget_last_log_event<-function(table, conn) {
result<- tbl(conn, table) |>
select(1, any_of(c("ts", "stored_date", "cron_run_start"))) |>
rename(
id=1,
ts=2
) |>
arrange(desc(id)) |>
head(n=1) |>
collect() |>
mutate(ts= ymd_hms(ts)) |>
mutate(
table=table,
host= dbGetInfo(conn)$host
)
return(result)
}
get_last_log_event("redcap_log_event", rc_source)
time_stamped_tables<- c(
redcapcustodian::log_event_tables,
"redcap_log_view",
"redcap_edocs_metadata",
"redcap_crons_history"
)
last_log_events<-
bind_rows(
purrr::map_df(time_stamped_tables, get_last_log_event, rc_source) |>
mutate(copy="source"),
purrr::map_df(time_stamped_tables, get_last_log_event, rc_target) |>
mutate(copy="target")
)
ts_diff<-last_log_events|>
pivot_wider(
id_cols= c("table"),
names_from="copy",
values_from= c("id", "ts")
) |>
mutate(matches=ts_source==ts_target) |>
select(-starts_with("log_event_id")) |>
mutate(time_diff=ts_source-ts_target)
# What does the diff look like?ts_diff# example tables to comparemy_example_tables<- c(
"redcap_entity_project_ownership",
"redcap_user_information",
"redcap_projects",
"redcap_auth",
"redcap_ip_banned"
)
# Prepare to compute the script's elapsed run timestart<- now()
source_tables<- dbListTables(rc_source)
# Uncomment the next line to run a quick test against a REDCap host.# Testing a full redcap database can several minutes# source_tables <- my_example_tablessource_checksums<-purrr::map_df(source_tables, get_table_checksum, rc_source)
target_tables<- dbListTables(rc_target)
# Uncomment the next line to run a quick test against a REDCap host.# Testing a full redcap database can several minutes# target_tables <- my_example_tablestarget_checksums<-purrr::map_df(target_tables, get_table_checksum, rc_target)
# compare the source and target datachecksums<- evaluate_checksums(source_checksums, target_checksums)
# save our workchecksums|> write_csv(here::here("output", "checksums.csv"))
# report on the checksum matchingchecksums|> count(matches)
checksums|> filter(!matches)
# this will always contain the viewschecksums|> filter(is.na(matches))
# compute elapsed run timefinish<- now()
finish-start
dbDisconnect(rc_source)
dbDisconnect(rc_target)
The text was updated successfully, but these errors were encountered:
One of CTS-IT's internal projects built upon https://github.com/ctsit/redcapcustodian/blob/master/report/compare_two_mysql_dbs.R adding functions to compare the final rows of select REDCap tables. This allows a rapid test of sameness on these high-traffic, add-only, time-stamped tables. The original code is https://github.com/ctsit/rcc.ctsit/blob/master/report/compare_two_mysql_dbs.R. A copy is included here
The text was updated successfully, but these errors were encountered: