From 9fe56a99d668f17f05cc6dd28143615d44fc9f0c Mon Sep 17 00:00:00 2001 From: Eric Darchis Date: Mon, 20 Nov 2023 11:17:49 +0100 Subject: [PATCH 1/2] Various fixes to migration script --- migtool/migtool.py | 111 +++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 40 deletions(-) diff --git a/migtool/migtool.py b/migtool/migtool.py index 85db04b..0110505 100644 --- a/migtool/migtool.py +++ b/migtool/migtool.py @@ -1,9 +1,24 @@ +import re + import pyodbc # adapter for SQL Server import psycopg2 # adapter for PostgreSQL import configparser # used to read settings from file import datetime # used to properly format dates and datetimes import time # used to calculate time taken +# This script was created with global variables without initializing them here. Ideally, we should store a settings +# object rather than all of them separately but this works. +settings = None +EXCLUDED_COLUMNS = ["RowID"] +delete_data = False +historical = False +demo_fix = False +migration_modules = [] +old_connection = None +new_connection = None +today = datetime.date.today() +now = datetime.datetime.now() + # loads connection configuration and migration settings from a file. # In future the settings file could be specified with a parameter. @@ -29,10 +44,6 @@ def get_settings_from_file(): print(" Histrical data will be migrated to the new database.") else: print(" No historical data will be migrated.") - global today - global now - today = datetime.date.today() - now = datetime.datetime.now() except KeyError as e: print("\x1b[0;31;48m" + "Error while trying to load settings. " +\ "Please make sure the settings.ini file exists in your working directory." + "\x1b[0m") @@ -50,8 +61,9 @@ def connect(): global old_connection old_connection = pyodbc.connect(old_connection_string) except pyodbc.InterfaceError as exc: - print("\x1b[0;31;48m" + - "ERROR: Could not connect to the SQL Server database. Make sure the server is running and check your settings." + + print("\x1b[0;31;48m" + "ERROR: Could not connect to the SQL Server database. " + "Make sure the server is running and check your settings." "\x1b[0m") print(exc) exit(1) @@ -61,13 +73,15 @@ def connect(): new_db = settings["NewDB"] new_connection_string = f'host={new_db["host"]} port={new_db["port"]} dbname={new_db["name"]} ' \ f'user={new_db["user"]} password={new_db["pwd"]}' - new_connection_string = f'postgres://{new_db["user"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}' + # new_connection_string = \ + # f'postgres://{new_db["user"]}:{new_db["pwd"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}' try: global new_connection new_connection = psycopg2.connect(new_connection_string) except psycopg2.OperationalError as exc: - print("\x1b[0;31;48m" + - "ERROR: Could not connect to the PostgreSQL database. Make sure the server is running and check your settings." + + print("\x1b[0;31;48m" + "ERROR: Could not connect to the PostgreSQL database. " + "Make sure the server is running and check your settings." "\x1b[0m") print(exc) exit(1) @@ -108,43 +122,35 @@ def get_db_tables(): print("Finding tables in both databases.\n") old_cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';") new_cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';") - old_tables = list() - for x in old_cursor: - # Remove special characters at the start and end of each item when adding it to the list. - # This way the entries in the old and new list match - old_tables.append(str(x)[2:-4]) - new_tables = list() - for x in new_cursor: - # Remove special characters at the start and end of each item when adding it to the list. - # This way the entries in the old and new list match - new_tables.append(str(x)[2:-3]) + old_tables = [x[0] for x in old_cursor] + new_tables = [x[0] for x in new_cursor] return old_tables, new_tables # This function puts the data from a SELECT statement into string and formats it correctly so that postgres can work # with it. def generate_insertion_string(row): - row_str = "(" + row_list = [] for x in row: # Strings must be enclosed in apostrophes, also escape singe quotes in a string by doubling them if isinstance(x, str): - row_str = row_str + "'" + str(x).replace("'", "''") + "', " + row_list.append("'" + str(x).replace("'", "''") + "'") # Dates and datetimes must be enclosed in apostrophes elif isinstance(x, datetime.datetime) or isinstance(x, datetime.date): - row_str = row_str + "'" + str(x) + "', " - # If x is NoneType then str(x) get transtlated to "None", but sql wants "null" + row_list.append("'" + str(x) + "'") + # If x is NoneType then str(x) get translated to "None", but sql wants "null" elif x is None: - row_str = row_str + "null, " + row_list.append("null") # If x is bytes we need to make them nice (start with \x and append the data converted to hex): elif isinstance(x, bytes): - row_str = row_str + "'\\x" + str(x.hex()) + "', " + row_list.append("'\\x" + str(x.hex()) + "'") else: - row_str = row_str + str(x) + ", " - row_str = row_str[:-2] + ")" + row_list.append(str(x)) + row_str = f"({', '.join(row_list)})" return row_str -# When not migrating historical data, this function figures out what colums "ValidityTo" is so we can later check for +# When not migrating historical data, this function figures out what columns "ValidityTo" is so we can later check for # each row if it is still valid or already historical def get_validity_index(rows): vi = -1 @@ -164,6 +170,8 @@ def get_validity_index(rows): def get_validity(vi, row): + global today + global now if historical or ((not historical) and vi == -1): return True elif (not historical) and vi != -1: @@ -185,6 +193,17 @@ def get_validity(vi, row): return True +def extract_sequence_name(column_default): + if not column_default: + return None + pattern = r"nextval\('([^']*)" + match = re.search(pattern, column_default) + if match: + return match.group(1) + else: + return None + + def migrate(): # This list collects all db tables that exist only in one of the databases but not the other. lonely_tables = list() @@ -206,7 +225,8 @@ def migrate(): "\"FeedbackUUID\", \"AuditUserID\") VALUES ('2000 01 01 00:00:00.000000', 0, 0, 0);") # Set up all the columns we're going to migrate. - new_cursor.execute("SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';") + new_cursor.execute("SELECT COLUMN_NAME, COLUMN_DEFAULT " + "FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';") rows = new_cursor.fetchall() # While we have the data ready: find out where dates are stored for historical data stuff. validity_index # stores in which column the date (ValidityTo) is stored @@ -214,18 +234,24 @@ def migrate(): if not historical: validity_index = get_validity_index(rows) # Finally, set up the columns to migrate - old_cols = "" - new_cols = "(" + sequence_columns = {} + old_cols_list = [] + new_cols_list = [] for row in rows: - old_cols = old_cols + str(row)[2:-3] + ", " - new_cols = new_cols + "\"" + str(row)[2:-3] + "\", " - old_cols = old_cols[:-2] - new_cols = new_cols[:-2] + ")" + if row[0] not in EXCLUDED_COLUMNS: + col_default = extract_sequence_name(row[1]) + if col_default: + sequence_columns[row[0]] = col_default + old_cols_list.append(row[0]) + new_cols_list.append(f'"{row[0]}"') + old_cols = ", ".join(old_cols_list) + new_cols = "(" + ", ".join(new_cols_list) + ")" # Get the data from the old db with these column specifications print(" Fetching data from old database.") old_cursor.execute("SELECT COUNT(*) FROM " + table + ";") - print(" Found " + str(old_cursor.fetchone())[1:-3] + " entries.") + print(f" Found {old_cursor.fetchone()[0]} entries.") + print(f" == old_cols: {old_cols} from {table} ==") old_cursor.execute("SELECT " + old_cols + " FROM " + table + ";") # Set up the values for the insert statement and execute @@ -247,14 +273,19 @@ def migrate(): # Not rolling back leads to an InFailedSqlTransaction exception. new_connection.rollback() pass - + except Exception as e: + print("Failed: INSERT INTO \"" + table + "\" " + new_cols + " VALUES " + row_str + ";") + raise + if sequence_columns: + print(" Data transferred, updating sequences.") + for column, sequence in sequence_columns.items(): + new_cursor.execute(f"select setval('{sequence}', max(\"{column}\")) from \"{table}\";") print(" Table " + table + " has been migrated.\n") # Table doesn't exist else: - print("\x1b[0;31;48m" + "WARNING: Table " + table + \ - " only exists in one of the databases (but not the other)! Is this correct?" + "\x1b[0m\n") - print("") + print(f"\x1b[0;31;48mWARNING: Table {table} only exists in one of the databases " + f"new: {table in new_tables}, old:{table in old_tables})! Is this correct?\x1b[0m\n") lonely_tables.append(table) # Print all tables that have not been migrated due to missing schemas: From 856af1026cddabbe5f54675e2be333b0fe0bc4c4 Mon Sep 17 00:00:00 2001 From: Eric Darchis Date: Mon, 20 Nov 2023 11:33:45 +0100 Subject: [PATCH 2/2] Build and publish both empty and demo dbs --- .github/workflows/docker-dev-cd.yml | 8 ++++++-- .github/workflows/docker.yml | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-dev-cd.yml b/.github/workflows/docker-dev-cd.yml index c1a7348..f4e5fc2 100644 --- a/.github/workflows/docker-dev-cd.yml +++ b/.github/workflows/docker-dev-cd.yml @@ -15,7 +15,11 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build the pgsql Docker image + - name: Build the pgsql EMPTY Docker image + run: | + docker build . --tag ghcr.io/openimis/openimis-pgsql:develop-base --target base + docker push ghcr.io/openimis/openimis-pgsql:develop-base + - name: Build the pgsql DEMO Docker image run: | docker build . --tag ghcr.io/openimis/openimis-pgsql:develop - docker push ghcr.io/openimis/openimis-pgsql:develop \ No newline at end of file + docker push ghcr.io/openimis/openimis-pgsql:develop diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index af4a318..52ca2ee 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -17,7 +17,11 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build the pgsql Docker image + - name: Build the pgsql EMPTY Docker image + run: | + docker build . --target base --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base + docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base + - name: Build the pgsql DEMO Docker image run: | docker build . --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }} docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}