Skip to content

Commit

Permalink
chore: Use ruff in version 0.1.6
Browse files Browse the repository at this point in the history
  • Loading branch information
asgeirrr committed Nov 24, 2023
1 parent eccaab5 commit 49ea81b
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.4
rev: v0.1.6
hooks:
# Run the linter.
- id: ruff
Expand Down
50 changes: 33 additions & 17 deletions pgantomizer/anonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def check_schema(cursor, schema, db_args):


def get_column_update(schema, table, column, data_type):
custom_rule = get_in(schema, [table, "custom_rules", column]) if schema[table] else None
custom_rule = (
get_in(schema, [table, "custom_rules", column]) if schema[table] else None
)

if column == get_table_pk_name(schema, table) or (
schema[table] and column in schema[table].get("raw", [])
Expand All @@ -190,7 +192,9 @@ def get_column_update(schema, table, column, data_type):
'Custom rule "{}" must provide a non-None value'.format(custom_rule)
)
else:
return "{column} = '{value}'".format(column=column, value=custom_rule["value"])
return "{column} = '{value}'".format(
column=column, value=custom_rule["value"]
)
elif custom_rule and custom_rule not in CUSTOM_ANONYMIZATION_RULES:
raise MissingAnonymizationRuleError(
'Custom rule "{}" is not defined'.format(custom_rule)
Expand Down Expand Up @@ -222,9 +226,13 @@ def anonymize_table(conn, cursor, schema, table, disable_schema_changes):
cascade = " CASCADE"

logging.debug(
"Running TRUNCATE{cascade} on {table} ...".format(table=table, cascade=cascade)
"Running TRUNCATE{cascade} on {table} ...".format(
table=table, cascade=cascade
)
)
cursor.execute(
"TRUNCATE {table} {cascade}".format(table=table, cascade=cascade)
)
cursor.execute("TRUNCATE {table} {cascade}".format(table=table, cascade=cascade))
return

# Generate list of column_update SQL snippets for UPDATE
Expand All @@ -236,20 +244,24 @@ def anonymize_table(conn, cursor, schema, table, disable_schema_changes):
updated_column_names = []
for column_name, data_type in cursor.fetchall():
if not disable_schema_changes: # Bypass schema changes if explicitly requested
prepare_column_for_anonymization(conn, cursor, table, column_name, data_type)
prepare_column_for_anonymization(
conn, cursor, table, column_name, data_type
)
column_update = get_column_update(schema, table, column_name, data_type)
if column_update is not None:
column_updates.append(column_update)
updated_column_names.append(column_name)

# Process UPDATE if any column_updates requested
if len(column_updates) > 0:
update_statement = "UPDATE {table} SET {column_updates_sql} {where_clause}".format(
table=table,
column_updates_sql=", ".join(column_updates),
where_clause="WHERE {}".format(
schema[table].get("where", "TRUE") if schema[table] else "TRUE"
),
update_statement = (
"UPDATE {table} SET {column_updates_sql} {where_clause}".format(
table=table,
column_updates_sql=", ".join(column_updates),
where_clause="WHERE {}".format(
schema[table].get("where", "TRUE") if schema[table] else "TRUE"
),
)
)
logging.debug(
"Running UPDATE on {} for columns {} ...".format(
Expand All @@ -269,7 +281,9 @@ def anonymize_db(schema, db_args, disable_schema_changes):
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type <> 'VIEW' ORDER BY table_name;"
)
for table_name in cursor.fetchall():
anonymize_table(conn, cursor, schema, table_name[0], disable_schema_changes)
anonymize_table(
conn, cursor, schema, table_name[0], disable_schema_changes
)
logging.debug("Anonymization complete!")


Expand All @@ -291,9 +305,7 @@ def load_anonymize_remove(
try:
load_db_to_new_instance(dump_file, db_args)
anonymize_db(schema, db_args, disable_schema_changes)
except (
Exception
): # Any exception must result into dropping the schema to prevent sensitive data leakage
except Exception: # Any exception must result into dropping the schema to prevent sensitive data leakage
drop_schema(db_args)
raise
finally:
Expand All @@ -308,7 +320,9 @@ def main():
epilog="Beware that all tables in the target DB are dropped "
"prior to loading the dump and anonymization. See README.md for details.",
)
parser.add_argument("-v", "--verbose", action="count", help="increase output verbosity")
parser.add_argument(
"-v", "--verbose", action="count", help="increase output verbosity"
)
parser.add_argument(
"-s",
"--skip-restore",
Expand Down Expand Up @@ -349,7 +363,9 @@ def main():
help="password of the Postgres user with access to the anonymized database",
default="",
)
parser.add_argument("--host", help="host where the DB is running", default="localhost")
parser.add_argument(
"--host", help="host where the DB is running", default="localhost"
)
parser.add_argument("--port", help="port where the DB is running", default="5432")

args = parser.parse_args()
Expand Down
3 changes: 0 additions & 3 deletions tests/asserts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from psycopg2.extras import NamedTupleCursor


def assert_customer_anonymized(customer, name, language, currency, ip):
assert customer[1] == name
assert customer[2] == language
Expand Down
4 changes: 3 additions & 1 deletion tests/test_pgantomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def test_dump_and_load(original_db, anonymized):

def test_load_anonymize_remove(dumped_db, anonymized):
assert_db_empty(anonymized)
load_anonymize_remove(DUMP_PATH, SCHEMA_PATH, leave_dump=False, db_args=ANONYMIZED_DB_ARGS)
load_anonymize_remove(
DUMP_PATH, SCHEMA_PATH, leave_dump=False, db_args=ANONYMIZED_DB_ARGS
)
assert_db_anonymized(anonymized)


Expand Down

0 comments on commit 49ea81b

Please sign in to comment.