From f179a80ebe1758d9d44d147ca39f34b4cef5209b Mon Sep 17 00:00:00 2001 From: Loren Siebert Date: Mon, 11 Dec 2023 15:38:37 -0800 Subject: [PATCH] Dedupe entries when generating schema tables Something like this should only generate one table entry for `EmailAddress`: ``` 'emailDomains', (select array_agg(split_part(value, '@', 2)) from "EmailAddress" EA where "personId"="Person".id), 'emailAddresses', (select array_agg(value) from "EmailAddress" EA where "personId"="Person".id), ``` --- pyproject.toml | 2 +- src/sinker/utils.py | 8 ++++++-- tests/test_generate_schema_tables.py | 7 +++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e716255..16f8508 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "sinker" -version = "0.1.1" +version = "0.1.2" description = "Synchronize Postgres to Elasticsearch" authors = ["Loren Siebert "] license = "MIT/Apache-2.0" diff --git a/src/sinker/utils.py b/src/sinker/utils.py index 4aa745a..cfed5c9 100644 --- a/src/sinker/utils.py +++ b/src/sinker/utils.py @@ -7,10 +7,14 @@ def generate_schema_tables(view_select_query: str) -> Iterable[str]: """ - Given a view select query, return a list of tables that are referenced in the query. + Given a view select query, return a list of unique tables that are referenced in the query + in the order they were encountered. Skip anything that looks like a function call. :param view_select_query: The select query from the view """ + seen: set = set() for table_candidate in TABLE_RE.findall(view_select_query): if "(" not in table_candidate: - yield table_candidate + if table_candidate not in seen: + seen.add(table_candidate) + yield table_candidate diff --git a/tests/test_generate_schema_tables.py b/tests/test_generate_schema_tables.py index 1bf7d74..ef8cb14 100644 --- a/tests/test_generate_schema_tables.py +++ b/tests/test_generate_schema_tables.py @@ -5,8 +5,11 @@ def test_generate_schema_tables(): view_select_query = """select id, json_build_object( 'name', "name", - 'emailDomains',(select array_agg(split_part(email, '@', 2)) FROM unnest(emails) as email), + 'otherEmailDomains',(select array_agg(split_part(email, '@', 2)) FROM unnest(emails) as email), + 'emailDomains', (select array_agg(split_part(value, '@', 2)) + from "EmailAddress" EA where "personId"="Person".id), + 'emailAddresses', (select array_agg(value) from "EmailAddress" EA where "personId"="Person".id), ) as "person" from "person" """ - assert list(generate_schema_tables(view_select_query)) == ["person"] + assert list(generate_schema_tables(view_select_query)) == ["EmailAddress", "person"]