-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support common table expressions (CTEs)
- Bump minor version to `0.1.3` - Use [`sqlglot`](https://github.com/tobymao/sqlglot) for parsing
- Loading branch information
Showing
5 changed files
with
86 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "sinker" | ||
version = "0.1.2" | ||
version = "0.1.3" | ||
description = "Synchronize Postgres to Elasticsearch" | ||
authors = ["Loren Siebert <[email protected]>"] | ||
license = "MIT/Apache-2.0" | ||
|
@@ -15,6 +15,7 @@ elasticsearch = "^8.17.0" | |
environs = ">=9.5,<15.0" | ||
psycopg = "^3.1.8" | ||
pytest-mock = "^3.10.0" | ||
sqlglot = "^26.2.1" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
flake8 = ">=6,<8" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,18 @@ | ||
import re | ||
from typing import Set, Tuple | ||
|
||
from typing import Iterable | ||
import sqlglot | ||
from sqlglot.expressions import Table, CTE | ||
|
||
TABLE_RE = re.compile(r"from\s\"?(\S+)\b", re.I) | ||
|
||
|
||
def generate_schema_tables(view_select_query: str) -> Iterable[str]: | ||
def parse_schema_tables(view_select_query: str) -> Tuple[str, Set[str]]: | ||
""" | ||
Given a view select query, return a list of unique tables that are referenced in the query | ||
in the order they were encountered. | ||
Given a view select query, return a primary parent table and the set of unique tables that are referenced in the query. | ||
Skip anything that looks like a function call. | ||
:param view_select_query: The select query from the view | ||
""" | ||
seen: set = set() | ||
for table_candidate in TABLE_RE.findall(view_select_query): | ||
if "(" not in table_candidate: | ||
if table_candidate not in seen: | ||
seen.add(table_candidate) | ||
yield table_candidate | ||
parsed = sqlglot.parse_one(view_select_query) | ||
parent_table = parsed.find(Table).name | ||
tables = {table.name for table in parsed.find_all(Table)} | ||
ctes = {cte.alias for cte in parsed.find_all(CTE)} | ||
schema_tables = tables - ctes | ||
return parent_table, schema_tables |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from sinker.utils import parse_schema_tables | ||
|
||
|
||
def test_parse_schema_tables(): | ||
view_select_query = """select id, | ||
json_build_object( | ||
'name', "name", | ||
'otherEmailDomains',(select array_agg(split_part(email, '@', 2)) FROM unnest(emails) as email), | ||
'emailDomains', (select array_agg(split_part(value, '@', 2)) | ||
from "EmailAddress" EA where "personId"="Person".id), | ||
'emailAddresses', (select array_agg(value) from "EmailAddress" EA where "personId"="Person".id), | ||
) as "person" | ||
from "person" | ||
""" | ||
parent_table, schema_tables = parse_schema_tables(view_select_query) | ||
assert parent_table == "person" | ||
assert schema_tables == {"EmailAddress", "person"} | ||
|
||
def test_parse_schema_tables_with_cte(): | ||
view_select_query = """ | ||
WITH | ||
attendees AS ( | ||
SELECT DISTINCT ON (a."personId", a."hostedEventId") | ||
a."hostedEventId", | ||
a.status, | ||
e.value as email, | ||
p."primaryOrganizationId" | ||
FROM "HostedEventAttendance" a | ||
JOIN "Person" p ON a."personId" = p.id | ||
JOIN "EmailAddress" e ON p.id = e."personId" | ||
GROUP BY | ||
a."personId", | ||
a."hostedEventId", | ||
a.status, | ||
e.value, | ||
p."primaryOrganizationId" | ||
) | ||
SELECT | ||
id, | ||
json_build_object( | ||
'summary', "name", | ||
'startTime', "timestamp", | ||
'attendees', ( | ||
SELECT json_agg(json_build_object('email', attendees.email, 'eventResponse', attendees.status)) AS formatted_attendees | ||
FROM attendees | ||
WHERE attendees."hostedEventId" = "HostedEvent".id | ||
), | ||
'organizationIds', | ||
( | ||
SELECT array_agg(attendees."primaryOrganizationId") | ||
FROM attendees | ||
WHERE attendees."hostedEventId" = "HostedEvent".id | ||
) | ||
) AS "hosted_events" | ||
FROM | ||
"HostedEvent" | ||
""" | ||
parent_table, schema_tables = parse_schema_tables(view_select_query) | ||
assert parent_table == "HostedEvent" | ||
assert schema_tables == {"EmailAddress", "HostedEvent", "HostedEventAttendance", "Person"} |