Skip to content

Commit

Permalink
Fix warning output and revert change to query docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Oct 26, 2024
1 parent 2ae5b69 commit 743e81c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
31 changes: 16 additions & 15 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def check_defined_return_forms(query_text: str) -> str:
return ""


# MARK: forms order within the query
# MARK: Forms Order


def check_forms_order(query_text: str) -> bool:
Expand All @@ -420,47 +420,48 @@ def check_forms_order(query_text: str) -> bool:
Parameters
----------
query_file : str
The SPARQL query text as a string.
query_file : str
The SPARQL query text as a string.
Returns
-------
bool
True if the order of the matches, False otherwise.
bool
True if the order of the matches, False otherwise.
"""

# Regex pattern to capture the variables in the SELECT statement.
select_pattern = r"SELECT\s+(.*?)\s+WHERE"

# Extracting the variables from the SELECT statement.
if select_match := re.search(select_pattern, query_text, flags=re.DOTALL):
select_vars = re.findall(r"\?(\w+)", select_match.group(1))
select_vars = re.findall(r"\?(\w+)", select_match[1])

else:
return False # invalid query format if no SELECT match.
return False # invalid query format if no SELECT match

# Exclude the first two variables from select_vars
# Exclude the first two variables from select_vars.
select_vars = select_vars[2:]
# Regex pattern to capture the variables in the WHERE clause.
dt_pattern = r"WHERE\s*\{[^}]*?wikibase:lemma\s*\?\s*(\w+)\s*[;.]\s*"
forms_pattern = r"ontolex:representation \?([^ ;]+)"
where_vars = []

# Extracting variables from the WHERE clause
# Extracting variables from the WHERE clause.
dt_match = re.findall(dt_pattern, query_text)
if dt_match == ["lemma"]:
where_vars.append("preposition")

elif dt_match:
where_vars.append(dt_match[0])

where_vars += re.findall(forms_pattern, query_text)

# Handling labels provided by the labeling service like 'case' and 'gender' in the same order as in select_vars
# Handling labels provided by the labeling service like 'case' and 'gender' in the same order as in select_vars.
for var in ["case", "gender", "auxiliaryVerb"]:
if var in select_vars:
# Insert in the corresponding index of where_vars
# Insert in the corresponding index of where_vars.
index = select_vars.index(var)
where_vars.insert(index, var)

# Check if the order of variables matches
# Check if the order of variables matches.
return select_vars == where_vars


Expand Down Expand Up @@ -500,7 +501,7 @@ def check_query_forms() -> None:
# Check the order of variables in the WHERE and SELECT clauses.
select_where_labels_matching = check_forms_order(query_text)
if not select_where_labels_matching:
error_output += f"\n{index}. {query_file_str}: The order of variables in the SELECT statement does not match the WHERE clause.\n"
error_output += f"\n{index}. {query_file_str}:\n - The order of variables in the SELECT statement does not match their order in the query.\n"
index += 1

if extract_forms_from_sparql(query_file):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Italian (Q652) nouns (Q1084) and the given forms.
# All Italian (Q652) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down

0 comments on commit 743e81c

Please sign in to comment.