From aa5ddd199fedc2dde1fc4dc3f2d9e7d73c6c91e7 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 09:31:46 -0500 Subject: [PATCH] replace legacy with schema validator also change cubids print-metadata-fields to account for json file errors due to not been validated yet --- cubids/cubids.py | 16 ++++++-- cubids/validator.py | 91 +++++++++++++++------------------------------ 2 files changed, 43 insertions(+), 64 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 44c57fdc..27f632e3 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self): found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): - with open(json_file, "r") as jsonr: - metadata = json.load(jsonr) - found_fields.update(metadata.keys()) + # add this in case `print-metadata-fields` is run before validate + try: + with open(json_file, "r", encoding="utf-8") as jsonr: + content = jsonr.read().strip() + if not content: + print(f"Empty file: {json_file}") + continue + metadata = json.loads(content) + found_fields.update(metadata.keys()) + except json.JSONDecodeError as e: + print(f"Error decoding JSON in {json_file}: {e}") + except Exception as e: + print(f"Unexpected error with file {json_file}: {e}") return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/validator.py b/cubids/validator.py index d7e52fe4..7fba8138 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -14,9 +14,9 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" - # build docker call - # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] + # New schema BIDS validator doesn't have option to ignore subject consistency. + # Build the deno command to run the BIDS validator. + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -87,32 +87,6 @@ def parse_validator_output(output): Dataframe of validator output. """ - def get_nested(dct, *keys): - """Get a nested value from a dictionary. - - Parameters - ---------- - dct : :obj:`dict` - Dictionary to get value from. - keys : :obj:`list` - List of keys to get value from. - - Returns - ------- - :obj:`dict` - The nested value. - """ - for key in keys: - try: - dct = dct[key] - except (KeyError, TypeError): - return None - return dct - - data = json.loads(output) - - issues = data["issues"] - def parse_issue(issue_dict): """Parse a single issue from the validator output. @@ -126,30 +100,27 @@ def parse_issue(issue_dict): return_dict : :obj:`dict` Dictionary of parsed issue. """ - return_dict = {} - return_dict["files"] = [ - get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") - ] - return_dict["type"] = issue_dict.get("key", "") - return_dict["severity"] = issue_dict.get("severity", "") - return_dict["description"] = issue_dict.get("reason", "") - return_dict["code"] = issue_dict.get("code", "") - return_dict["url"] = issue_dict.get("helpUrl", "") - - return return_dict - - df = pd.DataFrame() - - for warn in issues["warnings"]: - parsed = parse_issue(warn) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) - - for err in issues["errors"]: - parsed = parse_issue(err) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) + return { + "location": issue_dict.get("location", ""), + "code": issue_dict.get("code", ""), + "subCode": issue_dict.get("subCode", ""), + "severity": issue_dict.get("severity", ""), + "rule": issue_dict.get("rule", ""), + } + + # Load JSON data + data = json.loads(output) + + # Extract issues + issues = data.get("issues", {}).get("issues", []) + if not issues: + return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"]) + + # Parse all issues + parsed_issues = [parse_issue(issue) for issue in issues] + # Convert to DataFrame + df = pd.DataFrame(parsed_issues) return df @@ -161,12 +132,10 @@ def get_val_dictionary(): val_dict : dict Dictionary of values. """ - val_dict = {} - val_dict["files"] = {"Description": "File with warning orerror"} - val_dict["type"] = {"Description": "BIDS validation warning or error"} - val_dict["severity"] = {"Description": "gravity of problem (warning/error"} - val_dict["description"] = {"Description": "Description of warning/error"} - val_dict["code"] = {"Description": "BIDS validator issue code number"} - val_dict["url"] = {"Description": "Link to the issue's neurostars thread"} - - return val_dict + return { + "location": {"Description": "File with the validation issue."}, + "code": {"Description": "Code of the validation issue."}, + "subCode": {"Description": "Subcode providing additional issue details."}, + "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, + "rule": {"Description": "Validation rule that triggered the issue."}, + }