From df73a3baa016a55a46d248516281054cc45e03b3 Mon Sep 17 00:00:00 2001 From: David Chen Date: Mon, 22 Jul 2024 20:21:25 -0700 Subject: [PATCH] fix #134 hotfix decode error (#135) * fix #134 hotfix decode error * fix pre-commit * update * loose version --- .pre-commit-config.yaml | 23 ++++++------------- poetry.lock | 15 ++++++++++-- pyproject.toml | 1 + src/fuzzy_json/decoder.py | 15 +++++++++--- ...paired_json_invaild_case[case9.jsonx].json | 4 ++++ .../tests/test_data/invalid/case9.jsonx | 1 + 6 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 src/fuzzy_json/tests/__snapshots__/test_decoder/test_repaired_json_invaild_case[case9.jsonx].json create mode 100644 src/fuzzy_json/tests/test_data/invalid/case9.jsonx diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bbdffd0..f560ae8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,24 +45,15 @@ repos: - id: codespell types_or: [python, markdown] - - repo: local + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.8.0 hooks: - id: mypy name: mypy entry: mypy - language: system types: [python] - exclude: migrations/|commands/|scripts/ - args: - [ - --pretty, - --show-error-codes, - --implicit-optional, - --follow-imports=silent, - --warn-redundant-casts, - --warn-unused-ignores, - --disallow-any-generics, - --check-untyped-defs, - --no-implicit-reexport, - --disallow-untyped-defs, - ] + exclude: migrations/|commands/|sandbox/|samples|sdk + additional_dependencies: [pytest, syrupy, json5] + args: [ + "--config-file=pyproject.toml" + ] diff --git a/poetry.lock b/poetry.lock index 91a14e4..3e964e7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "certifi" @@ -303,6 +303,17 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "json5" +version = "0.9.25" +description = "A Python implementation of the JSON5 data format." +optional = false +python-versions = ">=3.8" +files = [ + {file = "json5-0.9.25-py3-none-any.whl", hash = "sha256:34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f"}, + {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"}, +] + [[package]] name = "multidict" version = "6.0.5" @@ -945,4 +956,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bc2d9cdfa150d9633d0b33ebb02f08278bd84fbf195d4b7e1001c33ab3f6a519" +content-hash = "a763de743853f83831433113d92fdc43d2f2cf6cbde655146928653670b3f7cf" diff --git a/pyproject.toml b/pyproject.toml index 47a42cb..3e80397 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.10" +json5 = "*" [tool.poetry.group.dev] optional = true diff --git a/src/fuzzy_json/decoder.py b/src/fuzzy_json/decoder.py index 868b686..0373442 100644 --- a/src/fuzzy_json/decoder.py +++ b/src/fuzzy_json/decoder.py @@ -2,6 +2,8 @@ from functools import wraps from typing import Any, Callable +import json5 + def state(fn: Callable[[str, list[str]], str | None]) -> Callable[[str, list[str]], str]: @wraps(fn) @@ -288,10 +290,17 @@ def repair_json(json_str: str) -> str: return state_start(json_str) -def loads(json_str: str, auto_repair: bool = True) -> dict[str, Any]: +def base_loads(json_str: str) -> dict[str, Any]: try: + return json5.loads(json_str) + except Exception: return json.loads(json_str, strict=False) - except json.decoder.JSONDecodeError: + + +def loads(json_str: str, auto_repair: bool = True) -> dict[str, Any]: + try: + return base_loads(json_str) + except Exception: if not auto_repair: raise @@ -300,4 +309,4 @@ def loads(json_str: str, auto_repair: bool = True) -> dict[str, Any]: except Exception as e: raise json.decoder.JSONDecodeError(f"Failed to repair JSON: {e}", json_str, 0) - return json.loads(repaired_json, strict=False) + return base_loads(repaired_json) diff --git a/src/fuzzy_json/tests/__snapshots__/test_decoder/test_repaired_json_invaild_case[case9.jsonx].json b/src/fuzzy_json/tests/__snapshots__/test_decoder/test_repaired_json_invaild_case[case9.jsonx].json new file mode 100644 index 0000000..15def5b --- /dev/null +++ b/src/fuzzy_json/tests/__snapshots__/test_decoder/test_repaired_json_invaild_case[case9.jsonx].json @@ -0,0 +1,4 @@ +{ + "highlighted_sentence": "to choose \"a candidate who can defeat Donald Trump in November.\"", + "keyword": "defeat Donald Trump" +} diff --git a/src/fuzzy_json/tests/test_data/invalid/case9.jsonx b/src/fuzzy_json/tests/test_data/invalid/case9.jsonx new file mode 100644 index 0000000..7a81131 --- /dev/null +++ b/src/fuzzy_json/tests/test_data/invalid/case9.jsonx @@ -0,0 +1 @@ +{"keyword": "defeat Donald Trump", "highlighted_sentence": 'to choose "a candidate who can defeat Donald Trump in November."'}