Skip to content

Commit

Permalink
core: Speed up json parse for large strings (#24036)
Browse files Browse the repository at this point in the history
for a large string:
- old 4.657918874989264
- new 0.023724667000351474
  • Loading branch information
nfcampos authored Jul 9, 2024
1 parent 160fc7f commit 859e434
Showing 1 changed file with 18 additions and 14 deletions.
32 changes: 18 additions & 14 deletions libs/core/langchain_core/utils/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
pass

# Initialize variables.
new_s = ""
new_chars = []
stack = []
is_inside_string = False
escaped = False
Expand Down Expand Up @@ -90,36 +90,37 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
return None

# Append the processed character to the new string.
new_s += char
new_chars.append(char)

# If we're still inside a string at the end of processing,
# we need to close the string.
if is_inside_string:
new_s += '"'
new_chars.append('"')

# Try to parse mods of string until we succeed or run out of characters.
while new_s:
final_s = new_s
# Reverse the stack to get the closing characters.
stack.reverse()

# Try to parse mods of string until we succeed or run out of characters.
while new_chars:
# Close any remaining open structures in the reverse
# order that they were opened.
for closing_char in reversed(stack):
final_s += closing_char

# Attempt to parse the modified string as JSON.
try:
return json.loads(final_s, strict=strict)
return json.loads("".join(new_chars + stack), strict=strict)
except json.JSONDecodeError:
# If we still can't parse the string as JSON,
# try removing the last character
new_s = new_s[:-1]
new_chars.pop()

# If we got here, we ran out of characters to remove
# and still couldn't parse the string as JSON, so return the parse error
# for the original string.
return json.loads(s, strict=strict)


_json_markdown_re = re.compile(r"```(json)?(.*)", re.DOTALL)


def parse_json_markdown(
json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
) -> dict:
Expand All @@ -136,7 +137,7 @@ def parse_json_markdown(
return _parse_json(json_string, parser=parser)
except json.JSONDecodeError:
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
match = _json_markdown_re.search(json_string)

# If no match found, assume the entire string is a JSON string
if match is None:
Expand All @@ -147,11 +148,14 @@ def parse_json_markdown(
return _parse_json(json_str, parser=parser)


_json_strip_chars = " \n\r\t`"


def _parse_json(
json_str: str, *, parser: Callable[[str], Any] = parse_partial_json
) -> dict:
# Strip whitespace and newlines from the start and end
json_str = json_str.strip().strip("`")
# Strip whitespace,newlines,backtick from the start and end
json_str = json_str.strip(_json_strip_chars)

# handle newlines and other special characters inside the returned value
json_str = _custom_parser(json_str)
Expand Down

0 comments on commit 859e434

Please sign in to comment.