From 921d4794d593b1bb2f947ca6fc462536378c1095 Mon Sep 17 00:00:00 2001 From: Fabian Reinold <32450519+freinold@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:05:38 +0100 Subject: [PATCH] JSONReader: Encoding fixes UnicodeDecodeError Provides additional encoding parameter, default is UTF-8 --- llama_hub/file/json/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama_hub/file/json/base.py b/llama_hub/file/json/base.py index 4c0781a612..067a011acc 100644 --- a/llama_hub/file/json/base.py +++ b/llama_hub/file/json/base.py @@ -55,6 +55,7 @@ def load_data( file: Path, is_jsonl: Optional[bool] = False, extra_info: Optional[Dict] = None, + encoding: Optional[str] = "utf-8" ) -> List[Document]: """Load data from the input file. @@ -62,13 +63,14 @@ def load_data( file (Path): Path to the input file. is_jsonl (Optional[bool]): If True, indicates that the file is in JSONL format. Defaults to False. extra_info (Optional[Dict]): Additional information. Default is None. + encoding: (Optional[str]): Encoding of file. Default is UTF-8 Returns: - List[Document]: List of documents. + List[Document]: List of documents. """ if not isinstance(file, Path): file = Path(file) - with open(file, "r") as f: + with open(file, "r", encoding=encoding) as f: data = [] if is_jsonl: for line in f: