From 806d4ae48f7cab56d740c896124d6d77d5eb8ff0 Mon Sep 17 00:00:00 2001 From: Chip Davis <62909360+chip-davis@users.noreply.github.com> Date: Tue, 9 Apr 2024 16:06:37 -0500 Subject: [PATCH] community[patch]: fixed multithreading returning List[List[Documents]] instead of List[Documents] (#20230) Description: When multithreading is set to True and using the DirectoryLoader, there was a bug that caused the return type to be a double nested list. This resulted in other places upstream not being able to utilize the from_documents method as it was no longer a `List[Documents]` it was a `List[List[Documents]]`. The change made was to just loop through the `future.result()` and yield every item. Issue: #20093 Dependencies: N/A Twitter handle: N/A --- .../langchain_community/document_loaders/directory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/directory.py b/libs/community/langchain_community/document_loaders/directory.py index 3902b8ae93cba..f181ebe95b79d 100644 --- a/libs/community/langchain_community/document_loaders/directory.py +++ b/libs/community/langchain_community/document_loaders/directory.py @@ -174,7 +174,8 @@ def lazy_load(self) -> Iterator[Document]: ) ) for future in concurrent.futures.as_completed(futures): - yield future.result() + for item in future.result(): + yield item else: for i in items: yield from self._lazy_load_file(i, p, pbar)