Skip to content

Commit

Permalink
community(doc_loaders): allow any credential type in AzureAIDocumentI… (
Browse files Browse the repository at this point in the history
#29289)

allow any credential type in AzureAIDocumentInteligence, not only
`api_key`.
This allows to use any of the credentials types integrated with AD.

---------

Co-authored-by: Chester Curme <[email protected]>
  • Loading branch information
ianchi and ccurme authored Jan 27, 2025
1 parent f00c66c commit 1551d97
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Iterator, List, Optional
from __future__ import annotations

from typing import TYPE_CHECKING, Iterator, List, Optional

from langchain_core.documents import Document

Expand All @@ -8,14 +10,17 @@
AzureAIDocumentIntelligenceParser,
)

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential


class AzureAIDocumentIntelligenceLoader(BaseLoader):
"""Load a PDF with Azure Document Intelligence."""

def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
file_path: Optional[str] = None,
url_path: Optional[str] = None,
bytes_source: Optional[bytes] = None,
Expand All @@ -24,6 +29,7 @@ def __init__(
mode: str = "markdown",
*,
analysis_features: Optional[List[str]] = None,
azure_credential: Optional["TokenCredential"] = None,
) -> None:
"""
Initialize the object for file processing with Azure Document Intelligence
Expand Down Expand Up @@ -63,6 +69,9 @@ def __init__(
List of optional analysis features, each feature should be passed
as a str that conforms to the enum `DocumentAnalysisFeature` in
`azure-ai-documentintelligence` package. Default value is None.
azure_credential: Optional[TokenCredential]
The credentials to use for DocumentIntelligenceClient construction, when
using credentials other than api_key (like AD).
Examples:
---------
Expand All @@ -79,6 +88,15 @@ def __init__(
assert (
file_path is not None or url_path is not None or bytes_source is not None
), "file_path, url_path or bytes_source must be provided"

assert (
api_key is not None or azure_credential is not None
), "Either api_key or azure_credential must be provided."

assert (
api_key is None or azure_credential is None
), "Only one of api_key or azure_credential should be provided."

self.file_path = file_path
self.url_path = url_path
self.bytes_source = bytes_source
Expand All @@ -90,6 +108,7 @@ def __init__(
api_model=api_model,
mode=mode,
analysis_features=analysis_features,
azure_credential=azure_credential,
)

def lazy_load(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations

import logging
from typing import Any, Iterator, List, Optional
from typing import TYPE_CHECKING, Any, Iterator, List, Optional

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential

logger = logging.getLogger(__name__)


Expand All @@ -16,17 +21,27 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
api_version: Optional[str] = None,
api_model: str = "prebuilt-layout",
mode: str = "markdown",
analysis_features: Optional[List[str]] = None,
azure_credential: Optional["TokenCredential"] = None,
):
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature
from azure.core.credentials import AzureKeyCredential

kwargs = {}

if api_key is None and azure_credential is None:
raise ValueError("Either api_key or azure_credential must be provided.")

if api_key and azure_credential:
raise ValueError(
"Only one of api_key or azure_credential should be provided."
)

if api_version is not None:
kwargs["api_version"] = api_version

Expand All @@ -49,7 +64,7 @@ def __init__(

self.client = DocumentIntelligenceClient(
endpoint=api_endpoint,
credential=AzureKeyCredential(api_key),
credential=azure_credential or AzureKeyCredential(api_key),
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
features=analysis_features,
**kwargs,
Expand Down

0 comments on commit 1551d97

Please sign in to comment.