langchain-ai · minhna1112 · Jan 5, 2024 · Jan 5, 2024 · Jan 5, 2024 · Jan 5, 2024
diff --git a/libs/community/langchain_community/chat_models/huggingface.py b/libs/community/langchain_community/chat_models/huggingface.py
@@ -51,7 +51,12 @@ def __init__(self, **kwargs: Any):
 
         from transformers import AutoTokenizer
 
-        self._resolve_model_id()
+        # `self.model_id` should only be resolved when not explicitly defined
+        # If `llm` is a HuggingFaceTextGenInference, there would exist cases
+        # in which the TGI server is not a HuggingFace-deployed Inference Endpoint
+        # that has an explicit model ID, but a self-hosted version
+        if not self.model_id:
+            self._resolve_model_id()
         self.tokenizer = (
             AutoTokenizer.from_pretrained(self.model_id)
             if self.tokenizer is None

diff --git a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py
@@ -8,7 +8,7 @@
 from langchain_core.language_models.llms import LLM
 from langchain_core.outputs import GenerationChunk
 from langchain_core.pydantic_v1 import Extra, Field, root_validator
-from langchain_core.utils import get_pydantic_field_names
+from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
 
 logger = logging.getLogger(__name__)
 
@@ -95,6 +95,8 @@ class HuggingFaceTextGenInference(LLM):
     """Holds any model parameters valid for `call` not explicitly specified"""
     client: Any
     async_client: Any
+    """Keyword arguments to pass to the model."""
+    huggingfacehub_api_token: Optional[str] = None
 
     class Config:
         """Configuration for this pydantic object."""
@@ -129,8 +131,17 @@ def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that python package exists in environment."""
-
+        """Validate that api key and python package exists in environment."""
+        huggingfacehub_api_token = get_from_dict_or_env(
+            values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN"
+        )
+        # When TGI make requests to Huggingface's Inference Endpoints,
+        # a bearer token must be included into the request header for authorization
+        # https://github.com/huggingface/text-generation-inference/issues/747
+        if huggingfacehub_api_token:
+            values["server_kwargs"]["headers"] = {
+                "Authorization": f"Bearer {huggingfacehub_api_token}"
+            }
         try:
             import text_generation
 

diff --git a/libs/community/tests/integration_tests/llms/test_huggingface_text_gen_inference.py b/libs/community/tests/integration_tests/llms/test_huggingface_text_gen_inference.py
@@ -1,3 +1,5 @@
+from text_generation import AsyncClient, Client
+
 from langchain_community.llms import HuggingFaceTextGenInference
 
 
@@ -17,3 +19,23 @@ def test_invocation_params_stop_sequences() -> None:
     runtime_stop = ["stop"]
     assert llm._invocation_params(runtime_stop)["stop_sequences"] == [".", "stop"]
     assert llm._default_params["stop_sequences"] == ["."]
+
+
+def test_client_type() -> None:
+    llm = HuggingFaceTextGenInference()
+
+    assert isinstance(llm.client, Client)
+    assert isinstance(llm.async_client, AsyncClient)
+
+
+def test_bearer_api() -> None:
+    llm = HuggingFaceTextGenInference()
+    # If called from a self-hosted TGI server,
+    assert not llm.client.headers
+    assert not llm.async_client.headers
+
+    BEARER_TOKEN = "abcdef1230"
+    llm = HuggingFaceTextGenInference(huggingfacehub_api_token=BEARER_TOKEN)
+    # If called from a self-hosted TGI server,
+    assert llm.client.headers["Authorization"] == f"Bearer {BEARER_TOKEN}"
+    assert llm.async_client.headers["Authorization"] == f"Bearer {BEARER_TOKEN}"