[Partner] NVIDIA TRT Package (#14733)

Simplify #13976 and add as a separate package. - [] Add README - [X] Add doc notebook - [X] Add simple LLM integration --------- Co-authored-by: Jeremy Dyer <[email protected]>
langchain-ai · Dec 19, 2023 · 5836967 · 5836967
1 parent 0d4cbbc
commit 5836967
Show file tree

Hide file tree

Showing 21 changed files with 2,993 additions and 0 deletions.
diff --git a/libs/partners/nvidia-trt/.gitignore b/libs/partners/nvidia-trt/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/libs/partners/nvidia-trt/LICENSE b/libs/partners/nvidia-trt/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/libs/partners/nvidia-trt/Makefile b/libs/partners/nvidia-trt/Makefile
@@ -0,0 +1,59 @@
+.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
+
+# Default target executed when no arguments are given to make.
+all: help
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests/
+
+test:
+	poetry run pytest $(TEST_FILE)
+
+tests:
+	poetry run pytest $(TEST_FILE)
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+PYTHON_FILES=.
+MYPY_CACHE=.mypy_cache
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/nvidia-trt --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+lint_package: PYTHON_FILES=langchain_nvidia_trt
+lint_tests: PYTHON_FILES=tests
+lint_tests: MYPY_CACHE=.mypy_cache_test
+
+lint lint_diff lint_package lint_tests:
+	poetry run ruff .
+	poetry run ruff format $(PYTHON_FILES) --diff
+	poetry run ruff --select I $(PYTHON_FILES)
+	mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+
+format format_diff:
+	poetry run ruff format $(PYTHON_FILES)
+	poetry run ruff --select I --fix $(PYTHON_FILES)
+
+spell_check:
+	poetry run codespell --toml pyproject.toml
+
+spell_fix:
+	poetry run codespell --toml pyproject.toml -w
+
+check_imports: $(shell find langchain_nvidia_trt -name '*.py')
+	poetry run python ./scripts/check_imports.py $^
+
+######################
+# HELP
+######################
+
+help:
+	@echo '----'
+	@echo 'check_imports				- check imports'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo 'test                         - run unit tests'
+	@echo 'tests                        - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
diff --git a/libs/partners/nvidia-trt/README.md b/libs/partners/nvidia-trt/README.md
@@ -0,0 +1 @@
+# langchain-nvidia-trt
diff --git a/libs/partners/nvidia-trt/docs/llms.ipynb b/libs/partners/nvidia-trt/docs/llms.ipynb
@@ -0,0 +1,106 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "67db2992",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: TritonTensorRT\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b56b221d",
+   "metadata": {},
+   "source": [
+    "# Nvidia Triton+TRT-LLM\n",
+    "\n",
+    "Nvidia's Triton is an inference server that provides an API style access to hosted LLM models. Likewise, Nvidia TensorRT-LLM, often abbreviated as TRT-LLM, is a GPU accelerated SDK for running optimizations and inference on LLM models. This connector allows for Langchain to remotely interact with a Triton inference server over GRPC or HTTP to performance accelerated inference operations.\n",
+    "\n",
+    "[Triton Inference Server Github](https://github.com/triton-inference-server/server)\n",
+    "\n",
+    "\n",
+    "## TritonTensorRTLLM\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with `TritonTensorRT` LLMs. To install, run the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "59c710c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# install package\n",
+    "%pip install -U langchain-nvidia-trt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ee90032",
+   "metadata": {},
+   "source": [
+    "## Create the Triton+TRT-LLM instance\n",
+    "\n",
+    "Remember that a Triton instance represents a running server instance therefore you should ensure you have a valid server configuration running and change the `localhost:8001` to the correct IP/hostname:port combination for your server.\n",
+    "\n",
+    "An example of setting up this environment can be found at Nvidia's (GenerativeAIExamples Github Repo)[https://github.com/NVIDIA/GenerativeAIExamples/tree/main/RetrievalAugmentedGeneration]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "035dea0f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_nvidia_trt.llms import TritonTensorRTLLM\n",
+    "\n",
+    "template = \"\"\"Question: {question}\n",
+    "\n",
+    "Answer: Let's think step by step.\"\"\"\n",
+    "\n",
+    "prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "# Connect to the TRT-LLM Llama-2 model running on the Triton server at the url below\n",
+    "triton_llm = TritonTensorRTLLM(server_url =\"localhost:8001\", model_name=\"ensemble\", tokens=500)\n",
+    "\n",
+    "chain = prompt | triton_llm \n",
+    "\n",
+    "chain.invoke({\"question\": \"What is LangChain?\"})"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/partners/nvidia-trt/langchain_nvidia_trt/__init__.py b/libs/partners/nvidia-trt/langchain_nvidia_trt/__init__.py
@@ -0,0 +1,3 @@
+from langchain_nvidia_trt.llms import TritonTensorRTLLM
+
+__all__ = ["TritonTensorRTLLM"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from langchain_nvidia_trt.llms import TritonTensorRTLLM

		__all__ = ["TritonTensorRTLLM"]