From 9f3ebcfa41b68892cdcb25e45c980cef67c45126 Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Fri, 15 Dec 2023 16:00:55 -0800 Subject: [PATCH] Add image support for Ollama (#14713) Support [LLaVA](https://ollama.ai/library/llava): * Upgrade Ollama * `ollama pull llava` Ensure compatibility with [image prompt template](https://github.com/langchain-ai/langchain/pull/14263) --------- Co-authored-by: jacoblee93 --- docs/docs/integrations/chat/ollama.ipynb | 354 +++++------------ docs/docs/integrations/llms/ollama.ipynb | 362 ++++-------------- .../langchain_community/chat_models/ollama.py | 150 +++++++- .../langchain_community/llms/ollama.py | 54 ++- 4 files changed, 358 insertions(+), 562 deletions(-) diff --git a/docs/docs/integrations/chat/ollama.ipynb b/docs/docs/integrations/chat/ollama.ipynb index 911f1f30f0739..99b6fba3a0ff1 100644 --- a/docs/docs/integrations/chat/ollama.ipynb +++ b/docs/docs/integrations/chat/ollama.ipynb @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,6 @@ "\n", "chat_model = ChatOllama(\n", " model=\"llama2:7b-chat\",\n", - " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", ")" ] }, @@ -84,41 +83,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "With `StreamingStdOutCallbackHandler`, you will see tokens streamed." + "Optionally, pass `StreamingStdOutCallbackHandler` to stream tokens:\n", + "\n", + "```\n", + "chat_model = ChatOllama(\n", + " model=\"llama2:7b-chat\",\n", + " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", + ")\n", + "```" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Artificial intelligence (AI) has a rich and varied history that spans several decades. Hinweis: The following is a brief overview of the major milestones in the history of AI, but it is by no means exhaustive.\n", - "\n", - "1. Early Beginnings (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of creating machines that can think and learn like humans dates back to ancient times. In the 1950s and 1960s, researchers began exploring the possibilities of AI using simple algorithms and machine learning techniques.\n", - "2. Rule-Based Systems (1970s-1980s): In the 1970s and 1980s, AI research focused on developing rule-based systems, which use predefined rules to reason and make decisions. This led to the development of expert systems, which were designed to mimic the decision-making abilities of human experts in specific domains.\n", - "3. Machine Learning (1980s-1990s): The 1980s saw a shift towards machine learning, which enables machines to learn from data without being explicitly programmed. This led to the development of algorithms such as decision trees, neural networks, and support vector machines.\n", - "4. Deep Learning (2000s-present): In the early 2000s, deep learning emerged as a subfield of machine learning, focusing on neural networks with multiple layers. These networks can learn complex representations of data, leading to breakthroughs in image and speech recognition, natural language processing, and other areas.\n", - "5. Natural Language Processing (NLP) (1980s-present): NLP has been an active area of research since the 1980s, with a focus on developing algorithms that can understand and generate human language. This has led to applications such as chatbots, voice assistants, and language translation systems.\n", - "6. Robotics (1970s-present): The development of robotics has been closely tied to AI research, with a focus on creating machines that can perform tasks that typically require human intelligence, such as manipulation and locomotion.\n", - "7. Computer Vision (1980s-present): Computer vision has been an active area of research since the 1980s, with a focus on enabling machines to interpret and understand visual data from the world around us. This has led to applications such as image recognition, object detection, and autonomous driving.\n", - "8. Ethics and Society (1990s-present): As AI technology has become more advanced and integrated into various aspects of society, there has been a growing concern about the ethical implications of AI. This includes issues related to privacy, bias, and job displacement.\n", - "9. Reinforcement Learning (2000s-present): Reinforcement learning is a subfield of machine learning that involves training machines to make decisions based on feedback from their environment. This has led to breakthroughs in areas such as game playing, robotics, and autonomous driving.\n", - "10. Generative Models (2010s-present): Generative models are a class of AI algorithms that can generate new data that is similar to a given dataset. This has led to applications such as image synthesis, music generation, and language creation.\n", - "\n", - "These are just a few of the many developments in the history of AI. As the field continues to evolve, we can expect even more exciting breakthroughs and innovations in the years to come." - ] - }, { "data": { "text/plain": [ - "AIMessage(content=' Artificial intelligence (AI) has a rich and varied history that spans several decades. Hinweis: The following is a brief overview of the major milestones in the history of AI, but it is by no means exhaustive.\\n\\n1. Early Beginnings (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of creating machines that can think and learn like humans dates back to ancient times. In the 1950s and 1960s, researchers began exploring the possibilities of AI using simple algorithms and machine learning techniques.\\n2. Rule-Based Systems (1970s-1980s): In the 1970s and 1980s, AI research focused on developing rule-based systems, which use predefined rules to reason and make decisions. This led to the development of expert systems, which were designed to mimic the decision-making abilities of human experts in specific domains.\\n3. Machine Learning (1980s-1990s): The 1980s saw a shift towards machine learning, which enables machines to learn from data without being explicitly programmed. This led to the development of algorithms such as decision trees, neural networks, and support vector machines.\\n4. Deep Learning (2000s-present): In the early 2000s, deep learning emerged as a subfield of machine learning, focusing on neural networks with multiple layers. These networks can learn complex representations of data, leading to breakthroughs in image and speech recognition, natural language processing, and other areas.\\n5. Natural Language Processing (NLP) (1980s-present): NLP has been an active area of research since the 1980s, with a focus on developing algorithms that can understand and generate human language. This has led to applications such as chatbots, voice assistants, and language translation systems.\\n6. Robotics (1970s-present): The development of robotics has been closely tied to AI research, with a focus on creating machines that can perform tasks that typically require human intelligence, such as manipulation and locomotion.\\n7. Computer Vision (1980s-present): Computer vision has been an active area of research since the 1980s, with a focus on enabling machines to interpret and understand visual data from the world around us. This has led to applications such as image recognition, object detection, and autonomous driving.\\n8. Ethics and Society (1990s-present): As AI technology has become more advanced and integrated into various aspects of society, there has been a growing concern about the ethical implications of AI. This includes issues related to privacy, bias, and job displacement.\\n9. Reinforcement Learning (2000s-present): Reinforcement learning is a subfield of machine learning that involves training machines to make decisions based on feedback from their environment. This has led to breakthroughs in areas such as game playing, robotics, and autonomous driving.\\n10. Generative Models (2010s-present): Generative models are a class of AI algorithms that can generate new data that is similar to a given dataset. This has led to applications such as image synthesis, music generation, and language creation.\\n\\nThese are just a few of the many developments in the history of AI. As the field continues to evolve, we can expect even more exciting breakthroughs and innovations in the years to come.', additional_kwargs={}, example=False)" + "AIMessage(content='\\nArtificial intelligence (AI) has a rich and diverse history that spans several decades. Here is a brief overview of the major milestones and events in the development of AI:\\n\\n1. 1950s: The Dartmouth Conference: The field of AI was officially launched at a conference held at Dartmouth College in 1956. Attendees included computer scientists, mathematicians, and cognitive scientists who were interested in exploring the possibilities of creating machines that could simulate human intelligence.\\n2. 1951: The Turing Test: British mathematician Alan Turing proposed a test to measure a machine\\'s ability to exhibit intelligent behavior equivalent to, or indistinguishable from, that of a human. The Turing Test has since become a benchmark for measuring the success of AI systems.\\n3. 1956: The First AI Program: Computer scientist John McCarthy created the first AI program, called the Logical Theorist, which was designed to reason and solve problems using logical deduction.\\n4. 1960s: Rule-Based Expert Systems: The development of rule-based expert systems, which used a set of rules to reason and make decisions, marked a significant milestone in the history of AI. These systems were widely used in industries such as banking, healthcare, and transportation.\\n5. 1970s: Machine Learning: Machine learning, which enables machines to learn from data without being explicitly programmed, emerged as a major area of research in AI. This led to the development of algorithms such as decision trees and neural networks.\\n6. 1980s: Expert Systems: The development of expert systems, which were designed to mimic the decision-making abilities of human experts, reached its peak in the 1980s. These systems were widely used in industries such as banking and healthcare.\\n7. 1990s: AI Winter: Despite the progress that had been made in AI research, the field experienced a decline in funding and interest in the 1990s, which became known as the \"AI winter.\"\\n8. 2000s: Machine Learning Resurgence: The resurgence of machine learning, driven by advances in computational power and data storage, led to a new wave of AI research and applications.\\n9. 2010s: Deep Learning: The development of deep learning algorithms, which are capable of learning complex patterns in large datasets, marked a significant breakthrough in AI research. These algorithms have been used in applications such as image and speech recognition, natural language processing, and autonomous vehicles.\\n10. Present Day: AI is now being applied to a wide range of industries and domains, including healthcare, finance, transportation, and education. The field is continuing to evolve, with new technologies and applications emerging all the time.\\n\\nOverall, the history of AI reflects a long-standing interest in creating machines that can simulate human intelligence. While the field has experienced periods of progress and setbacks, it continues to evolve and expand into new areas of research and application.')" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -145,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -162,49 +148,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " Sure! Here's a JSON response with the colors of the sky at different times of the day:\n", - " Begriffe und Abkürzungen:\n", - "\n", - "* `time`: The time of day (in 24-hour format)\n", - "* `sky_color`: The color of the sky at that time (as a hex code)\n", - "\n", - "Here are the colors of the sky at different times of the day:\n", - "```json\n", - "[\n", - " {\n", - " \"time\": \"6am\",\n", - " \"sky_color\": \"#0080c0\"\n", - " },\n", - " {\n", - " \"time\": \"9am\",\n", - " \"sky_color\": \"#3498db\"\n", - " },\n", - " {\n", - " \"time\": \"12pm\",\n", - " \"sky_color\": \"#ef7c00\"\n", - " },\n", - " {\n", - " \"time\": \"3pm\",\n", - " \"sky_color\": \"#9564b6\"\n", - " },\n", - " {\n", - " \"time\": \"6pm\",\n", - " \"sky_color\": \"#e78ac3\"\n", - " },\n", - " {\n", - " \"time\": \"9pm\",\n", - " \"sky_color\": \"#5f006a\"\n", - " }\n", - "]\n", - "```\n", - "In this response, the `time` property is a string in 24-hour format, representing the time of day. The `sky_color` property is a hex code representing the color of the sky at that time. For example, at 6am, the sky is blue (#0080c0), while at 9pm, it's dark blue (#5f006a)." + "{\"morning\": {\"sky\": \"pink\", \"sun\": \"rise\"}, \"daytime\": {\"sky\": \"blue\", \"sun\": \"high\"}, \"afternoon\": {\"sky\": \"gray\", \"sun\": \"peak\"}, \"evening\": {\"sky\": \"orange\", \"sun\": \"set\"}}\n", + " \t\n", + "\n" ] } ], @@ -222,30 +175,32 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " Sure! Based on the JSON schema you provided, here's the information we can gather about a person named John who is 35 years old and loves pizza:\n", - "\n", - "**Name:** John\n", - "\n", - "**Age:** 35 (integer)\n", - "\n", - "**Favorite food:** Pizza (string)\n", - "\n", - "So, the JSON object for John would look like this:\n", - "```json\n", "{\n", " \"name\": \"John\",\n", " \"age\": 35,\n", " \"fav_food\": \"pizza\"\n", "}\n", - "```\n", - "Note that we cannot provide additional information about John beyond what is specified in the schema. For example, we do not have any information about his gender, occupation, or address, as those fields are not included in the schema." + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" ] } ], @@ -287,235 +242,126 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## RAG\n", + "## Multi-modal\n", "\n", - "We can use Olama with RAG, [just as shown here](https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa).\n", + "Ollama has support for multi-modal LLMs, such as [bakllava](https://ollama.ai/library/bakllava) and [llava](https://ollama.ai/library/llava).\n", "\n", - "Let's use the 13b model:\n", + "Browse the full set of versions for models with `tags`, such as [here](https://ollama.ai/library/llava/tags).\n", "\n", + "Download the desired LLM:\n", "```\n", - "ollama pull llama2:13b\n", + "ollama pull bakllava\n", "```\n", "\n", - "Let's also use local embeddings from `OllamaEmbeddings` and `Chroma`." + "Be sure to update Ollama so that you have the most recent version to support multi-modal." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! pip install chromadb" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import WebBaseLoader\n", - "\n", - "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n", - "data = loader.load()\n", - "\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", - "all_splits = text_splitter.split_documents(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "from langchain.embeddings import OllamaEmbeddings\n", - "from langchain.vectorstores import Chroma\n", - "\n", - "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings())" + "%pip install pillow" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "" + ], "text/plain": [ - "4" + "" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "question = \"What are the approaches to Task Decomposition?\"\n", - "docs = vectorstore.similarity_search(question)\n", - "len(docs)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "\n", - "# Prompt\n", - "template = \"\"\"[INST] <> Use the following pieces of context to answer the question at the end. \n", - "If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", - "Use three sentences maximum and keep the answer as concise as possible. <>\n", - "{context}\n", - "Question: {question}\n", - "Helpful Answer:[/INST]\"\"\"\n", - "QA_CHAIN_PROMPT = PromptTemplate(\n", - " input_variables=[\"context\", \"question\"],\n", - " template=template,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Chat model\n", - "from langchain.callbacks.manager import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain.chat_models import ChatOllama\n", + "import base64\n", + "from io import BytesIO\n", "\n", - "chat_model = ChatOllama(\n", - " model=\"llama2:13b\",\n", - " verbose=True,\n", - " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# QA chain\n", - "from langchain.chains import RetrievalQA\n", + "from IPython.display import HTML, display\n", + "from PIL import Image\n", "\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " chat_model,\n", - " retriever=vectorstore.as_retriever(),\n", - " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Based on the provided context, there are three approaches to task decomposition for AI agents:\n", - "\n", - "1. LLM with simple prompting, such as \"Steps for XYZ.\" or \"What are the subgoals for achieving XYZ?\"\n", - "2. Task-specific instructions, such as \"Write a story outline\" for writing a novel.\n", - "3. Human inputs." - ] - } - ], - "source": [ - "question = \"What are the various approaches to Task Decomposition for AI Agents?\"\n", - "result = qa_chain({\"query\": question})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also get logging for tokens." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Based on the given context, here is the answer to the question \"What are the approaches to Task Decomposition?\"\n", - "\n", - "There are three approaches to task decomposition:\n", - "\n", - "1. LLM with simple prompting, such as \"Steps for XYZ.\" or \"What are the subgoals for achieving XYZ?\"\n", - "2. Using task-specific instructions, like \"Write a story outline\" for writing a novel.\n", - "3. With human inputs.{'model': 'llama2:13b-chat', 'created_at': '2023-08-23T15:37:51.469127Z', 'done': True, 'context': [1, 29871, 1, 29961, 25580, 29962, 518, 25580, 29962, 518, 25580, 29962, 3532, 14816, 29903, 6778, 4803, 278, 1494, 12785, 310, 3030, 304, 1234, 278, 1139, 472, 278, 1095, 29889, 29871, 13, 3644, 366, 1016, 29915, 29873, 1073, 278, 1234, 29892, 925, 1827, 393, 366, 1016, 29915, 29873, 1073, 29892, 1016, 29915, 29873, 1018, 304, 1207, 701, 385, 1234, 29889, 29871, 13, 11403, 2211, 25260, 7472, 322, 3013, 278, 1234, 408, 3022, 895, 408, 1950, 29889, 529, 829, 14816, 29903, 6778, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 1451, 16047, 267, 297, 1472, 29899, 8489, 18987, 322, 3414, 26227, 29901, 1858, 9450, 975, 263, 3309, 29891, 4955, 322, 17583, 3902, 8253, 278, 1650, 2913, 3933, 18066, 292, 29889, 365, 26369, 29879, 21117, 304, 10365, 13900, 746, 20050, 411, 15668, 4436, 29892, 3907, 963, 3109, 16424, 9401, 304, 25618, 1058, 5110, 515, 14260, 322, 1059, 29889, 13, 13, 1451, 16047, 267, 297, 1472, 29899, 8489, 18987, 322, 3414, 26227, 29901, 1858, 9450, 975, 263, 3309, 29891, 4955, 322, 17583, 3902, 8253, 278, 1650, 2913, 3933, 18066, 292, 29889, 365, 26369, 29879, 21117, 304, 10365, 13900, 746, 20050, 411, 15668, 4436, 29892, 3907, 963, 3109, 16424, 9401, 304, 25618, 1058, 5110, 515, 14260, 322, 1059, 29889, 13, 16492, 29901, 1724, 526, 278, 13501, 304, 9330, 897, 510, 3283, 29973, 13, 29648, 1319, 673, 10834, 29914, 25580, 29962, 518, 29914, 25580, 29962, 518, 29914, 25580, 29962, 29871, 16564, 373, 278, 2183, 3030, 29892, 1244, 338, 278, 1234, 304, 278, 1139, 376, 5618, 526, 278, 13501, 304, 9330, 897, 510, 3283, 3026, 13, 13, 8439, 526, 2211, 13501, 304, 3414, 26227, 29901, 13, 13, 29896, 29889, 365, 26369, 411, 2560, 9508, 292, 29892, 1316, 408, 376, 7789, 567, 363, 1060, 29979, 29999, 1213, 470, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 3026, 13, 29906, 29889, 5293, 3414, 29899, 14940, 11994, 29892, 763, 376, 6113, 263, 5828, 27887, 29908, 363, 5007, 263, 9554, 29889, 13, 29941, 29889, 2973, 5199, 10970, 29889, 2], 'total_duration': 9514823750, 'load_duration': 795542, 'sample_count': 99, 'sample_duration': 68732000, 'prompt_eval_count': 146, 'prompt_eval_duration': 6206275000, 'eval_count': 98, 'eval_duration': 3229641000}\n" - ] - } - ], - "source": [ - "from langchain.callbacks.base import BaseCallbackHandler\n", - "from langchain.schema import LLMResult\n", "\n", + "def convert_to_base64(pil_image):\n", + " \"\"\"\n", + " Convert PIL images to Base64 encoded strings\n", "\n", - "class GenerationStatisticsCallback(BaseCallbackHandler):\n", - " def on_llm_end(self, response: LLMResult, **kwargs) -> None:\n", - " print(response.generations[0][0].generation_info)\n", + " :param pil_image: PIL image\n", + " :return: Re-sized Base64 string\n", + " \"\"\"\n", "\n", + " buffered = BytesIO()\n", + " pil_image.save(buffered, format=\"JPEG\") # You can change the format if needed\n", + " img_str = base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", + " return img_str\n", "\n", - "callback_manager = CallbackManager(\n", - " [StreamingStdOutCallbackHandler(), GenerationStatisticsCallback()]\n", - ")\n", "\n", - "chat_model = ChatOllama(\n", - " model=\"llama2:13b-chat\", verbose=True, callback_manager=callback_manager\n", - ")\n", + "def plt_img_base64(img_base64):\n", + " \"\"\"\n", + " Disply base64 encoded string as image\n", "\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " chat_model,\n", - " retriever=vectorstore.as_retriever(),\n", - " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", - ")\n", + " :param img_base64: Base64 string\n", + " \"\"\"\n", + " # Create an HTML img tag with the base64 string as the source\n", + " image_html = f''\n", + " # Display the image by rendering the HTML\n", + " display(HTML(image_html))\n", "\n", - "question = \"What are the approaches to Task Decomposition?\"\n", - "result = qa_chain({\"query\": question})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`eval_count` / (`eval_duration`/10e9) gets `tok / s`" + "\n", + "file_path = \"/Users/rlm/Desktop/Eval_Sets/multi_modal_presentations/DDOG/img_23.jpg\"\n", + "pil_image = Image.open(file_path)\n", + "\n", + "image_b64 = convert_to_base64(pil_image)\n", + "plt_img_base64(image_b64)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "30.343929867127645" + "AIMessage(content='90%')" ] }, - "execution_count": 17, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "98 / (3229641000 / 1000 / 1000 / 1000)" + "from langchain.chat_models import ChatOllama\n", + "from langchain_core.messages import HumanMessage\n", + "\n", + "chat_model = ChatOllama(\n", + " model=\"bakllava\",\n", + ")\n", + "\n", + "# Call the chat model with both messages and images\n", + "content_parts = []\n", + "image_part = {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": f\"data:image/jpeg;base64,{image_b64}\",\n", + "}\n", + "text_part = {\"type\": \"text\", \"text\": \"What is the Daollar-based gross retention rate?\"}\n", + "\n", + "content_parts.append(image_part)\n", + "content_parts.append(text_part)\n", + "prompt = [HumanMessage(content=content_parts)]\n", + "chat_model(prompt)" ] } ], @@ -535,7 +381,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/docs/docs/integrations/llms/ollama.ipynb b/docs/docs/integrations/llms/ollama.ipynb index e6bd21944883f..adbf4eccac8ea 100644 --- a/docs/docs/integrations/llms/ollama.ipynb +++ b/docs/docs/integrations/llms/ollama.ipynb @@ -20,8 +20,8 @@ "\n", "* [Download](https://ollama.ai/download)\n", "* Fetch a model via `ollama pull `\n", - "* e.g., for `Llama-7b`: `ollama pull llama2` (see full list [here](https://github.com/jmorganca/ollama))\n", - "* This will download the most basic version of the model typically (e.g., smallest # parameters and `q4_0`)\n", + "* e.g., for `Llama-7b`: `ollama pull llama2` (see full list [here](https://ollama.ai/library)\n", + "* This will download the most basic version of the model typically (e.g., smallest # parameters)\n", "* On Mac, it will download to \n", "\n", "`~/.ollama/models/manifests/registry.ollama.ai/library//latest`\n", @@ -61,369 +61,147 @@ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.llms import Ollama\n", "\n", - "llm = Ollama(\n", - " model=\"llama2\", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With `StreamingStdOutCallbackHandler`, you will see tokens streamed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm(\"Tell me about the history of AI\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Ollama supports embeddings via `OllamaEmbeddings`:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.embeddings import OllamaEmbeddings\n", - "\n", - "oembed = OllamaEmbeddings(base_url=\"http://localhost:11434\", model=\"llama2\")\n", - "oembed.embed_query(\"Llamas are social animals and live with others as a herd.\")" + "llm = Ollama(model=\"llama2\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## RAG\n", - "\n", - "We can use Olama with RAG, [just as shown here](https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa).\n", + "Optionally, pass `StreamingStdOutCallbackHandler` to stream tokens:\n", "\n", - "Let's use the 13b model:\n", - "\n", - "```\n", - "ollama pull llama2:13b\n", "```\n", - "\n", - "Let's also use local embeddings from `OllamaEmbeddings` and `Chroma`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! pip install chromadb" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Load web page\n", - "from langchain.document_loaders import WebBaseLoader\n", - "\n", - "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n", - "data = loader.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Split into chunks\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)\n", - "all_splits = text_splitter.split_documents(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found model file at /Users/rlm/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "objc[77472]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x17f754208) and /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x17fb80208). One of the two will be used. Which one is undefined.\n" - ] - } - ], - "source": [ - "# Embed and store\n", - "from langchain.embeddings import (\n", - " GPT4AllEmbeddings,\n", - " OllamaEmbeddings, # We can also try Ollama embeddings\n", + "llm = Ollama(\n", + " model=\"llama2\"\n", + " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()\n", ")\n", - "from langchain.vectorstores import Chroma\n", - "\n", - "vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())" + "```" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "4" + "' Artificial intelligence (AI) has a rich and varied history that spans several decades. październik 1950s and has evolved significantly over time. Here is a brief overview of the major milestones in the history of AI:\\n\\n1. 1950s: The Dartmouth Conference - Considered the birthplace of AI, this conference brought together computer scientists, mathematicians, and cognitive scientists to discuss the possibilities of creating machines that could simulate human intelligence. Attendees included John McCarthy, Marvin Minsky, Nathaniel Rochester, and Claude Shannon.\\n2. 1951: The Turing Test - Alan Turing proposed a test to measure a machine\\'s ability to exhibit intelligent behavior equivalent to, or indistinguishable from, that of a human. The Turing Test has since become a benchmark for measuring the success of AI systems.\\n3. 1956: The First AI Program - John McCarthy created the first AI program, called the Logical Theorist, which was designed to reason and solve problems using logical deduction.\\n4. 1960s: Rule-Based Expert Systems - Researchers developed rule-based expert systems, which used a set of rules to reason and make decisions. These systems were widely used in industries such as banking and healthcare.\\n5. 1970s: Machine Learning -Machine learning, a subfield of AI, emerged as a way for machines to learn from data without being explicitly programmed. This led to the development of algorithms such as decision trees and neural networks.\\n6. 1980s: Expert Systems - The development of expert systems, which were designed to mimic the decision-making abilities of human experts, reached its peak in the 1980s. These systems were widely used in industries such as banking and healthcare.\\n7. 1990s: AI Winter - Despite the progress made in AI research, the field experienced a decline in funding and interest in the 1990s, known as the \"AI winter.\"\\n8. 2000s: AI Resurgence - The resurgence of AI began in the early 2000s with the development of new algorithms and techniques, such as support vector machines and deep learning. This led to a renewed interest in AI research and applications.\\n9. 2010s: Rise of Deep Learning - The development of deep learning algorithms, which are capable of learning and improving on their own by analyzing large amounts of data, has been a major factor in the recent progress made in AI. These algorithms have been used in applications such as image recognition, natural language processing, and autonomous vehicles.\\n10. Present Day: AI Continues to Advance - AI is continuing to advance at a rapid pace, with new techniques and applications emerging all the time. Areas of research include natural language processing, computer vision, robotics, and more.\\n\\nSome notable people who have made significant contributions to the field of AI include:\\n\\n1. Alan Turing - Considered one of the pioneers of AI, Turing proposed the Turing Test and developed the concept of a universal machine.\\n2. John McCarthy - McCarthy is known as the \"father of AI\" for his work in developing the field of AI. He coined the term \"Artificial Intelligence\" and was instrumental in organizing the Dartmouth Conference.\\n3. Marvin Minsky - Minsky was a pioneer in the field of neural networks and co-founder of the MIT AI Laboratory.\\n4. Nathaniel Rochester - Rochester was a computer scientist and cognitive scientist who worked on early AI projects, including the development of the Logical Theorist.\\n5. Claude Shannon - Shannon was a mathematician and electrical engineer who is known for his work on information theory, which has had a significant impact on the field of AI.\\n6. Yann LeCun - LeCun is a computer scientist and the director of AI Research at Facebook. He is also the Silver Professor of Computer Science at New York University, and a professor at the Courant Institute of Mathematical Sciences.\\n7. Geoffrey Hinton - Hinton is a computer scientist and cognitive psychologist who is known for his work on artificial neural networks. He is a pioneer in the field of deep learning and has made significant contributions to the development of convolutional neural networks (CNNs).\\n8. Yoshua Bengio - Bengio is a computer scientist and a pioneer in the field of deep learning. He is known for his work on recurrent neural networks (RNNs) and has made significant contributions to the development of CNNs and RNNs.\\n9. Andrew Ng - Ng is a computer scientist and entrepreneur who has made significant contributions to the field of AI. He is known for his work on deep learning and has worked at Google, where he founded the Google Brain deep learning project, and at Baidu, where he led the company\\'s AI group.\\n10. Demis Hassabis - Hassabis is a computer scientist and entrepreneur who is known for his work on deep learning and artificial intelligence. He is the co-founder of DeepMind, which was acquired by Alphabet in 2014, and has made significant contributions to the field of AI.\\n\\nThese are just a few examples of notable people who have made significant contributions to the field of AI. There are many other researchers and scientists who have also made important advancements in the field.'" ] }, - "execution_count": 7, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Retrieve\n", - "question = \"How can Task Decomposition be done?\"\n", - "docs = vectorstore.similarity_search(question)\n", - "len(docs)" + "llm(\"Tell me about the history of AI\")" ] }, { - "cell_type": "code", - "execution_count": 9, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# RAG prompt\n", - "from langchain import hub\n", + "## Multi-modal\n", + "\n", + "Ollama has support for multi-modal LLMs, such as [bakllava](https://ollama.ai/library/bakllava) and [llava](https://ollama.ai/library/llava).\n", + "\n", + "```\n", + "ollama pull bakllava\n", + "```\n", "\n", - "QA_CHAIN_PROMPT = hub.pull(\"rlm/rag-prompt-llama\")" + "Be sure to update Ollama so that you have the most recent version to support multi-modal." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# LLM\n", - "from langchain.callbacks.manager import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.llms import Ollama\n", "\n", - "llm = Ollama(\n", - " model=\"llama2\",\n", - " verbose=True,\n", - " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# QA chain\n", - "from langchain.chains import RetrievalQA\n", - "\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " llm,\n", - " retriever=vectorstore.as_retriever(),\n", - " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", - ")" + "bakllava = Ollama(model=\"bakllava\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " There are several approaches to task decomposition for AI agents, including:\n", - "\n", - "1. Chain of thought (CoT): This involves instructing the model to \"think step by step\" and use more test-time computation to decompose hard tasks into smaller and simpler steps.\n", - "2. Tree of thoughts (ToT): This extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. The search process can be BFS or DFS with each state evaluated by a classifier or majority vote.\n", - "3. Using task-specific instructions: For example, \"Write a story outline.\" for writing a novel.\n", - "4. Human inputs: The agent can receive input from a human operator to perform tasks that require creativity and domain expertise.\n", - "\n", - "These approaches allow the agent to break down complex tasks into manageable subgoals, enabling efficient handling of tasks and improving the quality of final results through self-reflection and refinement." - ] + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "question = \"What are the various approaches to Task Decomposition for AI Agents?\"\n", - "result = qa_chain({\"query\": question})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also get logging for tokens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.callbacks.base import BaseCallbackHandler\n", - "from langchain.schema import LLMResult\n", + "import base64\n", + "from io import BytesIO\n", "\n", + "from IPython.display import HTML, display\n", + "from PIL import Image\n", "\n", - "class GenerationStatisticsCallback(BaseCallbackHandler):\n", - " def on_llm_end(self, response: LLMResult, **kwargs) -> None:\n", - " print(response.generations[0][0].generation_info)\n", "\n", + "def convert_to_base64(pil_image):\n", + " \"\"\"\n", + " Convert PIL images to Base64 encoded strings\n", "\n", - "callback_manager = CallbackManager(\n", - " [StreamingStdOutCallbackHandler(), GenerationStatisticsCallback()]\n", - ")\n", + " :param pil_image: PIL image\n", + " :return: Re-sized Base64 string\n", + " \"\"\"\n", "\n", - "llm = Ollama(\n", - " base_url=\"http://localhost:11434\",\n", - " model=\"llama2\",\n", - " verbose=True,\n", - " callback_manager=callback_manager,\n", - ")\n", + " buffered = BytesIO()\n", + " pil_image.save(buffered, format=\"JPEG\") # You can change the format if needed\n", + " img_str = base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", + " return img_str\n", "\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " llm,\n", - " retriever=vectorstore.as_retriever(),\n", - " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", - ")\n", "\n", - "question = \"What are the approaches to Task Decomposition?\"\n", - "result = qa_chain({\"query\": question})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`eval_count` / (`eval_duration`/10e9) gets `tok / s`" + "def plt_img_base64(img_base64):\n", + " \"\"\"\n", + " Disply base64 encoded string as image\n", + "\n", + " :param img_base64: Base64 string\n", + " \"\"\"\n", + " # Create an HTML img tag with the base64 string as the source\n", + " image_html = f''\n", + " # Display the image by rendering the HTML\n", + " display(HTML(image_html))\n", + "\n", + "\n", + "file_path = \"/Users/rlm/Desktop/Eval_Sets/multi_modal_presentations/DDOG/img_23.jpg\"\n", + "pil_image = Image.open(file_path)\n", + "image_b64 = convert_to_base64(pil_image)\n", + "plt_img_base64(image_b64)" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "47.22003469910937" + "'90%'" ] }, - "execution_count": 57, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "62 / (1313002000 / 1000 / 1000 / 1000)" + "llm_with_image_context = bakllava.bind(images=[image_b64])\n", + "llm_with_image_context.invoke(\"What is the dollar based gross retention rate:\")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using the Hub for prompt management\n", - " \n", - "Open-source models often benefit from specific prompts. \n", - "\n", - "For example, [Mistral 7b](https://mistral.ai/news/announcing-mistral-7b/) was fine-tuned for chat using the prompt format shown [here](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).\n", - "\n", - "Get the model: `ollama pull mistral:7b-instruct`" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# LLM\n", - "from langchain.callbacks.manager import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain.llms import Ollama\n", - "\n", - "llm = Ollama(\n", - " model=\"mistral:7b-instruct\",\n", - " verbose=True,\n", - " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain import hub\n", - "\n", - "QA_CHAIN_PROMPT = hub.pull(\"rlm/rag-prompt-mistral\")\n", - "\n", - "# QA chain\n", - "from langchain.chains import RetrievalQA\n", - "\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " llm,\n", - " retriever=vectorstore.as_retriever(),\n", - " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "There are different approaches to Task Decomposition for AI Agents such as Chain of thought (CoT) and Tree of Thoughts (ToT). CoT breaks down big tasks into multiple manageable tasks and generates multiple thoughts per step, while ToT explores multiple reasoning possibilities at each step. Task decomposition can be done by LLM with simple prompting or using task-specific instructions or human inputs." - ] - } - ], - "source": [ - "question = \"What are the various approaches to Task Decomposition for AI Agents?\"\n", - "result = qa_chain({\"query\": question})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -442,7 +220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/chat_models/ollama.py b/libs/community/langchain_community/chat_models/ollama.py index 91dda64e45e43..54aa8a8c8cf2d 100644 --- a/libs/community/langchain_community/chat_models/ollama.py +++ b/libs/community/langchain_community/chat_models/ollama.py @@ -1,6 +1,7 @@ import json -from typing import Any, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional, Union +from langchain_core._api import deprecated from langchain_core.callbacks import ( CallbackManagerForLLMRun, ) @@ -15,9 +16,10 @@ ) from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult -from langchain_community.llms.ollama import _OllamaCommon +from langchain_community.llms.ollama import OllamaEndpointNotFoundError, _OllamaCommon +@deprecated("0.0.3", alternative="_chat_stream_response_to_chat_generation_chunk") def _stream_response_to_chat_generation_chunk( stream_response: str, ) -> ChatGenerationChunk: @@ -30,6 +32,20 @@ def _stream_response_to_chat_generation_chunk( ) +def _chat_stream_response_to_chat_generation_chunk( + stream_response: str, +) -> ChatGenerationChunk: + """Convert a stream response to a generation chunk.""" + parsed_response = json.loads(stream_response) + generation_info = parsed_response if parsed_response.get("done") is True else None + return ChatGenerationChunk( + message=AIMessageChunk( + content=parsed_response.get("message", {}).get("content", "") + ), + generation_info=generation_info, + ) + + class ChatOllama(BaseChatModel, _OllamaCommon): """Ollama locally runs large language models. @@ -52,11 +68,15 @@ def is_lc_serializable(cls) -> bool: """Return whether this model can be serialized by Langchain.""" return False + @deprecated("0.0.3", alternative="_convert_messages_to_ollama_messages") def _format_message_as_text(self, message: BaseMessage) -> str: if isinstance(message, ChatMessage): message_text = f"\n\n{message.role.capitalize()}: {message.content}" elif isinstance(message, HumanMessage): - message_text = f"[INST] {message.content} [/INST]" + if message.content[0].get("type") == "text": + message_text = f"[INST] {message.content[0]['text']} [/INST]" + elif message.content[0].get("type") == "image_url": + message_text = message.content[0]["image_url"]["url"] elif isinstance(message, AIMessage): message_text = f"{message.content}" elif isinstance(message, SystemMessage): @@ -70,6 +90,98 @@ def _format_messages_as_text(self, messages: List[BaseMessage]) -> str: [self._format_message_as_text(message) for message in messages] ) + def _convert_messages_to_ollama_messages( + self, messages: List[BaseMessage] + ) -> List[Dict[str, Union[str, List[str]]]]: + ollama_messages = [] + for message in messages: + role = "" + if isinstance(message, HumanMessage): + role = "user" + elif isinstance(message, AIMessage): + role = "assistant" + elif isinstance(message, SystemMessage): + role = "system" + else: + raise ValueError("Received unsupported message type for Ollama.") + + content = "" + images = [] + if isinstance(message.content, str): + content = message.content + else: + for content_part in message.content: + if content_part.get("type") == "text": + content += f"\n{content_part['text']}" + elif content_part.get("type") == "image_url": + if isinstance(content_part.get("image_url"), str): + image_url_components = content_part["image_url"].split(",") + # Support data:image/jpeg;base64, format + # and base64 strings + if len(image_url_components) > 1: + images.append(image_url_components[1]) + else: + images.append(image_url_components[0]) + else: + raise ValueError( + "Only string image_url " "content parts are supported." + ) + else: + raise ValueError( + "Unsupported message content type. " + "Must either have type 'text' or type 'image_url' " + "with a string 'image_url' field." + ) + + ollama_messages.append( + { + "role": role, + "content": content, + "images": images, + } + ) + + return ollama_messages + + def _create_chat_stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + **kwargs: Any, + ) -> Iterator[str]: + payload = { + "messages": self._convert_messages_to_ollama_messages(messages), + } + yield from self._create_stream( + payload=payload, stop=stop, api_url=f"{self.base_url}/api/chat/", **kwargs + ) + + def _chat_stream_with_aggregation( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + verbose: bool = False, + **kwargs: Any, + ) -> ChatGenerationChunk: + final_chunk: Optional[ChatGenerationChunk] = None + for stream_resp in self._create_chat_stream(messages, stop, **kwargs): + if stream_resp: + chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp) + if final_chunk is None: + final_chunk = chunk + else: + final_chunk += chunk + if run_manager: + run_manager.on_llm_new_token( + chunk.text, + verbose=verbose, + ) + if final_chunk is None: + raise ValueError("No data received from Ollama stream.") + + return final_chunk + def _generate( self, messages: List[BaseMessage], @@ -94,9 +206,12 @@ def _generate( ]) """ - prompt = self._format_messages_as_text(messages) - final_chunk = super()._stream_with_aggregation( - prompt, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs + final_chunk = self._chat_stream_with_aggregation( + messages, + stop=stop, + run_manager=run_manager, + verbose=self.verbose, + **kwargs, ) chat_generation = ChatGeneration( message=AIMessage(content=final_chunk.text), @@ -110,9 +225,30 @@ def _stream( stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + try: + for stream_resp in self._create_chat_stream(messages, stop, **kwargs): + if stream_resp: + chunk = _stream_response_to_chat_generation_chunk(stream_resp) + yield chunk + if run_manager: + run_manager.on_llm_new_token( + chunk.text, + verbose=self.verbose, + ) + except OllamaEndpointNotFoundError: + yield from self._legacy_stream(messages, stop, **kwargs) + + @deprecated("0.0.3", alternative="_stream") + def _legacy_stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: prompt = self._format_messages_as_text(messages) - for stream_resp in self._create_stream(prompt, stop, **kwargs): + for stream_resp in self._create_generate_stream(prompt, stop, **kwargs): if stream_resp: chunk = _stream_response_to_chat_generation_chunk(stream_resp) yield chunk diff --git a/libs/community/langchain_community/llms/ollama.py b/libs/community/langchain_community/llms/ollama.py index 3551ba446ef36..64ddf82c801cb 100644 --- a/libs/community/langchain_community/llms/ollama.py +++ b/libs/community/langchain_community/llms/ollama.py @@ -20,6 +20,10 @@ def _stream_response_to_generation_chunk( ) +class OllamaEndpointNotFoundError(Exception): + """Raised when the Ollama endpoint is not found.""" + + class _OllamaCommon(BaseLanguageModel): base_url: str = "http://localhost:11434" """Base url the model is hosted under.""" @@ -129,10 +133,26 @@ def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" return {**{"model": self.model, "format": self.format}, **self._default_params} - def _create_stream( + def _create_generate_stream( self, prompt: str, stop: Optional[List[str]] = None, + images: Optional[List[str]] = None, + **kwargs: Any, + ) -> Iterator[str]: + payload = {"prompt": prompt, "images": images} + yield from self._create_stream( + payload=payload, + stop=stop, + api_url=f"{self.base_url}/api/generate/", + **kwargs, + ) + + def _create_stream( + self, + api_url: str, + payload: Any, + stop: Optional[List[str]] = None, **kwargs: Any, ) -> Iterator[str]: if self.stop is not None and stop is not None: @@ -156,20 +176,34 @@ def _create_stream( **kwargs, } + if payload.get("messages"): + request_payload = {"messages": payload.get("messages", []), **params} + else: + request_payload = { + "prompt": payload.get("prompt"), + "images": payload.get("images", []), + **params, + } + response = requests.post( - url=f"{self.base_url}/api/generate/", + url=api_url, headers={"Content-Type": "application/json"}, - json={"prompt": prompt, **params}, + json=request_payload, stream=True, timeout=self.timeout, ) response.encoding = "utf-8" if response.status_code != 200: - optional_detail = response.json().get("error") - raise ValueError( - f"Ollama call failed with status code {response.status_code}." - f" Details: {optional_detail}" - ) + if response.status_code == 404: + raise OllamaEndpointNotFoundError( + "Ollama call failed with status code 404." + ) + else: + optional_detail = response.json().get("error") + raise ValueError( + f"Ollama call failed with status code {response.status_code}." + f" Details: {optional_detail}" + ) return response.iter_lines(decode_unicode=True) def _stream_with_aggregation( @@ -181,7 +215,7 @@ def _stream_with_aggregation( **kwargs: Any, ) -> GenerationChunk: final_chunk: Optional[GenerationChunk] = None - for stream_resp in self._create_stream(prompt, stop, **kwargs): + for stream_resp in self._create_generate_stream(prompt, stop, **kwargs): if stream_resp: chunk = _stream_response_to_generation_chunk(stream_resp) if final_chunk is None: @@ -225,6 +259,7 @@ def _generate( self, prompts: List[str], stop: Optional[List[str]] = None, + images: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> LLMResult: @@ -248,6 +283,7 @@ def _generate( final_chunk = super()._stream_with_aggregation( prompt, stop=stop, + images=images, run_manager=run_manager, verbose=self.verbose, **kwargs,