From 8f612435708363f98bfd3ab3f251356707f7f6ee Mon Sep 17 00:00:00 2001 From: Sebastian Cerquera Date: Sun, 17 Nov 2024 09:44:11 -0500 Subject: [PATCH] gutenbers sage --- README.md | 35 +- all_agents_tutorials/Gutenbergs_Sage.ipynb | 933 +++++++++++++++++++++ 2 files changed, 956 insertions(+), 12 deletions(-) create mode 100644 all_agents_tutorials/Gutenbergs_Sage.ipynb diff --git a/README.md b/README.md index c517b95..b73edb7 100644 --- a/README.md +++ b/README.md @@ -128,9 +128,20 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Utilizes StateGraph to define the application flow, incorporates custom PlannerState for process management, and employs node functions for city input, interests input, and itinerary creation. The system integrates an LLM to generate the final personalized travel itinerary. +8. **[Project Gutenbergs Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/Gutenbergs_Sage.ipynb)** + + #### Overview 🔎 + A conversational AI agent designed to help users improve their knowledge through interactive discussions and feedback with a focus on data references provided. The agent facilitates group discussions where multiple users can practice and share their knowledge for specific use cases while receiving expert guidance on best practices. + + #### Implementation 🛠️ + - LangGraph for conversation flow management + - Ollma LLM integration for response generation + - Memory-based state management + - Checkpoint saving for conversation continuity + ### 🎨 Creative and Generative Agents -8. **[GIF Animation Generator Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/gif_animation_generator_langgraph.ipynb)** +9. **[GIF Animation Generator Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/gif_animation_generator_langgraph.ipynb)** #### Overview 🔎 A GIF animation generator that integrates LangGraph for workflow management, GPT-4 for text generation, and DALL-E for image creation, producing custom animations from user prompts. @@ -138,7 +149,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Utilizes LangGraph to orchestrate a workflow that generates character descriptions, plots, and image prompts using GPT-4, creates images with DALL-E 3, and assembles them into GIFs using PIL. Employs asynchronous programming for efficient parallel processing. -9. **[TTS Poem Generator Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/tts_poem_generator_agent_langgraph.ipynb)** +10. **[TTS Poem Generator Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/tts_poem_generator_agent_langgraph.ipynb)** #### Overview 🔎 An advanced text-to-speech (TTS) agent using LangGraph and OpenAI's APIs classifies input text, processes it based on content type, and generates corresponding speech output. @@ -146,7 +157,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Utilizes LangGraph to orchestrate a workflow that classifies input text using GPT models, applies content-specific processing, and converts the processed text to speech using OpenAI's TTS API. The system adapts its output based on the identified content type (general, poem, news, or joke). -10. **[Music Compositor Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/music_compositor_agent_langgraph.ipynb)** +11. **[Music Compositor Agent (LangGraph)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/music_compositor_agent_langgraph.ipynb)** #### Overview 🔎 An AI Music Compositor using LangGraph and OpenAI's language models generates custom musical compositions based on user input. The system processes the input through specialized components, each contributing to the final musical piece, which is then converted to a playable MIDI file. @@ -156,7 +167,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t ### 🚀 Advanced Agent Architectures -11. **[Memory-Enhanced Conversational Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/memory_enhanced_conversational_agent.ipynb)** +12. **[Memory-Enhanced Conversational Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/memory_enhanced_conversational_agent.ipynb)** #### Overview 🔎 A memory-enhanced conversational AI agent incorporates short-term and long-term memory systems to maintain context within conversations and across multiple sessions, improving interaction quality and personalization. @@ -164,7 +175,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Integrates a language model with separate short-term and long-term memory stores, utilizes a prompt template incorporating both memory types, and employs a memory manager for storage and retrieval. The system includes an interaction loop that updates and utilizes memories for each response. -12. **[Multi-Agent Collaboration System](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/multi_agent_collaboration_system.ipynb)** +13. **[Multi-Agent Collaboration System](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/multi_agent_collaboration_system.ipynb)** #### Overview 🔎 A multi-agent collaboration system combining historical research with data analysis, leveraging large language models to simulate specialized agents working together to answer complex historical questions. @@ -172,7 +183,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Utilizes a base Agent class to create specialized HistoryResearchAgent and DataAnalysisAgent, orchestrated by a HistoryDataCollaborationSystem. The system follows a five-step process: historical context provision, data needs identification, historical data provision, data analysis, and final synthesis. -13. **[Self-Improving Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/self_improving_agent.ipynb)** +14. **[Self-Improving Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/self_improving_agent.ipynb)** #### Overview 🔎 A Self-Improving Agent using LangChain engages in conversations, learns from interactions, and continuously improves its performance over time through reflection and adaptation. @@ -180,7 +191,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Integrates a language model with chat history management, response generation, and a reflection mechanism. The system employs a learning system that incorporates insights from reflection to enhance future performance, creating a continuous improvement loop. -14. **[Task-Oriented Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/task_oriented_agent.ipynb)** +15. **[Task-Oriented Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/task_oriented_agent.ipynb)** #### Overview 🔎 A language model application using LangChain that summarizes text and translates the summary to Spanish, combining custom functions, structured tools, and an agent for efficient text processing. @@ -188,7 +199,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Utilizes custom functions for summarization and translation, wrapped as structured tools. Employs a prompt template to guide the agent, which orchestrates the use of tools. An agent executor manages the process, taking input text and producing both an English summary and its Spanish translation. -15. **[Internet Search and Summarize Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/search_the_internet_and_summarize.ipynb)** +16. **[Internet Search and Summarize Agent](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/search_the_internet_and_summarize.ipynb)** #### Overview 🔎 An intelligent web research assistant that combines web search capabilities with AI-powered summarization, automating the process of gathering information from the internet and distilling it into concise, relevant summaries. @@ -196,7 +207,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Implementation 🛠️ Integrates a web search module using DuckDuckGo's API, a result parser, and a text summarization engine leveraging OpenAI's language models. The system performs site-specific or general searches, extracts relevant content, generates concise summaries, and compiles attributed results for efficient information retrieval and synthesis. -16. **[Multi agent research team - Autogen](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/research_team_autogen.ipynb)** +17. **[Multi agent research team - Autogen](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/research_team_autogen.ipynb)** #### Overview 🔎 @@ -211,7 +222,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t - **[comprehensive solution with UI](https://github.com/yanivvak/dream-team)** - **[Blogpost](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/build-your-dream-team-with-autogen/ba-p/4157961)** -17. **[Blog Writer (Open AI Swarm)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/blog_writer_swarm.ipynb)** +18. **[Blog Writer (Open AI Swarm)](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/blog_writer_swarm.ipynb)** #### Overview 🔎 @@ -225,7 +236,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t #### Additional Resources 📚 - **[Swarm Repo](https://github.com/openai/swarm)** -18. **[Podcast Internet Search and Generate Agent 🎙️](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/generate_podcast_agent_langgraph.ipynb)** +19. **[Podcast Internet Search and Generate Agent 🎙️](https://github.com/NirDiamant/GenAI_Agents/blob/main/all_agents_tutorials/generate_podcast_agent_langgraph.ipynb)** #### Overview 🔎 A two step agent that first searches the internet for a given topic and then generates a podcast on the topic found. The search step uses a search agent and search function to find the most relevant information. The second step uses a podcast generation agent and generation function to create a podcast on the topic found. @@ -235,7 +246,7 @@ Explore our extensive list of GenAI agent implementations, ranging from simple t ## 🌟 Special Advanced Technique 🌟 -19. **[Sophisticated Controllable Agent for Complex RAG Tasks 🤖](https://github.com/NirDiamant/Controllable-RAG-Agent)** +20. **[Sophisticated Controllable Agent for Complex RAG Tasks 🤖](https://github.com/NirDiamant/Controllable-RAG-Agent)** #### Overview 🔎 An advanced RAG solution designed to tackle complex questions that simple semantic similarity-based retrieval cannot solve. This approach uses a sophisticated deterministic graph as the "brain" 🧠 of a highly controllable autonomous agent, capable of answering non-trivial questions from your own data. diff --git a/all_agents_tutorials/Gutenbergs_Sage.ipynb b/all_agents_tutorials/Gutenbergs_Sage.ipynb new file mode 100644 index 0000000..a3e4872 --- /dev/null +++ b/all_agents_tutorials/Gutenbergs_Sage.ipynb @@ -0,0 +1,933 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "wmsetq6tIq9_" + }, + "source": [ + "# SAGE\n", + "\n", + "### Overview\n", + "A conversational AI agent designed to help users improve their knowledge through interactive discussions and feedback with a focus on data references provided\n", + "The agent facilitates group discussions where multiple users can practice and share their knowledge for specific use cases while receiving expert guidance on best practices.\n", + "\n", + "### Motivation\n", + "Creating effective prompts for AI systems is a complex challenge that requires both theoretical knowledge and practical experience.\n", + "Through collaborative learning and structured practice, users can develop their prompt engineering skills more effectively than working in isolation.\n", + "SAGE helps bridge the gap between theoretical understanding and practical application,\n", + "allowing users to iteratively refine their prompts based on each specific technique and final goal.\n", + "\n", + "### Key Components\n", + "#### Core Features\n", + "- Multi-user support with distinct user personas\n", + "- Interactive prompt creation exercises\n", + "- Real-time feedback based on prompt engineering best practices\n", + "- Reference-based learning using markdown documentation that retrives information with Rag System\n", + "\n", + "#### Technical Architecture\n", + "- LangGraph for conversation flow management\n", + "- Ollma LLM integration for response generation\n", + "- Memory-based state management\n", + "- Checkpoint saving for conversation continuity\n", + "\n", + "#### Learning Flow\n", + "1. Agent introduces the Host of discussion\n", + "2. Presents a specific topic\n", + "3. Users take turns creating and discussing\n", + "4. Agent provides analysis and constructive feedback\n", + "5. Session can be continued or concluded with a summary\n", + "\n", + "#### Application Domains (Use Case Shown - Prompt Engineering)\n", + "- Training sessions for prompt engineering\n", + "- Collaborative prompt development\n", + "- Learning prompt engineering best practices\n", + "- Improving chatbot and AI system prompts\n", + "\n", + "### Acknowledgment\n", + "This implementation is inspired by and utilizes techniques from the comprehensive prompt engineering collection maintained by [NirDiamant](https://github.com/NirDiamant/Prompt_Engineering), specifically focusing on the Role Prompting methodology for creating an effective learning environment." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zi9W1UCFJBf7" + }, + "source": [ + "## Environment : Installs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kMhTJAd4Jh9E" + }, + "source": [ + "### Install dependencies\n", + "#### Installs required Python packages including:\n", + " - Environment management (python-dotenv)\n", + " - LLM integrations (groq, langchain)\n", + " - Graph processing (langgraph)\n", + " - Vector stores (pinecone, chroma)\n", + " - NLP tools (spacy)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install python-dotenv==1.0.1\n", + "!pip install groq==0.4.2 # For Groq API interactions\n", + "!pip install pydantic==2.9.2\n", + "!pip install spacy==3.8.2\n", + "!pip install langgraph==0.2.48\n", + "!pip install langgraph-checkpoint-sqlite==2.0.1\n", + "!pip install langchain-core==0.3.18\n", + "!pip install langchain-groq==0.2.1\n", + "\n", + "!pip install langchain==0.3.7\n", + "!pip install langchain_ollama==0.2.0\n", + "!pip install langchain_pinecone==0.2.0\n", + "!pip install pinecone-notebooks==0.1.0\n", + "\n", + "!pip install langchain-community==0.3.7\n", + "!pip install langchain-chroma==0.1.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!apt-get install -y pciutils\n", + "!curl https://ollama.ai/install.sh | sh" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jJCZWpTpJo3o" + }, + "source": [ + "### 1. Configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "11W7ZIyJOChD" + }, + "outputs": [], + "source": [ + "NEW_LINE = \"\\n\"\n", + "GROQ_LLM_INFERENCE = \"llama-3.2-3b-preview\"\n", + "OLLAMA_LLM_INFERENCE = \"llama3.2\"\n", + "OLLAMA_HOST = 'http://0.0.0.0:11434'\n", + "LLM_EMBBEDDINGS = \"nomic-embed-text\"\n", + "\n", + "SPACEY_MODEL = \"en_core_web_sm\"\n", + "\n", + "LOCAL_ARCHIVE_PATH = \"./\"\n", + "LOCAL_DEFAULT_BOOK = \"3300-0.txt\"\n", + "WEB_DEFAULT_BOOK = \"https://www.gutenberg.org/files/3300/3300-0.txt\"\n", + "\n", + "CHROMA_STORE = \"wealth_of_nations\"\n", + "\n", + "INDEX_NAME = \"wealthofnations\"\n", + "\n", + "IS_LOCAL_ENVIRONMENT = True" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wmiV3OFSJu7W" + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "3M3KM9lbOCaZ" + }, + "outputs": [], + "source": [ + "# Vectorstore\n", + "import os\n", + "import json\n", + "import time\n", + "import random\n", + "import threading\n", + "import subprocess\n", + "\n", + "from pathlib import Path\n", + "from operator import add\n", + "from dotenv import load_dotenv\n", + "\n", + "from langchain_groq import ChatGroq\n", + "from langchain_ollama import OllamaEmbeddings, ChatOllama\n", + "\n", + "load_dotenv()\n", + "\n", + "## Embbeddings databases\n", + "from langchain_pinecone import PineconeVectorStore\n", + "from langchain_chroma import Chroma\n", + "\n", + "\n", + "## Agent dependencies\n", + "from typing import Annotated, Literal\n", + "from typing_extensions import TypedDict\n", + "from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage , AIMessage\n", + "from langgraph.graph import add_messages, StateGraph, START, END\n", + "from pydantic import BaseModel\n", + "from langgraph.checkpoint.sqlite import SqliteSaver\n", + "from langgraph.checkpoint.memory import MemorySaver\n", + "\n", + "## NER\n", + "import spacy\n", + "\n", + "## RAG dependencies:\n", + "from langchain_community.document_loaders import (\n", + " DirectoryLoader,\n", + " TextLoader,\n", + ")\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "GkbNsXCNNFsU" + }, + "outputs": [], + "source": [ + "def ollama():\n", + " os.environ['OLLAMA_HOST'] = OLLAMA_HOST\n", + " os.environ['OLLAMA_ORIGINS'] = '*'\n", + " subprocess.Popen([\"ollama\", \"pull\", LLM_EMBBEDDINGS])\n", + " subprocess.Popen([\"ollama\", \"pull\", OLLAMA_LLM_INFERENCE])\n", + " subprocess.Popen([\"ollama\", \"serve\"])\n", + "\n", + "ollama_thread = threading.Thread(target=ollama)\n", + "ollama_thread.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "vzbxaMc9TdOQ" + }, + "outputs": [], + "source": [ + "\n", + "## Colab custom logic\n", + "\n", + "if not IS_LOCAL_ENVIRONMENT:\n", + " from google.colab import userdata\n", + "\n", + " os.environ[\"GROQ_API_KEY\"] = userdata.get('GROQ_API_KEY')\n", + " os.environ[\"PINECONE_API_KEY\"] = userdata.get('PINECONE_API_KEY')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gxvuAAM7PhK-", + "outputId": "9e5bbfa8-53c2-44f1-a223-c7a8322bf256" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/spacy/util.py:910: UserWarning: [W095] Model 'en_core_web_sm' (3.7.1) was trained with spaCy v3.7.2 and may not be 100% compatible with the current version (3.8.2). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", + " warnings.warn(warn_msg)\n" + ] + } + ], + "source": [ + "##\n", + "## PROGRAM LOGIC\n", + "##\n", + "def build_llm():\n", + " if IS_LOCAL_ENVIRONMENT:\n", + " llm = ChatOllama(model=OLLAMA_LLM_INFERENCE)\n", + " else:\n", + " llm = ChatGroq(model=GROQ_LLM_INFERENCE)\n", + " return llm\n", + "\n", + "llm = build_llm()\n", + "nlp = spacy.load(SPACEY_MODEL)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "efJjXLnlJz3a" + }, + "source": [ + "### 2. Vector Store Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "cSEMgboCPSS2" + }, + "outputs": [], + "source": [ + "## vectorstore\n", + "def build_vector_store():\n", + " if not IS_LOCAL_ENVIRONMENT:\n", + " return PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)\n", + " else:\n", + " return Chroma(\n", + " embedding_function=embeddings,\n", + " persist_directory=f\"{LOCAL_ARCHIVE_PATH}{CHROMA_STORE}\"\n", + " )\n", + "\n", + "embeddings = OllamaEmbeddings(model=LLM_EMBBEDDINGS)\n", + "embeddings.base_url = os.getenv(\"OLLAMA_HOST\")\n", + "\n", + "vectorstore = build_vector_store()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "xLfkvYYaXw0h" + }, + "outputs": [], + "source": [ + "## Embeddings\n", + "from langchain_text_splitters import MarkdownHeaderTextSplitter\n", + "\n", + "def embbed_paragraphs(db):\n", + " sample_docs = DirectoryLoader(\n", + " \"./\",\n", + " glob=\"**/*.txt\",\n", + " loader_cls=TextLoader,\n", + " show_progress=True,\n", + " ).load()\n", + "\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=500,\n", + " chunk_overlap=50,\n", + " length_function=len,\n", + " )\n", + " documents = text_splitter.split_documents(sample_docs)\n", + "\n", + " if not IS_LOCAL_ENVIRONMENT:\n", + " # Use Pinecone\n", + " db.from_documents(documents, embedding=embeddings, index_name=\"wealthofnations\")\n", + " else:\n", + " # Use Local Chroma\n", + " db.from_documents(documents, embedding=embeddings)\n", + "\n", + " return documents" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "RcDoaK3OPX8-" + }, + "outputs": [], + "source": [ + "## State\n", + "class User(BaseModel):\n", + " name: str\n", + " gender: str\n", + " age: int\n", + " likes: list[str]\n", + " dislikes: list[str]\n", + "\n", + "class Host(User):\n", + " sys_msg: str = None\n", + " personality: list[str]\n", + "\n", + "def add_discussions(discussions, new_messages):\n", + " if not new_messages:\n", + " return []\n", + " return discussions + new_messages\n", + "\n", + "class DiscussionState(TypedDict):\n", + " messages: Annotated[list[AnyMessage], add_messages] = []\n", + " topic: str\n", + " users: list[User]\n", + " host: Host\n", + " curr_discussion: Annotated[list[AnyMessage], add_discussions] = []\n", + " discussion_summaries: Annotated[list[str], add] = []\n", + " setEndDiscussion: bool = False\n", + " random_anchor: str\n", + " book_file_path: str = LOCAL_DEFAULT_BOOK" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ppgzSK6XJ_nM" + }, + "source": [ + "### 4. Node Functions\n", + "\n", + "Source Material Processing\n", + "\n", + "ingest_source_material: Downloads and stores source material\n", + "\n", + "preprocess_source_material: Performs NER and embeddings\n", + "\n", + "prepare_topic: Selects discussion topics from processed material\n", + "\n", + "#### Discussion Flow\n", + "\n", + "introduce: Host Initiates the session\n", + "\n", + "ask_question: Generates discussion questions\n", + "human_discuss: Handles user interactions\n", + "\n", + "agent_interpretations: Analyzes discussion and provides AI feedback\n", + "\n", + "summarize_discussion: Creates discussion summaries" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "993b1uIfTtCP" + }, + "outputs": [], + "source": [ + "## LangGraph States\n", + "def introduce(state):\n", + " host = state['host']\n", + " topic = state['topic']\n", + " users = state['users']\n", + "\n", + " sys_msg = f\"\"\"\n", + " You are a meeting host leading a meeting.\n", + "\n", + " You will be leading a group of {len(users)} members.\n", + "\n", + " Your job is to ask a quesetion about the to the users to discuss.\n", + "\n", + " The following is your information, do not break your character.\n", + " name: {host.name}\n", + " gender: {host.gender}\n", + " age: {host.age}\n", + " personality: {host.personality}\n", + "\n", + "\n", + " The following are the information of the club users:\n", + " {NEW_LINE.join([f\"{user.name} ({user.gender}, {user.age}\" for user in users])}\n", + "\n", + " The topic that will be discussed is {topic}\n", + " \"\"\"\n", + "\n", + " if state.get('guide'):\n", + " sys_msg += f\"\\n Follow this guide: \\n{state.get('guide')}\"\n", + "\n", + " host.sys_msg = sys_msg\n", + "\n", + " narrator_msg = f\"\"\"\n", + " Club members just arrived!\n", + " Before we dive in, please greet the members, introduce yourself, and give brief introductions of the members.\n", + " \"\"\"\n", + " response = llm.invoke([\n", + " SystemMessage(content=sys_msg),\n", + " HumanMessage(content=narrator_msg, name=\"narrator\")\n", + " ])\n", + " response.name = host.name\n", + " response.content = f\"{host.name}: {response.content}\"\n", + " response.pretty_print()\n", + " return {\n", + " 'message': [response],\n", + " 'host': host,\n", + " }\n", + "\n", + "def ingest_source_material(state):\n", + " \"\"\"\n", + " Downloads source material from the Gutenberg project\n", + " \"\"\"\n", + " import requests\n", + " response = requests.get(WEB_DEFAULT_BOOK)\n", + "\n", + " source_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}{LOCAL_DEFAULT_BOOK}\")\n", + " with open(source_file_path, \"w\") as source_file:\n", + " source_file.write(response.text)\n", + "\n", + " return state\n", + "\n", + "def extract_entities(file_path):\n", + " # Read the text file\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " text = file.read()\n", + "\n", + " # Process the text with spaCy\n", + " nlp.max_length = len(text)\n", + " doc = nlp(text)\n", + "\n", + " # Extract entities and organize them by type\n", + " entities = {}\n", + " for ent in doc.ents:\n", + " if ent.label_ not in entities:\n", + " entities[ent.label_] = set()\n", + " entities[ent.label_].add(ent.text)\n", + "\n", + " for key, value in entities.items():\n", + " entities[key] = list(value)\n", + "\n", + " return entities\n", + "\n", + "\n", + "def preprocess_source_material(state):\n", + " \"\"\"\n", + " Performs NER against source material\n", + " \"\"\"\n", + " embbed_paragraphs(vectorstore)\n", + "\n", + " entities_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}entities_{LOCAL_DEFAULT_BOOK}\")\n", + "\n", + " found_entities = extract_entities(f\"{LOCAL_ARCHIVE_PATH}{LOCAL_DEFAULT_BOOK}\")\n", + " json_entities = json.dumps(found_entities)\n", + "\n", + " with open(entities_file_path, \"w\") as output:\n", + " output.write(json_entities)\n", + "\n", + " return state\n", + "\n", + "def prepare_topic(state):\n", + " entities_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}entities_{LOCAL_DEFAULT_BOOK}\")\n", + "\n", + " with open(entities_file_path, \"r\") as entities_file:\n", + " entities = json.loads(entities_file.read())\n", + "\n", + " persons = entities.get(\"PERSON\")\n", + " size = len(persons)\n", + "\n", + " state['random_anchor'] = persons[random.randint(1, size)]\n", + " return state\n", + "\n", + "def should_download_source_material(state):\n", + " source_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}{LOCAL_DEFAULT_BOOK}\")\n", + "\n", + " if(not source_file_path.exists() or not source_file_path.is_file()):\n", + " return \"web_download\"\n", + "\n", + " entities_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}entities_{LOCAL_DEFAULT_BOOK}\")\n", + "\n", + " if(not entities_file_path.exists() or not entities_file_path.is_file()):\n", + " return \"embeddings_ner\"\n", + "\n", + " return \"prepare_topic\"\n", + "\n", + "def should_prepare_source_material(state):\n", + " entities_file_path = Path(f\"{LOCAL_ARCHIVE_PATH}entities_{LOCAL_DEFAULT_BOOK}\")\n", + "\n", + " if(not entities_file_path.exists() or not entities_file_path.is_file()):\n", + " return \"embeddings_ner\"\n", + "\n", + " return \"prepare_topic\"\n", + "\n", + "def ask_question(state):\n", + " # 1. find any interesting topic from retriever\n", + "\n", + " retriever = vectorstore.as_retriever(\n", + " search_type=\"similarity_score_threshold\",\n", + " search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n", + " )\n", + "\n", + " if not IS_LOCAL_ENVIRONMENT:\n", + " # Use Pinecone\n", + " results = retriever.invoke(state['random_anchor'], filter={\"source\": \"wealthofnations\"})\n", + " else:\n", + " # Use Local Chroma\n", + " results = retriever.invoke(state['random_anchor'])\n", + "\n", + "\n", + " # referenced_document = list(results)[0].page_content\n", + " referenced_document = \"test\"\n", + "\n", + " # 2. generate interesting question from the topic\n", + " host = state['host']\n", + "\n", + " narrator_msg = f\"\"\"\n", + " Present an interesting and insightful question from the topic from the following interesting fact(s):\n", + " {referenced_document}\n", + "\n", + " Remember that you are the host, do not break the character.\n", + " \"\"\"\n", + " response = llm.invoke([\n", + " SystemMessage(content=host.sys_msg),\n", + " SystemMessage(content=\"Your job is to ask a quesetion about the topic to the users for dicsussion.\"),\n", + " HumanMessage(content=narrator_msg, name=\"narrator\")\n", + " ])\n", + " response.name = host.name\n", + " response.content = f\"{host.name}: {response.content}\"\n", + " response.pretty_print()\n", + " return {\n", + " 'message': [response],\n", + " 'curr_discussion': [response],\n", + " \"setEndDiscussion\": False\n", + " }\n", + "\n", + "def human_discuss(state):\n", + " # Discussions among users\\\n", + " end_discussion = False\n", + " user_messages = []\n", + " user_options = {str(i+1): user.name for i, user in enumerate(state['users'])}\n", + " participating_users = set() # Track who has participated\n", + " while True:\n", + " print(\"===\" * 10 + \"USER INPUT\" + \"===\" * 10)\n", + " user_ind = input(f\"{user_options}\\nWho are you (type number to select user OR 'q' to quit): \")\n", + "\n", + " if user_ind.isdigit() and int(user_ind) in range(1, len(state['users'])+1):\n", + " user = state['users'][int(user_ind) - 1]\n", + " participating_users.add(user.name) # Track this user's participation\n", + " user_input = input(f\"{user.name}: \")\n", + " user_messages.append(HumanMessage(content=f\"{user.name}: {user_input}\", user_id=user.name))\n", + " break\n", + " elif user_ind == 'q':\n", + " end_discussion = True\n", + " # Add empty messages for all silent users\n", + " silent_users = [user for user in state['users'] if user.name not in participating_users]\n", + " for silent_user in silent_users:\n", + " user_messages.append(HumanMessage(content=f\"{silent_user.name}: '......'\", user_id=silent_user.name))\n", + " break\n", + " else:\n", + " print(\"Invalid input. Please enter a number between 1 and\", len(state['users']))\n", + "\n", + " return {\n", + " \"curr_discussion\": user_messages,\n", + " \"setEndDiscussion\": end_discussion\n", + " }\n", + "\n", + "def summarize_discussion(state):\n", + " curr_discussion = state['curr_discussion']\n", + " sys_msg = \"\"\"\n", + " You are a summarizer of a discussion. Your job is to summarize the following conversations\n", + " \"\"\"\n", + " response = llm.invoke(\n", + " [SystemMessage(content=sys_msg)] + curr_discussion\n", + " )\n", + " response.pretty_print()\n", + " return {\n", + " 'discussion_summaries': [response.content],\n", + " }\n", + "\n", + "def agent_interjection(state):\n", + " curr_discussion = state['curr_discussion']\n", + " host = state['host']\n", + " topic = state['topic']\n", + " users = state['users']\n", + "\n", + " sys_msg = f\"\"\"\n", + " You are {host.name}, a club host discussing {topic}. Your traits: {', '.join(host.personality)}\n", + "\n", + " You will always respond as host \"{host.name}: message\"\n", + "\n", + " Club members present:\n", + " {', '.join([f\"{user.name} (interests: {', '.join(user.likes)})\" for user in users])}\n", + "\n", + " only address the members present in the discussion.\n", + "\n", + " When analyzing the discussion, pay extra attention to:\n", + " 1. Who said what - track each member's specific contributions accurately\n", + " 2. The depth of each response - look for opportunities to expand shallow comments\n", + " 3. The engagement level of each participant\n", + " 4. Accuracy in referencing previous comments\n", + " 5. If members are silent, which is indicated by '......'.\n", + "\n", + " When intervening:\n", + " - Always acknowledge the specific points made by each member\n", + " - Build on their existing comments and dont assume there responses other than what they said.\n", + " - Ask follow-up questions that directly relate to their statements\n", + " - Maintain conversation flow by connecting members' comments to each other\n", + " - If members are silent, Make first engage them to say hi and then use their interests to draw them in\n", + "\n", + " IMPORTANT: Respond ONLY with a valid JSON object in the following format:\n", + " {{\n", + " \"thought_process\": \"Your step-by-step analysis for intervention\",\n", + " \"should_intervene\": boolean,\n", + " \"message\": \"Your intervention message if should_intervene is true and based on your thought process\"\n", + " }}\n", + " \"\"\"\n", + "\n", + " narrator_msg = f\"\"\"\n", + " Current discussion (in chronological order):\n", + " {NEW_LINE.join([f\"- {msg.content}\" for msg in curr_discussion])}\n", + "\n", + " Analyze the above discussion and decide if you should intervene to help members engage with {topic}.\n", + " \"\"\"\n", + "\n", + " response = llm.invoke(\n", + " [SystemMessage(content=sys_msg)] + [HumanMessage(content=narrator_msg, name=\"narrator\")]\n", + " )\n", + "\n", + " # Parse the response and act accordingly\n", + " try:\n", + " import json\n", + " response_data = json.loads(response.content)\n", + "\n", + " #print(\"Response data: ================\")\n", + " #print(response_data)\n", + "\n", + " if response_data[\"should_intervene\"]:\n", + " ai_response = AIMessage(content=response_data[\"message\"])\n", + " print(ai_response.pretty_print())\n", + " return {\n", + " 'messages': [ai_response],\n", + " 'curr_discussion': [ai_response]\n", + " }\n", + " except json.JSONDecodeError as e:\n", + " return {\n", + " 'messages': state['messages'],\n", + " 'curr_discussion': state['curr_discussion']\n", + " }\n", + "\n", + "def ask_to_continue(state) -> Literal['ask_question', END]:\n", + " print(\"---\" * 25)\n", + " user_input = input(\"\\nContinue with another question? y/n: \")\n", + " if user_input.lower() == 'y':\n", + " return \"ask_question\"\n", + " else:\n", + " return END\n", + "\n", + "def route_discussion(state) -> Literal['agent_interjection', 'summarize_discussion']:\n", + " # print(\"Came to route discussion\")\n", + " # Check if last message indicates quitting\n", + " if state['setEndDiscussion'] : #state['curr_discussion'] and state['curr_discussion'][-2].content.split(\":\")[-1].strip().lower() == 'q':\n", + " print(\"Went to summarize discussion\")\n", + " state['setEndDiscussion'] = False\n", + " return 'summarize_discussion'\n", + " return 'agent_interjection'\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2jeUwVl1KDvF" + }, + "source": [ + "### 5. Graph Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "Ft7EaEzjPorq" + }, + "outputs": [], + "source": [ + "builder = StateGraph(DiscussionState)\n", + "\n", + "builder.add_node(introduce)\n", + "#builder.add_node('web_download', web_to_local_node)\n", + "builder.add_node('web_download', ingest_source_material)\n", + "builder.add_node('embeddings_ner', preprocess_source_material)\n", + "\n", + "builder.add_node(prepare_topic)\n", + "builder.add_node(ask_question)\n", + "builder.add_node(human_discuss)\n", + "builder.add_node(agent_interjection)\n", + "builder.add_node(summarize_discussion)\n", + "\n", + "builder.add_edge(START, 'introduce')\n", + "builder.add_conditional_edges('introduce', should_download_source_material, ['web_download', 'embeddings_ner', 'prepare_topic'])\n", + "builder.add_conditional_edges('web_download', should_prepare_source_material, ['embeddings_ner', 'prepare_topic'])\n", + "builder.add_edge('embeddings_ner', 'prepare_topic')\n", + "builder.add_edge('prepare_topic', 'ask_question')\n", + "\n", + "builder.add_edge('ask_question', 'human_discuss')\n", + "builder.add_edge('agent_interjection', 'human_discuss')\n", + "builder.add_conditional_edges('human_discuss',route_discussion)\n", + "builder.add_conditional_edges('summarize_discussion', ask_to_continue)\n", + "\n", + "memory = MemorySaver()\n", + "agent = builder.compile(checkpointer=memory)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dibGP-4SKGX5" + }, + "source": [ + "### 6. Graph Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 846 + }, + "id": "G4IitM8ifWvD", + "outputId": "774e38df-406d-4f00-f6d8-f181fd3f0d87" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import display, Image\n", + "\n", + "display(Image(agent.get_graph().draw_mermaid_png()))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7sA9anqdUH6O", + "outputId": "02cc69bc-26c5-4781-9641-67f977f57b5f" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:langchain_core.vectorstores.base:No relevant docs were retrieved using the relevance score threshold 0.5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "Name: Vander\n", + "\n", + "Vander: (big smile) Ah, great to see everyone here! Welcome to our meeting today. I'm Vander, your host for today's discussion. (extends hand for a handshake or a friendly greeting)\n", + "\n", + "Before we dive into the topic at hand, let me introduce myself and my fellow club members. We have some fantastic minds gathered in this room today.\n", + "\n", + "First up, we have Alice. Welcome, Alice! (nods towards Alice) A 30-year-old professional with a passion for data science and machine learning. Her expertise in natural language processing will undoubtedly bring valuable insights to our discussion.\n", + "\n", + "Next, please give a warm welcome to Bob. (turns to Bob) Bob's an 25-year-old rising star in the field of artificial intelligence. His enthusiasm for prompt engineering is contagious, and I'm sure he'll be sharing some exciting ideas with us today.\n", + "\n", + "Last but not least, we have Charlie. (smiles at Charlie) Charlie, a 28-year-old expert in human-computer interaction. Her research on user experience and behavior will help us better understand the nuances of prompt engineering and its impact on labor value.\n", + "\n", + "So, without further ado, let's get started! Our topic for today is prompt engineering labor value. (makes eye contact with each member) I'm sure we'll have a fascinating discussion about this. Are you all ready to dive in?\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "Name: Vander\n", + "\n", + "Vander: (laughs) Alright everyone, let's dive right into it! With prompt engineering being such a crucial aspect of AI model development, I'd like to ask: How do you think the labor value of prompt engineering would change if we were to consider the impact of cultural and societal biases on the language used in prompts? (looks around the table with a hint of curiosity) Can anyone share their thoughts on this?\n", + "==============================USER INPUT==============================\n", + "{'1': 'Alice', '2': 'Bob', '3': 'Charlie'}\n", + "Who are you (type number to select user OR 'q' to quit): 1\n", + "Alice: The markets will be localized, as local poses a comparative advantage\n", + "==============================USER INPUT==============================\n", + "{'1': 'Alice', '2': 'Bob', '3': 'Charlie'}\n", + "Who are you (type number to select user OR 'q' to quit): 3\n", + "Charlie: Cultural knowledge will be hard to transfer\n", + "==============================USER INPUT==============================\n", + "{'1': 'Alice', '2': 'Bob', '3': 'Charlie'}\n", + "Who are you (type number to select user OR 'q' to quit): 1\n", + "Alice: Yeah, for that reason wages be localized\n", + "==============================USER INPUT==============================\n", + "{'1': 'Alice', '2': 'Bob', '3': 'Charlie'}\n", + "Who are you (type number to select user OR 'q' to quit): q\n", + "Went to summarize discussion\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "Vander: It seems like we're getting into some interesting ideas here. Alice, you mentioned that the markets will be localized due to comparative advantages. That's a great point. And Charlie, you're right that cultural knowledge can be challenging to transfer across different regions. This highlights an important aspect of labor value in prompt engineering.\n", + "\n", + "Alice, your point about wages being localized is also well-taken. As the market for prompt engineers becomes more localized, it's likely that wages will follow suit. This could lead to some interesting implications for global talent acquisition and distribution.\n", + "\n", + "It's clear that we're on the cusp of a significant shift in how prompt engineering is valued and compensated. I think we need to explore this idea further and consider what it means for the future of work in AI model development.\n", + "\n", + "Bob, would you like to add anything to this discussion? And Charlie, do you have any thoughts on how localization might affect the quality of prompts generated by human engineers versus machine learning models?\n", + "---------------------------------------------------------------------------\n", + "\n", + "Continue with another question? y/n: n\n" + ] + } + ], + "source": [ + "users = [\n", + " User(name=\"Alice\", gender=\"female\", age=30, likes=[\"Mathemathics\", \"Japanese\"], dislikes=[\"Biology\"]),\n", + " User(name=\"Bob\", gender=\"male\", age=25, likes=[\"Science Fiction\", \"Fantasy\"], dislikes=[\"Health Sciences\"]),\n", + " User(name=\"Charlie\", gender=\"female\", age=28, likes=[\"Humanities\", \"Classic Literature\"], dislikes=[\"Economics\"])\n", + "]\n", + "test_state = {\n", + " 'topic': \"prompt engineering labor value\",\n", + " 'users':users,\n", + " 'host': Host(name=\"Vander\", gender=\"male\", age=40, likes=[\"Book Club\", \"Knowledge\"], dislikes=[\"Ignorance\"], personality=[\"Enthusiastic\", \"Humorous\", \"Terse\"]),\n", + "}\n", + "config = {\n", + " 'configurable': {'thread_id': 2}\n", + "}\n", + "setEndDiscussion = False\n", + "\n", + "response = agent.invoke(test_state, config=config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lyX173lRqlOC" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}