Update AICodeBot to handle larger context sizes and improve user expe…

…rience 🚀 This commit introduces several enhancements to AICodeBot. The README.md file has been updated with more detailed instructions on how to configure the language model to use, manage tokens effectively, and handle larger context sizes. The version in the configuration file has been updated to 1.2. In addition, the language model manager now dynamically switches to a larger model if the context size exceeds the limit of the current model. This feature is supported for both OpenAI and OpenRouter models. The user interface has also been improved. When a request is sent to the language model, a message is displayed indicating which model is being used. If the model is changed due to the context size, a warning message is displayed. Finally, the test suite has been updated to cover these new features. These changes should make AICodeBot more robust and user-friendly, especially when dealing with larger context sizes. 🎉
TechNickAI · Aug 4, 2023 · b6adba1 · b6adba1
1 parent 3120b99
commit b6adba1
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -88,7 +88,6 @@ Commands:
   commit          Generate a commit message based on your changes.
   configure       Create or update the configuration file
   debug           Run a command and debug the output.
-  learn           NOT WORKING YET: Learn new skills and gain additional...
   review          Do a code review, with [un]staged changes, or a...
   sidekick        Coding help from your AI sidekick
 ```
@@ -160,9 +159,29 @@ Technology itself is amoral; it just imbues the values of the people who create
 
 It's also not a "build a site for me in 5 minutes" tool that takes a well-constructed prompt and builds a scaffold for you. There are [other tools](https://github.com/AntonOsika/gpt-engineer) for that. It's not a no-code platform. Instead, AICodeBot is built to work with existing code bases and the git-commit level. It's designed to multiply the effectiveness of capable engineers.
 
-## Which Language Model?
+## Configuring the language model to use
 
-Not all OpenAI accounts have GPT-4 API access enabled. By default, AICodeBot will use GPT-4. If your OpenAI account supports it, we will check the first time you run it. If your OpenAI API does not support GPT-4, you can ask to be added to the waitlist [here](https://openai.com/waitlist/gpt-4-api). You can also use openrouter.ai to get access to advanced models like GPT-4 32k for larger context windows.
+Not all OpenAI accounts have GPT-4 API access enabled. By default, AICodeBot will use GPT-4. If your OpenAI account supports it, we will check the first time you run it. If your OpenAI API does not support GPT-4, you can ask to be added to the waitlist [here](https://openai.com/waitlist/gpt-4-api). In our testing, GPT-4 is the best model and provides the best results.
+
+To specify a different model, you can set the `language_model` in your `$HOME/.aicodebot.yaml` file. For example:
+
+```yaml
+openai_api_key: sk-*****
+language_model: gpt-3.5-turbo
+personality: Stewie
+version: 1.2
+```
+
+You can also use openrouter.ai to get access to advanced models like GPT-4 32k and Anthropic's 100k model for larger context windows. See [openrouter.ai](https://openrouter.ai) for more details. Here's a sample config:
+
+```yaml
+openai_api_key: sk-*****
+openrouter_api_key: sk-or-****
+language_model_provider: OpenRouter
+language_model: openai/gpt-4-32k # or anthropic/claude-2 for 100k token limit
+personality: Stewie
+version: 1.2
+```
 
 Note: We'll be adding more options for AI models in the future, including those that can be run locally, such as [GPT4all](https://gpt4all.io/) and HuggingFace's [Transformers](https://huggingface.co/transformers/).
 
@@ -192,10 +211,10 @@ There are a couple of things you can do:
 
 We'd love your help! If you're interested in contributing, here's how to get started. See [CONTRIBUTING](https://github.com/gorillamania/AICodeBot/blob/main/CONTRIBUTING.md) for more details.
 
-
 ## Docker
 
 Assumes you have changes in current working dir that are already added.
+
 ```
 docker build -t aicodebot .
 docker run -v ~/.aicodebot.yaml:/home/user/.aicodebot.yaml -v .:/app aicodebot commit -y

diff --git a/aicodebot/commands/configure.py b/aicodebot/commands/configure.py
@@ -16,7 +16,7 @@ def configure(verbose, openai_api_key):
     # --------------- Check for an existing key or set up defaults --------------- #
 
     config_data_defaults = {
-        "version": 1.1,
+        "version": 1.2,
         "openai_api_key": openai_api_key,
         "personality": DEFAULT_PERSONALITY.name,
     }

diff --git a/aicodebot/commands/sidekick.py b/aicodebot/commands/sidekick.py
@@ -2,7 +2,7 @@
 from aicodebot.coder import Coder
 from aicodebot.config import Session
 from aicodebot.input import Chat, generate_prompt_session
-from aicodebot.lm import DEFAULT_CONTEXT_TOKENS, LanguageModelManager
+from aicodebot.lm import DEFAULT_CONTEXT_TOKENS, LanguageModelManager, token_size
 from aicodebot.output import OurMarkdown, RichLiveCallbackHandler, get_console
 from aicodebot.prompts import generate_files_context, get_prompt
 from rich.live import Live
@@ -91,7 +91,6 @@ def sidekick(request, no_files, max_file_tokens, files):  # noqa: PLR0915
             # If the user edited the input, then we want to print it out so they
             # have a record of what they asked for on their terminal
             console.print(parsed_human_input)
-
         try:
             with Live(OurMarkdown(f"Talking to {lmm.model_name} via {lmm.provider}"), auto_refresh=True) as live:
                 chain = lmm.chain_factory(
@@ -100,6 +99,12 @@ def sidekick(request, no_files, max_file_tokens, files):  # noqa: PLR0915
                     callbacks=[RichLiveCallbackHandler(live, console.bot_style)],
                     chat_history=True,
                 )
+                old_model, new_model = lmm.use_appropriate_sized_model(chain, token_size(context))
+                if old_model != new_model:
+                    console.print(
+                        f"Changing from {old_model} to {new_model} to handle the context size.",
+                        style=console.warning_style,
+                    )
 
                 response = chain.run({"task": parsed_human_input, "context": context, "languages": languages})
                 live.update(OurMarkdown(response))

diff --git a/aicodebot/lm.py b/aicodebot/lm.py
@@ -154,7 +154,7 @@ def get_openai_model(
     def get_api_key(self, key_name):
         # Read the api key from either the environment or the config file
         key_name_upper = key_name.upper()
-        api_key = os.getenv(key_name)
+        api_key = os.getenv(key_name_upper)
         if api_key:
             return api_key
         else:
@@ -243,7 +243,7 @@ def read_model_config(self):
 
         return self.provider, self.model_name
 
-    @functools.lru_cache  # cache so we only make the API call once
+    @functools.cache  # cache so we only make the API call once
     @staticmethod
     def openai_supported_engines():
         """Get a list of the models supported by the OpenAI API key."""
@@ -267,6 +267,51 @@ def tiktoken_model_name(self):
         else:
             return self.model_name
 
+    def use_appropriate_sized_model(self, chain, token_size):
+        current_model = self.model_name
+        gpt_4_limit = self.get_model_token_limit("gpt-4") * 0.9
+        gpt_4_32k_limit = self.get_model_token_limit("gpt-4-32k") * 0.9
+        if current_model in ["gpt-4", "gpt-4-32k"]:
+            if token_size > gpt_4_32k_limit:
+                raise ValueError("Token limit exceeded for GPT4, try using less context (files)")
+            elif token_size > gpt_4_limit:
+                if "gpt-4-32k" in LanguageModelManager.openai_supported_engines():
+                    self.model_name = "gpt-4-32k"
+                else:
+                    raise ValueError(
+                        "Your request is too large for gpt-4, and you don't have access to gpt-4-32k.\n"
+                        "Hint: Try using openrouter.ai which has access to lots of models. See the README for details."
+                    )
+            else:
+                self.model_name = "gpt-4"
+
+        elif current_model in ["openai/gpt-4", "openai/gpt-4-32k"]:
+            if token_size > gpt_4_32k_limit:
+                raise ValueError(
+                    "Token limit exceeded for GPT4, try using less context (files)\n"
+                    "Hint: try anthropic/claude-2 (100k token limit)"
+                )
+            elif token_size > gpt_4_limit:
+                self.model_name = "openai/gpt-4-32k"
+            else:
+                self.model_name = "openai/gpt-4"
+
+        elif current_model in ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"]:
+            gpt_3_limit = self.get_model_token_limit("gpt-3.5-turbo") * 0.9
+            gpt_3_16k_limit = self.get_model_token_limit("gpt-3.5-turbo-16k") * 0.9
+            if token_size > gpt_3_16k_limit:
+                raise ValueError("Token limit exceeded for GPT3.5, try using less context (files)")
+            elif token_size > gpt_3_limit:
+                self.model_name = "gp-3.5-turbo-16k"
+            else:
+                self.model_name = "gpt-3.5-turbo"
+
+        if current_model != self.model_name:
+            logger.trace(f"Switching from {current_model} to {self.model_name} to handle the context size.")
+            chain.llm.model_name = self.model_name
+
+        return current_model, self.model_name
+
 
 def token_size(text):
     # Shortcut

diff --git a/aicodebot/output.py b/aicodebot/output.py
@@ -3,6 +3,7 @@
 from langchain.callbacks.base import BaseCallbackHandler
 from rich.console import Console
 from rich.markdown import CodeBlock, Markdown
+from rich.panel import Panel
 from rich.style import Style
 from rich.syntax import Syntax
 
@@ -13,8 +14,9 @@ def __init__(self, live, style):
         self.live = live
         self.style = style
 
-    def on_llm_start(self, *args, **kwargs):
-        self.live.update(Markdown("**Thinking...**"))
+    def on_llm_start(self, serialized, *args, **kwargs):
+        message = f'Sending request to *{serialized["kwargs"]["model"]}*...'
+        self.live.update(Panel(OurMarkdown(message)), refresh=True)
 
     def on_llm_new_token(self, token, **kwargs):
         self.buffer.append(token)

diff --git a/tests/test_config.yaml b/tests/test_config.yaml
@@ -1,3 +1,3 @@
 openai_api_key: sk-dummy
 personality: Her
-version: 1.1
+version: 1.2
diff --git a/tests/test_input.py b/tests/test_input.py
@@ -1,5 +1,6 @@
 from aicodebot.helpers import create_and_write_file
 from aicodebot.input import Chat
+from pathlib import Path
 from tests.conftest import in_temp_directory
 import pytest
 
@@ -29,7 +30,8 @@ def test_parse_human_input(chat):
     assert chat.parse_human_input(input_data) == chat.CONTINUE
 
 
-def test_parse_human_input_files(chat, tmp_path):
+def test_parse_human_input_files(chat, tmp_path, monkeypatch):
+    monkeypatch.setenv("AICODEBOT_CONFIG_FILE", str(Path(__file__).parent / "test_config.yaml"))
     with in_temp_directory(tmp_path):
         create_and_write_file(tmp_path / "file.txt", "text")
 

diff --git a/tests/test_lm.py b/tests/test_lm.py
@@ -1,22 +1,42 @@
 from aicodebot.lm import LanguageModelManager, token_size
 from aicodebot.prompts import get_prompt
-import pytest
+from pathlib import Path
+import os, pytest
 
 
-def test_token_size():
+def test_token_size(monkeypatch):
+    monkeypatch.setenv("AICODEBOT_CONFIG_FILE", str(Path(__file__).parent / "test_config.yaml"))
     assert token_size("") == 0
 
     text = "Code with heart, align AI with humanity. ❤️🤖"
     assert LanguageModelManager().get_token_size(text) == 14
     assert token_size(text) == 14
 
 
-@pytest.mark.parametrize("provider,model_name", [("OpenAI", "gpt-4"), ("OpenRouter", "gpt-4")])
+@pytest.mark.parametrize(
+    "provider,model_name",
+    [
+        (LanguageModelManager.OPENAI, "gpt-4"),
+        (LanguageModelManager.OPENROUTER, "gpt-4"),
+        (LanguageModelManager.HUGGINGFACE_HUB, "google/flan-t5-xxl"),
+    ],
+)
 def test_chain_factory(provider, model_name, monkeypatch):
     monkeypatch.setenv("AICODEBOT_MODEL_PROVIDER", provider)
     monkeypatch.setenv("AICODEBOT_MODEL", model_name)
     monkeypatch.setenv("OPENROUTER_API_KEY", "dummy")
     monkeypatch.setenv("OPENAI_API_KEY", "dummy")
+    monkeypatch.setenv("HUGGINGFACE_API_KEY", "dummy")
+
     llm = LanguageModelManager()
+    assert os.getenv("OPENROUTER_API_KEY") == "dummy"
+    assert llm.get_api_key("OPENROUTER_API_KEY") == "dummy"
+
     prompt = get_prompt("alignment")
     chain = llm.chain_factory(prompt)
+    if hasattr(chain.llm, "model_name"):
+        # OpenAI compatible
+        assert chain.llm.model_name == model_name
+    elif hasattr(chain.llm, "repo_id"):
+        # Hugging Face Hub
+        assert chain.llm.repo_id == model_name