From f159dc45f2dccfa7c1f5b4d8290e168d48175104 Mon Sep 17 00:00:00 2001 From: Anton Osika Date: Sat, 24 Jun 2023 17:53:04 +0200 Subject: [PATCH] First step in making gpt-engineer learn. Rename main_prompt -> prompt (#381) * First step in collecting learnings * Rename prompts * remove requirements, use pip install -e . instead * Add requirements * Fix tests --- .github/workflows/ci.yaml | 2 +- README.md | 26 ++++-- .../{main_prompt => prompt} | 0 .../file_explorer/{main_prompt => prompt} | 0 .../file_organizer/{main_prompt => prompt} | 0 .../image_resizer/{main_prompt => prompt} | 0 .../markdown_editor/{main_prompt => prompt} | 0 .../{main_prompt => prompt} | 0 .../pomodoro_timer/{main_prompt => prompt} | 0 benchmark/timer_app/{main_prompt => prompt} | 0 benchmark/todo_list/{main_prompt => prompt} | 0 .../url_shortener/{main_prompt => prompt} | 0 benchmark/weather_app/main_prompt | 1 - gpt_engineer/collect.py | 85 +++++++++++++++++++ gpt_engineer/db.py | 6 ++ gpt_engineer/main.py | 6 +- gpt_engineer/steps.py | 57 +++++++++---- projects/example/{main_prompt => prompt} | 0 pyproject.toml | 3 + requirements.txt | 8 -- scripts/benchmark.py | 2 +- scripts/clean_benchmarks.py | 2 +- tests/test_collect.py | 36 ++++++++ tests/test_db.py | 2 +- 24 files changed, 197 insertions(+), 39 deletions(-) rename benchmark/currency_converter/{main_prompt => prompt} (100%) rename benchmark/file_explorer/{main_prompt => prompt} (100%) rename benchmark/file_organizer/{main_prompt => prompt} (100%) rename benchmark/image_resizer/{main_prompt => prompt} (100%) rename benchmark/markdown_editor/{main_prompt => prompt} (100%) rename benchmark/password_generator/{main_prompt => prompt} (100%) rename benchmark/pomodoro_timer/{main_prompt => prompt} (100%) rename benchmark/timer_app/{main_prompt => prompt} (100%) rename benchmark/todo_list/{main_prompt => prompt} (100%) rename benchmark/url_shortener/{main_prompt => prompt} (100%) delete mode 100644 benchmark/weather_app/main_prompt create mode 100644 gpt_engineer/collect.py rename projects/example/{main_prompt => prompt} (100%) delete mode 100644 requirements.txt create mode 100644 tests/test_collect.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 97dde537f1..4c19c4feae 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,4 +1,4 @@ -name: Pytest Execution +name: Pip install and pytest on: pull_request: branches: [main] diff --git a/README.md b/README.md index 9a3620fab2..b83938a110 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![GitHub Repo stars](https://img.shields.io/github/stars/AntonOsika/gpt-engineer?style=social)](https://github.com/AntonOsika/gpt-engineer) [![Twitter Follow](https://img.shields.io/twitter/follow/antonosika?style=social)](https://twitter.com/AntonOsika) + **Specify what you want it to build, the AI asks for clarification, and then builds it.** GPT Engineer is made to be easy to adapt, extend, and make your agent learn how you want your code to look. It generates an entire codebase based on a prompt. @@ -40,34 +41,43 @@ With an api key that has GPT4 access run: - `export OPENAI_API_KEY=[your api key]` + **Run**: - Create an empty folder. If inside the repo, you can run: - `cp -r projects/example/ projects/my-new-project` -- Fill in the `main_prompt` file in your new folder +- Fill in the `prompt` file in your new folder - `gpt-engineer projects/my-new-project` - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project) -**Results**: +By running gpt-engineer you agree to our [ToS](https://github.com/AntonOsika/gpt-engineer/TERMS_OF_USE.md). +**Results** - Check the generated files in `projects/my-new-project/workspace` + ## Features -You can specify the "identity" of the AI agent by editing the files in the `identity` folder. +You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder. -Editing the identity, and evolving the `main_prompt`, is currently how you make the agent remember things between projects. +Editing the `preprompts`, and evolving how you write the project prompt, is currently how you make the agent remember things between projects. Each step in `steps.py` will have its communication history with GPT4 stored in the logs folder, and can be rerun with `scripts/rerun_edited_message_logs.py`. ## Contributing +The gpt-engineer community is building the **open platform for devs to tinker with and build their personal code-generation toolbox**. + +If you are interested in contributing to this, we would be interested in having you! -We are building the open platform for devs to tinker with and build their personal code-generation toolbox. +You can check for good first issues [here](https://github.com/AntonOsika/gpt-engineer/issues). +Contributing document [here](.github/CONTRIBUTING.md). -If you want to contribute, please check out the [roadmap](https://github.com/AntonOsika/gpt-engineer/blob/main/ROADMAP.md), [projects](https://github.com/AntonOsika/gpt-engineer/projects?query=is%3Aopen) or [issues tab](https://github.com/AntonOsika/gpt-engineer/issues) in the GitHub repo. You are welcome to read the [contributing document](.github/CONTRIBUTING.md) and join our [Discord 💬](https://discord.gg/4t5vXHhu). +We are currently looking for more maintainers and community organisers. Email anton.osika@gmail.com if you are interested in an official role. -We are currently looking for more maintainers and community organisers. Email if you are interested in an official role. +If you want to see our broader ambitions, check out the [roadmap](https://github.com/AntonOsika/gpt-engineer/blob/main/ROADMAP.md), and join +[discord ](https://discord.gg/4t5vXHhu) +to get input on how you can contribute to it. ## Example - +https://github.com/AntonOsika/gpt-engineer/assets/4467025/6e362e45-4a94-4b0d-973d-393a31d92d9b diff --git a/benchmark/currency_converter/main_prompt b/benchmark/currency_converter/prompt similarity index 100% rename from benchmark/currency_converter/main_prompt rename to benchmark/currency_converter/prompt diff --git a/benchmark/file_explorer/main_prompt b/benchmark/file_explorer/prompt similarity index 100% rename from benchmark/file_explorer/main_prompt rename to benchmark/file_explorer/prompt diff --git a/benchmark/file_organizer/main_prompt b/benchmark/file_organizer/prompt similarity index 100% rename from benchmark/file_organizer/main_prompt rename to benchmark/file_organizer/prompt diff --git a/benchmark/image_resizer/main_prompt b/benchmark/image_resizer/prompt similarity index 100% rename from benchmark/image_resizer/main_prompt rename to benchmark/image_resizer/prompt diff --git a/benchmark/markdown_editor/main_prompt b/benchmark/markdown_editor/prompt similarity index 100% rename from benchmark/markdown_editor/main_prompt rename to benchmark/markdown_editor/prompt diff --git a/benchmark/password_generator/main_prompt b/benchmark/password_generator/prompt similarity index 100% rename from benchmark/password_generator/main_prompt rename to benchmark/password_generator/prompt diff --git a/benchmark/pomodoro_timer/main_prompt b/benchmark/pomodoro_timer/prompt similarity index 100% rename from benchmark/pomodoro_timer/main_prompt rename to benchmark/pomodoro_timer/prompt diff --git a/benchmark/timer_app/main_prompt b/benchmark/timer_app/prompt similarity index 100% rename from benchmark/timer_app/main_prompt rename to benchmark/timer_app/prompt diff --git a/benchmark/todo_list/main_prompt b/benchmark/todo_list/prompt similarity index 100% rename from benchmark/todo_list/main_prompt rename to benchmark/todo_list/prompt diff --git a/benchmark/url_shortener/main_prompt b/benchmark/url_shortener/prompt similarity index 100% rename from benchmark/url_shortener/main_prompt rename to benchmark/url_shortener/prompt diff --git a/benchmark/weather_app/main_prompt b/benchmark/weather_app/main_prompt deleted file mode 100644 index e58cab36f3..0000000000 --- a/benchmark/weather_app/main_prompt +++ /dev/null @@ -1 +0,0 @@ -Develop a weather app using Python and a weather API. Display current weather conditions for a given location, including temperature, humidity, and weather description. diff --git a/gpt_engineer/collect.py b/gpt_engineer/collect.py new file mode 100644 index 0000000000..491eef7d09 --- /dev/null +++ b/gpt_engineer/collect.py @@ -0,0 +1,85 @@ +import hashlib +import json +import os +import random +import tempfile + +from dataclasses import dataclass +from pathlib import Path + +from dataclasses_json import dataclass_json + +from gpt_engineer import steps +from gpt_engineer.db import DBs +from gpt_engineer.steps import Step + + +@dataclass_json +@dataclass +class Learning: + model: str + temperature: float + steps: str + steps_file_hash: str + prompt: str + feedback: str | None + session: str + version: str = "0.1" + + +def steps_file_hash(): + with open(steps.__file__, "r") as f: + content = f.read() + return hashlib.sha256(content.encode("utf-8"), usedforsecurity=False).hexdigest() + + +def extract_learning( + model: str, temperature: float, steps: list[Step], dbs: DBs +) -> Learning: + learning = Learning( + prompt=dbs.input["prompt"], + model=model, + temperature=temperature, + steps=json.dumps([step.__name__ for step in steps]), + steps_file_hash=steps_file_hash(), + feedback=dbs.input.get("feedback"), + session=get_session(), + ) + return learning + + +def send_learnings(learning: Learning): + import rudderstack.analytics as rudder_analytics + + rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG" + rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com" + + rudder_analytics.track( + user_id=learning.session, + event="learning", + properties=learning.to_dict(), # type: ignore + ) + + +def get_session(): + path = Path(tempfile.gettempdir()) / "gpt_engineer_user_id.txt" + + try: + if path.exists(): + user_id = path.read_text() + else: + # random uuid: + user_id = str(random.randint(0, 2**32)) + path.write_text(user_id) + return user_id + except IOError: + return "ephemeral_" + str(random.randint(0, 2**32)) + + +def collect_learnings(model: str, temperature: float, steps: list[Step], dbs: DBs): + if os.environ.get("COLLECT_LEARNINGS_OPT_OUT") in ["true", "1"]: + print("COLLECT_LEARNINGS_OPT_OUT is set to true, not collecting learning") + return + + learnings = extract_learning(model, temperature, steps, dbs) + send_learnings(learnings) diff --git a/gpt_engineer/db.py b/gpt_engineer/db.py index 6bfee18cba..c3db67c103 100644 --- a/gpt_engineer/db.py +++ b/gpt_engineer/db.py @@ -22,6 +22,12 @@ def __getitem__(self, key): with full_path.open("r", encoding="utf-8") as f: return f.read() + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + def __setitem__(self, key, val): full_path = self.path / key full_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py index 1faa2291a0..934d80b847 100644 --- a/gpt_engineer/main.py +++ b/gpt_engineer/main.py @@ -8,6 +8,7 @@ from gpt_engineer import steps from gpt_engineer.ai import AI +from gpt_engineer.collect import collect_learnings from gpt_engineer.db import DB, DBs from gpt_engineer.steps import STEPS @@ -56,10 +57,13 @@ def main( preprompts=DB(Path(__file__).parent / "preprompts"), ) - for step in STEPS[steps_config]: + steps = STEPS[steps_config] + for step in steps: messages = step(ai, dbs) dbs.logs[step.__name__] = json.dumps(messages) + collect_learnings(model, temperature, steps, dbs) + if __name__ == "__main__": app() diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py index 503aad065e..10d144ec5a 100644 --- a/gpt_engineer/steps.py +++ b/gpt_engineer/steps.py @@ -5,6 +5,8 @@ from enum import Enum from typing import Callable, List, TypeVar +from termcolor import colored + from gpt_engineer.ai import AI from gpt_engineer.chat_to_files import to_files from gpt_engineer.db import DBs @@ -19,12 +21,24 @@ def setup_sys_prompt(dbs): Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]]) +def get_prompt(dbs): + """While we migrate we have this fallback getter""" + assert ( + "prompt" in dbs.input or "main_prompt" in dbs.input + ), "Please put your prompt in the file `prompt` in the project directory" + + if "prompt" not in dbs.input: + print( + colored("Please put the prompt in the file `prompt`, not `main_prompt", "red") + ) + print() + + return dbs.input.get("prompt", dbs.input["main_prompt"]) + + def simple_gen(ai: AI, dbs: DBs): """Run the AI on the main prompt and save the results""" - messages = ai.start( - setup_sys_prompt(dbs), - dbs.input["main_prompt"], - ) + messages = ai.start(setup_sys_prompt(dbs), get_prompt(dbs)) to_files(messages[-1]["content"], dbs.workspace) return messages @@ -34,22 +48,31 @@ def clarify(ai: AI, dbs: DBs): Ask the user if they want to clarify anything and save the results to the workspace """ messages = [ai.fsystem(dbs.preprompts["qa"])] - user = dbs.input["main_prompt"] + user_input = get_prompt(dbs) while True: - messages = ai.next(messages, user) + messages = ai.next(messages, user_input) if messages[-1]["content"].strip().lower().startswith("no"): - print(" Nothing more to clarify.") + print("Nothing more to clarify.") break print() - user = input('(answer in text, or "c" to move on)\n') + user_input = input('(answer in text, or "c" to move on)\n') print() - if not user or user == "c": - break - - user += ( + if not user_input or user_input == "c": + print("(letting gpt-engineer make its own assumptions)") + print() + messages = ai.next( + messages, + ai.fuser( + "Make your own assumptions and state them explicitly before starting" + ), + ) + print() + return messages + + user_input += ( "\n\n" "Is anything else unclear? If yes, only answer in the form:\n" "{remaining unclear areas} remaining questions.\n" @@ -68,7 +91,7 @@ def gen_spec(ai: AI, dbs: DBs): """ messages = [ ai.fsystem(setup_sys_prompt(dbs)), - ai.fsystem(f"Instructions: {dbs.input['main_prompt']}"), + ai.fsystem(f"Instructions: {dbs.input['prompt']}"), ] messages = ai.next(messages, dbs.preprompts["spec"]) @@ -105,7 +128,7 @@ def gen_unit_tests(ai: AI, dbs: DBs): """ messages = [ ai.fsystem(setup_sys_prompt(dbs)), - ai.fuser(f"Instructions: {dbs.input['main_prompt']}"), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"), ] @@ -136,7 +159,7 @@ def gen_code(ai: AI, dbs: DBs): messages = [ ai.fsystem(setup_sys_prompt(dbs)), - ai.fuser(f"Instructions: {dbs.input['main_prompt']}"), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"), ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"), ] @@ -200,7 +223,7 @@ def gen_entrypoint(ai, dbs): def use_feedback(ai: AI, dbs: DBs): messages = [ ai.fsystem(setup_sys_prompt(dbs)), - ai.fuser(f"Instructions: {dbs.input['main_prompt']}"), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), ai.fassistant(dbs.workspace["all_output.txt"]), ai.fsystem(dbs.preprompts["use_feedback"]), ] @@ -213,7 +236,7 @@ def fix_code(ai: AI, dbs: DBs): code_output = json.loads(dbs.logs[gen_code.__name__])[-1]["content"] messages = [ ai.fsystem(setup_sys_prompt(dbs)), - ai.fuser(f"Instructions: {dbs.input['main_prompt']}"), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), ai.fuser(code_output), ai.fsystem(dbs.preprompts["fix_code"]), ] diff --git a/projects/example/main_prompt b/projects/example/prompt similarity index 100% rename from projects/example/main_prompt rename to projects/example/prompt diff --git a/pyproject.toml b/pyproject.toml index 81450a69b8..38f4caa555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ dependencies = [ 'ruff == 0.0.272', 'termcolor==2.3.0', 'typer == 0.9.0', + 'requests == 2.28.2', + 'rudder-sdk-python == 2.0.2', + 'dataclasses-json == 0.5.7', ] [project.scripts] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1704cedd2a..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -black==23.3.0 -mypy==1.3.0 -openai==0.27.8 -pre-commit==3.3.3 -pytest==7.3.1 -ruff==0.0.272 -termcolor==2.3.0 -typer==0.9.0 diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 66ebb713df..518ec76f85 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -56,7 +56,7 @@ def main( print("process", bench_folder.name, "finished with code", process.returncode) print("Running it. Original benchmark prompt:") print() - with open(bench_folder / "main_prompt") as f: + with open(bench_folder / "prompt") as f: print(f.read()) print() diff --git a/scripts/clean_benchmarks.py b/scripts/clean_benchmarks.py index b90faa26fa..6572fca43d 100644 --- a/scripts/clean_benchmarks.py +++ b/scripts/clean_benchmarks.py @@ -16,7 +16,7 @@ def main(): if benchmark.is_dir(): print(f"Cleaning {benchmark}") for path in benchmark.iterdir(): - if path.name == "main_prompt": + if path.name in ["prompt", "main_prompt"]: continue # Get filename of Path object diff --git a/tests/test_collect.py b/tests/test_collect.py new file mode 100644 index 0000000000..722ae84066 --- /dev/null +++ b/tests/test_collect.py @@ -0,0 +1,36 @@ +import os + +from unittest.mock import MagicMock + +import pytest +import rudderstack.analytics as rudder_analytics + +from gpt_engineer.collect import collect_learnings, extract_learning +from gpt_engineer.db import DB, DBs +from gpt_engineer.steps import gen_code + + +def test_collect_learnings(monkeypatch): + monkeypatch.setattr(os, "environ", {"COLLECT_LEARNINGS_OPT_OUT": "false"}) + monkeypatch.setattr(rudder_analytics, "track", MagicMock()) + + model = "test_model" + temperature = 0.5 + steps = [gen_code] + dbs = DBs(DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp")) + dbs.input = { + "prompt": "test prompt\n with newlines", + "feedback": "test feedback", + } + dbs.logs = {gen_code.__name__: "test logs"} + + collect_learnings(model, temperature, steps, dbs) + + learnings = extract_learning(model, temperature, steps, dbs) + assert rudder_analytics.track.call_count == 1 + assert rudder_analytics.track.call_args[1]["event"] == "learning" + assert rudder_analytics.track.call_args[1]["properties"] == learnings.to_dict() + + +if __name__ == "__main__": + pytest.main(["-v"]) diff --git a/tests/test_db.py b/tests/test_db.py index f6f7b1dc32..072c3fa659 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -44,7 +44,7 @@ def test_DBs_initialization(tmp_path): def test_invalid_path(): - with pytest.raises(PermissionError): + with pytest.raises((PermissionError, OSError)): # Test with a path that will raise a permission error DB("/root/test")