Skip to content

Commit

Permalink
Add datasets for tool usage tasks (#46)
Browse files Browse the repository at this point in the history
Add datasets
  • Loading branch information
eyurtsev authored Nov 21, 2023
1 parent fd0203c commit 7253b43
Show file tree
Hide file tree
Showing 4 changed files with 349 additions and 48 deletions.
184 changes: 155 additions & 29 deletions langchain_benchmarks/tool_usage/tasks/multiverse_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,34 @@
The modified operations should yield different results, but still retain
appropriate properties. For example, the modified multiplication operation
should still be commutative.
Please note that the modified operations are not guaranteed to even make sense
in the real world since not all properties will be retained (e.g.,
distributive property).
For example,
I ate 1 apple and 2 oranges every day for 7 days. How many fruits did I eat?
One would expect the answer to be 21, but in this universe, the answer is 32.34.
In addition, it depends on how the operations are grouped:
(1 + 2) * 7 = 32.34
But:
1 * 7 + 2 * 7 = 24.3
Due to these changes certain questions are not allowed as inputs as they
would yield different results if evaluated in different ways.
For example, "convert 15 degrees to radians" is not allowed as an input
as it could be interpreted as either:
divide(multiply(15, pi()), 180)
or
multiply(divide(15, 180), pi())
"""
import math
from typing import List, cast
Expand All @@ -25,14 +53,24 @@ def multiply(a: float, b: float) -> float:
def divide(a: float, b: float) -> float:
"""Divide two numbers; a / b."""
# Division is neither commutative nor associative
return a / (b + 0.5)
return 0.5 * a / b


def add(a: float, b: float) -> float:
"""Add two numbers; a + b."""
return a + b + 1.2


def sin(radians: float) -> float:
"""The sine of an angle in radians."""
return math.cos(radians)


def cos(radians: float) -> float:
"""The cosine of an angle in radians."""
return math.sin(radians)


def subtract(a: float, b: float) -> float:
"""Subtract two numbers; a - b."""
return a - b - 3
Expand All @@ -45,42 +83,43 @@ def power(a: float, b: float) -> float:

def log(a: float, base: float) -> float:
"""Take the log of a number; log(a, base)."""
return math.log(a, base + 1.5)
# Force the base to always be positive -- hard to predict what will happen
# in this universe :)
return math.log(a, abs(base + 1.5))


def pi() -> float:
"""Returns a precise value of PI for this alternate universe."""
return math.e


def negate(a: float) -> float:
"""Negate a number; -a."""
return a # negation does not negate the number


# Temporary dataset
DATASET = [
# 2-tuple format of (question, answer)
("Add 2 and 3", add(2, 3)),
("Subtract 3 from 2", subtract(2, 3)),
(
"I ate 1 apple and 2 oranges every day for 7 days. How many fruits did I eat?",
multiply(7, add(1, 2)),
),
(
"what is the result of 2 to the power of 3?",
power(2, 3),
),
(
"calculate sqrt of 101 to 4 digits of precision",
round(power(101, 0.4), 4),
),
]


# PUBLIC API


def get_environment() -> ToolUsageEnvironment:
"""Create an environment."""
tools = cast(
List[BaseTool],
[tool(func) for func in [multiply, add, divide, subtract, power, log, negate]],
[
tool(func)
for func in [
multiply,
add,
divide,
subtract,
power,
log,
negate,
sin,
cos,
pi,
]
],
)
return ToolUsageEnvironment(
tools=tools,
Expand All @@ -90,15 +129,15 @@ def get_environment() -> ToolUsageEnvironment:

MULTIVERSE_MATH = ToolUsageTask(
name="Multiverse Math",
dataset_id="placeholder",
dataset_id="https://smith.langchain.com/public/3f2b389f-dcb3-4669-8174-1be4f0a86576/d",
create_environment=get_environment,
instructions=(
"You are requested to solve math questions in an alternate "
"mathematical universe. The rules of association, commutativity, "
"and distributivity still apply, but the operations have been "
"altered to yield different results than expected. Solve the "
"given math questions using the provided tools. "
"Do not guess the answer."
"mathematical universe. The operations have been altered to yield "
"different results than expected. Do not guess the answer or rely on your "
" innate knowledge of math. Use the provided tools to answer the question. "
"While associativity and commutativity apply, distributivity does not. Answer "
"the question using the fewest possible tools."
),
description=(
"""\
Expand All @@ -113,3 +152,90 @@ def get_environment() -> ToolUsageEnvironment:
"""
),
)

# Source dataset used to create the public dataset in LangSmith
DATASET = [
{
"question": "Add 2 and 3",
"answer": add(2, 3),
"expected_steps": ["add"],
},
{
"question": "Subtract 3 from 2",
"answer": subtract(2, 3),
"expected_steps": ["subtract"],
},
{
"question": "What is -5 if evaluated using the negate function?",
"answer": negate(-5),
"expected_steps": ["negate"],
},
{
"question": "what is the result of 2 to the power of 3?",
"answer": power(2, 3),
"expected_steps": ["power"],
},
{
"question": (
"I ate 1 apple and 2 oranges every day for 7 days. "
"How many fruits did I eat?"
),
"answer": multiply(7, add(1, 2)),
"expected_steps": ["multiply", "add"],
},
{
"question": "multiply the result of (log of 100 to base 10) by 3",
"answer": multiply(log(100, 10), 3),
"expected_steps": ["log", "multiply"],
},
{
"question": "calculate sqrt of 101 to 4 digits of precision",
"answer": round(power(101, 0.4), 4),
"expected_steps": ["power", "round"],
},
{
"question": (
"ecoli divides every 20 minutes. How many cells will be "
"there after 2 hours if we start with 5 cells?"
),
"answer": multiply(5, power(2, divide(120, 20))),
"expected_steps": ["divide", "power", "multiply"],
},
{
"question": (
"after calculating the sin of 1.5 radians, divide "
"the result by cos of 1.5 radians"
),
"answer": sin(1.5) / cos(1.5),
"expected_steps": ["sin", "cos", "divide"],
},
{
"question": "convert 15 degrees to radians",
"answer": divide(multiply(15, pi()), 180),
"expected_steps": ["pi", "multiply", "divide"],
},
]


def _create_dataset() -> None:
"""Create a dataset with the langsmith client."""
from langsmith.client import Client

client = Client()

dataset = client.create_dataset(
dataset_name=MULTIVERSE_MATH.name,
description=MULTIVERSE_MATH.description,
)

for example in DATASET:
client.create_example(
inputs={
"question": example["question"],
},
outputs={
"output": example["answer"],
"expected_steps": example["expected_steps"],
},
dataset_id=dataset.id,
)
97 changes: 86 additions & 11 deletions langchain_benchmarks/tool_usage/tasks/type_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ class Paper:
content: str


def function(paper: Paper) -> Callable[[], str]:
def create_typer(paper: Paper) -> Callable[[], str]:
"""Create a function that types the given letter."""

def type_letter(letter: str) -> str:
"""Print the given letter on the paper."""
if len(letter) != 1:
return "ERROR: The letter must be a single character."
paper.content += letter
return "OK"

Expand All @@ -47,7 +49,7 @@ def _read_state() -> Any:
"""Read the state of the environment."""
return paper.content

tools = cast(List[BaseTool], [tool(function(paper))])
tools = cast(List[BaseTool], [tool(create_typer(paper))])

return ToolUsageEnvironment(
tools=tools,
Expand All @@ -56,22 +58,21 @@ def _read_state() -> Any:


TYPE_WRITER_TASK = ToolUsageTask(
name="Tool Usage - Typewriter (1 func)",
dataset_id="placeholder",
name="Tool Usage - Typewriter (1 tool)",
dataset_id="https://smith.langchain.com/public/2300c32a-5063-4b5c-9e60-b97ad7542126/d",
create_environment=get_environment,
instructions=(
"Repeat the given string by using the provided tools. "
"Repeat the given string using the provided tools. "
"Do not write anything else or provide any explanations. "
"For example, if the string is 'abc', you must invoke the tools "
"'a', 'b', and 'c' in that order. "
"Please invoke the function with a single letter at a time."
"For example, if the string is 'abc', you must print the letters "
"'a', 'b', and 'c' one at a time and in that order. "
),
description=(
"""\
Environment with a single function that accepts a single letter as input, and \
"prints" it on a piece of paper.
Environment with a single tool that accepts a single letter as input, and \
prints it on a piece of virtual paper.
The objective of this task is to evaluate the ability to use the provided \
The objective of this task is to evaluate the ability of the model to use the provided \
tools to repeat a given input string.
For example, if the string is 'abc', the tools 'a', 'b', and 'c' must be invoked \
Expand All @@ -82,3 +83,77 @@ def _read_state() -> Any:
"""
),
)


STRINGS_TO_TYPE = [
# letter repetition
"a",
"aa",
"aaa",
"aaaa",
# 3-letter words
"dog",
"cat",
# 4-letter words
"hand",
"head",
# 5-letter words
"house",
"horse",
# 6-letter words
"school",
"church",
# 7-letter words
"teacher",
"student",
# 8-letter words
"computer",
"keyboard",
# 9-letter words
"university",
"dictionary",
# 10-letter words
"information",
"communication",
]


def _create_dataset(strings: List[str]) -> List[dict]:
"""Create the dataset."""
dataset = []
for string in strings:
dataset.append(
{
"question": string,
"expected_steps": ["type_letter"] * len(string),
"state": string,
}
)
return dataset


DATASET = _create_dataset(STRINGS_TO_TYPE)


def _create_dataset() -> None:
"""Create a dataset with the langsmith client."""
from langsmith.client import Client

client = Client()
dataset = client.create_dataset(
dataset_name=TYPE_WRITER_TASK.name,
description=TYPE_WRITER_TASK.description,
)

for example in DATASET:
client.create_example(
inputs={
"question": example["question"],
},
outputs={
"output": example["state"],
"expected_steps": example["expected_steps"],
"state": example["state"],
},
dataset_id=dataset.id,
)
Loading

0 comments on commit 7253b43

Please sign in to comment.