Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
baberabb committed Jan 22, 2025
1 parent 1f63b3d commit 37eb9c9
Showing 1 changed file with 4 additions and 30 deletions.
34 changes: 4 additions & 30 deletions lm_eval/tasks/math500/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,37 +38,12 @@ def _process_doc(doc: dict) -> dict:
return dataset.map(_process_doc)


# def list_fewshot_samples() -> list[dict]:
# return [
# {
# "problem": "Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}",
# "solution": "The expressions inside each square root must be non-negative. Therefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{[2,5)}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct.",
# "few_shot": "1",
# },
# {
# "problem": "If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$",
# "solution": "We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\nFinal Answer: The final answer is $24$. I hope it is correct.",
# "few_shot": "1",
# },
# {
# "problem": "Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?",
# "solution": "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$:\n\\begin{align*}\n30n&=480\\\n\\Rightarrow\\qquad n&=480/30=\\boxed{16}\n\\end{align*}\nFinal Answer: The final answer is $16$. I hope it is correct.",
# "few_shot": "1",
# },
# {
# "problem": "If the system of equations\n\n\\begin{align*}\n6x-4y&=a,\\\n6y-9x &=b.\n\\end{align*}has a solution $(x, y)$ where $x$ and $y$ are both nonzero,\nfind $\\frac{a}{b},$ assuming $b$ is nonzero.",
# "solution": "If we multiply the first equation by $-\\frac{3}{2}$, we obtain\n\n$$6y-9x=-\\frac{3}{2}a.$$Since we also know that $6y-9x=b$, we have\n\n$$-\\frac{3}{2}a=b\\Rightarrow\\frac{a}{b}=\\boxed{-\\frac{2}{3}}.$$\nFinal Answer: The final answer is $-\\frac{2}{3}$. I hope it is correct.",
# "few_shot": "1",
# },
# ]


def filter_final_answer(resps: list[list[str]], docs) -> list[list[str]]:
answer = []
for resp in resps:
answer.append(
[
normalize_final_answer(remove_boxed(last_boxed_only_string(r[0])))
normalize_final_answer(remove_boxed(last_boxed_only_string(r)))
for r in resp
]
)
Expand All @@ -82,10 +57,7 @@ def get_metric(predictions: list[list[str]], references: list[dict]) -> list[dic
res = []
for reference, candidates in zip(references, predictions):
for candidate in candidates:
answer = normalize_final_answer(
remove_boxed(last_boxed_only_string(candidate))
)
if is_equiv(answer, reference["answer"]):
if is_equiv(candidate, reference["answer"]):
retval = 1

results = {
Expand Down Expand Up @@ -164,6 +136,8 @@ def is_equiv(x1: str, x2: str) -> bool:
"""
x1 and x2 are normalized latex string
"""
if x1.strip() == x2.strip():
return True
try:
with timeout(seconds=5):
try:
Expand Down

0 comments on commit 37eb9c9

Please sign in to comment.