From fa992d03d852c90708261ea889abaa4ee9b04e86 Mon Sep 17 00:00:00 2001 From: ljvmiranda921 Date: Fri, 20 Sep 2024 14:18:08 -0700 Subject: [PATCH] [wip] Update --- scripts/eval_helpsteer2_dev.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/eval_helpsteer2_dev.py b/scripts/eval_helpsteer2_dev.py index 68371fa..d255dd5 100644 --- a/scripts/eval_helpsteer2_dev.py +++ b/scripts/eval_helpsteer2_dev.py @@ -8,6 +8,7 @@ import argparse import logging +import json import os import sys import hashlib @@ -213,6 +214,10 @@ def main(): out_dataset = out_dataset.add_column("scores_chosen", scores_chosen) out_dataset = out_dataset.add_column("scores_rejected", scores_rejected) + with open("metrics.json", "w") as f: + scores = {"hs2_dev_accuracy": sum(results) / len(results)} + json.dump(scores, f) + def check_tokenizer_chat_template(tokenizer): """ @@ -296,7 +301,7 @@ def _compute_rating(row, wt_vec): all_cols = dataset.column_names dataset = dataset.remove_columns([c for c in all_cols if c not in keep_columns]) - breakpoint() + return dataset def prepare_dialogue(