From 5c006ed417a2f4d01248d487bcbd493ebe3e5edd Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Sat, 25 Jan 2025 01:00:29 +0900 Subject: [PATCH] separate category for `global_mmlu` (#2652) * separate category * set version 0.0 * apply precommit --- .../global_mmlu/default/_generate_configs.py | 42 ------------------- .../{_default_yaml => ar/_ar_template_yaml} | 3 +- .../default/ar/_global_mmlu_ar.yaml | 13 ++++++ .../default/ar/global_mmlu_ar_business.yaml | 4 ++ .../default/ar/global_mmlu_ar_humanities.yaml | 4 ++ .../default/ar/global_mmlu_ar_medical.yaml | 4 ++ .../default/ar/global_mmlu_ar_other.yaml | 4 ++ .../ar/global_mmlu_ar_social_sciences.yaml | 4 ++ .../default/ar/global_mmlu_ar_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ar/utils.py | 18 ++++++++ .../global_mmlu/default/bn/_bn_template_yaml | 16 +++++++ .../default/bn/_global_mmlu_bn.yaml | 13 ++++++ .../default/bn/global_mmlu_bn_business.yaml | 4 ++ .../default/bn/global_mmlu_bn_humanities.yaml | 4 ++ .../default/bn/global_mmlu_bn_medical.yaml | 4 ++ .../default/bn/global_mmlu_bn_other.yaml | 4 ++ .../bn/global_mmlu_bn_social_sciences.yaml | 4 ++ .../default/bn/global_mmlu_bn_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/bn/utils.py | 18 ++++++++ .../global_mmlu/default/de/_de_template_yaml | 16 +++++++ .../default/de/_global_mmlu_de.yaml | 13 ++++++ .../default/de/global_mmlu_de_business.yaml | 4 ++ .../default/de/global_mmlu_de_humanities.yaml | 4 ++ .../default/de/global_mmlu_de_medical.yaml | 4 ++ .../default/de/global_mmlu_de_other.yaml | 4 ++ .../de/global_mmlu_de_social_sciences.yaml | 4 ++ .../default/de/global_mmlu_de_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/de/utils.py | 18 ++++++++ .../global_mmlu/default/en/_en_template_yaml | 16 +++++++ .../default/en/_global_mmlu_en.yaml | 13 ++++++ .../default/en/global_mmlu_en_business.yaml | 4 ++ .../default/en/global_mmlu_en_humanities.yaml | 4 ++ .../default/en/global_mmlu_en_medical.yaml | 4 ++ .../default/en/global_mmlu_en_other.yaml | 4 ++ .../en/global_mmlu_en_social_sciences.yaml | 4 ++ .../default/en/global_mmlu_en_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/en/utils.py | 18 ++++++++ .../global_mmlu/default/es/_es_template_yaml | 16 +++++++ .../default/es/_global_mmlu_es.yaml | 13 ++++++ .../default/es/global_mmlu_es_business.yaml | 4 ++ .../default/es/global_mmlu_es_humanities.yaml | 4 ++ .../default/es/global_mmlu_es_medical.yaml | 4 ++ .../default/es/global_mmlu_es_other.yaml | 4 ++ .../es/global_mmlu_es_social_sciences.yaml | 4 ++ .../default/es/global_mmlu_es_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/es/utils.py | 18 ++++++++ .../global_mmlu/default/fr/_fr_template_yaml | 16 +++++++ .../default/fr/_global_mmlu_fr.yaml | 13 ++++++ .../default/fr/global_mmlu_fr_business.yaml | 4 ++ .../default/fr/global_mmlu_fr_humanities.yaml | 4 ++ .../default/fr/global_mmlu_fr_medical.yaml | 4 ++ .../default/fr/global_mmlu_fr_other.yaml | 4 ++ .../fr/global_mmlu_fr_social_sciences.yaml | 4 ++ .../default/fr/global_mmlu_fr_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/fr/utils.py | 18 ++++++++ .../global_mmlu/default/global_mmlu_ar.yaml | 4 -- .../global_mmlu/default/global_mmlu_bn.yaml | 4 -- .../global_mmlu/default/global_mmlu_de.yaml | 4 -- .../global_mmlu/default/global_mmlu_en.yaml | 4 -- .../global_mmlu/default/global_mmlu_es.yaml | 4 -- .../global_mmlu/default/global_mmlu_fr.yaml | 4 -- .../global_mmlu/default/global_mmlu_hi.yaml | 4 -- .../global_mmlu/default/global_mmlu_id.yaml | 4 -- .../global_mmlu/default/global_mmlu_it.yaml | 4 -- .../global_mmlu/default/global_mmlu_ja.yaml | 4 -- .../global_mmlu/default/global_mmlu_ko.yaml | 4 -- .../global_mmlu/default/global_mmlu_pt.yaml | 4 -- .../global_mmlu/default/global_mmlu_sw.yaml | 4 -- .../global_mmlu/default/global_mmlu_yo.yaml | 4 -- .../global_mmlu/default/global_mmlu_zh.yaml | 4 -- .../default/hi/_global_mmlu_hi.yaml | 13 ++++++ .../global_mmlu/default/hi/_hi_template_yaml | 16 +++++++ .../default/hi/global_mmlu_hi_business.yaml | 4 ++ .../default/hi/global_mmlu_hi_humanities.yaml | 4 ++ .../default/hi/global_mmlu_hi_medical.yaml | 4 ++ .../default/hi/global_mmlu_hi_other.yaml | 4 ++ .../hi/global_mmlu_hi_social_sciences.yaml | 4 ++ .../default/hi/global_mmlu_hi_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/hi/utils.py | 18 ++++++++ .../default/id/_global_mmlu_id.yaml | 13 ++++++ .../global_mmlu/default/id/_id_template_yaml | 16 +++++++ .../default/id/global_mmlu_id_business.yaml | 4 ++ .../default/id/global_mmlu_id_humanities.yaml | 4 ++ .../default/id/global_mmlu_id_medical.yaml | 4 ++ .../default/id/global_mmlu_id_other.yaml | 4 ++ .../id/global_mmlu_id_social_sciences.yaml | 4 ++ .../default/id/global_mmlu_id_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/id/utils.py | 18 ++++++++ .../default/it/_global_mmlu_it.yaml | 13 ++++++ .../global_mmlu/default/it/_it_template_yaml | 16 +++++++ .../default/it/global_mmlu_it_business.yaml | 4 ++ .../default/it/global_mmlu_it_humanities.yaml | 4 ++ .../default/it/global_mmlu_it_medical.yaml | 4 ++ .../default/it/global_mmlu_it_other.yaml | 4 ++ .../it/global_mmlu_it_social_sciences.yaml | 4 ++ .../default/it/global_mmlu_it_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/it/utils.py | 18 ++++++++ .../default/ja/_global_mmlu_ja.yaml | 13 ++++++ .../global_mmlu/default/ja/_ja_template_yaml | 16 +++++++ .../default/ja/global_mmlu_ja_business.yaml | 4 ++ .../default/ja/global_mmlu_ja_humanities.yaml | 4 ++ .../default/ja/global_mmlu_ja_medical.yaml | 4 ++ .../default/ja/global_mmlu_ja_other.yaml | 4 ++ .../ja/global_mmlu_ja_social_sciences.yaml | 4 ++ .../default/ja/global_mmlu_ja_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ja/utils.py | 18 ++++++++ .../default/ko/_global_mmlu_ko.yaml | 13 ++++++ .../global_mmlu/default/ko/_ko_template_yaml | 16 +++++++ .../default/ko/global_mmlu_ko_business.yaml | 4 ++ .../default/ko/global_mmlu_ko_humanities.yaml | 4 ++ .../default/ko/global_mmlu_ko_medical.yaml | 4 ++ .../default/ko/global_mmlu_ko_other.yaml | 4 ++ .../ko/global_mmlu_ko_social_sciences.yaml | 4 ++ .../default/ko/global_mmlu_ko_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ko/utils.py | 18 ++++++++ .../default/pt/_global_mmlu_pt.yaml | 13 ++++++ .../global_mmlu/default/pt/_pt_template_yaml | 16 +++++++ .../default/pt/global_mmlu_pt_business.yaml | 4 ++ .../default/pt/global_mmlu_pt_humanities.yaml | 4 ++ .../default/pt/global_mmlu_pt_medical.yaml | 4 ++ .../default/pt/global_mmlu_pt_other.yaml | 4 ++ .../pt/global_mmlu_pt_social_sciences.yaml | 4 ++ .../default/pt/global_mmlu_pt_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/pt/utils.py | 18 ++++++++ .../default/sw/_global_mmlu_sw.yaml | 13 ++++++ .../global_mmlu/default/sw/_sw_template_yaml | 16 +++++++ .../default/sw/global_mmlu_sw_business.yaml | 4 ++ .../default/sw/global_mmlu_sw_humanities.yaml | 4 ++ .../default/sw/global_mmlu_sw_medical.yaml | 4 ++ .../default/sw/global_mmlu_sw_other.yaml | 4 ++ .../sw/global_mmlu_sw_social_sciences.yaml | 4 ++ .../default/sw/global_mmlu_sw_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/sw/utils.py | 18 ++++++++ .../default/yo/_global_mmlu_yo.yaml | 13 ++++++ .../global_mmlu/default/yo/_yo_template_yaml | 16 +++++++ .../default/yo/global_mmlu_yo_business.yaml | 4 ++ .../default/yo/global_mmlu_yo_humanities.yaml | 4 ++ .../default/yo/global_mmlu_yo_medical.yaml | 4 ++ .../default/yo/global_mmlu_yo_other.yaml | 4 ++ .../yo/global_mmlu_yo_social_sciences.yaml | 4 ++ .../default/yo/global_mmlu_yo_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/yo/utils.py | 18 ++++++++ .../default/zh/_global_mmlu_zh.yaml | 13 ++++++ .../global_mmlu/default/zh/_zh_template_yaml | 16 +++++++ .../default/zh/global_mmlu_zh_business.yaml | 4 ++ .../default/zh/global_mmlu_zh_humanities.yaml | 4 ++ .../default/zh/global_mmlu_zh_medical.yaml | 4 ++ .../default/zh/global_mmlu_zh_other.yaml | 4 ++ .../zh/global_mmlu_zh_social_sciences.yaml | 4 ++ .../default/zh/global_mmlu_zh_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/zh/utils.py | 18 ++++++++ .../full/am/_global_mmlu_full_am.yaml | 2 +- .../full/ar/_global_mmlu_full_ar.yaml | 2 +- .../full/bn/_global_mmlu_full_bn.yaml | 2 +- .../full/cs/_global_mmlu_full_cs.yaml | 2 +- .../full/de/_global_mmlu_full_de.yaml | 2 +- .../full/el/_global_mmlu_full_el.yaml | 2 +- .../full/en/_global_mmlu_full_en.yaml | 2 +- .../full/es/_global_mmlu_full_es.yaml | 2 +- .../full/fa/_global_mmlu_full_fa.yaml | 2 +- .../full/fil/_global_mmlu_full_fil.yaml | 2 +- .../full/fr/_global_mmlu_full_fr.yaml | 2 +- .../full/ha/_global_mmlu_full_ha.yaml | 2 +- .../full/he/_global_mmlu_full_he.yaml | 2 +- .../full/hi/_global_mmlu_full_hi.yaml | 2 +- .../full/id/_global_mmlu_full_id.yaml | 2 +- .../full/ig/_global_mmlu_full_ig.yaml | 2 +- .../full/it/_global_mmlu_full_it.yaml | 2 +- .../full/ja/_global_mmlu_full_ja.yaml | 2 +- .../full/ko/_global_mmlu_full_ko.yaml | 2 +- .../full/ky/_global_mmlu_full_ky.yaml | 2 +- .../full/lt/_global_mmlu_full_lt.yaml | 2 +- .../full/mg/_global_mmlu_full_mg.yaml | 2 +- .../full/ms/_global_mmlu_full_ms.yaml | 2 +- .../full/ne/_global_mmlu_full_ne.yaml | 2 +- .../full/nl/_global_mmlu_full_nl.yaml | 2 +- .../full/ny/_global_mmlu_full_ny.yaml | 2 +- .../full/pl/_global_mmlu_full_pl.yaml | 2 +- .../full/pt/_global_mmlu_full_pt.yaml | 2 +- .../full/ro/_global_mmlu_full_ro.yaml | 2 +- .../full/ru/_global_mmlu_full_ru.yaml | 2 +- .../full/si/_global_mmlu_full_si.yaml | 2 +- .../full/sn/_global_mmlu_full_sn.yaml | 2 +- .../full/so/_global_mmlu_full_so.yaml | 2 +- .../full/sr/_global_mmlu_full_sr.yaml | 2 +- .../full/sv/_global_mmlu_full_sv.yaml | 2 +- .../full/sw/_global_mmlu_full_sw.yaml | 2 +- .../full/te/_global_mmlu_full_te.yaml | 2 +- .../full/tr/_global_mmlu_full_tr.yaml | 2 +- .../full/uk/_global_mmlu_full_uk.yaml | 2 +- .../full/vi/_global_mmlu_full_vi.yaml | 2 +- .../full/yo/_global_mmlu_full_yo.yaml | 2 +- .../full/zh/_global_mmlu_full_zh.yaml | 2 +- 193 files changed, 1092 insertions(+), 146 deletions(-) delete mode 100644 lm_eval/tasks/global_mmlu/default/_generate_configs.py rename lm_eval/tasks/global_mmlu/default/{_default_yaml => ar/_ar_template_yaml} (95%) create mode 100644 lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/de/_de_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/en/_en_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/es/_es_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/utils.py delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/_id_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/_it_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/utils.py diff --git a/lm_eval/tasks/global_mmlu/default/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py deleted file mode 100644 index 58e169c6d4..0000000000 --- a/lm_eval/tasks/global_mmlu/default/_generate_configs.py +++ /dev/null @@ -1,42 +0,0 @@ -import yaml - - -languages = [ - "en", - "ar", - "fr", - "es", - "hi", - "de", - "id", - "it", - "ja", - "ko", - "pt", - "zh", - "yo", - "bn", - "sw", -] - - -def main() -> None: - for language in languages: - file_name = f"global_mmlu_{language}.yaml" - try: - with open(f"{file_name}", "w") as f: - f.write("# Generated by _generate_configs.py\n") - yaml.dump( - { - "include": "_default_yaml", - "task": f"global_mmlu_{language}", - "dataset_name": language, - }, - f, - ) - except FileExistsError: - pass - - -if __name__ == "__main__": - main() diff --git a/lm_eval/tasks/global_mmlu/default/_default_yaml b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml similarity index 95% rename from lm_eval/tasks/global_mmlu/default/_default_yaml rename to lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml index 33a1fc356a..3fa8f23f86 100644 --- a/lm_eval/tasks/global_mmlu/default/_default_yaml +++ b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml @@ -1,6 +1,5 @@ -tag: - - global_mmlu dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ar test_split: test fewshot_split: dev fewshot_config: diff --git a/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml new file mode 100644 index 0000000000..27f6e1a470 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ar +task: + - global_mmlu_ar_business + - global_mmlu_ar_humanities + - global_mmlu_ar_medical + - global_mmlu_ar_other + - global_mmlu_ar_stem + - global_mmlu_ar_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml new file mode 100644 index 0000000000..c7f47fdf85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ar_business diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml new file mode 100644 index 0000000000..c35f1f6e95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ar_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml new file mode 100644 index 0000000000..cb40548668 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ar_medical diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml new file mode 100644 index 0000000000..1ffd9be89f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ar_other diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml new file mode 100644 index 0000000000..037e25a81e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ar_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml new file mode 100644 index 0000000000..f2ed28c714 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ar_stem diff --git a/lm_eval/tasks/global_mmlu/default/ar/utils.py b/lm_eval/tasks/global_mmlu/default/ar/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml new file mode 100644 index 0000000000..c9a234dbfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml new file mode 100644 index 0000000000..4098af1a2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_bn +task: + - global_mmlu_bn_business + - global_mmlu_bn_humanities + - global_mmlu_bn_medical + - global_mmlu_bn_other + - global_mmlu_bn_stem + - global_mmlu_bn_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml new file mode 100644 index 0000000000..c77589c30a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_bn_business diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml new file mode 100644 index 0000000000..da495c6d54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_bn_humanities diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml new file mode 100644 index 0000000000..867e5e4eca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_bn_medical diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml new file mode 100644 index 0000000000..c44b6d755d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_bn_other diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml new file mode 100644 index 0000000000..7bbde182df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_bn_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml new file mode 100644 index 0000000000..433ba8b7a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_bn_stem diff --git a/lm_eval/tasks/global_mmlu/default/bn/utils.py b/lm_eval/tasks/global_mmlu/default/bn/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml new file mode 100644 index 0000000000..7c17e2d0d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml new file mode 100644 index 0000000000..1a54aaceb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_de +task: + - global_mmlu_de_business + - global_mmlu_de_humanities + - global_mmlu_de_medical + - global_mmlu_de_other + - global_mmlu_de_stem + - global_mmlu_de_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml new file mode 100644 index 0000000000..eba9514c10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_de_business diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml new file mode 100644 index 0000000000..d37de49194 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_de_humanities diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml new file mode 100644 index 0000000000..f114de4637 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_de_medical diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml new file mode 100644 index 0000000000..d6089b2df7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_de_other diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml new file mode 100644 index 0000000000..853711f3a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_de_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml new file mode 100644 index 0000000000..ef66d3cf7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_de_stem diff --git a/lm_eval/tasks/global_mmlu/default/de/utils.py b/lm_eval/tasks/global_mmlu/default/de/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml new file mode 100644 index 0000000000..e24d798320 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml new file mode 100644 index 0000000000..fc927412eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_en +task: + - global_mmlu_en_business + - global_mmlu_en_humanities + - global_mmlu_en_medical + - global_mmlu_en_other + - global_mmlu_en_stem + - global_mmlu_en_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml new file mode 100644 index 0000000000..aa3f4bc148 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_en_business diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml new file mode 100644 index 0000000000..c2a20e292e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_en_humanities diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml new file mode 100644 index 0000000000..ba9914592e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_en_medical diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml new file mode 100644 index 0000000000..c14d7657a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_en_other diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml new file mode 100644 index 0000000000..d576d2c158 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_en_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml new file mode 100644 index 0000000000..fd0179f2a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_en_stem diff --git a/lm_eval/tasks/global_mmlu/default/en/utils.py b/lm_eval/tasks/global_mmlu/default/en/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml new file mode 100644 index 0000000000..b0942331bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml new file mode 100644 index 0000000000..614b1b0fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_es +task: + - global_mmlu_es_business + - global_mmlu_es_humanities + - global_mmlu_es_medical + - global_mmlu_es_other + - global_mmlu_es_stem + - global_mmlu_es_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml new file mode 100644 index 0000000000..388251a2f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_es_business diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml new file mode 100644 index 0000000000..fd51574b08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_es_humanities diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml new file mode 100644 index 0000000000..649ad70de6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_es_medical diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml new file mode 100644 index 0000000000..878251d10b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_es_other diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml new file mode 100644 index 0000000000..1e97c6adfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_es_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml new file mode 100644 index 0000000000..45b4fa4af2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_es_stem diff --git a/lm_eval/tasks/global_mmlu/default/es/utils.py b/lm_eval/tasks/global_mmlu/default/es/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml new file mode 100644 index 0000000000..a2c6fc88e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml new file mode 100644 index 0000000000..d65a2e2538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_fr +task: + - global_mmlu_fr_business + - global_mmlu_fr_humanities + - global_mmlu_fr_medical + - global_mmlu_fr_other + - global_mmlu_fr_stem + - global_mmlu_fr_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml new file mode 100644 index 0000000000..49f8543b8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_fr_business diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml new file mode 100644 index 0000000000..35d0086b45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_fr_humanities diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml new file mode 100644 index 0000000000..e411a3475e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_fr_medical diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml new file mode 100644 index 0000000000..5bece30310 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_fr_other diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml new file mode 100644 index 0000000000..4e26ceab73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_fr_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml new file mode 100644 index 0000000000..6d3d1538f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_fr_stem diff --git a/lm_eval/tasks/global_mmlu/default/fr/utils.py b/lm_eval/tasks/global_mmlu/default/fr/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml deleted file mode 100644 index 703f420a52..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ar -include: _default_yaml -task: global_mmlu_ar diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml deleted file mode 100644 index f85b67a293..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: bn -include: _default_yaml -task: global_mmlu_bn diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml deleted file mode 100644 index a874c64fd5..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: de -include: _default_yaml -task: global_mmlu_de diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml deleted file mode 100644 index 34a6d7120a..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: en -include: _default_yaml -task: global_mmlu_en diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml deleted file mode 100644 index 75abc77567..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: es -include: _default_yaml -task: global_mmlu_es diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml deleted file mode 100644 index 1a66f53648..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: fr -include: _default_yaml -task: global_mmlu_fr diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml deleted file mode 100644 index 788f95f2d2..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: hi -include: _default_yaml -task: global_mmlu_hi diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml deleted file mode 100644 index f4b6d5071d..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: id -include: _default_yaml -task: global_mmlu_id diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml deleted file mode 100644 index 5b55df975f..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: it -include: _default_yaml -task: global_mmlu_it diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml deleted file mode 100644 index 97d9c6ca48..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ja -include: _default_yaml -task: global_mmlu_ja diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml deleted file mode 100644 index 02b7fe0388..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ko -include: _default_yaml -task: global_mmlu_ko diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml deleted file mode 100644 index 724bfb4d23..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: pt -include: _default_yaml -task: global_mmlu_pt diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml deleted file mode 100644 index 481232fa28..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sw -include: _default_yaml -task: global_mmlu_sw diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml deleted file mode 100644 index c6ec2f9efc..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: yo -include: _default_yaml -task: global_mmlu_yo diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml deleted file mode 100644 index 862d46ad9d..0000000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: zh -include: _default_yaml -task: global_mmlu_zh diff --git a/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml new file mode 100644 index 0000000000..406b27a671 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_hi +task: + - global_mmlu_hi_business + - global_mmlu_hi_humanities + - global_mmlu_hi_medical + - global_mmlu_hi_other + - global_mmlu_hi_stem + - global_mmlu_hi_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml new file mode 100644 index 0000000000..180dee963e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml new file mode 100644 index 0000000000..63b516c56b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_hi_business diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml new file mode 100644 index 0000000000..d8e888cd28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_hi_humanities diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml new file mode 100644 index 0000000000..46a2195771 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_hi_medical diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml new file mode 100644 index 0000000000..ea242d7a30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_hi_other diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml new file mode 100644 index 0000000000..df95b8c460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_hi_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml new file mode 100644 index 0000000000..acab4f12e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_hi_stem diff --git a/lm_eval/tasks/global_mmlu/default/hi/utils.py b/lm_eval/tasks/global_mmlu/default/hi/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml new file mode 100644 index 0000000000..cfe87f590d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_id +task: + - global_mmlu_id_business + - global_mmlu_id_humanities + - global_mmlu_id_medical + - global_mmlu_id_other + - global_mmlu_id_stem + - global_mmlu_id_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml new file mode 100644 index 0000000000..fae80c32f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml new file mode 100644 index 0000000000..d8f7c1cf10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_id_business diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml new file mode 100644 index 0000000000..459442d440 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_id_humanities diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml new file mode 100644 index 0000000000..1fe61f1380 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_id_medical diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml new file mode 100644 index 0000000000..dfdf7dd243 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_id_other diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml new file mode 100644 index 0000000000..8ac1ddf46b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_id_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml new file mode 100644 index 0000000000..a2230d33f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_id_stem diff --git a/lm_eval/tasks/global_mmlu/default/id/utils.py b/lm_eval/tasks/global_mmlu/default/id/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml new file mode 100644 index 0000000000..1378b765e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_it +task: + - global_mmlu_it_business + - global_mmlu_it_humanities + - global_mmlu_it_medical + - global_mmlu_it_other + - global_mmlu_it_stem + - global_mmlu_it_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml new file mode 100644 index 0000000000..e6b1f56de5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml new file mode 100644 index 0000000000..dabac0a9af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_it_business diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml new file mode 100644 index 0000000000..6d2c923f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_it_humanities diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml new file mode 100644 index 0000000000..25d4efc5fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_it_medical diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml new file mode 100644 index 0000000000..3e35260db1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_it_other diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml new file mode 100644 index 0000000000..bee7983506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_it_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml new file mode 100644 index 0000000000..04502ceff1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_it_stem diff --git a/lm_eval/tasks/global_mmlu/default/it/utils.py b/lm_eval/tasks/global_mmlu/default/it/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml new file mode 100644 index 0000000000..098f3b5710 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ja +task: + - global_mmlu_ja_business + - global_mmlu_ja_humanities + - global_mmlu_ja_medical + - global_mmlu_ja_other + - global_mmlu_ja_stem + - global_mmlu_ja_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml new file mode 100644 index 0000000000..5f0e4cc6c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml new file mode 100644 index 0000000000..19a5050a6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ja_business diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml new file mode 100644 index 0000000000..b2d83886c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ja_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml new file mode 100644 index 0000000000..8c0695ef02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ja_medical diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml new file mode 100644 index 0000000000..5e72d4c0e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ja_other diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml new file mode 100644 index 0000000000..acdabd5331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ja_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml new file mode 100644 index 0000000000..b9ab07cbbb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ja_stem diff --git a/lm_eval/tasks/global_mmlu/default/ja/utils.py b/lm_eval/tasks/global_mmlu/default/ja/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml new file mode 100644 index 0000000000..19f4f961ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ko +task: + - global_mmlu_ko_business + - global_mmlu_ko_humanities + - global_mmlu_ko_medical + - global_mmlu_ko_other + - global_mmlu_ko_stem + - global_mmlu_ko_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml new file mode 100644 index 0000000000..364e159b03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml new file mode 100644 index 0000000000..2f1ce375cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ko_business diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml new file mode 100644 index 0000000000..a613ff5550 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ko_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml new file mode 100644 index 0000000000..7e8710384b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ko_medical diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml new file mode 100644 index 0000000000..3fa1c608d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ko_other diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml new file mode 100644 index 0000000000..ad5874f9df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ko_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml new file mode 100644 index 0000000000..f6c7e8ec18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ko_stem diff --git a/lm_eval/tasks/global_mmlu/default/ko/utils.py b/lm_eval/tasks/global_mmlu/default/ko/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml new file mode 100644 index 0000000000..7a489c1245 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_pt +task: + - global_mmlu_pt_business + - global_mmlu_pt_humanities + - global_mmlu_pt_medical + - global_mmlu_pt_other + - global_mmlu_pt_stem + - global_mmlu_pt_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml new file mode 100644 index 0000000000..f1db662946 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml new file mode 100644 index 0000000000..1e72b16871 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_pt_business diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml new file mode 100644 index 0000000000..7244f2a754 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_pt_humanities diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml new file mode 100644 index 0000000000..44776f2cff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_pt_medical diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml new file mode 100644 index 0000000000..b612120187 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_pt_other diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml new file mode 100644 index 0000000000..949d346ece --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_pt_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml new file mode 100644 index 0000000000..9f194c1155 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_pt_stem diff --git a/lm_eval/tasks/global_mmlu/default/pt/utils.py b/lm_eval/tasks/global_mmlu/default/pt/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml new file mode 100644 index 0000000000..b3913d244e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_sw +task: + - global_mmlu_sw_business + - global_mmlu_sw_humanities + - global_mmlu_sw_medical + - global_mmlu_sw_other + - global_mmlu_sw_stem + - global_mmlu_sw_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml new file mode 100644 index 0000000000..33edff382b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml new file mode 100644 index 0000000000..a53ca478f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_sw_business diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml new file mode 100644 index 0000000000..4687df760d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_sw_humanities diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml new file mode 100644 index 0000000000..76240ea3d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_sw_medical diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml new file mode 100644 index 0000000000..7c3bfda2e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_sw_other diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml new file mode 100644 index 0000000000..4a77aa2ba4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_sw_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml new file mode 100644 index 0000000000..d6faf18b16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_sw_stem diff --git a/lm_eval/tasks/global_mmlu/default/sw/utils.py b/lm_eval/tasks/global_mmlu/default/sw/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml new file mode 100644 index 0000000000..14df221ad8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_yo +task: + - global_mmlu_yo_business + - global_mmlu_yo_humanities + - global_mmlu_yo_medical + - global_mmlu_yo_other + - global_mmlu_yo_stem + - global_mmlu_yo_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml new file mode 100644 index 0000000000..6cdd0a035e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml new file mode 100644 index 0000000000..162a97cf09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_yo_business diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml new file mode 100644 index 0000000000..5befbc12e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_yo_humanities diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml new file mode 100644 index 0000000000..d48d02088f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_yo_medical diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml new file mode 100644 index 0000000000..5e407c2cf1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_yo_other diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml new file mode 100644 index 0000000000..c85596aa60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_yo_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml new file mode 100644 index 0000000000..a19e1e8dcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_yo_stem diff --git a/lm_eval/tasks/global_mmlu/default/yo/utils.py b/lm_eval/tasks/global_mmlu/default/yo/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml new file mode 100644 index 0000000000..212a33fc90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_zh +task: + - global_mmlu_zh_business + - global_mmlu_zh_humanities + - global_mmlu_zh_medical + - global_mmlu_zh_other + - global_mmlu_zh_stem + - global_mmlu_zh_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml new file mode 100644 index 0000000000..eeb1e7b9c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml new file mode 100644 index 0000000000..aa0a689aed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_zh_business diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml new file mode 100644 index 0000000000..823854b958 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_zh_humanities diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml new file mode 100644 index 0000000000..f1f7a7d585 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_zh_medical diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml new file mode 100644 index 0000000000..a3beae834c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_zh_other diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml new file mode 100644 index 0000000000..1891a45aec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_zh_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml new file mode 100644 index 0000000000..6a9f4f05de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_zh_stem diff --git a/lm_eval/tasks/global_mmlu/default/zh/utils.py b/lm_eval/tasks/global_mmlu/default/zh/utils.py new file mode 100644 index 0000000000..507a41bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml index 48fc270ade..555bfd8684 100644 --- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml index 61f60b9b5f..83340da0dd 100644 --- a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml index f1c91f09b2..135b4bf5dd 100644 --- a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml index 977b0051ca..419d5de4fb 100644 --- a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml index c09da2684e..5217599a67 100644 --- a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml index a77feecb2f..9d44d8c8b9 100644 --- a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml index 648a10dd8f..d4c82b6465 100644 --- a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml index 832001c1f2..13d2eccf3a 100644 --- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml index 9edb85405b..282664e59f 100644 --- a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml index 24fcb6d28e..659c9d4b9a 100644 --- a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml index e85d6746cb..7857a19328 100644 --- a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml index 08a958bbab..a50084174a 100644 --- a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml index ff0a5e8f22..4952391a26 100644 --- a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml index ed54a6ad28..c899be84ff 100644 --- a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml index f678660e38..fd29a2ad18 100644 --- a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml index a263e29556..d5346e1cb4 100644 --- a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml index dabb898708..f3bcd69418 100644 --- a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml index 103460d7e9..af25573dfe 100644 --- a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml index d2225e2309..8b9b419701 100644 --- a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml index 4774599a13..33b2a4df9e 100644 --- a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml index 93929d4282..dd2cd37d3e 100644 --- a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml index 05b5594840..bcc6cd81ad 100644 --- a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml index e5a136458b..a322dad5b8 100644 --- a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml index ec13a0be2c..7e9c77ce6c 100644 --- a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml index 44f562da97..c293a558a1 100644 --- a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml index c325bf1d71..80739c2c70 100644 --- a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml index 2476fd33a5..deb51e119b 100644 --- a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml index ac79bda10f..7cb9f0fe37 100644 --- a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml index b3aa5f4981..f4e5575f04 100644 --- a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml index cc63cd34e0..3fc05379c0 100644 --- a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml index 4deed57076..6503e26786 100644 --- a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml index 98ced98731..fd084b3284 100644 --- a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml index 014a412131..91e11b88eb 100644 --- a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml index e322d980a1..1e0c55a5d2 100644 --- a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml index a9b0dc1b0e..078de8b63a 100644 --- a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml index 274543cf05..fd87d0682a 100644 --- a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml index 5ef0f7ab57..0bc967ded7 100644 --- a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml index 8cd3d3f3a8..aa4ae63fa9 100644 --- a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml index e880be3296..9bbd14cb88 100644 --- a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml index d6413b3590..278884741d 100644 --- a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml index ba9f2460b1..ef4930b6e4 100644 --- a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml index 098ec0979e..926c8f88e7 100644 --- a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0