Skip to content

Commit

Permalink
separate category for global_mmlu (#2652)
Browse files Browse the repository at this point in the history
* separate category

* set version 0.0

* apply precommit
  • Loading branch information
bzantium authored Jan 24, 2025
1 parent 370e2f9 commit 5c006ed
Show file tree
Hide file tree
Showing 193 changed files with 1,092 additions and 146 deletions.
42 changes: 0 additions & 42 deletions lm_eval/tasks/global_mmlu/default/_generate_configs.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
tag:
- global_mmlu
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: ar
test_split: test
fewshot_split: dev
fewshot_config:
Expand Down
13 changes: 13 additions & 0 deletions lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: global_mmlu_ar
task:
- global_mmlu_ar_business
- global_mmlu_ar_humanities
- global_mmlu_ar_medical
- global_mmlu_ar_other
- global_mmlu_ar_stem
- global_mmlu_ar_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_ar_business
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_ar_humanities
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_ar_medical
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_ar_other
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_ar_social_sciences
4 changes: 4 additions & 0 deletions lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_ar_stem
18 changes: 18 additions & 0 deletions lm_eval/tasks/global_mmlu/default/ar/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import partial


CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]


def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)


process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}

globals().update(process_functions)
16 changes: 16 additions & 0 deletions lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: bn
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
13 changes: 13 additions & 0 deletions lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: global_mmlu_bn
task:
- global_mmlu_bn_business
- global_mmlu_bn_humanities
- global_mmlu_bn_medical
- global_mmlu_bn_other
- global_mmlu_bn_stem
- global_mmlu_bn_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_bn_business
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_bn_humanities
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_bn_medical
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_bn_other
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_bn_social_sciences
4 changes: 4 additions & 0 deletions lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_bn_stem
18 changes: 18 additions & 0 deletions lm_eval/tasks/global_mmlu/default/bn/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import partial


CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]


def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)


process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}

globals().update(process_functions)
16 changes: 16 additions & 0 deletions lm_eval/tasks/global_mmlu/default/de/_de_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: de
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
13 changes: 13 additions & 0 deletions lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: global_mmlu_de
task:
- global_mmlu_de_business
- global_mmlu_de_humanities
- global_mmlu_de_medical
- global_mmlu_de_other
- global_mmlu_de_stem
- global_mmlu_de_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_de_business
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_de_humanities
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_de_medical
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_de_other
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_de_social_sciences
4 changes: 4 additions & 0 deletions lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _de_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_de_stem
18 changes: 18 additions & 0 deletions lm_eval/tasks/global_mmlu/default/de/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import partial


CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]


def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)


process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}

globals().update(process_functions)
16 changes: 16 additions & 0 deletions lm_eval/tasks/global_mmlu/default/en/_en_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: en
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
13 changes: 13 additions & 0 deletions lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: global_mmlu_en
task:
- global_mmlu_en_business
- global_mmlu_en_humanities
- global_mmlu_en_medical
- global_mmlu_en_other
- global_mmlu_en_stem
- global_mmlu_en_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_en_business
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_en_humanities
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_en_medical
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_en_other
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_en_social_sciences
4 changes: 4 additions & 0 deletions lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_en_stem
18 changes: 18 additions & 0 deletions lm_eval/tasks/global_mmlu/default/en/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import partial


CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]


def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)


process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}

globals().update(process_functions)
16 changes: 16 additions & 0 deletions lm_eval/tasks/global_mmlu/default/es/_es_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: es
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
13 changes: 13 additions & 0 deletions lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: global_mmlu_es
task:
- global_mmlu_es_business
- global_mmlu_es_humanities
- global_mmlu_es_medical
- global_mmlu_es_other
- global_mmlu_es_stem
- global_mmlu_es_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_es_business
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_es_humanities
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_es_medical
Loading

0 comments on commit 5c006ed

Please sign in to comment.