Skip to content

Commit

Permalink
fixed mmlu generative response extraction (#2503)
Browse files Browse the repository at this point in the history
* fixed mmlu generative response extraction

* updated file version | added args to exact_match

* fix

* fix

* pre-commit

* fix groups

---------

Co-authored-by: Baber <[email protected]>
  • Loading branch information
RawthiL and baberabb authored Jan 20, 2025
1 parent 8814407 commit 12b6eeb
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 51 deletions.
82 changes: 42 additions & 40 deletions lm_eval/tasks/arabicmmlu/_generate_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,48 @@
eval_logger = logging.getLogger("lm-eval")


SUBJECTS = {'Islamic Studies': 'humanities',
'Driving Test': 'other',
'Natural Science (Middle School)': 'stem',
'Natural Science (Primary School)': 'stem',
'History (Primary School)': 'humanities',
'History (Middle School)': 'humanities',
'History (High School)': 'humanities',
'General Knowledge': 'other',
'General Knowledge (Primary School)': 'other',
'General Knowledge (Middle School)': 'other',
'Law (Professional)': 'humanities',
'Physics (High School)': 'stem',
'Social Science (Middle School)': 'social_science',
'Social Science (Primary School)': 'social_science',
'Management (University)': 'other',
'Arabic Language (Primary School)': 'language',
'Arabic Language (Middle School)': 'language',
'Arabic Language (High School)': 'language',
'Political Science (University)': 'social_science',
'Philosophy (High School)': 'humanities',
'Accounting (University)': 'social_science',
'Computer Science (University)': 'stem',
'Computer Science (Middle School)': 'stem',
'Computer Science (Primary School)': 'stem',
'Computer Science (High School)': 'stem',
'Geography (Primary School)': 'social_science',
'Geography (Middle School)': 'social_science',
'Geography (High School)': 'social_science',
'Math (Primary School)': 'stem',
'Biology (High School)': 'stem',
'Economics (University)': 'social_science',
'Economics (Middle School)': 'social_science',
'Economics (High School)': 'social_science',
'Arabic Language (General)': 'language',
'Arabic Language (Grammar)': 'language',
'Islamic Studies (High School)': 'humanities',
'Islamic Studies (Middle School)': 'humanities',
'Islamic Studies (Primary School)': 'humanities',
'Civics (Middle School)': 'social_science',
'Civics (High School)': 'social_science'}
SUBJECTS = {
"Islamic Studies": "humanities",
"Driving Test": "other",
"Natural Science (Middle School)": "stem",
"Natural Science (Primary School)": "stem",
"History (Primary School)": "humanities",
"History (Middle School)": "humanities",
"History (High School)": "humanities",
"General Knowledge": "other",
"General Knowledge (Primary School)": "other",
"General Knowledge (Middle School)": "other",
"Law (Professional)": "humanities",
"Physics (High School)": "stem",
"Social Science (Middle School)": "social_science",
"Social Science (Primary School)": "social_science",
"Management (University)": "other",
"Arabic Language (Primary School)": "language",
"Arabic Language (Middle School)": "language",
"Arabic Language (High School)": "language",
"Political Science (University)": "social_science",
"Philosophy (High School)": "humanities",
"Accounting (University)": "social_science",
"Computer Science (University)": "stem",
"Computer Science (Middle School)": "stem",
"Computer Science (Primary School)": "stem",
"Computer Science (High School)": "stem",
"Geography (Primary School)": "social_science",
"Geography (Middle School)": "social_science",
"Geography (High School)": "social_science",
"Math (Primary School)": "stem",
"Biology (High School)": "stem",
"Economics (University)": "social_science",
"Economics (Middle School)": "social_science",
"Economics (High School)": "social_science",
"Arabic Language (General)": "language",
"Arabic Language (Grammar)": "language",
"Islamic Studies (High School)": "humanities",
"Islamic Studies (Middle School)": "humanities",
"Islamic Studies (Primary School)": "humanities",
"Civics (Middle School)": "social_science",
"Civics (High School)": "social_science",
}


def parse_args():
Expand Down
1 change: 1 addition & 0 deletions lm_eval/tasks/mmlu/_generate_configs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# noqa
"""
Take in a YAML, and output all "other" splits with this YAML
"""
Expand Down
16 changes: 15 additions & 1 deletion lm_eval/tasks/mmlu/generative/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,21 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_punctuation: true
ignore_case: true
filter_list:
- name: get_response
filter:
# Filter everything after the first break line
- function: "regex"
regex_pattern: "^(.*?)(?=\\n|$)"
# Remove leading white spaces
- function: remove_whitespace
# function to ignore right white spaces or line breaks
- function: "regex"
regex_pattern: "^(.*?)\\s*$"
- function: take_first
metadata:
version: 2.0
version: 3.0
dataset_kwargs:
trust_remote_code: true
20 changes: 10 additions & 10 deletions lm_eval/tasks/mmlu/generative/_mmlu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@ task:
task:
- mmlu_stem_generative
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: exact_match
weight_by_size: true
- group: other
task:
- mmlu_other_generative
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: exact_match
weight_by_size: true
- group: social sciences
task:
- mmlu_social_sciences_generative
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: exact_match
weight_by_size: true
- group: humanities
task:
- mmlu_humanities_generative
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: exact_match
weight_by_size: true
aggregate_metric_list:
- aggregation: mean
metric: exact_match
weight_by_size: True
weight_by_size: true
metadata:
version: 2
version: 3

0 comments on commit 12b6eeb

Please sign in to comment.