EleutherAI · baberabb · Jan 20, 2025 · Dec 5, 2024 · Dec 12, 2024 · Dec 12, 2024
@@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA
 
 ### Groups and Tasks
 
-* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, and `kormedmcqa_pharm`.
+* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, `kormedmcqa_pharm`, and `kormedmcqa_dentist`.
 
 #### Tasks
 
 * `kormedmcqa_doctor`: `Official Korean Doctor Examination`
 * `kormedmcqa_nurse`: `Official Korean Nurse Examination`
 * `kormedmcqa_pharm`: `Official Korean Pharmacist Examination`
+* `kormedmcqa_dentist`: `Official Korean Dentist Examination`
 
 ### Checklist
 
 For adding novel benchmarks/datasets to the library:
-* [x] Is the task an existing benchmark in the literature?
-  * [x] Have you referenced the original paper that introduced the task?
-  * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
+* [ ] Is the task an existing benchmark in the literature?
+  * [ ] Have you referenced the original paper that introduced the task?
+  * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
 
 
 If other tasks on this dataset are already supported:

@@ -3,9 +3,10 @@ task:
   - kormedmcqa_doctor
   - kormedmcqa_nurse
   - kormedmcqa_pharm
+  - kormedmcqa_dentist
 aggregate_metric_list:
   - metric: exact_match
     aggregation: mean
     weight_by_size: true
 metadata:
-  version: 0.0
+  version: 2.0
@@ -1,10 +1,10 @@
-task : kormedmcqa_doctor
 dataset_path : sean0042/KorMedMCQA
-dataset_name : doctor
 test_split : test
-fewshot_split : dev
+fewshot_split : fewshot
 fewshot_config:
   sampler: first_n
+  doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답： {{['A', 'B', 'C', 'D', 'E'][answer-1]}}\n\n"
+  doc_to_target: ""
 output_type: generate_until
 doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답："
 doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}"
@@ -15,12 +15,19 @@ metric_list:
     ignore_case: true
     ignore_punctuation: true
     regexes_to_ignore:
-          - " "
+      - " "
+      - "\n"
 generation_kwargs:
   until:
     - "Q:"
-    - "\n\n"
     - "</s>"
+    - "<|im_end|>"
     - "."
+    - "\n\n"
   do_sample: false
   temperature: 0.0
+  max_gen_toks: 1024
+metadata:
+  version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -0,0 +1,3 @@
+include: _template_yaml
+dataset_name: dentist
+task: kormedmcqa_dentist
@@ -0,0 +1,3 @@
+include: _template_yaml
+dataset_name: doctor
+task: kormedmcqa_doctor
@@ -0,0 +1,3 @@
+include: _template_yaml
+dataset_name: nurse
+task: kormedmcqa_nurse
@@ -0,0 +1,3 @@
+include: _template_yaml
+dataset_name: pharm
+task: kormedmcqa_pharm