From 4d5de8a997c522fb0921773c71183b12e601796e Mon Sep 17 00:00:00 2001 From: Harry Caufield Date: Tue, 21 Nov 2023 13:51:01 -0500 Subject: [PATCH 1/3] Init biored eval --- src/ontogpt/evaluation/biored/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/ontogpt/evaluation/biored/__init__.py diff --git a/src/ontogpt/evaluation/biored/__init__.py b/src/ontogpt/evaluation/biored/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/ontogpt/evaluation/biored/__init__.py @@ -0,0 +1 @@ + From 19392f2fe8c9b4a3f21df685413200de309f2cef Mon Sep 17 00:00:00 2001 From: Harry Caufield Date: Tue, 21 Nov 2023 13:57:45 -0500 Subject: [PATCH 2/3] Init eval_biored.py --- src/ontogpt/evaluation/biored/eval_biored.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/ontogpt/evaluation/biored/eval_biored.py diff --git a/src/ontogpt/evaluation/biored/eval_biored.py b/src/ontogpt/evaluation/biored/eval_biored.py new file mode 100644 index 000000000..a4672de63 --- /dev/null +++ b/src/ontogpt/evaluation/biored/eval_biored.py @@ -0,0 +1,19 @@ +""" +BioRED evaluation. + +This evaluation measures performance of OntoGPT +on relation extraction over the BioRED data set +(see Luo et al. 2022, +https://doi.org/10.1093/bib/bbac282). + +The BioRED set includes 600 biomedical abstracts +annotated for multiple entity and relation types. + +Luo et al. report these F1 scores as best results: +NER: 89.3 +RE: 47.7 + +This evaluation uses a task-specific template +(biored). + +""" From 585278cfdad5395901a57fa6001648d90b32d7fb Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Wed, 22 Nov 2023 10:44:01 -0500 Subject: [PATCH 3/3] Expand intro doc --- src/ontogpt/evaluation/biored/eval_biored.py | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/ontogpt/evaluation/biored/eval_biored.py b/src/ontogpt/evaluation/biored/eval_biored.py index a4672de63..42f3e44aa 100644 --- a/src/ontogpt/evaluation/biored/eval_biored.py +++ b/src/ontogpt/evaluation/biored/eval_biored.py @@ -8,6 +8,32 @@ The BioRED set includes 600 biomedical abstracts annotated for multiple entity and relation types. +The entity types are as follows, grounded +to the following namespaces: +Gene (NCBI Gene) +Variant (dbSNP) +Species (NCBI Taxonomy) +Disease (MESH + OMIM) +Chemical (MESH Chemicals and Drugs) +CellLine (Cellosaurus) +The BioRED annotations include ungrounded but +normalized tmVar representations for variants without +corresponding dbSNP entries. + +The relation types are as follows: +Disease-chemical +Disease-gene +Disease-variant +Disease-gene +Gene-gene +Gene-chemical +Chemical-chemical +Chemical-variant + +Note that these relation types have subtypes as +well. See the BioRED annotation guide, Table 4, +for the full set +(https://ftp.ncbi.nlm.nih.gov/pub/lu/BioRED/BioRED_Annotation_Guideline.pdf) Luo et al. report these F1 scores as best results: NER: 89.3