opencobra · Hemant27031999 · May 29, 2020 · May 30, 2020 · May 31, 2020 · May 31, 2020
diff --git a/cobra/io/__init__.py b/cobra/io/__init__.py
@@ -5,6 +5,5 @@
 from cobra.io.dict import model_from_dict, model_to_dict
 from cobra.io.json import from_json, load_json_model, save_json_model, to_json
 from cobra.io.mat import load_matlab_model, save_matlab_model
-from cobra.io.sbml import read_sbml_model, write_sbml_model, \
-    validate_sbml_model
+from cobra.io.sbml import read_sbml_model, validate_sbml_model, write_sbml_model
 from cobra.io.yaml import from_yaml, load_yaml_model, save_yaml_model, to_yaml
diff --git a/cobra/io/dict.py b/cobra/io/dict.py
@@ -2,13 +2,14 @@
 
 from __future__ import absolute_import
 
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from operator import attrgetter, itemgetter
 
 from numpy import bool_, float_
 from six import iteritems, string_types
 
 from cobra.core import Gene, Metabolite, Model, Reaction
+from cobra.io.sbml import parse_annotation_info
 from cobra.util.solver import set_objective
 
 
@@ -53,6 +54,31 @@
 }
 
 
+def _fix_annotation(annotation):
+    # if annotation is in the form of a list of list, convert it in
+    # right format first i.e in a dict format
+    if isinstance(annotation, list):
+        dict_anno = defaultdict(list)
+        for item in annotation:
+            data = parse_annotation_info(item[1])
+            if data is None:
+                continue
+            else:
+                provider, identifier = data
+
+            dict_anno[provider].append(identifier)
+
+        annotation = dict_anno
+
+    # Convert single annotation values which are represented as
+    # as strings as list to have a consistent format
+    for key in annotation.keys():
+        if isinstance(annotation[key], string_types) and key != "sbo":
+            annotation[key] = [annotation[key]]
+
+    return annotation
+
+
 def _fix_type(value):
     """convert possible types to str, float, and bool"""
     # Because numpy floats can not be pickled to json
@@ -82,6 +108,8 @@ def _update_optional(cobra_object, new_dict, optional_attribute_dict,
         value = getattr(cobra_object, key)
         if value is None or value == default:
             continue
+        if key == "annotation":
+            _fix_annotation(value)
         new_dict[key] = _fix_type(value)
 
 
@@ -97,6 +125,8 @@ def metabolite_to_dict(metabolite):
 def metabolite_from_dict(metabolite):
     new_metabolite = Metabolite()
     for k, v in iteritems(metabolite):
+        if k == "annotation":
+            v = _fix_annotation(v)
         setattr(new_metabolite, k, v)
     return new_metabolite
 
@@ -113,6 +143,8 @@ def gene_to_dict(gene):
 def gene_from_dict(gene):
     new_gene = Gene(gene["id"])
     for k, v in iteritems(gene):
+        if k == "annotation":
+            v = _fix_annotation(v)
         setattr(new_gene, k, v)
     return new_gene
 
@@ -142,6 +174,8 @@ def reaction_from_dict(reaction, model):
                 (model.metabolites.get_by_id(str(met)), coeff)
                 for met, coeff in iteritems(v)))
         else:
+            if k == "annotation":
+                v = _fix_annotation(v)
             setattr(new_reaction, k, v)
     return new_reaction
 
@@ -225,5 +259,7 @@ def model_from_dict(obj):
     set_objective(model, coefficients)
     for k, v in iteritems(obj):
         if k in {'id', 'name', 'notes', 'compartments', 'annotation'}:
+            if k == "annotation":
+                v = _fix_annotation(v)
             setattr(model, k, v)
     return model
diff --git a/cobra/io/sbml.py b/cobra/io/sbml.py
@@ -1427,26 +1427,20 @@ def _parse_annotations(sbase):
             # FIXME: read and store the qualifier
 
             uri = cvterm.getResourceURI(k)
-            data = _parse_annotation_info(uri)
+            data = parse_annotation_info(uri)
             if data is None:
                 continue
             else:
                 provider, identifier = data
 
-            if provider in annotation:
-                if isinstance(annotation[provider], string_types):
-                    annotation[provider] = [annotation[provider]]
-                # FIXME: use a list
-                if identifier not in annotation[provider]:
-                    annotation[provider].append(identifier)
-            else:
-                # FIXME: always in list
-                annotation[provider] = identifier
+            if provider not in annotation:
+                annotation[provider] = []
+            annotation[provider].append(identifier)
 
     return annotation
 
 
-def _parse_annotation_info(uri):
+def parse_annotation_info(uri):
     """Parses provider and term from given identifiers annotation uri.
 
     Parameters

diff --git a/cobra/test/data/invalid_annotation_format.json b/cobra/test/data/invalid_annotation_format.json
@@ -12,6 +12,10 @@
         [
           "CHEBI",
           "http://identifiers.org/chebi/CHEBI:11981"
+        ],
+        [
+          "CHEBI",
+          "http://identifiers.org/chebi/CHEBI:17847"
         ]
       ]
     }

diff --git a/cobra/test/data/mini.pickle b/cobra/test/data/mini.pickle
diff --git a/cobra/test/test_io/test_annotation.py b/cobra/test/test_io/test_annotation.py
@@ -15,9 +15,9 @@ def _check_sbml_annotations(model):
     assert len(annotation) == 3
     for key in ["bigg.model", "doi", "taxonomy"]:
         assert key in annotation
-    assert annotation["bigg.model"] == "e_coli_core"
-    assert annotation["doi"] == "10.1128/ecosalplus.10.2.1"
-    assert annotation["taxonomy"] == "511145"
+    assert annotation["bigg.model"] == ["e_coli_core"]
+    assert annotation["doi"] == ["10.1128/ecosalplus.10.2.1"]
+    assert annotation["taxonomy"] == ["511145"]
 
     # gene annotation
     # {'asap': 'ABE-0006162', 'ncbigene': '946368', 'uniprot': 'P33221',
@@ -26,11 +26,11 @@ def _check_sbml_annotations(model):
     assert len(annotation) == 5
     for key in ["asap", "ncbigene", "uniprot", "ncbigi", "ecogene"]:
         assert key in annotation
-    assert annotation["asap"] == "ABE-0006162"
-    assert annotation["ncbigene"] == "946368"
-    assert annotation["uniprot"] == "P33221"
-    assert annotation["ncbigi"] == "gi:16129802"
-    assert annotation["ecogene"] == "EG11809"
+    assert annotation["asap"] == ["ABE-0006162"]
+    assert annotation["ncbigene"] == ["946368"]
+    assert annotation["uniprot"] == ["P33221"]
+    assert annotation["ncbigi"] == ["gi:16129802"]
+    assert annotation["ecogene"] == ["EG11809"]
 
     # compartment annotation
     # FIXME: add tests with first class compartment model
@@ -53,7 +53,7 @@ def _check_sbml_annotations(model):
                 "kegg.compound", "seed.compound", "hmdb", "biocyc"]:
         assert key in annotation
     assert annotation[
-               "inchi"] == "InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1"  # noqa: E501
+               "inchi"] == ["InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1"]  # noqa: E501
 
     # reaction annotation
     # {'kegg.reaction': 'R00228', 'sbo': 'SBO:0000375',
@@ -64,7 +64,7 @@ def _check_sbml_annotations(model):
     for key in ["kegg.reaction", "sbo", "ec-code", "rhea",
                 "metanetx.reaction", "bigg.reaction", "biocyc"]:
         assert key in annotation
-    assert annotation["biocyc"] == 'META:ACETALD-DEHYDROG-RXN'
+    assert annotation["biocyc"] == ['META:ACETALD-DEHYDROG-RXN']
 
 
 def test_read_sbml_annotations(data_directory):

diff --git a/cobra/test/test_io/test_annotation_format.py b/cobra/test/test_io/test_annotation_format.py
@@ -21,7 +21,11 @@ def test_load_json_model_valid(data_directory, tmp_path):
 
 
 def test_load_json_model_invalid(data_directory):
-    """Test that loading an invalid annotation from JSON raises TypeError"""
+    """Test loading an annotation in the form of list of list"""
     path = join(data_directory, "invalid_annotation_format.json")
-    with pytest.raises(TypeError):
-        model = load_json_model(path)
+    expected = {
+        'kegg.compound': ['C01468'],
+        'chebi': ['CHEBI:11981', 'CHEBI:17847']
+    }
+    model = load_json_model(path)
+    assert model.metabolites[0].annotation == expected
diff --git a/cobra/test/test_io/test_json.py b/cobra/test/test_io/test_json.py
@@ -39,3 +39,20 @@ def test_save_json_model(tmpdir, mini_model):
     with open(output_file, "r") as infile:
         loaded = json.load(infile)
     assert jsonschema.validate(loaded, cio.json.json_schema)
+
+
+def test_consistent_annotation_values(data_directory):
+    """Test if annotation are consistently represented as list"""
+    model = cio.read_sbml_model(join(data_directory, "mini_fbc2.xml"))
+    # annotation of genes
+    for gene in model.genes:
+        for key in list(gene.annotation.keys()):
+            assert isinstance(gene.annotation[key], list)
+    # annotation of metabolites
+    for metabolite in model.metabolites:
+        for key in list(metabolite.annotation.keys()):
+            assert isinstance(metabolite.annotation[key], list)
+    # annotation of reaction
+    for reaction in model.genes:
+        for key in list(reaction.annotation.keys()):
+            assert isinstance(reaction.annotation[key], list)
diff --git a/cobra/test/test_io/test_sbml.py b/cobra/test/test_io/test_sbml.py
@@ -360,15 +360,15 @@ def test_gprs(data_directory, tmp_path):
 
 
 def test_identifiers_annotation():
-    from cobra.io.sbml import _parse_annotation_info
+    from cobra.io.sbml import parse_annotation_info
 
     for uri in [
         "http://identifiers.org/chebi/CHEBI:000123",
         "https://identifiers.org/chebi/CHEBI:000123",
         "http://identifiers.org/CHEBI:000123",
         "https://identifiers.org/CHEBI:000123",
     ]:
-        data = _parse_annotation_info(uri)
+        data = parse_annotation_info(uri)
         assert data
         assert data[0] == "chebi"
         assert data[1] == "CHEBI:000123"
@@ -379,7 +379,7 @@ def test_identifiers_annotation():
         "http://identifiers.org/taxonomy:9602",
         "https://identifiers.org/taxonomy:9602",
     ]:
-        data = _parse_annotation_info(uri)
+        data = parse_annotation_info(uri)
         assert data
         assert data[0] == "taxonomy"
         assert data[1] == "9602"
@@ -388,7 +388,7 @@ def test_identifiers_annotation():
         "http://identifier.org/taxonomy/9602",
         "https://test.com",
     ]:
-        data = _parse_annotation_info(uri)
+        data = parse_annotation_info(uri)
         assert data is None
 
 
@@ -414,39 +414,39 @@ def test_smbl_with_notes(data_directory, tmp_path):
     }
     metabolite_annotations = {
         '2hb_e': {'sbo': 'SBO:0000247',
-                  'inchi': 'InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,'
-                           '(H,6,7)',
-                  'chebi': 'CHEBI:1148'},
+                  'inchi': ['InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,'
+                            '(H,6,7)'],
+                  'chebi': ['CHEBI:1148']},
         'nad_e': {'sbo': 'SBO:0000247',
-                  'inchi': 'InChI=1S/C21H27N7O14P2/c22-17-12-19('
-                           '25-7-24-17)28(8-26-12)21-16(32)14(30)11('
-                           '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15('
-                           '31)20(40-10)27-3-1-2-9(4-27)18('
-                           '23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,'
-                           '(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,'
-                           '11-,13-,14-,15-,16-,20-,21-/m1/s1',
-                  'chebi': 'CHEBI:57540'},
-        'h_e': {'sbo': 'SBO:0000247', 'inchi': 'InChI=1S/p+1/i/hH',
-                'chebi': 'CHEBI:24636'},
+                  'inchi': ['InChI=1S/C21H27N7O14P2/c22-17-12-19('
+                            '25-7-24-17)28(8-26-12)21-16(32)14(30)11('
+                            '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15('
+                            '31)20(40-10)27-3-1-2-9(4-27)18('
+                            '23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,'
+                            '(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,'
+                            '11-,13-,14-,15-,16-,20-,21-/m1/s1'],
+                  'chebi': ['CHEBI:57540']},
+        'h_e': {'sbo': 'SBO:0000247', 'inchi': ['InChI=1S/p+1/i/hH'],
+                'chebi': ['CHEBI:24636']},
         '2obut_e': {'sbo': 'SBO:0000247',
-                    'inchi': 'InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,'
-                             '7)/p-1',
-                    'chebi': 'CHEBI:16763'},
+                    'inchi': ['InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,'
+                              '7)/p-1'],
+                    'chebi': ['CHEBI:16763']},
         'nadh_e': {'sbo': 'SBO:0000247',
-                   'inchi': 'InChI=1S/C21H29N7O14P2/c22-17-12-19('
-                            '25-7-24-17)28(8-26-12)21-16(32)14(30)11('
-                            '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13('
-                            '29)15(31)20(40-10)27-3-1-2-9(4-27)18('
-                            '23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,'
-                            '5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,'
-                            '25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1',
-                   'chebi': 'CHEBI:57945'}
+                   'inchi': ['InChI=1S/C21H29N7O14P2/c22-17-12-19('
+                             '25-7-24-17)28(8-26-12)21-16(32)14(30)11('
+                             '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13('
+                             '29)15(31)20(40-10)27-3-1-2-9(4-27)18('
+                             '23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,'
+                             '5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,'
+                             '25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1'],
+                   'chebi': ['CHEBI:57945']}
     }
     reaction_notes = {'CONFIDENCE_LEVEL': '4', 'NOTES': 'NCD',
                       'SUBSYSTEM': 'Propanoate metabolism',
                       'GENE_ASSOCIATION': '(HGNC:8546 and HGNC:8548) or'
                                           ' (HGNC:8547 and HGNC:8548)'}
-    reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': '1.1.1.27',
+    reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': ['1.1.1.27'],
                             'pubmed': ['10108', '21765']}
 
     for met_id in metabolite_notes: