Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consistent list annotation #959

Open
wants to merge 8 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cobra/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
from cobra.io.dict import model_from_dict, model_to_dict
from cobra.io.json import from_json, load_json_model, save_json_model, to_json
from cobra.io.mat import load_matlab_model, save_matlab_model
from cobra.io.sbml import read_sbml_model, write_sbml_model, \
validate_sbml_model
from cobra.io.sbml import read_sbml_model, validate_sbml_model, write_sbml_model
from cobra.io.yaml import from_yaml, load_yaml_model, save_yaml_model, to_yaml
38 changes: 37 additions & 1 deletion cobra/io/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

from __future__ import absolute_import

from collections import OrderedDict
from collections import OrderedDict, defaultdict
from operator import attrgetter, itemgetter

from numpy import bool_, float_
from six import iteritems, string_types

from cobra.core import Gene, Metabolite, Model, Reaction
from cobra.io.sbml import parse_annotation_info
from cobra.util.solver import set_objective


Expand Down Expand Up @@ -53,6 +54,31 @@
}


def _fix_annotation(annotation):
# if annotation is in the form of a list of list, convert it in
# right format first i.e in a dict format
if isinstance(annotation, list):
dict_anno = defaultdict(list)
for item in annotation:
data = parse_annotation_info(item[1])
if data is None:
continue
else:
provider, identifier = data

dict_anno[provider].append(identifier)

annotation = dict_anno

# Convert single annotation values which are represented as
# as strings as list to have a consistent format
for key in annotation.keys():
if isinstance(annotation[key], string_types) and key != "sbo":
annotation[key] = [annotation[key]]

return annotation


def _fix_type(value):
"""convert possible types to str, float, and bool"""
# Because numpy floats can not be pickled to json
Expand Down Expand Up @@ -82,6 +108,8 @@ def _update_optional(cobra_object, new_dict, optional_attribute_dict,
value = getattr(cobra_object, key)
if value is None or value == default:
continue
if key == "annotation":
_fix_annotation(value)
new_dict[key] = _fix_type(value)


Expand All @@ -97,6 +125,8 @@ def metabolite_to_dict(metabolite):
def metabolite_from_dict(metabolite):
new_metabolite = Metabolite()
for k, v in iteritems(metabolite):
if k == "annotation":
v = _fix_annotation(v)
setattr(new_metabolite, k, v)
return new_metabolite

Expand All @@ -113,6 +143,8 @@ def gene_to_dict(gene):
def gene_from_dict(gene):
new_gene = Gene(gene["id"])
for k, v in iteritems(gene):
if k == "annotation":
v = _fix_annotation(v)
setattr(new_gene, k, v)
return new_gene

Expand Down Expand Up @@ -142,6 +174,8 @@ def reaction_from_dict(reaction, model):
(model.metabolites.get_by_id(str(met)), coeff)
for met, coeff in iteritems(v)))
else:
if k == "annotation":
v = _fix_annotation(v)
setattr(new_reaction, k, v)
return new_reaction

Expand Down Expand Up @@ -225,5 +259,7 @@ def model_from_dict(obj):
set_objective(model, coefficients)
for k, v in iteritems(obj):
if k in {'id', 'name', 'notes', 'compartments', 'annotation'}:
if k == "annotation":
v = _fix_annotation(v)
setattr(model, k, v)
return model
16 changes: 5 additions & 11 deletions cobra/io/sbml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,26 +1427,20 @@ def _parse_annotations(sbase):
# FIXME: read and store the qualifier

uri = cvterm.getResourceURI(k)
data = _parse_annotation_info(uri)
data = parse_annotation_info(uri)
if data is None:
continue
else:
provider, identifier = data

if provider in annotation:
if isinstance(annotation[provider], string_types):
annotation[provider] = [annotation[provider]]
# FIXME: use a list
if identifier not in annotation[provider]:
annotation[provider].append(identifier)
else:
# FIXME: always in list
annotation[provider] = identifier
if provider not in annotation:
annotation[provider] = []
annotation[provider].append(identifier)

return annotation


def _parse_annotation_info(uri):
def parse_annotation_info(uri):
"""Parses provider and term from given identifiers annotation uri.

Parameters
Expand Down
4 changes: 4 additions & 0 deletions cobra/test/data/invalid_annotation_format.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
[
"CHEBI",
"http://identifiers.org/chebi/CHEBI:11981"
],
[
"CHEBI",
"http://identifiers.org/chebi/CHEBI:17847"
]
]
}
Expand Down
Binary file modified cobra/test/data/mini.pickle
Binary file not shown.
20 changes: 10 additions & 10 deletions cobra/test/test_io/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ def _check_sbml_annotations(model):
assert len(annotation) == 3
for key in ["bigg.model", "doi", "taxonomy"]:
assert key in annotation
assert annotation["bigg.model"] == "e_coli_core"
assert annotation["doi"] == "10.1128/ecosalplus.10.2.1"
assert annotation["taxonomy"] == "511145"
assert annotation["bigg.model"] == ["e_coli_core"]
assert annotation["doi"] == ["10.1128/ecosalplus.10.2.1"]
assert annotation["taxonomy"] == ["511145"]

# gene annotation
# {'asap': 'ABE-0006162', 'ncbigene': '946368', 'uniprot': 'P33221',
Expand All @@ -26,11 +26,11 @@ def _check_sbml_annotations(model):
assert len(annotation) == 5
for key in ["asap", "ncbigene", "uniprot", "ncbigi", "ecogene"]:
assert key in annotation
assert annotation["asap"] == "ABE-0006162"
assert annotation["ncbigene"] == "946368"
assert annotation["uniprot"] == "P33221"
assert annotation["ncbigi"] == "gi:16129802"
assert annotation["ecogene"] == "EG11809"
assert annotation["asap"] == ["ABE-0006162"]
assert annotation["ncbigene"] == ["946368"]
assert annotation["uniprot"] == ["P33221"]
assert annotation["ncbigi"] == ["gi:16129802"]
assert annotation["ecogene"] == ["EG11809"]

# compartment annotation
# FIXME: add tests with first class compartment model
Expand All @@ -53,7 +53,7 @@ def _check_sbml_annotations(model):
"kegg.compound", "seed.compound", "hmdb", "biocyc"]:
assert key in annotation
assert annotation[
"inchi"] == "InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1" # noqa: E501
"inchi"] == ["InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1"] # noqa: E501

# reaction annotation
# {'kegg.reaction': 'R00228', 'sbo': 'SBO:0000375',
Expand All @@ -64,7 +64,7 @@ def _check_sbml_annotations(model):
for key in ["kegg.reaction", "sbo", "ec-code", "rhea",
"metanetx.reaction", "bigg.reaction", "biocyc"]:
assert key in annotation
assert annotation["biocyc"] == 'META:ACETALD-DEHYDROG-RXN'
assert annotation["biocyc"] == ['META:ACETALD-DEHYDROG-RXN']


def test_read_sbml_annotations(data_directory):
Expand Down
10 changes: 7 additions & 3 deletions cobra/test/test_io/test_annotation_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ def test_load_json_model_valid(data_directory, tmp_path):


def test_load_json_model_invalid(data_directory):
"""Test that loading an invalid annotation from JSON raises TypeError"""
"""Test loading an annotation in the form of list of list"""
path = join(data_directory, "invalid_annotation_format.json")
with pytest.raises(TypeError):
model = load_json_model(path)
expected = {
'kegg.compound': ['C01468'],
'chebi': ['CHEBI:11981', 'CHEBI:17847']
}
model = load_json_model(path)
assert model.metabolites[0].annotation == expected
17 changes: 17 additions & 0 deletions cobra/test/test_io/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,20 @@ def test_save_json_model(tmpdir, mini_model):
with open(output_file, "r") as infile:
loaded = json.load(infile)
assert jsonschema.validate(loaded, cio.json.json_schema)


def test_consistent_annotation_values(data_directory):
"""Test if annotation are consistently represented as list"""
model = cio.read_sbml_model(join(data_directory, "mini_fbc2.xml"))
# annotation of genes
for gene in model.genes:
for key in list(gene.annotation.keys()):
assert isinstance(gene.annotation[key], list)
# annotation of metabolites
for metabolite in model.metabolites:
for key in list(metabolite.annotation.keys()):
assert isinstance(metabolite.annotation[key], list)
# annotation of reaction
for reaction in model.genes:
for key in list(reaction.annotation.keys()):
assert isinstance(reaction.annotation[key], list)
58 changes: 29 additions & 29 deletions cobra/test/test_io/test_sbml.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,15 +360,15 @@ def test_gprs(data_directory, tmp_path):


def test_identifiers_annotation():
from cobra.io.sbml import _parse_annotation_info
from cobra.io.sbml import parse_annotation_info

for uri in [
"http://identifiers.org/chebi/CHEBI:000123",
"https://identifiers.org/chebi/CHEBI:000123",
"http://identifiers.org/CHEBI:000123",
"https://identifiers.org/CHEBI:000123",
]:
data = _parse_annotation_info(uri)
data = parse_annotation_info(uri)
assert data
assert data[0] == "chebi"
assert data[1] == "CHEBI:000123"
Expand All @@ -379,7 +379,7 @@ def test_identifiers_annotation():
"http://identifiers.org/taxonomy:9602",
"https://identifiers.org/taxonomy:9602",
]:
data = _parse_annotation_info(uri)
data = parse_annotation_info(uri)
assert data
assert data[0] == "taxonomy"
assert data[1] == "9602"
Expand All @@ -388,7 +388,7 @@ def test_identifiers_annotation():
"http://identifier.org/taxonomy/9602",
"https://test.com",
]:
data = _parse_annotation_info(uri)
data = parse_annotation_info(uri)
assert data is None


Expand All @@ -414,39 +414,39 @@ def test_smbl_with_notes(data_directory, tmp_path):
}
metabolite_annotations = {
'2hb_e': {'sbo': 'SBO:0000247',
'inchi': 'InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,'
'(H,6,7)',
'chebi': 'CHEBI:1148'},
'inchi': ['InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,'
'(H,6,7)'],
'chebi': ['CHEBI:1148']},
'nad_e': {'sbo': 'SBO:0000247',
'inchi': 'InChI=1S/C21H27N7O14P2/c22-17-12-19('
'25-7-24-17)28(8-26-12)21-16(32)14(30)11('
'41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15('
'31)20(40-10)27-3-1-2-9(4-27)18('
'23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,'
'(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,'
'11-,13-,14-,15-,16-,20-,21-/m1/s1',
'chebi': 'CHEBI:57540'},
'h_e': {'sbo': 'SBO:0000247', 'inchi': 'InChI=1S/p+1/i/hH',
'chebi': 'CHEBI:24636'},
'inchi': ['InChI=1S/C21H27N7O14P2/c22-17-12-19('
'25-7-24-17)28(8-26-12)21-16(32)14(30)11('
'41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15('
'31)20(40-10)27-3-1-2-9(4-27)18('
'23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,'
'(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,'
'11-,13-,14-,15-,16-,20-,21-/m1/s1'],
'chebi': ['CHEBI:57540']},
'h_e': {'sbo': 'SBO:0000247', 'inchi': ['InChI=1S/p+1/i/hH'],
'chebi': ['CHEBI:24636']},
'2obut_e': {'sbo': 'SBO:0000247',
'inchi': 'InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,'
'7)/p-1',
'chebi': 'CHEBI:16763'},
'inchi': ['InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,'
'7)/p-1'],
'chebi': ['CHEBI:16763']},
'nadh_e': {'sbo': 'SBO:0000247',
'inchi': 'InChI=1S/C21H29N7O14P2/c22-17-12-19('
'25-7-24-17)28(8-26-12)21-16(32)14(30)11('
'41-21)6-39-44(36,37)42-43(34,35)38-5-10-13('
'29)15(31)20(40-10)27-3-1-2-9(4-27)18('
'23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,'
'5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,'
'25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1',
'chebi': 'CHEBI:57945'}
'inchi': ['InChI=1S/C21H29N7O14P2/c22-17-12-19('
'25-7-24-17)28(8-26-12)21-16(32)14(30)11('
'41-21)6-39-44(36,37)42-43(34,35)38-5-10-13('
'29)15(31)20(40-10)27-3-1-2-9(4-27)18('
'23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,'
'5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,'
'25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1'],
'chebi': ['CHEBI:57945']}
}
reaction_notes = {'CONFIDENCE_LEVEL': '4', 'NOTES': 'NCD',
'SUBSYSTEM': 'Propanoate metabolism',
'GENE_ASSOCIATION': '(HGNC:8546 and HGNC:8548) or'
' (HGNC:8547 and HGNC:8548)'}
reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': '1.1.1.27',
reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': ['1.1.1.27'],
'pubmed': ['10108', '21765']}

for met_id in metabolite_notes:
Expand Down