Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/updateDatabases #122

Open
wants to merge 6 commits into
base: gecko2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 27 additions & 24 deletions geckomat/change_model/addProtein.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function model = addProtein(model,P,kegg,swissprot)
%addProtein
%
% model = addProtein(model,P,kegg,swissprot)
% Adds an exchange reaction for protein P and updates model.enzymes,
% model.MWs and model.pathways to account for P.
Expand All @@ -13,9 +15,8 @@
% model Model with the added protein
%
% Cheng Zhang & Benjamin J. Sanchez. Last edited: 2018-05-28
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Ivan Domenzain. Last edited: 2020-02-07

function model = addProtein(model,P,kegg,swissprot)

%Update model.enzyme vector:
prot_name = ['prot_' P];
Expand Down Expand Up @@ -64,27 +65,29 @@

%Update model.genes & model.pathways vectors:
match_path = false;
for i = 1:length(kegg)
if strcmp(P,kegg{i,1})
%Gene:
if ~isempty(kegg{i,3}) && ~match_gen
match_gen = true;
gene = kegg{i,3};
end
%Pathway:
if ~isempty(kegg{i,6}) && ~match_path
match_path = true;
model.pathways{pos_e,1} = kegg{i,6};
end
%Molecular Weight (if nothing found in uniprot):
if kegg{i,5} > 0 && ~match_MW
match_MW = true;
model.MWs(pos_e,1) = kegg{i,5}/1000;
end
%Sequence (if nothing found in uniprot):
if ~isempty(kegg{i,7}) && ~match_seq
match_seq = true;
model.sequences(pos_e,1) = kegg(i,7);
if ~isempty(kegg)
for i = 1:length(kegg)
if strcmp(P,kegg{i,1})
%Gene:
if ~isempty(kegg{i,3}) && ~match_gen
match_gen = true;
gene = kegg{i,3};
end
%Pathway:
if ~isempty(kegg{i,6}) && ~match_path
match_path = true;
model.pathways{pos_e,1} = kegg{i,6};
end
%Molecular Weight (if nothing found in uniprot):
if kegg{i,5} > 0 && ~match_MW
match_MW = true;
model.MWs(pos_e,1) = kegg{i,5}/1000;
end
%Sequence (if nothing found in uniprot):
if ~isempty(kegg{i,7}) && ~match_seq
match_seq = true;
model.sequences(pos_e,1) = kegg(i,7);
end
end
end
end
Expand Down
17 changes: 10 additions & 7 deletions geckomat/change_model/convertToEnzymeModel.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function eModel = convertToEnzymeModel(irrevModel,Genes,uniprots,kcats)
%convertToEnzymeModel
%
% eModel = convertToEnzymeModel(model,Genes,uniprots,kcats)
% Converts standard GEM to GEM accounting for enzymes as pseudo
% metabolites, with -(1/kcat) as the corresponding stoich. coeffs.
Expand All @@ -13,17 +15,14 @@
% eModel Modified GEM structure (1x1 struct)
%
% Cheng Zhang. Last edited: 2018-05-24
% Ivan Domenzain. Last edited: 2018-09-07
% Benjamin J. Sanchez. Last edited: 2018-11-11
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function eModel = convertToEnzymeModel(irrevModel,Genes,uniprots,kcats)
% Ivan Domenzain. Last edited: 2020-02-07

%Load databases:
data = load('../../databases/ProtDatabase.mat');
swissprot = data.swissprot;
kegg = data.kegg;

[nrows,~] = size(kegg);
eModel = irrevModel;
enzymes = cell(5000,1);
[m,n] = size(uniprots);
Expand Down Expand Up @@ -85,7 +84,11 @@
eModel.sequences = cell(0,1);
eModel.pathways = cell(0,1);
for i = 1:length(enzymes)
eModel = addProtein(eModel,enzymes{i},kegg,swissprot);
if nrows>1
eModel = addProtein(eModel,enzymes{i},kegg,swissprot);
else
eModel = addProtein(eModel,enzymes{i},[],swissprot);
end
end

end
Expand Down
9 changes: 6 additions & 3 deletions geckomat/get_enzyme_data/getEnzymeCodes.m
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
% *count(4): #other rxns
%
% Benjamin Sanchez. Last edited: 2017-03-05
% Ivan Domenzain. Last edited: 2018-09-07
% Ivan Domenzain. Last edited: 2020-02-07
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function model_data = getEnzymeCodes(model,action)

Expand Down Expand Up @@ -169,8 +169,11 @@
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function database = standardizeDatabase(database)
for i = 1:length(database)
database{i,3} = strsplit(database{i,3},' ');
[m,~] = size(database);
if m>1
for i = 1:length(database)
database{i,3} = strsplit(database{i,3},' ');
end
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down
50 changes: 17 additions & 33 deletions geckomat/get_enzyme_data/updateDatabases.m
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,36 @@
%
% Usage: [swissprot,kegg] = updateDatabases
%
% Benjamin Sanchez, Cheng Zhang, Ivan Domenzain. Last edited: 2019-07-12
% Ivan Domenzain. Last edited: 2020-01-25
%

current = pwd;
cd ..
parameters = getModelParameters;
keggID = parameters.keggID;
cd (current)
if ~regexp(keggID,'[a-z]{3,4}')
error('Please specify the KEGG organism ID in the script getModelParameters.m')
kegg = cell(1,7);
if isfield(parameters,'keggID')
keggID = parameters.keggID;
mkdir ../../databases/KEGG
try
%Download KEGG data:
downloadKEGGdata(keggID)
%Build KEGG table
kegg = buildKEGGtable(keggID);
catch
warning(['Unsuccessful query for "' keggID '", check the presence of the provided ID in the KEGG database (https://www.genome.jp/kegg/catalog/org_list.html)'])
end
%Remove KEGG files for compliance of repository:
delete ../../databases/KEGG/*.txt
rmdir ../../databases/KEGG
end

%Build Swissprot table:
swissprot = buildSWISSPROTtable;

%Download KEGG data:
mkdir ../../databases/KEGG
downloadKEGGdata(keggID)

%Build KEGG table
kegg = buildKEGGtable(keggID);

%Remove KEGG files for compliance of repository:
delete ../../databases/KEGG/*.txt
rmdir ../../databases/KEGG

%Save both databases as .mat files:
save('../../databases/ProtDatabase.mat','swissprot','kegg');

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function swissprot = buildSWISSPROTtable

%Build Swissprot table (uniprot code - protein name - gene names - EC number - sequence):
fileID_uni = fopen('../../databases/uniprot.tab');
swissprot = textscan(fileID_uni,'%s %s %s %s %s','delimiter','\t');
Expand All @@ -63,19 +58,14 @@
swissprot{i,6} = sequence;
end
disp('Building Swiss-Prot database.')

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function downloadKEGGdata(keggID)

base = 'http://rest.kegg.jp/';
operation = 'list/';
gene_list = urlread([base operation keggID]);
gene_list = regexpi(gene_list, '[^\n]+','match')';
gene_id = regexpi(gene_list,['(?<=' keggID ':)\S+'],'match');

% Retrieve information for every gene in the list (with a maximum of 10,000
% to avoid bulk downloads)
operation = 'get/';
Expand All @@ -90,13 +80,9 @@ function downloadKEGGdata(keggID)
disp(['Cannot find ' gene_id{i}{1} ' in KEGG']);
end
end

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function kegg = buildKEGGtable(keggID)

%Build KEGG table (uniprot code - protein name - systematic gene name - EC number - MW - pathway - sequence):
file_names = dir('../../databases/KEGG/');
file_names(1:2) = [];
Expand Down Expand Up @@ -188,8 +174,6 @@ function downloadKEGGdata(keggID)
kegg{n,7} = sequence;
disp(['Updating KEGG database: Ready with gene ' gene_name])
end
kegg(n+1:end,:) = [];

kegg(n+1:end,:) = [];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%