'ElMo-Knock'

metabolicnetwork.cpp

Go to the documentation of this file.
00001 #include <iostream>
00002 #include <fstream>   // file I/O
00003 #include <sstream>
00004 #include <stdlib.h>
00005 #include <vector>
00006 #include <algorithm>
00007 #include <math.h>
00008 #include <iomanip>
00009 #include <limits>
00010 #include "metabolicnetwork.h"
00011 #include "util.h"
00012 #include "mpi_utils.h"
00013 
00014 
00015 
00016 MetabolicNetwork::MetabolicNetwork() {}
00017 MetabolicNetwork::MetabolicNetwork(MetabolicNetwork &metab_network) {
00018 
00019 
00020         this->efm_yields=efm_yields;
00021 
00022         //init reacs_names
00023         this->reacs_names=metab_network.reacs_names;
00024         this->substrate_names=metab_network.substrate_names;
00025         //init target_names
00026         this->target_names=metab_network.target_names;
00027         this->yields=metab_network.yields;
00028         this->num_modes=metab_network.num_modes;
00029         this->num_reacs=metab_network.num_reacs;
00030         this->num_metabs=metab_network.num_metabs;
00031 
00032 
00033 }
00034 
00035 MetabolicNetwork::MetabolicNetwork(Options options):bit_efm()  {
00036 
00037         vector< vector<double> > stoich;
00038         int nump=1,proc_id=0;
00039         util::get_proc_id_nump(proc_id,nump);
00040         t_UnsignedInt num_ems;
00041 
00042         this->options=options;
00043 
00044         this->options.load_efm_meta_file(stoich, this->reacs_names, num_ems);
00045 
00046         ifstream fp_in;  // file for reading
00047         ofstream fp_out;
00048 
00049         vector<string> init_knock_reacs=options.get_init_knock_reacs();
00050         vector<string> target_reacs=options.get_target_reacs();
00051         vector<string> substrate_reacs=options.get_substrate_reacs();
00052         vector<double> min_yields=options.get_min_yields();
00053         vector<double> min_core_yields=options.get_min_core_yields();
00054         vector<string> transport_reacs=options.get_transport_reacs();
00055 
00056 
00057         this->compressed_mode=1;
00058         bit_efm.load_efm_bit(options.get_efm_bit_fname(), num_ems, stoich);
00059 
00060         /*****************INIT DATA STRUCTURES FOR WHAT IS LOADED FROM configs.knock******************/
00061         //read substrate reactions
00062         for (vector<string>::iterator it=substrate_reacs.begin();it!=substrate_reacs.end();it++) {
00063                 if ((*it).size()) {
00064                     vector<string>::iterator reac_ind=find(this->reacs_names.begin(),this->reacs_names.end(),*it);
00065                     this->substrate_names[*it]=reac_ind-reacs_names.begin();
00066                 }               
00067         }
00068 
00069 
00070         map<string,int>::iterator pi;
00071         //if (proc_id==0) {
00072         //      cout << endl << "\t"<<util::get_time_str()<<": substrates: ";
00073         //      for (pi=substrate_names.begin();pi!=substrate_names.end();pi++)
00074         //              cout << " "<<pi->first;
00075         //      cout << endl;
00076         //}
00077 
00078 
00079         //read transport reactions
00080         for (vector<string>::iterator it=transport_reacs.begin();it!=transport_reacs.end();it++) {
00081                 if ((*it).size()) {
00082                     vector<string>::iterator reac_ind=find(this->reacs_names.begin(),this->reacs_names.end(),*it);
00083                     this->transport_names[*it]=reac_ind-reacs_names.begin();
00084                 }               
00085         }
00086         
00087         //if (proc_id==0) {
00088         //      cout << endl << "\t"<<util::get_time_str()<<": transport reactions: ";
00089         //      for (pi=transport_names.begin();pi!=transport_names.end();pi++)
00090         //              cout << " "<<pi->first;
00091         //      cout << endl;
00092         //}
00093 
00094         
00095         if (options.get_use_xchg_reacs().compare("yes")) {
00096                 //remove transport reacs from knock_cand_reacs
00097                 int i=0;
00098                 for (vector<string>::iterator it=this->reacs_names.begin();it!=this->reacs_names.end();it++,i++) {
00099                         //is reaction *it in transport_names
00100                         map<string,int>::iterator mpi=this->transport_names.find(*it);
00101                         if (mpi==this->transport_names.end()) {
00102                                 knock_cand_reacs.push_back(i);
00103                         }
00104                 }
00105         } else {
00106                 for (int i=0;i<this->reacs_names.size();i++)
00107                         knock_cand_reacs.push_back(i);
00108         }
00109 
00110 
00111         if (proc_id==0) {
00112                 cout << endl << "\t"<<util::get_time_str()<<": knockout candidate reactions: ";
00113                 for (int i=0;i<this->knock_cand_reacs.size();i++)
00114                         cout << " "<<this->reacs_names[this->knock_cand_reacs[i]];
00115                 cout << endl;
00116         }
00117                 
00118         
00119         //read target reactions
00120         
00121         for (vector<string>::iterator it=target_reacs.begin();it!=target_reacs.end();it++) {
00122                 if ((*it).size()) {
00123                         vector<string>::iterator reac_ind;
00124                         reac_ind=find(reacs_names.begin(),reacs_names.end(),*it);
00125                         this->target_names[*it]=reac_ind-reacs_names.begin();
00126                 }               
00127         }
00128         
00129         //read init knockout reactions (e.g. for anaerobic case OPM1, OPM2)
00130 
00131         for (vector<string>::iterator it=init_knock_reacs.begin();it!=init_knock_reacs.end();it++) {
00132 
00133                 if ((*it).size()) {
00134                         vector<string>::iterator reac_ind;
00135                         reac_ind=find(reacs_names.begin(),reacs_names.end(),*it);
00136                         this->init_knocks_names[*it]=reac_ind-reacs_names.begin();
00137                 }               
00138         }
00139 
00140         //if (proc_id==0) {
00141         //      cout << endl << "\t"<<util::get_time_str()<<": initial knockout reactions"; // << this->init_knocks_names.size()<<endl;
00142         //      map<string,int>::iterator t;
00143         //      for (t=this->init_knocks_names.begin();t!=this->init_knocks_names.end();t++)
00144         //              cout <<" ("<< t->first<<","<<t->second<<")";
00145         //      cout << endl;
00146         //}
00147                 //read target min yield values
00148 
00149         map<string,int>::iterator p=this->target_names.begin();
00150 
00151         for (vector<double>::iterator dit=min_yields.begin();dit!=min_yields.end();dit++) {
00152                 this->yields[p->first]=*dit; //yield;
00153                 p++;            
00154         }
00155 
00156 
00157         map<string,double>::iterator q;
00158 
00159         if (proc_id==0) {
00160                 cout << endl <<"\t"<<util::get_time_str()<<": target reactions: ";
00161                 for (p=this->target_names.begin() ,q=this->yields.begin();p!=this->target_names.end();p++,q++ )
00162                         cout << " (" << p->first <<","<<p->second<<")|"<<q->second;             
00163         }
00164         /***********************************************************************/       
00165 
00166 
00167         init_yields();
00168 
00169         remove_zero_substrate_modes();
00170 
00171         if (proc_id==0)
00172                 cout << endl << "\t"<<util::get_time_str()<<": # of EFMs after removing modes with zero-flux substrates "<< get_num_modes()<<endl;
00173         
00174         bit_efm.knock_efms(this->init_knocks_names, this->efm_yields);
00175 
00176         if (proc_id==0)
00177                 cout << endl << "\t"<<util::get_time_str()<<": # of EFMs after initial knockout "<< get_num_modes();
00178 
00179 
00180 
00181 
00182         this->num_reacs=reacs_names.size();
00183         this->num_modes=get_num_modes();        
00184 
00185         
00186 
00187         if (proc_id==0) {
00188                 cout << endl << "\t"<<util::get_time_str()<<": # of elementary modes: " << this->num_modes;
00189                 cout << endl << "\t"<<util::get_time_str()<<": # of reactions: " << this->num_reacs;
00190         }
00191         
00192         //*******************************
00193         //read target min core EFM yield values
00194 
00195         init_max_yields();
00196         p=this->target_names.begin();
00197         if (proc_id==0)
00198                 cout << endl <<"\t"<<util::get_time_str()<<": minimum production-efficient mode yields [0-1]: ";
00199         
00200         for (vector<double>::iterator dit=min_core_yields.begin();dit!=min_core_yields.end();dit++) {   
00201                 if (proc_id==0)
00202                         cout << "("<< p->first<<","<<(*dit)*(this->max_yields[p->first])<<") ";
00203                 this->core_min_yields.push_back((*dit)*(this->max_yields[p->first]));   //multiply core_yield with max_yield
00204                 p++;            
00205         }
00206 
00207         if (proc_id==0)
00208                 cout << endl;
00209         //end read target min core EFM yield values
00210         
00211         //separate core modes from efms
00212         extract_production_eff_efms();
00213         //find indices of efficient and inefficient modes
00214         extract_growth_eff_and_ineff_modes();
00215         
00216         
00217         if (proc_id==0) {
00218                 cout << endl << "\t"<<util::get_time_str()<<": # of production-efficient modes: " << this->core_efm_indices.size();
00219                 cout << endl << "\t"<<util::get_time_str()<<": # of growth-efficient modes: " << this->eff_efm_indices.size();
00220                 cout << endl << "\t"<<util::get_time_str()<<": # of inefficient modes: " << this->ineff_efm_indices.size();
00221         }
00222 
00223 }
00224 
00225 
00226 
00237 int MetabolicNetwork::init_yields()  {
00238         int nump=1,proc_id=0;
00239         util::get_proc_id_nump(proc_id,nump);
00240 
00241 
00242         if (proc_id==0) {
00243                 fstream fin;
00244                 fin.open(this->options.get_yields_fname().c_str(),ios::in);
00245                 if( fin.is_open() ) {
00246 
00247                         cout << endl << "\t"<<util::get_time_str()<<": reading mode yields from file "<<this->options.get_yields_fname().c_str()<<".... ";
00248                         string str;
00249                                 this->efm_yields.resize(this->target_names.size(),vector<double>(get_num_modes()));
00250                                 for (long i=0;i<get_num_modes();i++)
00251                                         for (long j=0;j<this->target_names.size();j++) {
00252                                                 fin >> str;
00253                                                 if (str.compare("inf")==0)
00254                                                         this->efm_yields[j][i]=numeric_limits<double>::max();
00255                                                 else if (str.compare("-inf")==0) 
00256                                                         this->efm_yields[j][i]=numeric_limits<double>::min();
00257                                                 else {
00258                                                         istringstream ss(str);
00259                                                         ss>>this->efm_yields[j][i];
00260                                                 }
00261                                         }
00262                 }
00263                 fin.close();
00264         }
00265         mpi_bcast_matrix( this->efm_yields);
00266 
00267         return 0;
00268 }
00279 int MetabolicNetwork::init_max_yields()  {
00280 
00281         int nump=1,proc_id=0;
00282         util::get_proc_id_nump(proc_id,nump);
00283 
00284         map<string,int>::iterator p;
00285         for (p=this->target_names.begin();p!=this->target_names.end();p++) {
00286                 this->max_yields[p->first]=0;
00287         }
00288         
00289         for (long j=0;j<this->efm_yields[0].size();j++) {
00290                 int k=0;
00291                 
00292                 for (p=this->target_names.begin();p!=this->target_names.end();p++) {
00293         
00294                         if (this->efm_yields[k][j]>this->max_yields[p->first]) 
00295                                 this->max_yields[p->first]=this->efm_yields[k][j];
00296                         k++;
00297                 }
00298         
00299         }
00300         if (proc_id==0) {
00301                 map<string,double>::iterator q,t;
00302                 //cout << endl << "\t"<<util::get_time_str()<<": target reaction yields: ";
00303                 //for (q=this->max_yields.begin(),t=this->yields.begin();q!=this->max_yields.end();q++,t++) {
00304                 //      cout  << "("<< q->first << ","<<q->second<<","<<t->second<<")";
00305                 //}
00306                 cout << endl << "\t"<<util::get_time_str()<<": minimum growth-efficient mode yields [0-1]:    ";
00307                 for (q=this->max_yields.begin(),t=this->yields.begin();q!=this->max_yields.end();q++,t++) {
00308                         cout  << "("<< q->first << ","<<q->second*t->second<<") ";
00309                 }
00310         }
00311 
00312         
00313 }
00314 
00322 int MetabolicNetwork::get_num_reacs() { return this->reacs_names.size(); }
00323 
00324         
00325 unsigned long MetabolicNetwork::get_num_modes() { 
00326                 unsigned long val;
00327                 val=this->bit_efm.get_num_modes();
00328                 return val;
00329 } 
00330 
00331 
00342 int MetabolicNetwork::is_efm_elem_zero(unsigned long efm_index, int reac, int &is_zero) {
00343 
00344 
00345         this->bit_efm.is_efm_elem_zero(efm_index,reac,is_zero);
00346         return 0;
00347 
00348 }
00349 
00350 
00351 double MetabolicNetwork::get_efm_elem(unsigned long index, int reac) {
00352         double elem;
00353         vector<double> em;
00354         this->bit_efm.read_single_efm(index,em);
00355         elem=em[reac];
00356         return elem;
00357 }
00358 
00359 
00368 int MetabolicNetwork::remove_zero_substrate_modes() {
00369 
00370         //remove elementary modes having zero-flux for all substrate reactions
00371         vector<long> efm_zero_substrates;
00372 
00373         int nump=1,proc_id=0;
00374         util::get_proc_id_nump(proc_id,nump);
00375 
00376         
00377         for (long j=0;j<get_num_modes();j++) {
00378 
00379 
00380                 //now iterate through substrates
00381                 map<string,int>::iterator subst_it;
00382                 int all_subst_zero=1;
00383                 for (subst_it=this->substrate_names.begin();subst_it!=this->substrate_names.end();subst_it++)  {
00384 
00385                         int is_zero;
00386                         is_efm_elem_zero(j, subst_it->second, is_zero); 
00387 
00388                         if (!is_zero) {
00389                                 all_subst_zero=0;
00390                                 break;
00391                         }
00392                 }
00393 
00394                 if (all_subst_zero==1)
00395                         efm_zero_substrates.push_back(j);
00396         }
00397         if (proc_id==0)
00398                 cout << endl << "\t"<<util::get_time_str()<<": # of modes with zero-flux for substrate reaction: "<<efm_zero_substrates.size();
00399 
00400         this->bit_efm.remove_efm(efm_zero_substrates,this->efm_yields); 
00401 
00402         if (proc_id==0)
00403                 cout << endl << "\t"<<util::get_time_str()<<": removed modes with 0 substrate flux";
00404 
00405 }
00414 int MetabolicNetwork::is_core_efms_collapsed(map<int,string> knock_reacs,int &cnt_not_collapsed) {
00415 
00416         map<int,string>::iterator it;
00417         cnt_not_collapsed=0;
00418 
00419         for (long i=0;i<this->core_efms[0].size();i++) {
00420                 int not_collapsed=1;
00421                 for (it=knock_reacs.begin();it!=knock_reacs.end();it++) {
00422                         if (this->core_efms[it->first][i]!=0) {
00423                                 not_collapsed=0;
00424                                 break;
00425                         }
00426                 }
00427                 if (not_collapsed)
00428                         cnt_not_collapsed++;
00429 
00430         }
00431 
00432         return cnt_not_collapsed==0;
00433 }
00443 int MetabolicNetwork::extract_production_eff_efms() {
00444 
00445         int nump=1,proc_id=0;
00446         util::get_proc_id_nump(proc_id,nump);
00447 
00448         for (long i=0;i<get_num_modes();i++) {
00449                 //yield for efms[][j]-th mode is in array this->efm_yields[j][i]=
00450                 int is_core_efm=1;
00451                 for (int j=0;j<this->core_min_yields.size();j++)
00452                         if (this->efm_yields[j][i]<core_min_yields[j]) {
00453                                 is_core_efm=0;
00454                                 break;
00455                         }
00456                 if (is_core_efm)
00457                         this->core_efm_indices.push_back(i);    
00458         }
00459 
00460         //extract core modes from this->efms and create this->core_efms
00461         this->core_efms.resize(this->num_reacs,vector<double>(this->core_efm_indices.size()));
00462         long k=0;
00463         for (long j=0;j<this->core_efm_indices.size();j++) {
00464                 //found core mode
00465                 for (int i=0;i<this->num_reacs;i++) {
00466                         double elem;
00467                         elem=get_efm_elem(this->core_efm_indices[j], i);
00468                         this->core_efms[i][k]=elem; 
00469                 }                       
00470                 k++;
00471         }
00472 
00473         bit_efm.remove_efm(this->core_efm_indices,this->efm_yields);
00474 
00475 }
00485 int MetabolicNetwork::extract_growth_eff_and_ineff_modes() {
00486 
00487         for (long j=0;j<get_num_modes();j++) {
00488                 //explore yield of i-th EFM
00489                 //double substrate_sum=0;
00490                 map<string,int>::iterator p;
00491                 int k=0;
00492                 for (p=this->target_names.begin();p!=this->target_names.end();p++) {
00493                         if (this->efm_yields[k++][j]<(this->yields[p->first])*(this->max_yields[p->first]))
00494                                 break; 
00495                 }
00496                 if (p!=this->target_names.end())
00497                         this->ineff_efm_indices.push_back(j);
00498                 else
00499                         this->eff_efm_indices.push_back(j);                     
00500         }
00501         return 0;
00502 }
00503 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines