|
'ElMo-Knock'
|
00001 #include <iostream> 00002 #include <fstream> // file I/O 00003 #include <sstream> 00004 #include <stdlib.h> 00005 #include <vector> 00006 #include <algorithm> 00007 #include <math.h> 00008 #include <iomanip> 00009 #include <limits> 00010 #include "metabolicnetwork.h" 00011 #include "util.h" 00012 #include "mpi_utils.h" 00013 00014 00015 00016 MetabolicNetwork::MetabolicNetwork() {} 00017 MetabolicNetwork::MetabolicNetwork(MetabolicNetwork &metab_network) { 00018 00019 00020 this->efm_yields=efm_yields; 00021 00022 //init reacs_names 00023 this->reacs_names=metab_network.reacs_names; 00024 this->substrate_names=metab_network.substrate_names; 00025 //init target_names 00026 this->target_names=metab_network.target_names; 00027 this->yields=metab_network.yields; 00028 this->num_modes=metab_network.num_modes; 00029 this->num_reacs=metab_network.num_reacs; 00030 this->num_metabs=metab_network.num_metabs; 00031 00032 00033 } 00034 00035 MetabolicNetwork::MetabolicNetwork(Options options):bit_efm() { 00036 00037 vector< vector<double> > stoich; 00038 int nump=1,proc_id=0; 00039 util::get_proc_id_nump(proc_id,nump); 00040 t_UnsignedInt num_ems; 00041 00042 this->options=options; 00043 00044 this->options.load_efm_meta_file(stoich, this->reacs_names, num_ems); 00045 00046 ifstream fp_in; // file for reading 00047 ofstream fp_out; 00048 00049 vector<string> init_knock_reacs=options.get_init_knock_reacs(); 00050 vector<string> target_reacs=options.get_target_reacs(); 00051 vector<string> substrate_reacs=options.get_substrate_reacs(); 00052 vector<double> min_yields=options.get_min_yields(); 00053 vector<double> min_core_yields=options.get_min_core_yields(); 00054 vector<string> transport_reacs=options.get_transport_reacs(); 00055 00056 00057 this->compressed_mode=1; 00058 bit_efm.load_efm_bit(options.get_efm_bit_fname(), num_ems, stoich); 00059 00060 /*****************INIT DATA STRUCTURES FOR WHAT IS LOADED FROM configs.knock******************/ 00061 //read substrate reactions 00062 for (vector<string>::iterator it=substrate_reacs.begin();it!=substrate_reacs.end();it++) { 00063 if ((*it).size()) { 00064 vector<string>::iterator reac_ind=find(this->reacs_names.begin(),this->reacs_names.end(),*it); 00065 this->substrate_names[*it]=reac_ind-reacs_names.begin(); 00066 } 00067 } 00068 00069 00070 map<string,int>::iterator pi; 00071 //if (proc_id==0) { 00072 // cout << endl << "\t"<<util::get_time_str()<<": substrates: "; 00073 // for (pi=substrate_names.begin();pi!=substrate_names.end();pi++) 00074 // cout << " "<<pi->first; 00075 // cout << endl; 00076 //} 00077 00078 00079 //read transport reactions 00080 for (vector<string>::iterator it=transport_reacs.begin();it!=transport_reacs.end();it++) { 00081 if ((*it).size()) { 00082 vector<string>::iterator reac_ind=find(this->reacs_names.begin(),this->reacs_names.end(),*it); 00083 this->transport_names[*it]=reac_ind-reacs_names.begin(); 00084 } 00085 } 00086 00087 //if (proc_id==0) { 00088 // cout << endl << "\t"<<util::get_time_str()<<": transport reactions: "; 00089 // for (pi=transport_names.begin();pi!=transport_names.end();pi++) 00090 // cout << " "<<pi->first; 00091 // cout << endl; 00092 //} 00093 00094 00095 if (options.get_use_xchg_reacs().compare("yes")) { 00096 //remove transport reacs from knock_cand_reacs 00097 int i=0; 00098 for (vector<string>::iterator it=this->reacs_names.begin();it!=this->reacs_names.end();it++,i++) { 00099 //is reaction *it in transport_names 00100 map<string,int>::iterator mpi=this->transport_names.find(*it); 00101 if (mpi==this->transport_names.end()) { 00102 knock_cand_reacs.push_back(i); 00103 } 00104 } 00105 } else { 00106 for (int i=0;i<this->reacs_names.size();i++) 00107 knock_cand_reacs.push_back(i); 00108 } 00109 00110 00111 if (proc_id==0) { 00112 cout << endl << "\t"<<util::get_time_str()<<": knockout candidate reactions: "; 00113 for (int i=0;i<this->knock_cand_reacs.size();i++) 00114 cout << " "<<this->reacs_names[this->knock_cand_reacs[i]]; 00115 cout << endl; 00116 } 00117 00118 00119 //read target reactions 00120 00121 for (vector<string>::iterator it=target_reacs.begin();it!=target_reacs.end();it++) { 00122 if ((*it).size()) { 00123 vector<string>::iterator reac_ind; 00124 reac_ind=find(reacs_names.begin(),reacs_names.end(),*it); 00125 this->target_names[*it]=reac_ind-reacs_names.begin(); 00126 } 00127 } 00128 00129 //read init knockout reactions (e.g. for anaerobic case OPM1, OPM2) 00130 00131 for (vector<string>::iterator it=init_knock_reacs.begin();it!=init_knock_reacs.end();it++) { 00132 00133 if ((*it).size()) { 00134 vector<string>::iterator reac_ind; 00135 reac_ind=find(reacs_names.begin(),reacs_names.end(),*it); 00136 this->init_knocks_names[*it]=reac_ind-reacs_names.begin(); 00137 } 00138 } 00139 00140 //if (proc_id==0) { 00141 // cout << endl << "\t"<<util::get_time_str()<<": initial knockout reactions"; // << this->init_knocks_names.size()<<endl; 00142 // map<string,int>::iterator t; 00143 // for (t=this->init_knocks_names.begin();t!=this->init_knocks_names.end();t++) 00144 // cout <<" ("<< t->first<<","<<t->second<<")"; 00145 // cout << endl; 00146 //} 00147 //read target min yield values 00148 00149 map<string,int>::iterator p=this->target_names.begin(); 00150 00151 for (vector<double>::iterator dit=min_yields.begin();dit!=min_yields.end();dit++) { 00152 this->yields[p->first]=*dit; //yield; 00153 p++; 00154 } 00155 00156 00157 map<string,double>::iterator q; 00158 00159 if (proc_id==0) { 00160 cout << endl <<"\t"<<util::get_time_str()<<": target reactions: "; 00161 for (p=this->target_names.begin() ,q=this->yields.begin();p!=this->target_names.end();p++,q++ ) 00162 cout << " (" << p->first <<","<<p->second<<")|"<<q->second; 00163 } 00164 /***********************************************************************/ 00165 00166 00167 init_yields(); 00168 00169 remove_zero_substrate_modes(); 00170 00171 if (proc_id==0) 00172 cout << endl << "\t"<<util::get_time_str()<<": # of EFMs after removing modes with zero-flux substrates "<< get_num_modes()<<endl; 00173 00174 bit_efm.knock_efms(this->init_knocks_names, this->efm_yields); 00175 00176 if (proc_id==0) 00177 cout << endl << "\t"<<util::get_time_str()<<": # of EFMs after initial knockout "<< get_num_modes(); 00178 00179 00180 00181 00182 this->num_reacs=reacs_names.size(); 00183 this->num_modes=get_num_modes(); 00184 00185 00186 00187 if (proc_id==0) { 00188 cout << endl << "\t"<<util::get_time_str()<<": # of elementary modes: " << this->num_modes; 00189 cout << endl << "\t"<<util::get_time_str()<<": # of reactions: " << this->num_reacs; 00190 } 00191 00192 //******************************* 00193 //read target min core EFM yield values 00194 00195 init_max_yields(); 00196 p=this->target_names.begin(); 00197 if (proc_id==0) 00198 cout << endl <<"\t"<<util::get_time_str()<<": minimum production-efficient mode yields [0-1]: "; 00199 00200 for (vector<double>::iterator dit=min_core_yields.begin();dit!=min_core_yields.end();dit++) { 00201 if (proc_id==0) 00202 cout << "("<< p->first<<","<<(*dit)*(this->max_yields[p->first])<<") "; 00203 this->core_min_yields.push_back((*dit)*(this->max_yields[p->first])); //multiply core_yield with max_yield 00204 p++; 00205 } 00206 00207 if (proc_id==0) 00208 cout << endl; 00209 //end read target min core EFM yield values 00210 00211 //separate core modes from efms 00212 extract_production_eff_efms(); 00213 //find indices of efficient and inefficient modes 00214 extract_growth_eff_and_ineff_modes(); 00215 00216 00217 if (proc_id==0) { 00218 cout << endl << "\t"<<util::get_time_str()<<": # of production-efficient modes: " << this->core_efm_indices.size(); 00219 cout << endl << "\t"<<util::get_time_str()<<": # of growth-efficient modes: " << this->eff_efm_indices.size(); 00220 cout << endl << "\t"<<util::get_time_str()<<": # of inefficient modes: " << this->ineff_efm_indices.size(); 00221 } 00222 00223 } 00224 00225 00226 00237 int MetabolicNetwork::init_yields() { 00238 int nump=1,proc_id=0; 00239 util::get_proc_id_nump(proc_id,nump); 00240 00241 00242 if (proc_id==0) { 00243 fstream fin; 00244 fin.open(this->options.get_yields_fname().c_str(),ios::in); 00245 if( fin.is_open() ) { 00246 00247 cout << endl << "\t"<<util::get_time_str()<<": reading mode yields from file "<<this->options.get_yields_fname().c_str()<<".... "; 00248 string str; 00249 this->efm_yields.resize(this->target_names.size(),vector<double>(get_num_modes())); 00250 for (long i=0;i<get_num_modes();i++) 00251 for (long j=0;j<this->target_names.size();j++) { 00252 fin >> str; 00253 if (str.compare("inf")==0) 00254 this->efm_yields[j][i]=numeric_limits<double>::max(); 00255 else if (str.compare("-inf")==0) 00256 this->efm_yields[j][i]=numeric_limits<double>::min(); 00257 else { 00258 istringstream ss(str); 00259 ss>>this->efm_yields[j][i]; 00260 } 00261 } 00262 } 00263 fin.close(); 00264 } 00265 mpi_bcast_matrix( this->efm_yields); 00266 00267 return 0; 00268 } 00279 int MetabolicNetwork::init_max_yields() { 00280 00281 int nump=1,proc_id=0; 00282 util::get_proc_id_nump(proc_id,nump); 00283 00284 map<string,int>::iterator p; 00285 for (p=this->target_names.begin();p!=this->target_names.end();p++) { 00286 this->max_yields[p->first]=0; 00287 } 00288 00289 for (long j=0;j<this->efm_yields[0].size();j++) { 00290 int k=0; 00291 00292 for (p=this->target_names.begin();p!=this->target_names.end();p++) { 00293 00294 if (this->efm_yields[k][j]>this->max_yields[p->first]) 00295 this->max_yields[p->first]=this->efm_yields[k][j]; 00296 k++; 00297 } 00298 00299 } 00300 if (proc_id==0) { 00301 map<string,double>::iterator q,t; 00302 //cout << endl << "\t"<<util::get_time_str()<<": target reaction yields: "; 00303 //for (q=this->max_yields.begin(),t=this->yields.begin();q!=this->max_yields.end();q++,t++) { 00304 // cout << "("<< q->first << ","<<q->second<<","<<t->second<<")"; 00305 //} 00306 cout << endl << "\t"<<util::get_time_str()<<": minimum growth-efficient mode yields [0-1]: "; 00307 for (q=this->max_yields.begin(),t=this->yields.begin();q!=this->max_yields.end();q++,t++) { 00308 cout << "("<< q->first << ","<<q->second*t->second<<") "; 00309 } 00310 } 00311 00312 00313 } 00314 00322 int MetabolicNetwork::get_num_reacs() { return this->reacs_names.size(); } 00323 00324 00325 unsigned long MetabolicNetwork::get_num_modes() { 00326 unsigned long val; 00327 val=this->bit_efm.get_num_modes(); 00328 return val; 00329 } 00330 00331 00342 int MetabolicNetwork::is_efm_elem_zero(unsigned long efm_index, int reac, int &is_zero) { 00343 00344 00345 this->bit_efm.is_efm_elem_zero(efm_index,reac,is_zero); 00346 return 0; 00347 00348 } 00349 00350 00351 double MetabolicNetwork::get_efm_elem(unsigned long index, int reac) { 00352 double elem; 00353 vector<double> em; 00354 this->bit_efm.read_single_efm(index,em); 00355 elem=em[reac]; 00356 return elem; 00357 } 00358 00359 00368 int MetabolicNetwork::remove_zero_substrate_modes() { 00369 00370 //remove elementary modes having zero-flux for all substrate reactions 00371 vector<long> efm_zero_substrates; 00372 00373 int nump=1,proc_id=0; 00374 util::get_proc_id_nump(proc_id,nump); 00375 00376 00377 for (long j=0;j<get_num_modes();j++) { 00378 00379 00380 //now iterate through substrates 00381 map<string,int>::iterator subst_it; 00382 int all_subst_zero=1; 00383 for (subst_it=this->substrate_names.begin();subst_it!=this->substrate_names.end();subst_it++) { 00384 00385 int is_zero; 00386 is_efm_elem_zero(j, subst_it->second, is_zero); 00387 00388 if (!is_zero) { 00389 all_subst_zero=0; 00390 break; 00391 } 00392 } 00393 00394 if (all_subst_zero==1) 00395 efm_zero_substrates.push_back(j); 00396 } 00397 if (proc_id==0) 00398 cout << endl << "\t"<<util::get_time_str()<<": # of modes with zero-flux for substrate reaction: "<<efm_zero_substrates.size(); 00399 00400 this->bit_efm.remove_efm(efm_zero_substrates,this->efm_yields); 00401 00402 if (proc_id==0) 00403 cout << endl << "\t"<<util::get_time_str()<<": removed modes with 0 substrate flux"; 00404 00405 } 00414 int MetabolicNetwork::is_core_efms_collapsed(map<int,string> knock_reacs,int &cnt_not_collapsed) { 00415 00416 map<int,string>::iterator it; 00417 cnt_not_collapsed=0; 00418 00419 for (long i=0;i<this->core_efms[0].size();i++) { 00420 int not_collapsed=1; 00421 for (it=knock_reacs.begin();it!=knock_reacs.end();it++) { 00422 if (this->core_efms[it->first][i]!=0) { 00423 not_collapsed=0; 00424 break; 00425 } 00426 } 00427 if (not_collapsed) 00428 cnt_not_collapsed++; 00429 00430 } 00431 00432 return cnt_not_collapsed==0; 00433 } 00443 int MetabolicNetwork::extract_production_eff_efms() { 00444 00445 int nump=1,proc_id=0; 00446 util::get_proc_id_nump(proc_id,nump); 00447 00448 for (long i=0;i<get_num_modes();i++) { 00449 //yield for efms[][j]-th mode is in array this->efm_yields[j][i]= 00450 int is_core_efm=1; 00451 for (int j=0;j<this->core_min_yields.size();j++) 00452 if (this->efm_yields[j][i]<core_min_yields[j]) { 00453 is_core_efm=0; 00454 break; 00455 } 00456 if (is_core_efm) 00457 this->core_efm_indices.push_back(i); 00458 } 00459 00460 //extract core modes from this->efms and create this->core_efms 00461 this->core_efms.resize(this->num_reacs,vector<double>(this->core_efm_indices.size())); 00462 long k=0; 00463 for (long j=0;j<this->core_efm_indices.size();j++) { 00464 //found core mode 00465 for (int i=0;i<this->num_reacs;i++) { 00466 double elem; 00467 elem=get_efm_elem(this->core_efm_indices[j], i); 00468 this->core_efms[i][k]=elem; 00469 } 00470 k++; 00471 } 00472 00473 bit_efm.remove_efm(this->core_efm_indices,this->efm_yields); 00474 00475 } 00485 int MetabolicNetwork::extract_growth_eff_and_ineff_modes() { 00486 00487 for (long j=0;j<get_num_modes();j++) { 00488 //explore yield of i-th EFM 00489 //double substrate_sum=0; 00490 map<string,int>::iterator p; 00491 int k=0; 00492 for (p=this->target_names.begin();p!=this->target_names.end();p++) { 00493 if (this->efm_yields[k++][j]<(this->yields[p->first])*(this->max_yields[p->first])) 00494 break; 00495 } 00496 if (p!=this->target_names.end()) 00497 this->ineff_efm_indices.push_back(j); 00498 else 00499 this->eff_efm_indices.push_back(j); 00500 } 00501 return 0; 00502 } 00503