#include "ProteinDeeSpace.h" #include "structure_types.h" // for PROTEIN #include "dee_utilities.h" // for get_energy and get_self_energy #include "pdbATOM_utilities.h" // for seqpos_to_inputted_string #include "GA_utilities.h" // for inoculate_sidechains // Hide EGAD min,max definitions (conflicts with iostream template members) #undef min #undef max #include #include using namespace std; void ProteinDeeSpace::PackResimers(unsigned int i, unsigned int j) { // cout << "PackResimers(" << i << ", " << j << ")" << endl; // Calculate the numble of possible resimer choices at j int factor = 1, iprod = 1; for(list::const_iterator p = VariablePositions[j]->Pos.begin(); p != VariablePositions[j]->Pos.end(); ++p) { // cout << "size = " << ResimerMap[*p].size() << endl; factor *= ResimerMap[*p].size(); } // cout << " factor = " << factor << endl; // Form resimer indices for the actual combinations of i and j // (we could use a filter at this point to avoid DEP's) vector vecOldi = VariablePositions[i]->Res; VariablePositions[i]->Res.clear(); VariablePositions[i]->Res.reserve(vecOldi.size() * VariablePositions[j]->Res.size()); for(vector::const_iterator r = vecOldi.begin(); r != vecOldi.end(); ++r) { iprod = (*r)*factor; // cout << " iprod = " << iprod << endl; for(vector::const_iterator s = VariablePositions[j]->Res.begin(); s != VariablePositions[j]->Res.end(); ++s) { VariablePositions[i]->Res.push_back(iprod + (*s)); // cout << " adding " << VariablePositions[i]->Res.back() << endl; } } } // Generate a list of EGAD resimer indices for the single position // components of unified position pos, super-resimer resimer. list ProteinDeeSpace::UnpackResimers(const unsigned int pos, unsigned int resimer) const { // Special case of non-unified position if(VariablePositions[pos]->Pos.size() == 1) { // Converting internal resimer value to EGAD resimer value via // look up on reduced resimer list //return list //(1, ResimerMap[VariablePositions[pos]->Pos.front()][resimer]); return list (1, ResimerMap [VariablePositions[pos]->Pos.front()] [VariablePositions[pos]->Res[resimer]]); } //cout << "Unpacking " << pos << ", " << resimer << endl; // Peel off individual resimer values by applying modular division // right to left. list retval; /* #ifdef GCC2_TYPE_HACK div_t d; #else */ ldiv_t d; // #endif resimer = VariablePositions[pos]->Res[resimer]; typedef list::const_reverse_iterator RI; for(RI i = VariablePositions[pos]->Pos.rbegin(); i != RI(VariablePositions[pos]->Pos.rend()); ++i) { // Integer division of resimer by number of resimers at right-hand // position //cout << "resimer: " << resimer // << " *i: " << *i << " size: " << ResimerMap[*i].size() << endl; //d = ldiv(long(resimer), ResimerMap[*i].size()); /* #ifdef GCC2_TYPE_HACK d = div(resimer, ResimerMap[*i].size()); #else */ d = ldiv(long(resimer), ResimerMap[*i].size()); // #endif // The remainder is the resimer value for the right-hand position. retval.push_front(ResimerMap[*i][d.rem]); // The quotient is retained for the next division resimer = d.quot; } // Check that we've used up the whole resimer value if(resimer != 0) { // cout << "Unexpected non-zero quotient in UnpackResimers!" // << "pos = " << pos << " resimer = " << resimer << endl; } return retval; } ProteinDeeSpace::ProteinDeeSpace(const PROTEIN *prot_i) : protein(prot_i), VariablePositions(0), FixedPositions(0), ResimerMap(0), self_energy(0), fixed_energy(0.0) { // Check for valid input if((!protein)||(!protein->var_pos)){ std::cerr << "Dropped pointer in ProteinDeeSpace::ProteinDeeSpace!" << std::endl; return; } // Push dummy first value so that we can share EGAD position indices ResimerMap.push_back(vector(0)); // Initialize position and resimer maps for(int i = 1; protein->var_pos[i].seq_position != ENDFLAG; ++i) { // Assume that entries with more than one resimer choice are the // variable positions if(protein->var_pos[i].number_of_resimers > 1) { // Create initial resimer list for this position: 0...N-1 VariablePositions.push_back (new PosInfo(i, protein->var_pos[i].number_of_resimers)); ResimerMap.push_back(VariablePositions.back()->Res); // Increment values in ResimerMap to correspond to EGAD indexing //for(vector::iterator j = ResimerMap.back().begin(); // j != ResimerMap.back().end(); ++j) // { // ++(*j); // } for(unsigned int j = 0; j < ResimerMap.back().size(); ++j) { ++ResimerMap.back()[j]; if(ResimerMap.back()[j] < 1) { std::cerr << "Bad ResimerMap in init!" << std::endl; } } } else { //FixedPositions.push_back(i); // Special case 1: Dummy position (not used) if(protein->var_pos[i].number_of_resimers < 1) { ResimerMap.push_back(vector(0)); } // Special case 2: Fixed position (only one choice) else { ResimerMap.push_back(vector(1, 1)); } } } // Initialize fixed and self energies fixed_energy = protein->parameters.fixedatoms_energy; self_energy.resize(VariablePositions.size()); for(unsigned int i = 0; i < VariablePositions.size(); ++i) { self_energy[i] = vector (NumResimers(i)); for(unsigned int ires = 0; ires < NumResimers(i); ++ires) { self_energy[i][ires] = get_energy(protein, // raw indices okay because we're initializing VariablePositions[i]->Pos.front(), ires+1, VariablePositions[i]->Pos.front(), ires+1); } } // Corrections for fixed positions for(unsigned int i = 1; protein->var_pos[i].seq_position != ENDFLAG; ++i) { // Identify fixed positions as those with only one resimer choice if(protein->var_pos[i].number_of_resimers < 2) { // Add pairwise and self energies of fixed positions to fixed energy for(int j = i; protein->var_pos[j].seq_position != ENDFLAG; ++j) { if(protein->var_pos[j].number_of_resimers < 2) { fixed_energy += get_energy(protein, i, 1, j, 1); } } // Add pairwise energies between fixed and variable positions to // the self energies of the variable positions for(unsigned int j = 0; j < VariablePositions.size(); ++j) { for(unsigned int jres = 0; jres < NumResimers(j); ++jres) { self_energy[j][jres] += get_energy(protein, i, 1, VariablePositions[j]->Pos.front(), jres+1); } } } } } ProteinDeeSpace::ProteinDeeSpace(const ProteinDeeSpace& rhs) : protein(rhs.protein), VariablePositions(rhs.VariablePositions.size(), 0), FixedPositions(rhs.FixedPositions), ResimerMap(rhs.ResimerMap), self_energy(rhs.self_energy), fixed_energy(rhs.fixed_energy) { // Deep copy of VariablePositions for(unsigned int i = 0; i < VariablePositions.size(); ++i) { VariablePositions[i] = new PosInfo(); VariablePositions[i]->Pos = rhs.VariablePositions[i]->Pos; VariablePositions[i]->Res = rhs.VariablePositions[i]->Res; } } ProteinDeeSpace::~ProteinDeeSpace() { for(vector::iterator i = VariablePositions.begin(); i != VariablePositions.end(); ++i) { delete *i; } } ProteinDeeSpace& ProteinDeeSpace::operator=(const ProteinDeeSpace& rhs) { if(&rhs != this) { // anti-aliasing protein = rhs.protein; // Free currently allocated lists for(vector::iterator i = VariablePositions.begin(); i != VariablePositions.end(); ++i) { delete *i; } // Allocate memory for vector VariablePositions.clear(); VariablePositions.resize(rhs.VariablePositions.size(), 0); // Copy for(unsigned int i = 0; i < VariablePositions.size(); ++i) { VariablePositions[i] = new PosInfo(); VariablePositions[i]->Pos = rhs.VariablePositions[i]->Pos; VariablePositions[i]->Res = rhs.VariablePositions[i]->Res; } FixedPositions = rhs.FixedPositions; ResimerMap = rhs.ResimerMap; self_energy = rhs.self_energy; fixed_energy = rhs.fixed_energy; } return *this; } // Name of position pos (e.g. "28A") string ProteinDeeSpace::PosID(unsigned int pos) const{ string retval = ""; char buffer[80]; for(list::iterator i = VariablePositions[pos]->Pos.begin(); i != VariablePositions[pos]->Pos.end(); ++i){ //sprintf(buffer, "%d", protein->var_pos[*i].seq_position); sprintf(buffer, "%s", protein->seqpos_text_map[seqpos_to_inputted_string (protein->var_pos[*i].seq_position, protein->seqpos_text_map)].seqpos_text); if(retval.size() > 0){ retval += " "; } retval += string(buffer); } return retval; } // Name of resimer at pos (e.g. "14") string ProteinDeeSpace::ResimerID(unsigned int pos, unsigned int resimer) const{ // Convert super-resimer ID to EGAD resimer ID's list res_list = UnpackResimers(pos, resimer); // Generate string representation string retval = ""; char buffer[80]; for(list::const_iterator i = res_list.begin(); i != res_list.end(); ++i){ sprintf(buffer, "%d", *i); if(retval.size() > 0){ retval += " "; } retval += string(buffer); } return retval; } // Name of residue for resimer at pos (e.g. "GLU") string ProteinDeeSpace::ResType(unsigned int pos, unsigned int resimer) const { // Convert super-resimer ID to resimer ID's list res_list = UnpackResimers(pos, resimer); char buffer[80]; string retval = ""; list::iterator r = res_list.begin(), p = VariablePositions[pos]->Pos.begin(); for(;r != res_list.end(); ++r, ++p) { int cur_res = *r; int rotamer = 1, residue = 1, count = 1; // Iterate through rotamers of each residue choice to the selected // resimer while(count < cur_res) { ++count; ++rotamer; if(rotamer > protein->var_pos[*p].choice[residue].resparam_ptr-> rotamerlib_ptr->number_of_rotamers) { rotamer = 1; ++residue; } } // Read the three letter code for this residue sprintf(buffer, "%s", protein->var_pos[*p].choice[residue].resparam_ptr-> residuetype); if(retval.size() > 0) { retval += " "; } retval += string(buffer); } return retval; } // Dump resimer values of fixed positions void ProteinDeeSpace::DumpFixedPositions() const{ // cout << "Solution: " << endl; // cout << "EgadPos EgadResimer Residue ChiAngles" << endl; for(unsigned int i = 1; i < ResimerMap.size(); ++i) { if(ResimerMap[i].size() < 1) { // Skip dummy positions // printf("%d\tsize<1\n",i); continue; } VARIABLE_POSITION *var_pos = protein->var_pos + i; if(ResimerMap[i].size() > 1) { /* cout << string(protein-> seqpos_text_map[seqpos_to_inputted_string (var_pos->seq_position, protein->seqpos_text_map)].seqpos_text) << " (" << var_pos->seq_position << ") " << " not determined" << endl; */ // printf("%d\tsize>1\n",i); continue; } int resimer = ResimerMap[i].front(); // Map resimer to (residue,rotamer) pair (could this be done with div's?) int rotamer = 1, residue = 1, count = 1; while(count < resimer) { ++count; ++rotamer; if(rotamer > var_pos->choice[residue].resparam_ptr-> rotamerlib_ptr->number_of_rotamers) { rotamer = 1; ++residue; if(residue > var_pos->number_of_choices) { // cout << "Bad resimer value (out of choices)" << endl; // printf("%d\tbad resimer\n",i); break; } } } if(residue > var_pos->number_of_choices) { /* cout << "skipping (residue = " << residue << " choices = " << var_pos->number_of_choices << ")..." << endl; */ // printf("%d\tskip\n",i); continue; } /* NP 12/10/03 doesn't work to get rid of bad rotamers int i_res, i_res_rot; var_pos->dead_ended_flag=1; printf("%d\t%d\t%d\t%d\n",i,var_pos->choice[residue].resparam_ptr->rotamerlib_ptr->number_of_rotamers,residue,rotamer); for(i_res=1;i_res<=var_pos->number_of_choices;++i_res) { for(i_res_rot=1;i_res_rot<=var_pos->choice[residue].resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { if(i_res==residue && i_res_rot == rotamer) { var_pos->choice[residue].lookup_res_ptr->lookupRot[rotamer].in_use_flag = 1; printf("\tkeep\t%d\t%d\t%d\n",i,i_res,i_res_rot); } else { var_pos->choice[residue].lookup_res_ptr->lookupRot[rotamer].in_use_flag = 0; printf("\tditch\t%d\t%d\t%d\n",i,i_res,i_res_rot); } } } RESPARAM *resparam_ptr = var_pos->choice[residue].resparam_ptr; // Look up chi angles for this resimer ROTAMER *cur_rot = resparam_ptr->rotamerlib_ptr->rotamer + rotamer; cout // Egad Position ID << string(protein-> seqpos_text_map[seqpos_to_inputted_string (var_pos->seq_position, protein->seqpos_text_map)].seqpos_text) << " " << " (" << var_pos->seq_position << ") " // EGAD Resimer << resimer << " " // EGAD Residue << string(resparam_ptr->residuetype) << " "; // Chi angles for(int chi = 1; chi <= resparam_ptr->rotamerlib_ptr->numChi; ++chi) { cout << cur_rot->chi[chi] << " "; } cout << endl; */ } } // Get true sidechain-sidechain energy between two positions double ProteinDeeSpace::Get(unsigned int ipos, unsigned int ires, unsigned int jpos, unsigned int jres) const{ // Special cases: if(ipos == jpos){ // Self energy: if(ires == jres){ return Get(ipos, ires); } else{ // Energy between different resimers at the same position: return 0.0; } } if(VariablePositions[ipos]->Pos.size() == 1){ if(VariablePositions[jpos]->Pos.size() == 1){ // Non-unified position: return get_energy(protein, // ires and jres are both raw indices, // so we need to dereference them VariablePositions[ipos]->Pos.front(), ResimerMap[VariablePositions[ipos]->Pos.front()][VariablePositions[ipos]->Res[ires]], VariablePositions[jpos]->Pos.front(), ResimerMap[VariablePositions[jpos]->Pos.front()][VariablePositions[jpos]->Res[jres]]); } else{ // ipos not unified, jpos unified -> swap and trap below swap(ipos,jpos); swap(ires,jres); } } else{ // At this point, ipos is not unified. jpos may be unified if(VariablePositions[jpos]->Pos.size() == 1){ // ipos unified, jpos not unified list ures = UnpackResimers(ipos, ires); list::const_iterator r = ures.begin(); double energy = 0.0; // sum E(i,j) for all sub-positions at i for(list::const_iterator i = VariablePositions[ipos]->Pos.begin(); i != VariablePositions[ipos]->Pos.end(); ++i, ++r){ //cout << "E(" << protein->var_pos[*i].seq_position << ", " << *r // << ", " << protein->var_pos[VariablePositions[jpos]-> // Pos.front()].seq_position // << ", " << ResimerMap[VariablePositions[jpos]->Pos.front()][VariablePositions[jpos]->Res[jres]] // << ") = " << get_energy(protein, *i, *r, // VariablePositions[jpos]->Pos.front(), // ResimerMap[VariablePositions[jpos]-> // Pos.front()][VariablePositions[jpos]->Res[jres]]) << endl; energy += get_energy(protein, // *r is a valid EGAD resimer because we got // it from UnpackResimers *i, *r, // jres is a raw index, so we have to // dereference it VariablePositions[jpos]->Pos.front(), ResimerMap[VariablePositions[jpos]->Pos.front()][VariablePositions[jpos]->Res[jres]]); } return energy; } } // At this point, both ipos and jpos are unified // sum E(i,j) for all sub-positions at i and at j double energy = 0.0; list ures_i = UnpackResimers(ipos, ires); list ures_j = UnpackResimers(jpos, jres); list::const_iterator r = ures_i.begin(); for(list::const_iterator i = VariablePositions[ipos]->Pos.begin(); i != VariablePositions[ipos]->Pos.end(); ++i, ++r){ list::const_iterator s = ures_j.begin(); for(list::const_iterator j = VariablePositions[jpos]->Pos.begin(); j != VariablePositions[jpos]->Pos.end(); ++j, ++s){ energy += get_energy(protein, *i, *r, *j, *s); } } return energy; } // pair_energy folds the self energies into the pairwise energy terms // (useful for bounding calculations) // Note that the fixed energy is no longer spread over the pairwise terms double ProteinDeeSpace::pair_energy(unsigned int ipos, unsigned int ires, unsigned int jpos, unsigned int jres) const { if(ipos == jpos) { return 0.0; } return // Self energies spread evenly over the total pairwise terms ( Get(ipos, ires) + Get(jpos, jres) ) / double(2*(NumPos()) - 2) + // Actual pairwise term Get(ipos, ires, jpos, jres)/double(2.0); } // Fuse positions i and j into a single "super-rotamer" position // This task is easier if we have a list of ij resimers that can // be excluded from the indexed list. void ProteinDeeSpace::Unify(unsigned int i, unsigned int j) { // Assert i < j if(i > j) { swap(i,j); } // Update self energies vector ij_energy; ij_energy.reserve(VariablePositions[i]->Res.size() * VariablePositions[j]->Res.size()); for(unsigned int r = 0; r < VariablePositions[i]->Res.size(); ++r) { for(unsigned int s = 0; s < VariablePositions[j]->Res.size(); ++s) { ij_energy.push_back(self_energy[i][r]+self_energy[j][s]+ Get(i,r,j,s)); } } self_energy[i] = ij_energy; self_energy.erase(self_energy.begin()+j); // Generate list of ij super-resimers // (result stored in VariablePositions[i]->Res) PackResimers(i, j); // To do: // Trim VariablePositions[i]->Res based on externally supplied // dead ending pairs. This trimming must also update self_energy[i] // Append unified positions at j to the unified positions at i VariablePositions[i]->Pos.splice(VariablePositions[i]->Pos.end(), VariablePositions[j]->Pos); // Remove position j from VariablePositions. VariablePositions // is now one position smaller, and indices >= j are now invalid delete VariablePositions[j]; VariablePositions.erase(VariablePositions.begin() + j); } // Remove position i from the space, fixing it at resimer r void ProteinDeeSpace::FixPos(unsigned int i, unsigned int r) { // Add the self energy of position(s) i to the fixed_energy fixed_energy += Get(i,r); // Fold the pairwise energy of i with the remaining variable positions // into their self terms for(unsigned int j = 0; j < VariablePositions.size(); ++j) { if(j == i) { continue; } for(unsigned int s = 0; s < VariablePositions[j]->Res.size(); ++s) { self_energy[j][s] += Get(i,r,j,s); } } self_energy.erase(self_energy.begin() + i); // Decompose i into its component positions and fix the resimers // of those positions to the components of r. list ures = UnpackResimers(i,r); //if((ures.size() == 1)&&(ures.front() != r+1)) // { // cout << "Suspect resimer " << ures.front() << ", " << r // << " at " << i << endl; // } if(VariablePositions[i]->Pos.size() != ures.size()) { cout << "Bad unpacking at " << i << ": " << VariablePositions[i]->Pos.size() << " != " << ures.size() << endl; } list::const_iterator p, u; for(p = VariablePositions[i]->Pos.begin(), u = ures.begin(); p != VariablePositions[i]->Pos.end(); ++p, ++u) { ResimerMap[*p] = vector(1, *u); } // Add the component positions of i to the list of fixed positions FixedPositions.splice(FixedPositions.end(), VariablePositions[i]->Pos); // Remove i from the list of variable positions. delete VariablePositions[i]; VariablePositions.erase(VariablePositions.begin() + i); } // Remove positions i to j from the space, fixing them to the resimer // values in vecR void ProteinDeeSpace::FixPos(unsigned int i, unsigned int j, const vector& vecR) { // There is a more efficient implementation for this. For the // moment, however, we will code for simplicity =) for(vector::const_iterator r = vecR.begin(); r != vecR.end(); ++r, ++i) { FixPos(i,*r); } } // Remove resimer choice r at i from the space. All indices greater than // r at i will be shifted down by 1. void ProteinDeeSpace::RemoveRes(unsigned int i, unsigned int r) { // For non-unified positions, we remove r from both VariablePositions // and the underlying space. The current implementation assumes that // VariablePositions and ResimerMap are updated in parallel for // non-unified positions. //cout << "Space erasing " << PosID(i) << " " << ResimerID(i, r) // << "(" << i << ", " << r << ")" << endl; if(VariablePositions[i]->Pos.size() == 1) { // Remove entry from ResimerMap if(r < ResimerMap[VariablePositions[i]->Pos.front()].size()) { for(vector::iterator s = VariablePositions[i]-> Res.begin() + r + 1; s != VariablePositions[i]->Res.end(); ++s) { if(*s < 1) { std::cerr << "Zero value in RemoveRes!" << std::endl; } else { --(*s); } } ResimerMap[VariablePositions[i]->Pos.front()].erase (ResimerMap[VariablePositions[i]->Pos.front()].begin() + r); // Update VariablePositions indices to reflect this deletion // Note: there is a more efficient implementation based on stronger // assumptions about VariablePositions[i]->Res; viz., we just // need to decrement VariablePositions[i]->Res[r:] } else { // cout << "Not erasing ResimerMap for " << i << ", " << r << endl; } } VariablePositions[i]->Res.erase(VariablePositions[i]->Res.begin() + r); self_energy[i].erase(self_energy[i].begin() + r); } // Remove resimer choices r through s at i. All indices greater than s // at i will be shifted down by (r - s + 1) void ProteinDeeSpace::RemoveRes(unsigned int i, unsigned int r, unsigned int s) { // For non-unified positions, we remove r from both VariablePositions // and the underlying space. The current implementation assumes that // VariablePositions and ResimerMap are updated in parallel for // non-unified positions. if(VariablePositions[i]->Pos.size() == 1) { //ResimerMap[VariablePositions[i]->Pos.front()].erase // (ResimerMap[VariablePositions[i]->Pos.front()].begin() + r, // ResimerMap[VariablePositions[i]->Pos.front()].begin() + s + 1); } VariablePositions[i]->Res.erase(VariablePositions[i]->Res.begin() + r, VariablePositions[i]->Res.begin() + s + 1); self_energy[i].erase(self_energy[i].begin() + r, self_energy[i].begin() + s + 1); } /* NP 7/7/03 */ int ProteinDeeSpace::DeeSoln_to_CHROMOSOME (CHROMOSOME *chr, const vector *soln) const { int soln_index = 1; // current position in protein int cur_resimer; // current resimer choice int i_res, i_res_rot; vector::const_iterator soln_ptr; /* allocates memory and initializes a chr and the gene linked list */ inoculate_sidechains(chr, protein->var_pos, 0); //soln_index=0; chr->genes = chr->firstgene; // Check for user supplied solution (for unconverged spaces) if(soln) { if(soln->size() != NumPos()) { std::cerr << "Size mismatch in DeeSoln_to_CHROMOSOME! (" << soln->size() << " != " << NumPos() << std::endl; } soln_ptr = soln->begin(); } while(chr->genes->seq_position!=ENDFLAG) { /* advance past genes for positions w/ only one rotamer; these are not included in the Dee-specific data structures */ // Bounds check if(soln_index > int(ResimerMap.size())) { std::cerr << "soln_index out of bounds in DeeSoln_to_CHROMOSOME!" << std::endl; return 0; } // Check for unconverged solution if(ResimerMap[soln_index].size() != 1) { // Check if the client provided a choice for this position if(soln) { // The following error occurs if we have more undetermined // positions than soln can provide choices for. The size // check at the beginning of this function should provide // adequate trapping, so this trap is probably redundant. if(soln_ptr == soln->end()) { std::cerr << "Unexpected bad index in DeeSoln_to_CHROMOSOME!" << std::endl; return 0; } cur_resimer = ResimerMap[soln_index][*(soln_ptr)]; ++soln_ptr; } else { return 0; } } else { cur_resimer = ResimerMap[soln_index].front(); } if(protein->var_pos[soln_index].number_of_resimers > 1) get_i_res_i_res_rot_for_resimer (&(protein->var_pos[soln_index]), cur_resimer, &i_res, &i_res_rot); else { i_res=1; i_res_rot=1; } chr->genes->choice_ptr = &protein->var_pos[soln_index].choice[i_res]; chr->genes->chi = protein->var_pos[soln_index].choice[i_res].resparam_ptr->rotamerlib_ptr->rotamer[i_res_rot].chi; chr->genes->lookupRot_index = i_res_rot; chr->genes->j_choice_index = i_res-1; chr->genes->lookupRot = &protein->var_pos[soln_index].choice[i_res].lookup_res_ptr->lookupRot[i_res_rot]; chr->genes = chr->genes->nextgene; ++soln_index; } return 1; // success =) }