/* EGAD: DeeControl.cpp Mark Voorhies, Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains the DeeControl function which runs DEE blocks. */ // ********************************************************************** // Name: DeeTest.cpp // By: Mark Voorhies and Navin Pokala 7/21/03 // Description: // EGAD interface for DEE // ********************************************************************** #include "DeeControl.h" #include #include // for cout #include #include #include #include // for clocks() and CLOCKS_PER_SEC #include #include "dee_utilities.h" #include "DeeTable.h" #include "BoundingSingles.h" #include "ExhaustiveSearch.h" #include "GoldsteinSingles.h" #include "SplitSingles.h" #include "LogicalSingles.h" #include "MagicBulletDoubles.h" #include "MC.h" #include "ProteinDeeSpace.h" #include "structure_types.h" // Choose two positions and unify them. The positions are chosen // to maximize the fraction of dead-ending pairs between the // unified positions. Returns 1 on successful unification, 0 // if the space can not be unified. int Unify(DeeTable& eliminated, DeeSpace& space, const char *logfile); // Routine for cleaning up between application of DEE filters. Puts // the DEE table in a logically self-consistent state (this is not complete), // prunes eliminated resimers and fixed positions from the table, and // checks if the space is small enough to solve by brute force. Returns // 1 if DEE can proceed, 0 if DEE should terminate. (A return value of // 0 usually means that the problem has been solved. However, it can // also indicate that exhaustive search was tried and failed. It may // be useful to distinguish these results with different return codes). int CleanUp(DeeTable& eliminated, DeeSpace& space, const int iMaxNodes, const double& dLogBailout, const char *logfile, time_t start_time); using namespace std; /* this function controls DEE for rotamer optimization If DEE does not converge, or does not converge within RUNTIME, simulated annealling is performed. Final_chr, energies, structures are stored in protein, and is ready to send to output_PROTEIN */ /* internal and logfile comments for suggested improvements - NP */ int DeeControl(PROTEIN *protein) { char *logfile, *logfile_line, *escape_hatch_filename; extern int LOGFILE_FLAG, GET_PID; extern char *CURRENT_WORKING_DIRECTORY; int dummyint1, dummyint2; time_t start_time; logfile=NULL; logfile_line=NULL; if(LOGFILE_FLAG != 0) { logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile, "%s.log", protein->parameters.output_prefix); } logfile_line=(char *)calloc(MXLINE_INPUT,sizeof(char)); logfile_line[0]='\0'; logfile_line[1]='\0'; logfile_line[2]='\0'; escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.DEE.%d",CURRENT_WORKING_DIRECTORY,GET_PID); sprintf(logfile_line,"Starting DEE"); dee_logfile(logfile_line, logfile); sprintf(logfile_line,"To exit the DEE and move to the next step:"); sprintf(logfile_line,"\ttouch %s\n",escape_hatch_filename); dee_logfile(logfile_line, logfile); int max_nodes = protein->parameters.dee_max_nodes; bool bounding_flag = false; double E_ref = DBL_MAX; if(protein->parameters.dee_E_bounding!=ENDFLAG) { bounding_flag = true; } if(bounding_flag) { if(protein->parameters.dee_E_bounding!=0.0) { E_ref = protein->parameters.dee_E_bounding; protein->E_working = protein->parameters.dee_E_bounding; } else /* MC to get bounding energy; remove once MC_with_elimination_table is ready and incorporated below */ { dummyint1=protein->parameters.number_MC_cycles; dummyint2=protein->parameters.number_MC_solns; protein->parameters.number_MC_cycles=20; MC_rotamer_control(protein); protein->parameters.number_MC_cycles=dummyint1; protein->parameters.number_MC_solns=dummyint2; E_ref = protein->E_working + protein->parameters.dee_E_bounding_ceiling; } } // ============================================================ // Construct DeeTable linked to ProteinDeeSpace // ============================================================ dee_logfile("Initializing dead end elimination table", logfile); initialize_lookuptable_for_dee(protein); ProteinDeeSpace space(protein); DeeTable eliminated(&space); // ============================================================ // DEE Master Cycle // ============================================================ // DEE will bail out to exhaustive search when the upper limit on the // search space falls below exp(dLogBailout). dLogBailout should // probably be a user defined parameter. start_time = time(NULL); double dLogBailout; if(protein->parameters.ln_exhaustive_search_max_combo!=ENDFLAG) dLogBailout = protein->parameters.ln_exhaustive_search_max_combo; else dLogBailout = 11.5; unsigned int iElimCount = 0; int iMode = 0; // iMode chooses which type of costly algorithm to // apply at the end of each pass while(space.NumPos() > 0) { dee_logfile("Starting Goldstein Singles", logfile); while((iElimCount = GoldsteinSingles(eliminated)) > 0) { sprintf(logfile_line,"Goldstein Singles eliminated %d resimers, <= 10^%lf solutions remaining",iElimCount,eliminated.LogCombinations()/log(10.0)); dee_logfile(logfile_line, logfile); } if(CleanUp(eliminated, space, max_nodes, dLogBailout,logfile,start_time) == 0) { break; } dee_logfile("Starting Split Singles", logfile); while((iElimCount = SplitSingles(eliminated)) > 0) { sprintf(logfile_line,"Split Singles eliminated %d resimers, <= 10^%lf solutions remaining",iElimCount,eliminated.LogCombinations()/log(10.0)); dee_logfile(logfile_line, logfile); } if(CleanUp(eliminated, space, max_nodes, dLogBailout,logfile,start_time) == 0) { break; } if(bounding_flag) { if(protein->parameters.dee_E_bounding!=0.0) E_ref = protein->parameters.dee_E_bounding; else /* MC to get bounding energy */ { dee_logfile("MC w/ elimination table to get bounding energy should go here", logfile); /* dee_logfile("MC w/ elimination table to get bounding energy", logfile); E_current = MC_with_elimination_table(protein, eliminated); if(E_current 0) { sprintf(logfile_line,"Bounding Singles eliminated %d resimers, <= 10^%lf solutions remaining",iElimCount,eliminated.LogCombinations()/log(10.0)); dee_logfile(logfile_line, logfile); } if(CleanUp(eliminated, space, max_nodes, dLogBailout,logfile,start_time) == 0) { break; } } switch(iMode) { case 0: dee_logfile("Starting Magic Bullet Doubles", logfile); iElimCount = MagicBulletDoubles(eliminated); if(iElimCount > 0) { sprintf(logfile_line,"MagicBulletDoubles eliminated %d resimer pairs",iElimCount); dee_logfile(logfile_line, logfile); } break; default: dee_logfile("Starting Unification", logfile); Unify(eliminated, space, logfile); break; }; iMode = (iMode + 1) % 2; if(CleanUp(eliminated, space, max_nodes, dLogBailout,logfile,start_time) == 0) { break; } } // End DEE Master cycle // ================================================== // Debugging dump of DEE output // ================================================== space.DumpFixedPositions(); sprintf(logfile_line,"Final energy for converged positions: %lf",space.FixedEnergy() ); dee_logfile(logfile_line, logfile); // At this point, eliminated will have size 0 if the DEE converged // and size non-zero if it did not converge (need to confirm this) /* global found, so generate the final structure from the table */ int mc_flag; mc_flag=1; if(eliminated.LogCombinations() == 0.0) { /* if DEE converges less than or equal to MC */ if(protein->E_working >= space.FixedEnergy() || fabs(protein->E_working - space.FixedEnergy()) <= EPS2) { space.DeeSoln_to_CHROMOSOME(&protein->final_chr); final_chr_to_final_pdb_final_energy(protein); protein->E_working = space.FixedEnergy(); mc_flag=0; } } if(mc_flag==1) // do mc using the eliminated table { dee_logfile("DEE failed to converge; doing MC w/ elimination table", logfile); dee_logfile("MC w/ elimination table to get bounding energy should go here", logfile); dee_logfile("Doing MC w/o the table, for now", logfile); /* replace w/ MC_with_elimination_table */ sprintf(protein->parameters.algorithm,"MC_GA"); dummyint2 = protein->parameters.number_GA_cycles; protein->parameters.ga_convergence = 30; dummyint1=protein->parameters.number_MC_cycles; if(protein->parameters.number_MC_cycles==2500) protein->parameters.number_MC_cycles=250; MC_rotamer_control(protein); protein->parameters.number_MC_cycles = dummyint1; protein->parameters.number_GA_cycles = dummyint2; sprintf(protein->parameters.algorithm,"DEE"); } free_memory(escape_hatch_filename); free_memory(logfile_line); if(logfile!=NULL) free_memory(logfile); return EXIT_SUCCESS; } int Unify(DeeTable& eliminated, DeeSpace& space, const char *logfile) { char *logfile_line; logfile_line = (char *)calloc(MXLINE_INPUT,sizeof(char)); if(space.NumPos() < 2) { sprintf(logfile_line,"Can't unify; only %d positions remaining",space.NumPos() ); free_memory(logfile_line); return 0; } // Find the pair of unifiable positions that require the least // memory allocation unsigned int iPos1 = 0, iPos2 = 0; //unsigned int iCurMem = 0, iMemRequired = 0; double dBestDEP = 0.0, dCurDEP = 0.0; bool bInitFlag = false; for(unsigned int i = 0; i < space.NumPos(); ++i) { if(eliminated.AvailableResimers(i) < 2) { continue; } for(unsigned int j = i + 1; j < space.NumPos(); ++j) { if(eliminated.AvailableResimers(j) < 2) { continue; } // The following block chooses positions to unify based // on minimizing required memory //iCurMem = space.NumResimers(i) * space.NumResimers(j); //if((!bInitFlag) || (iCurMem < iMemRequired)) // { // bInitFlag = true; // iPos1 = i; // iPos2 = j; // iMemRequired = iCurMem; // } // The following block chooses positions to unify based // on maximizing DEP's dCurDEP = 0.0; for(unsigned int r = 0; r < space.NumResimers(i); ++r) { for(unsigned int s = 0; s < space.NumResimers(j); ++s) { if(!eliminated.Get(i,r,j,s)) { dCurDEP += 1.0; } } } // Set dCurDEP to fraction of DEP's for i and j dCurDEP /= double(space.NumResimers(i)*space.NumResimers(j)); if((!bInitFlag) || (dCurDEP > dBestDEP)) { bInitFlag = true; iPos1 = i; iPos2 = j; dBestDEP = dCurDEP; } } } if(!bInitFlag) { dee_logfile("Unexpectedly un-unifiable space", logfile); free_memory(logfile_line); return 0; } // Unify the chosen positions /* cout << "Unifying " << iPos1 << " with " << iPos2 << endl; */ eliminated.Unify(iPos1, iPos2); space.Unify(iPos1, iPos2); free_memory(logfile_line); return 1; } int CleanUp(DeeTable& eliminated, DeeSpace& space, const int iMaxNodes, const double& dLogBailout, const char *logfile, time_t start_time) { char *logfile_line; time_t now; extern double MAX_OPTIMIZATION_TIME; logfile_line = (char *)calloc(MXLINE_INPUT,sizeof(char)); // Make sure the table is logically self consistent int iElimCount = 0; while((iElimCount = LogicalSingles(eliminated)) > 0) { sprintf(logfile_line,"Logical Singles eliminated %d resimers, <= 10^%lf solutions remaining",iElimCount,eliminated.LogCombinations()/log(10.0)); dee_logfile(logfile_line, logfile); /* cout <<"Logical Singles eliminated " << iElimCount << " resimers, <= 10^" << eliminated.LogCombinations()/log(10.0) << " solutions remaining" << endl; */ } // Prune solved parts of the table dee_logfile("Pruning table...", logfile); list fix_list; for(unsigned int i = 0; i < space.NumPos(); ++i) { if(eliminated.AvailableResimers(i) < 2) { fix_list.push_front(i); } else { list rem_list; for(unsigned int r = 0; r < space.NumResimers(i); ++r) { if(!eliminated.Get(i,r)) { rem_list.push_front(r); } } for(list::iterator r = rem_list.begin(); r != rem_list.end(); ++r) { eliminated.RemoveRes(i,*r); space.RemoveRes(i,*r); } } } for(list::iterator i = fix_list.begin(); i != fix_list.end(); ++i) { unsigned int best = 0; for(unsigned int r = 0; r < space.NumResimers(*i); ++r) { if(eliminated.Get(*i,r)) { best = r; } } /* cout << "Fixing " << *i << " to " << best << endl; cout << "(" << space.PosID(*i) << ", " << space.ResType(*i, best) << ", " << space.ResimerID(*i, best) << ")" << endl; */ space.FixPos(*i,best); eliminated.FixPos(*i); } // Check for a unique solution double dUpperLimit = eliminated.LogCombinations(); if(dUpperLimit == 0) { dee_logfile("Only one possible sequence remaining", logfile); free_memory(logfile_line); return 0; } // Check for complete unification if(space.NumPos() == 1) { dee_logfile("Only one remaining position, solving by trivial minimization", logfile); double dBestEnergy = space.Get(0,0); unsigned int iBestResimer = 0; for(unsigned int r = 1; r < space.NumResimers(0); ++r) { double dTemp; if((dTemp = space.Get(0, r)) < dBestEnergy) { dBestEnergy = dTemp; iBestResimer = r; } } /* cout << "Fixing " << space.PosID(0) << " to " << space.ResimerID(0, iBestResimer) << endl; */ space.FixPos(0, iBestResimer); free_memory(logfile_line); return 0; } // Check if the problem is now small enough to solve by brute force if(dUpperLimit < dLogBailout) { dee_logfile("Starting exhaustive search...", logfile); ExhaustiveSolution *sol = 0; if(!(sol = ExhaustiveSearch(eliminated, iMaxNodes))) { dee_logfile("Exhaustive search couldn't find a solution", logfile); free_memory(logfile_line); return 0; } if(sol->success) { dee_logfile("Exhaustive search found a global minimum", logfile); } else { dee_logfile("Exhaustive search terminated before exploring the entire space", logfile); } // Use exhaustive search solution to fix remaining design space for(vector::const_iterator i = sol->resimers.begin(); i != sol->resimers.end(); ++i) { space.FixPos(0, *i); } delete sol; free_memory(logfile_line); return 0; } now = time(NULL); if(difftime(now,start_time) >= MAX_OPTIMIZATION_TIME) { dee_logfile("Out of time....do MC with elimination table", logfile); free_memory(logfile_line); return 0; } free_memory(logfile_line); // Done cleaning up, proceed to more elimination return 1; }