/* EGAD: MC.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions for using Monte Carlo simulated annealing (MC) for rotamer optimization. */ #include "MC.h" /* controls MC_rotamers. assumes that protein has been initialized by input_stuff and generate_lookup_table. Final structures etc generated by final_chr_to_final_pdb_final_energy; ready for output_PROTEIN. */ void MC_rotamer_control(PROTEIN *protein) { int i, k, q,p, first, last, stop_flag, current_num_moving_pos; extern int LOGFILE_FLAG,LOGFILE_PERIOD,GET_PID; extern double MAX_OPTIMIZATION_TIME; extern char *LOOKUP_TABLE_DIRECTORY, *CURRENT_WORKING_DIRECTORY; char *logfile,*logfile_line, *escape_hatch_filename, *name; FILE *file_ptr; time_t now, start_time; double E_pre_quench; goldstein_singles_elimination(&protein->parameters, protein->var_pos, protein->lookupEnergy); strcpy(LOOKUP_TABLE_DIRECTORY,protein->parameters.lookup_energy_table_directory); escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.MC.%d",CURRENT_WORKING_DIRECTORY,GET_PID); logfile=NULL; logfile_line=NULL; name=NULL; if(LOGFILE_FLAG != 0) { logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile, "%s.log", protein->parameters.output_prefix); logfile_line=(char *)calloc(MXLINE_INPUT,sizeof(char)); logfile_line[0]='\0'; logfile_line[1]='\0'; logfile_line[2]='\0'; name=(char *)calloc(MXLINE_INPUT,sizeof(char)); now = time(NULL); file_ptr = fopen_file(logfile, "a"); fprintf(file_ptr,"Starting MC\t%s",ctime(&now)); fprintf(file_ptr,"To exit the MC and move to the next step:"); fprintf(file_ptr,"\ttouch %s\n",escape_hatch_filename); fclose(file_ptr); } /* allocate memory for protein->chr */ if(protein->parameters.number_MC_solns == 0) protein->parameters.number_MC_solns = protein->parameters.number_MC_cycles/10; if(protein->parameters.number_MC_solns%2!=0) protein->parameters.number_MC_solns = protein->parameters.number_MC_solns+1; if(protein->parameters.number_MC_cycles < protein->parameters.number_MC_solns) protein->parameters.number_MC_solns = protein->parameters.number_MC_cycles; first = 1; last = protein->parameters.number_MC_cycles+1; protein->parameters.finalGAseed_population = protein->parameters.number_MC_cycles; q = 2*protein->parameters.finalGAseed_population; protein->sizeof_chr_array = q; protein->chr = (CHROMOSOME *)calloc((q+2),sizeof(CHROMOSOME)); for(i=1;i<=q;++i) { inoculate_sidechains(&protein->chr[i], protein->var_pos,0); protein->chr[i].bkbngenes = NULL; protein->chr[i].first_bkbngene = NULL; CHROMOSOME_to_lookupEnergy(&protein->chr[i], &protein->parameters.fixedatoms_energy); } /* do MC until it's time to stop */ k=1; stop_flag = 0; p=0; start_time = time(NULL); current_num_moving_pos = 0; i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) if(protein->var_pos[i].dead_ended_flag == 0) ++current_num_moving_pos; ++i; } /* not really moving, so just go through the motions */ /* or only one position is moving....HQM is much better */ if(protein->parameters.log10_rotamer_combinations==0 || current_num_moving_pos <= 1) { stop_flag=1; CHROMOSOME_to_lookupEnergy(&protein->chr[1], &protein->parameters.fixedatoms_energy); for(i=2;i<=protein->parameters.number_MC_cycles;++i) protein->chr[i].energy = protein->chr[1].energy; } while(stop_flag == 0) { randomize_sidechains(&protein->chr[k],1); CHROMOSOME_to_lookupEnergy(&protein->chr[k], &protein->parameters.fixedatoms_energy); /* start the clock here; if this is a rotamer_calc_foreman type job, it will take some time to load/calc the energies the first time CHROMOSOME_to_lookupEnergy is called */ if(k==1) start_time = time(NULL); /* run an MC */ MC_rotamers(&protein->parameters, &protein->chr[k], protein->var_pos); /* record in logfile */ now = time(NULL); if(LOGFILE_FLAG != 0) { if(k%LOGFILE_PERIOD == 0 || k == 1) { ++p; sprintf(logfile_line, "%smc\t%d\t%f\t%s", logfile_line,k, protein->chr[k].energy, ctime(&now)); } if(p%5==0) /* write to logfile every 5th lines */ { file_ptr = fopen_file(logfile, "a"); fprintf(file_ptr,"%s",logfile_line); fclose(file_ptr); logfile_line[0]='\0';logfile_line[1]='\0';logfile_line[2]='\0'; } } /* do we quit? */ /* time's up! quit, but do at least 10 */ if((difftime(now,start_time) >= MAX_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1) && k>=10) { protein->parameters.number_MC_cycles = k; protein->parameters.number_MC_solns = protein->parameters.number_MC_cycles/10; } ++k; /* we've done enough cycles....quit */ if(k >= protein->parameters.number_MC_cycles) stop_flag = 1; } if(does_this_file_exist(escape_hatch_filename)==1) rm_file(escape_hatch_filename); // if there are only a few solutions, HQM them all if(protein->parameters.number_MC_cycles < 20) protein->parameters.number_MC_solns = protein->parameters.number_MC_cycles; if(protein->parameters.number_MC_solns%2!=0) protein->parameters.number_MC_solns = protein->parameters.number_MC_solns + 1; if(LOGFILE_FLAG != 0) { file_ptr = fopen_file(logfile, "a"); fprintf(file_ptr,"%s",logfile_line); fclose(file_ptr); logfile_line[0]='\0';logfile_line[1]='\0';logfile_line[2]='\0'; } sort_CHROMOSOME(&first, &last, protein->chr); // if these are the same, most of the chrs are isoenergetic; most likely already converged; // running get_rid_of_isoenergetic_chrs or hqm won't do anything if(fabs(protein->chr[1].energy - protein->chr[protein->parameters.number_MC_cycles/2].energy)>EPS) { if(LOGFILE_FLAG != 0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "filter identical solutions\t%s", ctime(&now)); fclose(file_ptr); } get_rid_of_isoenergetic_chrs(protein->chr, protein->parameters.mutation_freq, protein->parameters.number_MC_cycles, protein->parameters.fixedatoms_energy); sort_CHROMOSOME(&first, &last, protein->chr); if(LOGFILE_FLAG != 0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "start hqm\t%s", ctime(&now)); fclose(file_ptr); } // quench the top protein->parameters.number_MC_solns solutions in protein->chr for(i=1;i<=protein->parameters.number_MC_solns;++i) { E_pre_quench = protein->chr[i].energy; HQM_rotamers(&protein->parameters, &protein->chr[i], protein->var_pos); if(LOGFILE_FLAG != 0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "quench\t%d\t%f\t%f\t%s", i, E_pre_quench, protein->chr[i].energy, ctime(&now)); fclose(file_ptr); } } sort_CHROMOSOME(&first, &last, protein->chr); if(LOGFILE_FLAG != 0) { sprintf(name,"%s.mc_hqm", protein->parameters.output_prefix); output_data(protein->chr, &protein->parameters.number_MC_solns, name, protein); } // if all the quenched solutions are identical, GA probably won't get us anything if(fabs(protein->chr[1].energy - protein->chr[protein->parameters.number_MC_solns/2].energy)>EPS || protein->parameters.number_MC_solns<=4) { get_rid_of_isoenergetic_chrs(protein->chr, protein->parameters.mutation_freq, protein->parameters.number_MC_cycles, protein->parameters.fixedatoms_energy); sort_CHROMOSOME(&first, &last, protein->chr); /* do GA/HQM with this population if so specified */ if(strstr(protein->parameters.algorithm, "GA")!=0) { GA_rotamer_control(&protein->parameters, protein->chr, protein->parameters.number_MC_solns, protein->mini_fixed_atoms, protein->var_pos); sort_CHROMOSOME(&first, &last, protein->chr); if(LOGFILE_FLAG != 0) { sprintf(name,"%s.ga", protein->parameters.output_prefix); output_data(protein->chr, &protein->parameters.number_MC_solns, name, protein); } } } } /* save protein->E_working and protein->final_chr */ protein->E_working = protein->chr[1].energy; inoculate_sidechains(&protein->final_chr, protein->var_pos,0); /* allocate memory for protein->final_chr */ copyCHROMOSOME(protein->final_chr, protein->chr[1]); /* copy protein->chr[1] into protein->final_chr */ /* generate final pdb arrays, energy */ final_chr_to_final_pdb_final_energy(protein); if(LOGFILE_FLAG != 0) { free_memory(logfile); free_memory(logfile_line); free_memory(name); } free_memory(escape_hatch_filename); } /* chr is used as a starting point for simulated annealing rotamer/resimer optimization. if solubility is a criterion, chr must be soluble. num_cycles = number of consecutive moves that do not result in a decrease in the best energy. The best soln is returned in chr */ void MC_rotamers(PARAMETERS *parameters, CHROMOSOME *chr, VARIABLE_POSITION *var_pos) { int neighborOptimizeFlag = 0; int GenX = 0; static GENE i_gene=NULL; int i, k, accept; int num_actual_steps, num_accepted, num_isoenergy, num_moving_positions; int num_isoenergy_safety, safety_convergence; int i_res, i_res_rot,solub_ctr; static int *indicies_of_floating_positions=NULL; static int max_residues=0; CHROMOSOME dummychr; double energy, current_energy, dE,fabs_dE; double T; extern int SOLUBILITY_CUTOFF_FLAG; extern double FRACTION_HYDROPHOBIC_SASA_CUTOFF, TRANSFER_FREE_ENERGY_DENSITY_CUTOFF; double sasa_total, sasa_hydrophobic, frac_hydrophobic, overall_charge, fixed_sasa, fixed_hphob_sasa, fixed_charge, E_transfer; double sasa_other, sasa_hphob_other, charge_other, fixed_E_transfer, E_transfer_other, E_transfer_density; if(is_this_chr_soluble(chr)==0) { solubilize_CHROMOSOME(chr); /* failure_report("ERROR MC.cpp: MC_rotamers assumes that the chr input is soluble", "abort"); */ } inoculate_sidechains(&dummychr, var_pos,0); dummychr.bkbngenes = NULL; dummychr.first_bkbngene = NULL; /* identify floating positions so moves are restricted to them only */ if(max_residues< MAX_RESIDUES) { max_residues=MAX_RESIDUES; if(indicies_of_floating_positions != NULL) free_memory(indicies_of_floating_positions); indicies_of_floating_positions = (int *)calloc(MAX_RESIDUES,sizeof(int)); } i=1; k=0; num_moving_positions = 0; while(var_pos[i].seq_position!=ENDFLAG) { if(var_pos[i].fixed_flag == 0) /* not fixed, so add to the list */ if(var_pos[i].dead_ended_flag == 0) /* not dead-ended, so add to the list */ { ++num_moving_positions; ++k; indicies_of_floating_positions[k] = i; } ++i; } parameters->MC_convergence = 100*num_moving_positions; safety_convergence = 100*parameters->MC_convergence; if(i_gene == NULL) i_gene = (MENDEL *)malloc(sizeof(MENDEL)); copyCHROMOSOME(dummychr, (*chr)); /* dummychr is the working chromosome */ CHROMOSOME_to_lookupEnergy(&dummychr,¶meters->fixedatoms_energy); frac_hydrophobic = 0; /* calculate fixed_position contributions to sasa and charge */ if(SOLUBILITY_CUTOFF_FLAG == 1) fixed_position_sasa_charge(&dummychr, &fixed_sasa, &fixed_hphob_sasa, &fixed_charge, &fixed_E_transfer); /* the simulated annealing */ T = parameters->mc_T0; fabs_dE=1e10; GenX=0; num_actual_steps=0; num_accepted=0; num_isoenergy=0; num_isoenergy_safety=0; while(GenX<=parameters->MC_convergence) { ++num_actual_steps; /* pick the position to mutate */ k=0; while(k==0) k = randint(num_moving_positions); i = indicies_of_floating_positions[k]; /* convert to a gene */ dummychr.genes = dummychr.firstgene; while(dummychr.genes->seq_position!=var_pos[i].seq_position) dummychr.genes = dummychr.genes->nextgene; copyGENE(i_gene, dummychr.genes); neighborOptimizeFlag = 1; i_res = i_gene->j_choice_index + 1; i_res_rot=i_gene->lookupRot_index; /* get the current energy of this gene */ current_energy = lookup_energy_sasa_charge_other(i, i_gene, dummychr.firstgene, &sasa_other, &sasa_hphob_other, &charge_other, &E_transfer_other) + iROT->energy_var_fix; /* make a move at this position */ mutate_sidechain(i_gene, var_pos[i]); if(i_gene->varpos_ptr->number_of_resimers > 1) while(i_gene->lookupRot == dummychr.genes->lookupRot) mutate_sidechain(i_gene, var_pos[i]); solub_ctr=0; if(SOLUBILITY_CUTOFF_FLAG == 1) /* keep mutating until solubility criterion is met */ { sasa_other += fixed_sasa; sasa_hphob_other += fixed_hphob_sasa; charge_other += fixed_charge; E_transfer_other += fixed_E_transfer; overall_charge = charge_other + i_gene->choice_ptr->resparam_ptr->overall_charge_pH_avg; sasa_total = sasa_other + i_gene->choice_ptr->lookup_res_ptr->sasa_total; sasa_hydrophobic = sasa_hphob_other + i_gene->choice_ptr->lookup_res_ptr->sasa_hphob; E_transfer = E_transfer_other + i_gene->choice_ptr->lookup_res_ptr->E_transfer; frac_hydrophobic = sasa_hydrophobic/sasa_total; E_transfer_density = E_transfer/sasa_total; while(solub_ctr <= 1001 && (frac_hydrophobic >= FRACTION_HYDROPHOBIC_SASA_CUTOFF || charge_within_specifications(overall_charge)==0 || E_transfer_density >= TRANSFER_FREE_ENERGY_DENSITY_CUTOFF)) { mutate_sidechain(i_gene, var_pos[i]); sasa_total = sasa_other + i_gene->choice_ptr->lookup_res_ptr->sasa_total; sasa_hydrophobic = sasa_hphob_other + i_gene->choice_ptr->lookup_res_ptr->sasa_hphob; overall_charge = charge_other + i_gene->choice_ptr->resparam_ptr->overall_charge_pH_avg; E_transfer = E_transfer_other + i_gene->choice_ptr->lookup_res_ptr->E_transfer; frac_hydrophobic = sasa_hydrophobic/sasa_total; E_transfer_density = E_transfer/sasa_total; if(solub_ctr>=1000) solub_ctr=2010; ++solub_ctr; } } /* this move is acceptable; now accept or reject it based on energy */ i_res = i_gene->j_choice_index + 1; i_res_rot=i_gene->lookupRot_index; /* energy of this new gene */ energy = lookup_energy_sasa_charge_other(i, i_gene, dummychr.firstgene, &sasa_other, &sasa_hphob_other, &charge_other, &E_transfer_other) + iROT->energy_var_fix; /* DEBUG printf("i=%d\n",i); dE = energy - current_energy; printf("dE = %lf\n",dE); copyGENE(dummychr.genes, i_gene); dummychr.genes = dummychr.firstgene; CHROMOSOME_to_lookupEnergy(&dummychr,¶meters->fixedatoms_energy); printf("deltaE = %lf\n",dummychr.energy - chr->energy); exit(0); */ /* accept this move?? */ accept=0; if(solub_ctr<1000) /* insoluble, so reject */ { dE = energy - current_energy; fabs_dE = fabs(dE); if(fabs_dE > EPS) /* if not equal */ { num_isoenergy=0; if(dE < 0) /* always accept downhill moves */ accept=1; else /* else, Metropolis criterion */ if(dice() <= exp(-(dE)/(T*R_univ))) accept=1; } else { ++num_isoenergy; if(num_isoenergy >= parameters->MC_convergence) GenX = parameters->MC_convergence + 10; } /* if there is some rounding/truncation issue */ if(fabs_dE > EPS2) num_isoenergy_safety=0; else { ++num_isoenergy_safety; if(num_isoenergy_safety>safety_convergence) /* bail out */ GenX = parameters->MC_convergence+10; } if(accept==1) /* move accepted */ { ++num_accepted; /* copy i_gene to the working chromosome */ copyGENE(dummychr.genes, i_gene); dummychr.genes = dummychr.firstgene; if(dE<0) /* if we're going downhill, are we the lowest energy yet? */ { CHROMOSOME_to_lookupEnergy(&dummychr,¶meters->fixedatoms_energy); /* new lower energy; reset convergence counters */ if(dummychr.energy < chr->energy) { copyCHROMOSOME((*chr), dummychr); /* copy best soln to date to chr */ neighborOptimizeFlag=0; GenX=0; } } } } // printf("%d\t%lf\n",GenX, chr->energy); if(neighborOptimizeFlag == 1) ++GenX; T = update_simulation_temperature(T, parameters->mc_dT, parameters->mc_Tf); } free_CHROMOSOME(&dummychr); }