/* EGAD: GA.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions for doing rotamer optimization with genetic algorithms. */ #include "GA.h" /* This function controls the GA. It launches the protein->parameters.number_GA_cycles number of independent GA's, pools the results, and launches an GA seeded with the winners from the independent runs. Final soln information stored in protein->final_*; ready for output_PROTEIN. */ void GA_rotamers_master_control(PROTEIN *protein) { int i, q; extern int LOGFILE_FLAG; extern char *LOOKUP_TABLE_DIRECTORY; goldstein_singles_elimination(&protein->parameters, protein->var_pos, protein->lookupEnergy); strcpy(LOOKUP_TABLE_DIRECTORY,protein->parameters.lookup_energy_table_directory); /* inoculate protein.chr with 2*finalGAseed_population chromosomes */ protein->parameters.finalGAseed_population = protein->parameters.number_GA_cycles*protein->parameters.num_GA_solns_per_cycle; q = 2*protein->parameters.finalGAseed_population; protein->sizeof_chr_array = q; protein->chr = (CHROMOSOME *)calloc((q+4),sizeof(CHROMOSOME)); for(i=1;i<=q;++i) { inoculate_sidechains(&protein->chr[i], protein->var_pos, 1); generate_custom_sequence_CHROMOSOME(&protein->chr[i],0); protein->chr[i].bkbngenes = NULL; protein->chr[i].first_bkbngene = NULL; CHROMOSOME_to_lookupEnergy(&protein->chr[i], &protein->parameters.fixedatoms_energy); } GA_rotamer_control(&protein->parameters, protein->chr, -q, protein->mini_fixed_atoms, protein->var_pos); if(LOGFILE_FLAG != 0) output_data(protein->chr, &protein->parameters.finalGAseed_population, protein->parameters.output_prefix, protein); protein->E_working = protein->chr[1].energy; inoculate_sidechains(&protein->final_chr, protein->var_pos,0); /* allocate memory for protein->final_chr */ copyCHROMOSOME(protein->final_chr, protein->chr[1]); /* copy protein->chr[1] into protein->final_chr */ final_chr_to_final_pdb_final_energy(protein); } /* This function seeds parameters->number_GA_cycles GAs with chrs array of chrs_size. The first GA uses all the chrs members as seeds; the nth GA run includes chrs[n] onward. If chrs_size<0, then all the seeds for all the runs are random. After all the GA's are done, a tournament GA is performed where the top parameters->num_GA_solns_per_cycle from each GA are used to seed the initial population. The final population is returned in chrs */ void GA_rotamer_control(PARAMETERS *parameters, CHROMOSOME *chrs, int chrs_size, mini_pdbATOM fixed_atoms[], VARIABLE_POSITION var_pos[]) { int i,j, k; CHROMOSOME *working_chrs, *final_chrs; int newpopsize,popsize,oldpopsize; int SuperGenX; int first, final_chr_ctr, actual_cycle_ctr; char *logfile, *escape_hatch_filename; FILE *file_ptr; time_t now, start_time; extern int LOGFILE_FLAG, GET_PID; extern char *CURRENT_WORKING_DIRECTORY; extern double MAX_OPTIMIZATION_TIME; logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile, "%s.log", parameters->output_prefix); escape_hatch_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.GA.%d",CURRENT_WORKING_DIRECTORY,GET_PID); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "Starting GA\t%s", ctime(&now)); fprintf(file_ptr,"To exit the GA and move to the next step:"); fprintf(file_ptr,"\ttouch %s\n",escape_hatch_filename); fclose(file_ptr); } first = 1; if(chrs_size>0) sort_CHROMOSOME(&first, &chrs_size, chrs); if(parameters->pop_size > abs(chrs_size)) popsize = parameters->pop_size+10; else popsize = abs(chrs_size); oldpopsize = parameters->pop_size; parameters->pop_size = popsize; newpopsize = 2*parameters->pop_size; /* inoculate and allocate enough space for 2 x pop_size */ working_chrs = (CHROMOSOME *)calloc((newpopsize+2),sizeof(CHROMOSOME)); final_chrs = (CHROMOSOME *)calloc((newpopsize+2),sizeof(CHROMOSOME)); for(i=1;i<=newpopsize;++i) { inoculate_sidechains(&working_chrs[i], var_pos,0); inoculate_sidechains(&final_chrs[i], var_pos,0); working_chrs[i].bkbngenes = NULL; working_chrs[i].first_bkbngene = NULL; final_chrs[i].bkbngenes = NULL; final_chrs[i].first_bkbngene = NULL; generate_custom_sequence_CHROMOSOME(&working_chrs[i],0); generate_custom_sequence_CHROMOSOME(&final_chrs[i],0); CHROMOSOME_to_lookupEnergy(&working_chrs[i], ¶meters->fixedatoms_energy); CHROMOSOME_to_lookupEnergy(&final_chrs[i], ¶meters->fixedatoms_energy); } start_time = time(NULL); final_chr_ctr = 0; actual_cycle_ctr=0; SuperGenX=1; while(SuperGenX<=parameters->number_GA_cycles) { ++actual_cycle_ctr; /* seed */ k=1; for(j=SuperGenX;j<=chrs_size;++j) { copyCHROMOSOME(working_chrs[k],chrs[j]); ++k; } while(k<=newpopsize) { randomize_sidechains(&working_chrs[k],1); CHROMOSOME_to_lookupEnergy(&working_chrs[k], ¶meters->fixedatoms_energy); ++k; } get_rid_of_isoenergetic_chrs(working_chrs, parameters->mutation_freq, newpopsize, parameters->fixedatoms_energy); sort_CHROMOSOME(&first, &newpopsize, working_chrs); evolve_sidechains(parameters, working_chrs, var_pos); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "GA\t%d\t%f\t%s", SuperGenX, working_chrs[1].energy, ctime(&now)); fclose(file_ptr); } /* copy the best num_final_chr members from the population into final_chrs */ for(k=1;k<=parameters->num_GA_solns_per_cycle;++k) { ++final_chr_ctr; copyCHROMOSOME(final_chrs[final_chr_ctr],working_chrs[k]); } /* time's up! quit */ if(difftime(now,start_time) >= MAX_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1 ) { SuperGenX = parameters->number_GA_cycles+10; } ++SuperGenX; } k=final_chr_ctr+1; while(k<=newpopsize) { randomize_sidechains(&final_chrs[k],1); CHROMOSOME_to_lookupEnergy(&final_chrs[k], ¶meters->fixedatoms_energy); ++k; } get_rid_of_isoenergetic_chrs(final_chrs, parameters->mutation_freq, newpopsize, parameters->fixedatoms_energy); sort_CHROMOSOME(&first, &newpopsize, final_chrs); /* tournament GA with winners from individual GA's, if there is time */ if(difftime(now,start_time) < MAX_OPTIMIZATION_TIME && actual_cycle_ctr>1 && does_this_file_exist(escape_hatch_filename)==0) evolve_sidechains(parameters, final_chrs, var_pos); if(does_this_file_exist(escape_hatch_filename)==1) rm_file(escape_hatch_filename); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "GA\tfinal\t%f\t%s", final_chrs[1].energy, ctime(&now)); fclose(file_ptr); } for(i=1;i<=abs(chrs_size);++i) copyCHROMOSOME(chrs[i],final_chrs[i]); for(i=1;i<=newpopsize;++i) { free_CHROMOSOME(&working_chrs[i]); free_CHROMOSOME(&final_chrs[i]); } parameters->pop_size = oldpopsize; free_memory(working_chrs); free_memory(final_chrs); free_memory(logfile); free_memory(escape_hatch_filename); } void find_best_ligand_confs(int *lig_index, VARIABLE_POSITION *var_pos, CHROMOSOME *dummychr) { int lig_ctr, solub_ctr; double sasa_other=0, sasa_hphob_other=0, charge_other=0, E_transfer_other=0; int best_res=1, best_res_rot=1; GENE lig_gene; extern int SOLUBILITY_CUTOFF_FLAG; return; if(lig_index == NULL) return; lig_ctr=1; while(lig_index[lig_ctr]!=ENDFLAG) { solub_ctr = SOLUBILITY_CUTOFF_FLAG; SOLUBILITY_CUTOFF_FLAG = 0; find_better_res_res_rot(lig_index[lig_ctr], var_pos, dummychr->firstgene, sasa_other, sasa_hphob_other, charge_other, E_transfer_other, &best_res, &best_res_rot); SOLUBILITY_CUTOFF_FLAG = solub_ctr; lig_gene = dummychr->firstgene; while(lig_gene->seq_position != var_pos[lig_index[lig_ctr]].seq_position) lig_gene = lig_gene->nextgene; lig_gene->j_choice_index = best_res - 1; lig_gene->choice_ptr = &var_pos[lig_index[lig_ctr]].choice[best_res]; lig_gene->lookupRot_index = best_res_rot; lig_gene->chi = lig_gene->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[best_res_rot].chi; lig_gene->lookupRot = &(lig_gene->choice_ptr->lookup_res_ptr->lookupRot[best_res_rot]); ++lig_ctr; } } /* this is a GA that uses a pre-calculated lookuptable for rotamer optimization input a seeded CHROMOSOME 2*parameters->pop_size array chrs[]. All elements of chrs must be soluble. best soln stored in chr[1] */ /* score energy */ /* temperature decay between T0 and Tfinal */ /* increase mut_freq if 3 or more cycles have the same energy after ten isoenergetic cycles, relax w/o increasing the mut_freq, to allow the mutations to spread into the population */ /* convergence criterion - same energy for parameters->ga_convergence cycles */ void evolve_sidechains(PARAMETERS *parameters, CHROMOSOME chrs[], VARIABLE_POSITION var_pos[]) { int newpopsize; int first, i; double T; double E_pre_quench; int convergence_counter, numCyclesSameEnergy; double prevEnergy, currentenergy; double dummy_mutfreq, temperature_decr; int GenX; int popsizeplus1; char *logfile_line, *escape_hatch_filename; FILE *file_ptr; extern int LOGFILE_FLAG, LOGFILE_PERIOD, GET_PID, MAX_MUTATIONS; extern double R; extern double MAX_OPTIMIZATION_TIME; extern char *CURRENT_WORKING_DIRECTORY; time_t now, start_time; char *logfile; int num_isoenergy_safety, safety_convergence; first = 1; R=R_univ; T=parameters->ga_T0; newpopsize = 2*parameters->pop_size; popsizeplus1 = parameters->pop_size+1; logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile,"%s.log",parameters->output_prefix); logfile_line = (char *)calloc(MXLINE_INPUT,sizeof(char)); logfile_line[0] = '\0'; logfile_line[1] = '\0'; logfile_line[2] = '\0'; /* buffer for logfile */ escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.GA.%d",CURRENT_WORKING_DIRECTORY,GET_PID); convergence_counter=0; // score energy of initial population for(i=1; i<=newpopsize; ++i) { if(is_this_chr_soluble(&chrs[i])==0) solubilize_CHROMOSOME(&chrs[i]); CHROMOSOME_to_lookupEnergy(&chrs[i],¶meters->fixedatoms_energy); } get_rid_of_isoenergetic_chrs(chrs, parameters->mutation_freq, newpopsize, parameters->fixedatoms_energy); sort_CHROMOSOME(&first,&newpopsize,chrs); // sorted input CHROMOSOME array if(calculate_mating_frequencies(chrs, parameters->pop_size, T)==0) convergence_counter = parameters->ga_convergence + 20; prevEnergy=0; currentenergy=chrs[1].energy; numCyclesSameEnergy=1; dummy_mutfreq = parameters->mutation_freq; GenX=0; start_time = time(NULL); T=parameters->ga_T0; temperature_decr=10; safety_convergence = 100*parameters->ga_convergence; num_isoenergy_safety=0; start_time = time(NULL); while(convergence_counter <= parameters->ga_convergence) { prevEnergy = currentenergy; ++GenX; // mate chrs[1]->[popsize], loading into chrs[popsize+1]->[newpopsize] // children are made soluble within mating function if need be mate_sidechains(chrs,parameters->pop_size,parameters->recomb_freq,dummy_mutfreq); // score energy for(i=(popsizeplus1); i<=newpopsize; ++i) { CHROMOSOME_to_lookupEnergy(&chrs[i],¶meters->fixedatoms_energy); } get_rid_of_isoenergetic_chrs(chrs, dummy_mutfreq, newpopsize, parameters->fixedatoms_energy); sort_CHROMOSOME(&first,&newpopsize,chrs); // sort population currentenergy=chrs[1].energy; if(calculate_mating_frequencies(chrs, parameters->pop_size, T)==0) convergence_counter = parameters->ga_convergence + 20; // write logfile if(LOGFILE_FLAG != 0) { if(GenX%LOGFILE_PERIOD == 0 || GenX == 1) { now = time(NULL); sprintf(logfile_line, "%sga\t%d\t%f\t%f\t%s",logfile_line,GenX, chrs[1].energy,fabs(prevEnergy - currentenergy),ctime(&now)); } if(GenX%5==0) { file_ptr = fopen(logfile,"a"); fprintf(file_ptr,"%s",logfile_line); fclose(file_ptr); logfile_line[0] = '\0'; logfile_line[1] = '\0'; logfile_line[2] = '\0'; } } // check for convergence or time-out if(fabs(prevEnergy - currentenergy)<=EPS) { ++numCyclesSameEnergy; ++convergence_counter; // increase mutation freq if it looks like we're in a local minimum; 10 generations w/ equal energy // after 10 cycles of increased mut_freq, relax for 10 cycles at parameters.mutation_freq if(convergence_counter >= 10) { if(numCyclesSameEnergy<10) dummy_mutfreq = dummy_mutfreq + 0.025; else dummy_mutfreq = parameters->mutation_freq; if(numCyclesSameEnergy%20==0) numCyclesSameEnergy=1; } } else { numCyclesSameEnergy=1; dummy_mutfreq = parameters->mutation_freq; convergence_counter=1; } if(fabs(prevEnergy - currentenergy)>EPS2) num_isoenergy_safety=0; else { ++num_isoenergy_safety; if(num_isoenergy_safety>safety_convergence) // bail out convergence_counter = parameters->ga_convergence + 20; } // adjust temperature T = update_simulation_temperature(T, parameters->ga_dT, parameters->ga_Tf); // bail out if we're taking too long now = time(NULL); if(difftime(now,start_time) >= MAX_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1) convergence_counter = parameters->ga_convergence + 20; } if(LOGFILE_FLAG != 0) { file_ptr = fopen(logfile,"a"); fprintf(file_ptr,"%s",logfile_line); fclose(file_ptr); logfile_line[0] = '\0'; logfile_line[1] = '\0'; logfile_line[2] = '\0'; } // sort and quench final population sort_CHROMOSOME(&first,&newpopsize,chrs); if(MAX_MUTATIONS == 0) for(i=1;i<=parameters->num_GA_solns_per_cycle;++i) { E_pre_quench = chrs[i].energy; HQM_rotamers(parameters, &chrs[i], var_pos); if(LOGFILE_FLAG != 0) { file_ptr = fopen(logfile,"a"); now = time(NULL); fprintf(file_ptr, "quench\t%d\t%f\t%f\t%s", i, E_pre_quench, chrs[i].energy, ctime(&now)); fclose(file_ptr); } } sort_CHROMOSOME(&first,&newpopsize,chrs); free_memory(logfile_line); free_memory(logfile); free_memory(escape_hatch_filename); }