/* EGAD: multistate.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Dec 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions for considering multiple states for design ie: alternative conformations, free forms for complexes, etc */ #include "multistate.h" /* if chr[i] is isosequence with any chr[1]->chr[i-1], returns 1; else 0; */ int is_chr_i_isosequence_with_others(int i, CHROMOSOME *chr, int popsize) { int n; static char *seq_chr=NULL, *seq_other=NULL; static int max_residues=0; extern int MAX_RESIDUES; if(max_residuesgenes = chr->firstgene; while(chr->genes->seq_position!=ENDFLAG) { /* write only floating and user-defined positions */ if(chr->genes->varpos_ptr->fixed_flag == 0 || chr->genes->varpos_ptr->neighbor_level == ENDFLAG) { if(initialize_flag==1) fprintf(file_ptr,"%s\tA.\n",chr->genes->varpos_ptr->seqpos_text_map_ptr->seqpos_text); else fprintf(file_ptr,"%s\t%c.\n",chr->genes->varpos_ptr->seqpos_text_map_ptr->seqpos_text, chr->genes->choice_ptr->resparam_ptr->one_letter_code[0]); } chr->genes = chr->genes->nextgene; } chr->genes = chr->firstgene; fprintf(file_ptr,"END\n"); fclose(file_ptr); } void make_multistate_inputfiles_for_chr(CHROMOSOME *chr, char *prefix, int output_coord_flag, int initialize_flag) { extern int GET_PID; int i; static char *filename=NULL; static int num_competitor_structures; FILE *file_ptr; if(filename == NULL) { filename = (char *)calloc(MAXLINE,sizeof(char)); num_competitor_structures = 0; sprintf(filename,"competitor.%d.slavefilelist.%d",num_competitor_structures+1,GET_PID); while(does_this_file_exist(filename)==1) { ++num_competitor_structures; sprintf(filename,"competitor.%d.slavefilelist.%d",num_competitor_structures+1,GET_PID); } } /* slave inputfile for target structure */ sprintf(filename,"target.slavefilelist.%d",GET_PID); file_ptr = fopen_file(filename,"a"); sprintf(filename,"%s.target",prefix); CHROMOSOME_to_slave_inputfile(chr, filename, output_coord_flag, initialize_flag); fprintf(file_ptr,"%s\n",filename); fclose(file_ptr); /* slave inputfiles for competitor structures */ for(i=1;i<=num_competitor_structures;++i) { sprintf(filename,"competitor.%d.slavefilelist.%d",i,GET_PID); file_ptr = fopen_file(filename,"a"); sprintf(filename,"%s.competitor.%d",prefix,i); CHROMOSOME_to_slave_inputfile(chr, filename, output_coord_flag, initialize_flag); fprintf(file_ptr,"%s\n",filename); fclose(file_ptr); } } struct protoCALCULATED_SEQUENCES { char *sequence; double score; float pseudo_dG; float E_total; float dG_gap; short int clashes; short int num_unsatisfied_hbond; struct protoCALCULATED_SEQUENCES *next; }; typedef struct protoCALCULATED_SEQUENCES CALCULATED_SEQUENCES; void CHROMOSOME_array_to_fitness(int start_index, int stop_index, CHROMOSOME *chr) { static char *prefix=NULL; static char *sequence=NULL; static char *filename=NULL, *filename2=NULL; static ENERGY *baseline_energy; static int num_competitor_structures; static ENERGY *energy=NULL, *dummyenergy=NULL; double fraction_sasa_hphob, transfer_free_energy_density; SASA_SUM sasa_sum; int i,j,flag, inconsistent_flag; CHROMOSOME *baseline_chr; static CALCULATED_SEQUENCES *first_calculated_sequences = NULL; static CALCULATED_SEQUENCES *calculated_sequences = NULL; static int num_seq_variable_positions=0; static double *probab, best_score=1e10; extern int GET_PID, OUTPUT_COORD_FLAG; flag=0; if(energy==NULL) { num_seq_variable_positions=0; chr[start_index].genes = chr[start_index].firstgene; while(chr[start_index].genes->seq_position!=ENDFLAG) { if(chr[start_index].genes->varpos_ptr->number_of_choices>1) ++num_seq_variable_positions; chr[start_index].genes = chr[start_index].genes->nextgene; } chr[start_index].genes = chr[start_index].firstgene; sequence = (char *)calloc(num_seq_variable_positions+2,sizeof(char)); prefix = (char *)calloc(MAXLINE,sizeof(char)); filename = (char *)calloc(MAXLINE,sizeof(char)); filename2 = (char *)calloc(MAXLINE,sizeof(char)); calculated_sequences = (CALCULATED_SEQUENCES *)malloc(sizeof(CALCULATED_SEQUENCES)); calculated_sequences->next = NULL; first_calculated_sequences = calculated_sequences; num_competitor_structures = 0; sprintf(filename,"competitor.%d.slavefilelist.%d",num_competitor_structures+1,GET_PID); while(does_this_file_exist(filename)==1) { ++num_competitor_structures; sprintf(filename,"competitor.%d.slavefilelist.%d",num_competitor_structures+1,GET_PID); } energy = (ENERGY *)calloc(num_competitor_structures+2,sizeof(ENERGY)); dummyenergy = (ENERGY *)calloc(num_competitor_structures+2,sizeof(ENERGY)); baseline_energy = (ENERGY *)calloc(num_competitor_structures+2,sizeof(ENERGY)); probab = (double *)calloc(num_competitor_structures+4,sizeof(double)); /* an arbitrary baseline for the energies; smallest residue at all variable positions */ baseline_chr = (CHROMOSOME *)malloc(sizeof(CHROMOSOME)); baseline_chr->genes = (MENDEL *)malloc(sizeof(MENDEL)); baseline_chr->firstgene = baseline_chr->genes; baseline_chr->bkbngenes = NULL; baseline_chr->first_bkbngene = NULL; chr[start_index].genes = chr[start_index].firstgene; while(chr[start_index].genes->seq_position!=ENDFLAG) { copyGENE(baseline_chr->genes, chr[start_index].genes); if(baseline_chr->genes->varpos_ptr->fixed_flag==0) /* not fixed */ if(baseline_chr->genes->varpos_ptr->number_of_choices>1) /* multiple choices */ { i=100000; for(j=1;j<=baseline_chr->genes->varpos_ptr->number_of_choices;++j) { if(baseline_chr->genes->varpos_ptr->choice[j].in_use_flag==1) /* choice j is in use */ { if(baseline_chr->genes->varpos_ptr->choice[j].resparam_ptr->numberofSideAtoms < i) { i=baseline_chr->genes->varpos_ptr->choice[j].resparam_ptr->numberofSideAtoms; CHOICE_to_GENE(baseline_chr->genes, baseline_chr->genes->varpos_ptr->choice[j], j); } } } } baseline_chr->genes->nextgene = (MENDEL *)malloc(sizeof(MENDEL)); baseline_chr->genes = baseline_chr->genes->nextgene; baseline_chr->genes->nextgene = NULL; baseline_chr->genes->seq_position = ENDFLAG; chr[start_index].genes = chr[start_index].genes->nextgene; } chr[start_index].genes = chr[start_index].firstgene; baseline_chr->genes = baseline_chr->firstgene; sprintf(prefix,"baseline.%d",GET_PID); sprintf(filename,"target.slavefilelist.%d",GET_PID); rm_file(filename); touch_file(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(filename); touch_file(filename); } make_multistate_inputfiles_for_chr(baseline_chr, prefix, OUTPUT_COORD_FLAG, 1); sprintf(filename,"target.slavefilelist.%d",GET_PID); wait_for_slaves_to_finish(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); wait_for_slaves_to_finish(filename); } ENERGY_0(baseline_energy[0]); for(j=1;j<=num_competitor_structures;++j) ENERGY_0(baseline_energy[j]); sprintf(filename,"%s.target.pdb",prefix); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &baseline_energy[0], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &baseline_energy[j], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); } free_CHROMOSOME(baseline_chr); free_memory(baseline_chr); } sprintf(filename,"target.slavefilelist.%d",GET_PID); rm_file(filename); touch_file(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(filename); touch_file(filename); } for(i=start_index;i<=stop_index;++i) { CHROMOSOME_to_variable_sequence(&chr[i], sequence); flag=0; calculated_sequences = first_calculated_sequences; while(calculated_sequences->next!=NULL && flag==0) { if(strcmp(sequence,calculated_sequences->sequence)==0) { flag=1; } calculated_sequences = calculated_sequences->next; } calculated_sequences = first_calculated_sequences; if(flag==0) { sprintf(prefix,"%d.of.%d.to.%d.%d",i,start_index,stop_index,GET_PID); make_multistate_inputfiles_for_chr(&chr[i], prefix,OUTPUT_COORD_FLAG, 0); } } sprintf(filename,"target.slavefilelist.%d",GET_PID); wait_for_slaves_to_finish(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); wait_for_slaves_to_finish(filename); } /* get energies */ for(i=start_index;i<=stop_index;++i) { CHROMOSOME_to_variable_sequence(&chr[i], sequence); chr[i].energy = 1e10; flag=0; calculated_sequences = first_calculated_sequences; while(calculated_sequences->next!=NULL && flag==0) { if(strcmp(sequence,calculated_sequences->sequence)==0) /* already calculated before */ { flag=1; ENERGY_0(chr[i].energy_list); chr[i].energy_list.pseudo_dG = calculated_sequences->pseudo_dG; chr[i].energy_list.dG_gap = calculated_sequences->dG_gap; chr[i].energy_list.E_total = calculated_sequences->E_total; chr[i].energy_list.clashes = calculated_sequences->clashes; chr[i].energy_list.num_unsatisfied_hbond = calculated_sequences->num_unsatisfied_hbond; chr[i].energy = calculated_sequences->score; } calculated_sequences = calculated_sequences->next; } if(flag==0) /* just calculated now so extract from files */ { calculated_sequences->sequence = (char *)calloc(num_seq_variable_positions+2,sizeof(char)); strcpy(calculated_sequences->sequence,sequence); sprintf(prefix,"%d.of.%d.to.%d.%d",i,start_index,stop_index,GET_PID); sprintf(filename,"%s.target.pdb",prefix); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &energy[0], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); energy[0] = ENERGY_SUBTRACT(energy[0],baseline_energy[0]); chr[i].energy_list = energy[0]; // get energies for competitor states for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &energy[j], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); energy[j] = ENERGY_SUBTRACT(energy[j],baseline_energy[j]); } chr[i].energy = multistate_objective_function(&chr[i], energy, num_competitor_structures); calculated_sequences->score = chr[i].energy; calculated_sequences->pseudo_dG = energy[0].pseudo_dG; calculated_sequences->dG_gap = energy[0].dG_gap; calculated_sequences->E_total = energy[0].E_total; calculated_sequences->clashes = energy[0].clashes; calculated_sequences->num_unsatisfied_hbond = energy[0].num_unsatisfied_hbond; // we have a new best sequence....double-check and then save the original files if(calculated_sequences->score < best_score) { sprintf(filename,"%s.target.pdb",prefix); sprintf(filename2,"%s.target.temp",prefix); mv_file(filename,filename2); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); sprintf(filename2,"%s.competitor.%d.temp",prefix,j); mv_file(filename,filename2); } // mv'ing these files will relaunch jobs to calculate them sprintf(filename,"target.slavefilelist.%d",GET_PID); wait_for_slaves_to_finish(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); wait_for_slaves_to_finish(filename); } // read the energies from these new files sprintf(filename,"%s.target.pdb",prefix); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &dummyenergy[0], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); dummyenergy[0] = ENERGY_SUBTRACT(dummyenergy[0],baseline_energy[0]); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(filename, &dummyenergy[j], &sasa_sum, &fraction_sasa_hphob, &transfer_free_energy_density); dummyenergy[j] = ENERGY_SUBTRACT(dummyenergy[j],baseline_energy[j]); } inconsistent_flag=0; // compare to first run of this sequence if(fabs(energy[0].E_structure - dummyenergy[0].E_structure)>TOL) { if(dummyenergy[0].E_structure < energy[0].E_structure) { inconsistent_flag=1; sprintf(filename2,"WARNING: inconsistent rotamer optimization for target"); failure_report(filename2,"warn"); energy[0] = dummyenergy[0]; chr[i].energy_list = energy[0]; } else // old file is best { sprintf(filename,"%s.target.pdb",prefix); sprintf(filename2,"%s.target.temp",prefix); cp_file(filename2,filename); } } for(j=1;j<=num_competitor_structures;++j) { if(fabs(energy[j].E_structure - dummyenergy[j].E_structure)>TOL) { if(dummyenergy[j].E_structure < energy[j].E_structure) { inconsistent_flag=1; sprintf(filename2,"WARNING: inconsistent rotamer optimization for competitor %d",j); failure_report(filename2,"warn"); energy[j] = dummyenergy[j]; } else // old file is best { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); sprintf(filename2,"%s.competitor.%d.temp",prefix,j); cp_file(filename2,filename); } } } // found an inconsistency....recalc the score using the updated values in energy array if(inconsistent_flag==1) { chr[i].energy = multistate_objective_function(&chr[i], energy, num_competitor_structures); calculated_sequences->score = chr[i].energy; calculated_sequences->pseudo_dG = energy[0].pseudo_dG; calculated_sequences->dG_gap = energy[0].dG_gap; calculated_sequences->E_total = energy[0].E_total; calculated_sequences->clashes = energy[0].clashes; calculated_sequences->num_unsatisfied_hbond = energy[0].num_unsatisfied_hbond; } // this is still the best score....save the files if(calculated_sequences->score < best_score) { best_score = calculated_sequences->score; sprintf(filename,"%s.target.pdb",prefix); sprintf(filename2,"best.target.%d.diagnostic",GET_PID); cp_file(filename,filename2); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d.pdb",prefix,j); sprintf(filename2,"best.competitor.%d.%d.diagnostic",j,GET_PID); cp_file(filename,filename2); } } // rm temp files sprintf(filename2,"%s.target.temp",prefix); rm_file(filename2); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename2,"%s.competitor.%d.temp",prefix,j); rm_file(filename2); } } calculated_sequences->next = (CALCULATED_SEQUENCES *)malloc(sizeof(CALCULATED_SEQUENCES)); calculated_sequences = calculated_sequences->next; calculated_sequences->next = NULL; } } calculated_sequences = first_calculated_sequences; sprintf(filename,"target.slavefilelist.%d",GET_PID); rm_file(filename); touch_file(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(filename); touch_file(filename); } for(i=start_index;i<=stop_index;++i) { sprintf(prefix,"%d.of.%d.to.%d.%d",i,start_index,stop_index,GET_PID); sprintf(filename,"%s.target.pdb",prefix); if(does_this_file_exist(filename)==1) rm_file(filename); sprintf(filename,"%s.target",prefix); if(does_this_file_exist(filename)==1) rm_file(filename); for(j=1;j<=num_competitor_structures;++j) { sprintf(filename,"%s.competitor.%d",prefix,j); if(does_this_file_exist(filename)==1) rm_file(filename); sprintf(filename,"%s.competitor.%d.pdb",prefix,j); if(does_this_file_exist(filename)==1) rm_file(filename); } } } double CHROMOSOME_to_score(CHROMOSOME *chr) { CHROMOSOME_array_to_fitness(0, 0, chr); return(chr->energy); } /* This function controls the GA. It launches the protein->parameters.number_GA_cycles number of independent GA's, pools the results, and launches an GA seeded with the winners from the independent runs. Final soln information stored in protein->final_*; ready for output_PROTEIN. */ void multistate_ga(PROTEIN *protein) { int i, q; extern int LOGFILE_FLAG; if(protein->parameters.pop_size==500) { i=1; protein->parameters.pop_size=0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].number_of_choices > 1) ++protein->parameters.pop_size; ++i; } protein->parameters.pop_size = 2*protein->parameters.pop_size; if(protein->parameters.pop_size<60) protein->parameters.pop_size=60; } if(protein->parameters.number_GA_cycles==10) protein->parameters.number_GA_cycles=2; if(protein->parameters.ga_convergence==300) protein->parameters.ga_convergence=20; /* DUDE protein->parameters.pop_size=10; protein->parameters.number_GA_cycles=2; protein->parameters.num_GA_solns_per_cycle=1; protein->parameters.ga_convergence=10; */ /* make sure popsize is even */ if(protein->parameters.pop_size%2 != 0) protein->parameters.pop_size = protein->parameters.pop_size + 1; /* inoculate protein.chr with 2*finalGAseed_population chromosomes */ protein->parameters.finalGAseed_population = protein->parameters.number_GA_cycles*protein->parameters.num_GA_solns_per_cycle; q = 2*protein->parameters.finalGAseed_population; if(protein->parameters.pop_sizeparameters.pop_size=q+4; protein->sizeof_chr_array = q; protein->chr = (CHROMOSOME *)calloc((q+2),sizeof(CHROMOSOME)); for(i=1;i<=q;++i) { inoculate_sidechains(&protein->chr[i], protein->var_pos, 0); generate_custom_sequence_CHROMOSOME(&protein->chr[i], 0); protein->chr[i].bkbngenes = NULL; protein->chr[i].first_bkbngene = NULL; protein->chr[i].energy = DBL_MAX; } GA_sequence_control(&protein->parameters, protein->chr, -q, protein->var_pos); if(LOGFILE_FLAG != 0) output_data(protein->chr, &protein->parameters.finalGAseed_population, protein->parameters.output_prefix, protein); protein->E_working = protein->chr[1].energy; if(protein->final_chr.firstgene == NULL) inoculate_sidechains(&protein->final_chr, protein->var_pos, 0); copyCHROMOSOME(protein->final_chr, protein->chr[1]); /* copy protein->chr[1] into protein->final_chr */ } /* This function seeds parameters->number_GA_cycles GAs with chrs array of chrs_size. The first GA uses all the chrs members as seeds; the nth GA run includes chrs[n] onward. If chrs_size<0, then all the seeds for all the runs are random. After all the GA's are done, a tournament GA is performed where the top parameters->num_GA_solns_per_cycle from each GA are used to seed the initial population. The final population is returned in chrs */ void GA_sequence_control(PARAMETERS *parameters, CHROMOSOME *chrs, int chrs_size, VARIABLE_POSITION var_pos[]) { int i,j, k; double master_time; CHROMOSOME *working_chrs, *final_chrs; int newpopsize; int SuperGenX; int first, final_chr_ctr, actual_cycle_ctr; char *logfile, *optimized_filename, *sequence, *escape_hatch_filename; FILE *file_ptr; time_t now, start_time; extern int LOGFILE_FLAG,GET_PID; extern double MASTER_OPTIMIZATION_TIME; extern char *CURRENT_WORKING_DIRECTORY; master_time = MASTER_OPTIMIZATION_TIME; sequence = (char *)calloc(MAX_RESIDUES,sizeof(char)); /* even number pop_size required */ if(parameters->pop_size%2!=0) parameters->pop_size = parameters->pop_size + 1; newpopsize = 2*parameters->pop_size; optimized_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(optimized_filename,"%s.target_state_optimization.pdb",parameters->output_prefix); logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile, "%s.log", parameters->output_prefix); escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.MULTISTATE_GA.%d",CURRENT_WORKING_DIRECTORY,GET_PID); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "Starting MULTISTATE_GA\tmain_population: %d\tmating_population: %d total_population: %d\t%s", abs(chrs_size), parameters->pop_size, newpopsize, ctime(&now)); fprintf(file_ptr, "To exit the MULTISTATE_GA and move to the next step:"); fprintf(file_ptr, "\ttouch %s\n",escape_hatch_filename); fprintf(file_ptr, "order: gen, num_cycles_same_energy, score, ddG_gap, ddG_pseudo, ddG_total, dd_clashes, dd_unsat_hbond, sequence, dE_cycle\n"); fprintf(file_ptr, "energies are with respect to the baseline 'ala at all sequence-variable positions' sequence\n"); fclose(file_ptr); } first = 1; if(chrs_size>0) sort_CHROMOSOME(&first, &chrs_size, chrs); /* inoculate and allocate enough space for 2 x pop_size */ working_chrs = (CHROMOSOME *)calloc((newpopsize+2),sizeof(CHROMOSOME)); final_chrs = (CHROMOSOME *)calloc((newpopsize+2),sizeof(CHROMOSOME)); for(i=1;i<=newpopsize;++i) { inoculate_sidechains(&working_chrs[i], var_pos,0); inoculate_sidechains(&final_chrs[i], var_pos,0); working_chrs[i].bkbngenes = NULL; working_chrs[i].first_bkbngene = NULL; final_chrs[i].bkbngenes = NULL; final_chrs[i].first_bkbngene = NULL; } MASTER_OPTIMIZATION_TIME = MASTER_OPTIMIZATION_TIME/(parameters->number_GA_cycles + 1); start_time = time(NULL); final_chr_ctr = 0; actual_cycle_ctr=0; SuperGenX=1; while(SuperGenX<=parameters->number_GA_cycles) { ++actual_cycle_ctr; /* seed */ k=1; for(j=SuperGenX;j<=chrs_size;++j) { copyCHROMOSOME(working_chrs[k],chrs[j]); ++k; } while(k<=newpopsize) { randomize_sidechains(&working_chrs[k],1); generate_custom_sequence_CHROMOSOME(&working_chrs[k], 0); ++k; } get_rid_of_isosequence_chrs(working_chrs, parameters->mutation_freq, newpopsize); /* include the target-state optimized sequence in the first cycle */ if(SuperGenX==1) if(does_this_file_exist(optimized_filename)==1) { pdbfile_to_CHROMOSOME(optimized_filename, &working_chrs[1]); } CHROMOSOME_array_to_fitness(1,newpopsize,working_chrs); sort_CHROMOSOME(&first, &newpopsize, working_chrs); evolve_sequence(parameters, working_chrs, var_pos); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); CHROMOSOME_to_variable_sequence(&working_chrs[1], sequence); fprintf(file_ptr, "GA\t%d\t%f\t%f\t%f\t%f\t%d\t%d\t%s\t%s", SuperGenX, working_chrs[1].energy, working_chrs[1].energy_list.dG_gap, working_chrs[1].energy_list.pseudo_dG, working_chrs[1].energy_list.E_total, working_chrs[1].energy_list.clashes, working_chrs[1].energy_list.num_unsatisfied_hbond, sequence,ctime(&now)); fclose(file_ptr); } /* copy the best num_final_chr members from the population into final_chrs */ for(k=1;k<=parameters->num_GA_solns_per_cycle;++k) { ++final_chr_ctr; copyCHROMOSOME(final_chrs[final_chr_ctr],working_chrs[k]); } if(does_this_file_exist(escape_hatch_filename)==1) SuperGenX = parameters->number_GA_cycles + 2; ++SuperGenX; } /* tournament GA with winners from individual GA's */ if(does_this_file_exist(escape_hatch_filename)==0) { k=final_chr_ctr+1; while(k<=newpopsize) { randomize_sidechains(&final_chrs[k],1); generate_custom_sequence_CHROMOSOME(&final_chrs[k],0); ++k; } get_rid_of_isosequence_chrs(final_chrs, parameters->mutation_freq, newpopsize); CHROMOSOME_array_to_fitness(1,newpopsize,final_chrs); sort_CHROMOSOME(&first, &newpopsize, final_chrs); evolve_sequence(parameters, final_chrs, var_pos); } else rm_file(escape_hatch_filename); sort_CHROMOSOME(&first, &newpopsize, final_chrs); if(LOGFILE_FLAG!=0) { file_ptr = fopen_file(logfile, "a"); now = time(NULL); CHROMOSOME_to_variable_sequence(&final_chrs[1], sequence); fprintf(file_ptr, "GA final\t%f\t%f\t%f\t%f\t%d\t%d\t%s\t%s", final_chrs[1].energy, final_chrs[1].energy_list.dG_gap, final_chrs[1].energy_list.pseudo_dG, final_chrs[1].energy_list.E_total, final_chrs[1].energy_list.clashes,final_chrs[1].energy_list.num_unsatisfied_hbond, sequence, ctime(&now)); fclose(file_ptr); } for(i=1;i<=abs(chrs_size);++i) copyCHROMOSOME(chrs[i],final_chrs[i]); for(i=1;i<=newpopsize;++i) { free_CHROMOSOME(&working_chrs[i]); free_CHROMOSOME(&final_chrs[i]); } free_memory(working_chrs); free_memory(final_chrs); free_memory(sequence); free_memory(logfile); free_memory(optimized_filename); free_memory(escape_hatch_filename); MASTER_OPTIMIZATION_TIME = master_time; } /* score energy */ /* temperature decay between T0 and Tfinal */ /* increase mut_freq if 3 or more cycles have the same energy after ten isoenergetic cycles, relax w/o increasing the mut_freq, to allow the mutations to spread into the population */ /* convergence criterion - same energy for parameters->ga_convergence cycles */ void evolve_sequence(PARAMETERS *parameters, CHROMOSOME chrs[], VARIABLE_POSITION var_pos[]) { int newpopsize; int first, i; double T; int convergence_counter, numCyclesSameEnergy; double prevEnergy, currentenergy; double dummy_mutfreq, temperature_decr; int GenX; int popsizeplus1; char *logfile_line, *escape_hatch_filename; FILE *file_ptr; extern int LOGFILE_FLAG, GET_PID; extern double R; extern double MASTER_OPTIMIZATION_TIME; extern char *CURRENT_WORKING_DIRECTORY; time_t now, start_time; char *logfile, *best_sequence; int num_isoenergy_safety, safety_convergence; escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.MULTISTATE_GA.%d",CURRENT_WORKING_DIRECTORY,GET_PID); first = 1; R=R_univ; T=5000; newpopsize = 2*parameters->pop_size; popsizeplus1 = parameters->pop_size+1; best_sequence=NULL; logfile_line =NULL; logfile=NULL; if(LOGFILE_FLAG!=0) { logfile = (char *)calloc(MXLINE_INPUT,sizeof(char)); best_sequence = (char *)calloc(MAX_RESIDUES,sizeof(char)); sprintf(logfile,"%s.log",parameters->output_prefix); logfile_line = (char *)calloc(MXLINE_INPUT,sizeof(char)); logfile_line[0] = '\0'; logfile_line[1] = '\0'; logfile_line[2] = '\0'; /* buffer for logfile */ } /* score energy of initial population */ get_rid_of_isosequence_chrs(chrs, parameters->mutation_freq, newpopsize); CHROMOSOME_array_to_fitness(1,newpopsize,chrs); sort_CHROMOSOME(&first,&newpopsize,chrs); /* sorted input CHROMOSOME array */ convergence_counter=0; if(calculate_mating_frequencies(chrs, parameters->pop_size, T)==0) convergence_counter = parameters->ga_convergence + 20; /* evolution */ prevEnergy=0; currentenergy=chrs[1].energy; numCyclesSameEnergy=1; dummy_mutfreq = parameters->mutation_freq; GenX=0; start_time = time(NULL); T=parameters->ga_T0; temperature_decr=10; safety_convergence = 100*parameters->ga_convergence; num_isoenergy_safety=0; start_time = time(NULL); while(convergence_counter <= parameters->ga_convergence) /* convergence criterion */ { prevEnergy = currentenergy; ++GenX; /* mate chrs[1]->[popsize], loading into chrs[popsize+1]->[newpopsize] */ /* children are made soluble within mating function if need be */ mate_sidechains(chrs,parameters->pop_size,parameters->recomb_freq,dummy_mutfreq); for(i=(popsizeplus1); i<=newpopsize; ++i) generate_custom_sequence_CHROMOSOME(&chrs[i],0); get_rid_of_isosequence_chrs(chrs, dummy_mutfreq, newpopsize); /* score energy */ CHROMOSOME_array_to_fitness(1,newpopsize,chrs); sort_CHROMOSOME(&first,&newpopsize,chrs); /* sort population */ currentenergy=chrs[1].energy; CHROMOSOME_to_variable_sequence(&chrs[1], best_sequence); if(calculate_mating_frequencies(chrs, parameters->pop_size, T)==0) convergence_counter = parameters->ga_convergence + 20; /* write logfile */ if(LOGFILE_FLAG != 0) { now = time(NULL); sprintf(logfile_line, "ga\t%d\t%d\t%f\t%f\t%f\t%f\t%d\t%d\t%s\t%f\t%s",GenX, convergence_counter, chrs[1].energy, chrs[1].energy_list.dG_gap, chrs[1].energy_list.pseudo_dG, chrs[1].energy_list.E_total, chrs[1].energy_list.clashes,chrs[1].energy_list.num_unsatisfied_hbond, best_sequence, fabs(prevEnergy - currentenergy),ctime(&now)); file_ptr = fopen(logfile,"a"); fprintf(file_ptr,"%s",logfile_line); fclose(file_ptr); } /* check for convergence or time-out */ if(fabs(prevEnergy - currentenergy)<=EPS) { ++numCyclesSameEnergy; ++convergence_counter; /* increase mutation freq if it looks like we're in a local minimum; 10 generations w/ equal energy */ /* after 10 cycles of increased mut_freq, relax for 10 cycles at parameters.mutation_freq */ if(convergence_counter >= 10) { if(numCyclesSameEnergy<10) dummy_mutfreq = dummy_mutfreq + 0.025; else dummy_mutfreq = parameters->mutation_freq; if(numCyclesSameEnergy%20==0) numCyclesSameEnergy=1; } } else { numCyclesSameEnergy=1; dummy_mutfreq = parameters->mutation_freq; convergence_counter=1; } if(fabs(prevEnergy - currentenergy)>EPS2) num_isoenergy_safety=0; else { ++num_isoenergy_safety; if(num_isoenergy_safety>safety_convergence) /* bail out */ convergence_counter = parameters->ga_convergence + 20; } /* adjust temperature */ T = update_simulation_temperature(T, parameters->ga_dT, parameters->ga_Tf); /* bail out if we're taking too long */ now = time(NULL); if(difftime(now,start_time) >= MASTER_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1) convergence_counter = parameters->ga_convergence + 20; } /* sort final population */ sort_CHROMOSOME(&first,&newpopsize,chrs); if(logfile_line !=NULL) { free_memory(logfile_line); free_memory(logfile); free_memory(best_sequence); } free_memory(escape_hatch_filename); } double find_best_residue(int i, VARIABLE_POSITION *varpos, CHROMOSOME *input_chr, int *best_res) { int i_res; double best_energy; CHROMOSOME *chr; chr = (CHROMOSOME *)calloc(varpos[i].number_of_choices+2,sizeof(CHROMOSOME)); for(i_res=1;i_res<=varpos[i].number_of_choices;++i_res) { inoculate_sidechains(&chr[i_res],varpos,0); copyCHROMOSOME(chr[i_res],(*input_chr)); chr[i_res].genes = chr[i_res].firstgene; while(chr[i_res].genes->seq_position!=varpos[i].seq_position) chr[i_res].genes = chr[i_res].genes->nextgene; chr[i_res].genes->choice_ptr = &varpos[i].choice[i_res]; chr[i_res].genes->j_choice_index = i_res-1; chr[i_res].genes = chr[i_res].firstgene; } CHROMOSOME_array_to_fitness(1,varpos[i].number_of_choices,chr); best_energy=1e10; for(i_res=1;i_res<=varpos[i].number_of_choices;++i_res) { if(chr[i_res].energyfinal_chr.firstgene==NULL) inoculate_sidechains(&protein->final_chr, protein->var_pos,0); sprintf(optimized_filename,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); if(does_this_file_exist(optimized_filename)==0) { sprintf(line,"ERROR For HQM, target state optimization must have been performed; %s does not exist",optimized_filename); failure_report(line,"exit"); } pdbfile_to_CHROMOSOME(optimized_filename, &protein->final_chr); if(LOGFILE_FLAG!=0) { sprintf(logfile, "%s.log", protein->parameters.output_prefix); now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "Start MULTISTATE_HQM\t%s", ctime(&now)); fprintf(file_ptr, "To exit the MULTISTATE_HQM and move to the next step:"); fprintf(file_ptr, "\ttouch %s\n",escape_hatch_filename); fprintf(file_ptr, "order: iteration, num_cycles_same_energy, position, optimal_residue, score, ddG_gap, ddG_pseudo, ddG_total dd_clashes dd_unsat_hbond\n"); fprintf(file_ptr, "energies are with respect to the baseline 'ala at all sequence-variable positions' sequence\n"); fclose(file_ptr); } if(protein->parameters.hqm_convergence == 0) protein->parameters.hqm_convergence = protein->parameters.numMovingPositions; best_res = ENDFLAG; inoculate_sidechains(&dummychr, protein->var_pos,0); dummychr.bkbngenes = NULL; dummychr.first_bkbngene = NULL; copyCHROMOSOME(dummychr, protein->final_chr); /* dummychr is the working chromosome */ for(i=1;i<=protein->parameters.numMovingPositions+1;++i) already_used_positions[i] = ENDFLAG; i=1; k=0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) /* not fixed, so add to the list */ { ++k; indicies_of_floating_positions[k] = i; } ++i; } protein->final_chr.energy = CHROMOSOME_to_score(&protein->final_chr); GenX=0; previous_seqpos=0; start_time = time(NULL); ctr=0; while(GenXparameters.hqm_convergence) { ++ctr; /* pick a random position */ k=1; if(protein->parameters.numMovingPositions>1) { k = randint(protein->parameters.numMovingPositions); while(has_this_position_been_used(k,already_used_positions)==1 || k==0) k = randint(protein->parameters.numMovingPositions); i=1; while(already_used_positions[i]!=ENDFLAG) ++i; already_used_positions[i]=k; } i = indicies_of_floating_positions[k]; previous_seqpos = k; neighborOptimizeFlag = 1; /* current energy at this position */ current_energy = CHROMOSOME_to_score(&dummychr); /* place all residues at this position and find the best one */ best_energy = find_best_residue(i, protein->var_pos, &dummychr, &best_res); fabs_dE = fabs(best_energy - current_energy); if(fabs_dE > EPS) /* energy has dropped */ { dummychr.genes = dummychr.firstgene; while(dummychr.genes->seq_position!=protein->var_pos[i].seq_position) dummychr.genes = dummychr.genes->nextgene; dummychr.genes->choice_ptr = &protein->var_pos[i].choice[best_res]; dummychr.genes->j_choice_index = best_res-1; dummychr.genes = dummychr.firstgene; neighborOptimizeFlag=0; GenX=0; /* reset the already_used_positions array since the energy has dropped */ j=1; while(already_used_positions[j]!=ENDFLAG) { already_used_positions[j] = ENDFLAG; ++j; } copyCHROMOSOME(protein->final_chr, dummychr); protein->final_chr.energy = CHROMOSOME_to_score(&protein->final_chr); CHROMOSOME_to_variable_sequence(&protein->final_chr, sequence); } if(LOGFILE_FLAG!=0) { now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "hqm\t%d\t%d\t%s\t%s\t%f\t%f\t%f\t%f\t%d\t%d\t%s\t%s", ctr, GenX, protein->var_pos[i].seqpos_text_map_ptr->seqpos_text, protein->var_pos[i].choice[best_res].resparam_ptr->one_letter_code, protein->final_chr.energy, protein->final_chr.energy_list.dG_gap, protein->final_chr.energy_list.pseudo_dG, protein->final_chr.energy_list.E_total, protein->final_chr.energy_list.clashes, protein->final_chr.energy_list.num_unsatisfied_hbond, sequence,ctime(&now)); fclose(file_ptr); } if(neighborOptimizeFlag == 1) ++GenX; /* bail out if we're taking too long */ now = time(NULL); if(difftime(now,start_time) >= MASTER_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1) GenX = protein->parameters.hqm_convergence + 20; } if(does_this_file_exist(escape_hatch_filename)==1) rm_file(escape_hatch_filename); free_CHROMOSOME(&dummychr); } void multistate_scan(PROTEIN *protein) { int i,k; int best_res, i_res; static int *indicies_of_floating_positions=NULL; static int max_residues=0; CHROMOSOME dummychr, *chr=NULL; static char *logfile, *optimized_filename, *sequence, *line=NULL, *escape_hatch_filename; char best_one_letter_code[5], wt_rescode[5]; FILE *file_ptr; ENERGY best_energy_list; double best_energy; time_t now, start_time; extern int LOGFILE_FLAG, GET_PID; extern double MASTER_OPTIMIZATION_TIME; extern char *CURRENT_WORKING_DIRECTORY; if(max_residues < MAX_RESIDUES) { if(line == NULL) { line = (char *)calloc(MAXLINE,sizeof(char)); optimized_filename = (char *)calloc(MAXLINE,sizeof(char)); logfile = (char *)calloc(MAXLINE,sizeof(char)); escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.MULTISTATE_SCAN.%d",CURRENT_WORKING_DIRECTORY,GET_PID); } max_residues=MAX_RESIDUES; if(indicies_of_floating_positions != NULL) { free_memory(indicies_of_floating_positions); free_memory(sequence); } indicies_of_floating_positions = (int *)calloc(MAX_RESIDUES,sizeof(int)); sequence = (char *)calloc(MAX_RESIDUES,sizeof(char)); } if(protein->final_chr.firstgene==NULL) inoculate_sidechains(&protein->final_chr, protein->var_pos,0); sprintf(optimized_filename,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); if(does_this_file_exist(optimized_filename)==0) { sprintf(line,"ERROR For SCAN, target state optimization must have been performed; %s does not exist",optimized_filename); failure_report(line,"exit"); } pdbfile_to_CHROMOSOME(optimized_filename, &protein->final_chr); if(LOGFILE_FLAG!=0) { sprintf(logfile, "%s.log", protein->parameters.output_prefix); now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "Start MULTISTATE_SCAN\t%s", ctime(&now)); fprintf(file_ptr, "To exit the MULTISTATE_SCAN and move to the next step:"); fprintf(file_ptr, "\ttouch %s\n",escape_hatch_filename); if(strcmp(protein->parameters.algorithm, "SCANNING_MUTAGENESIS")==0) fprintf(file_ptr, "order: position, wt_res, residue, score, ddG_gap, ddG_pseudo, ddG_total dd_clashes dd_unsat_hbond\n"); else fprintf(file_ptr, "order: position, wt_res, optimal_residue, score, ddG_gap, ddG_pseudo, ddG_total dd_clashes dd_unsat_hbond\n"); fprintf(file_ptr, "energies are with respect to the baseline 'ala at all sequence-variable positions' sequence\n"); fclose(file_ptr); } protein->final_chr.energy = CHROMOSOME_to_score(&protein->final_chr); inoculate_sidechains(&dummychr, protein->var_pos,0); dummychr.bkbngenes = NULL; dummychr.first_bkbngene = NULL; copyCHROMOSOME(dummychr, protein->final_chr); /* dummychr is the working chromosome */ i=1; k=0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) /* not fixed, so add to the list */ { ++k; indicies_of_floating_positions[k] = i; } ++i; } best_res = ENDFLAG; start_time = time(NULL); for(k=1;k <= protein->parameters.numMovingPositions; ++k) { i = indicies_of_floating_positions[k]; copyCHROMOSOME(dummychr, protein->final_chr); protein->final_chr.genes = protein->final_chr.firstgene; while(protein->final_chr.genes->seq_position!=protein->var_pos[i].seq_position) protein->final_chr.genes = protein->final_chr.genes->nextgene; strcpy(wt_rescode,protein->final_chr.genes->choice_ptr->resparam_ptr->one_letter_code); protein->final_chr.genes = protein->final_chr.firstgene; /* print out the energies of each residue */ if(strcmp(protein->parameters.algorithm, "SCANNING_MUTAGENESIS")==0) { best_energy = 1e10; chr = (CHROMOSOME *)calloc(protein->var_pos[i].number_of_choices+2,sizeof(CHROMOSOME)); for(i_res=1;i_res<=protein->var_pos[i].number_of_choices;++i_res) { inoculate_sidechains(&chr[i_res],protein->var_pos,0); copyCHROMOSOME(chr[i_res], protein->final_chr); chr[i_res].genes = chr[i_res].firstgene; while(chr[i_res].genes->seq_position!=protein->var_pos[i].seq_position) chr[i_res].genes = chr[i_res].genes->nextgene; chr[i_res].genes->choice_ptr = &protein->var_pos[i].choice[i_res]; chr[i_res].genes->j_choice_index = i_res-1; chr[i_res].genes = chr[i_res].firstgene; } CHROMOSOME_array_to_fitness(1,protein->var_pos[i].number_of_choices,chr); best_energy=1e10; for(i_res=1;i_res<=protein->var_pos[i].number_of_choices;++i_res) { copyCHROMOSOME(dummychr, chr[i_res]); if(LOGFILE_FLAG!=0) { now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "scan\t%s\t%s\t%s\t%f\t%f\t%f\t%f\t%d\t%d\n", protein->var_pos[i].seqpos_text_map_ptr->seqpos_text,wt_rescode, protein->var_pos[i].choice[i_res].resparam_ptr->one_letter_code, dummychr.energy, dummychr.energy_list.dG_gap, dummychr.energy_list.pseudo_dG, dummychr.energy_list.E_total, dummychr.energy_list.clashes, dummychr.energy_list.num_unsatisfied_hbond); fclose(file_ptr); } if(dummychr.energy < best_energy) { best_energy = dummychr.energy; best_energy_list = dummychr.energy_list; strcpy(best_one_letter_code,protein->var_pos[i].choice[i_res].resparam_ptr->one_letter_code); } } if(LOGFILE_FLAG!=0) { now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "optimal_res\t%s\t%s\t%s\t%f\t%f\t%f\t%f\t%d\t%d\n", protein->var_pos[i].seqpos_text_map_ptr->seqpos_text,wt_rescode, best_one_letter_code, best_energy, best_energy_list.dG_gap, best_energy_list.pseudo_dG, best_energy_list.E_total, best_energy_list.clashes, best_energy_list.num_unsatisfied_hbond); fclose(file_ptr); } for(i_res=1;i_res<=protein->var_pos[i].number_of_choices;++i_res) free_CHROMOSOME(&chr[i_res]); free_memory(chr); } else { /* place all residues at this position and find the best one */ find_best_residue(i, protein->var_pos, &dummychr, &best_res); dummychr.genes = dummychr.firstgene; while(dummychr.genes->seq_position!=protein->var_pos[i].seq_position) dummychr.genes = dummychr.genes->nextgene; dummychr.genes->choice_ptr = &protein->var_pos[i].choice[best_res]; dummychr.genes->j_choice_index = best_res-1; dummychr.genes = dummychr.firstgene; dummychr.energy = CHROMOSOME_to_score(&dummychr); CHROMOSOME_to_variable_sequence(&dummychr, sequence); if(LOGFILE_FLAG!=0) { now = time(NULL); file_ptr = fopen_file(logfile,"a"); fprintf(file_ptr, "optimal_res_scan\t%s\t%s\t%s\t%f\t%f\t%f\t%f\t%d\t%d\t%s\t%s", protein->var_pos[i].seqpos_text_map_ptr->seqpos_text,wt_rescode, protein->var_pos[i].choice[best_res].resparam_ptr->one_letter_code, dummychr.energy, dummychr.energy_list.dG_gap, dummychr.energy_list.pseudo_dG, dummychr.energy_list.E_total, dummychr.energy_list.clashes, dummychr.energy_list.num_unsatisfied_hbond, sequence,ctime(&now)); fclose(file_ptr); } } /* bail out if we're taking too long */ now = time(NULL); if(difftime(now,start_time) >= MASTER_OPTIMIZATION_TIME || does_this_file_exist(escape_hatch_filename)==1) k = protein->parameters.numMovingPositions + 20; } if(does_this_file_exist(escape_hatch_filename)==1) rm_file(escape_hatch_filename); free_CHROMOSOME(&dummychr); } void multistate_design(PROTEIN *protein) { int competitor_ctr,i,j,k,num_words,num_processors, dummypid; int runtime_flag, logfile_flag, jobtype_flag, max_probab_index; double denom, max_probab, master_time, long_runtime; char *line, **word, *line2, *inputfilename, *command, *keyword, *processor_name, *logfile_name, *dummystring; char *optimized_restype, *scmf_restype; FILE *target_file_ptr, *top_file_ptr, *avail_proc_file_ptr, *lookup_table_input_file_ptr, *logfile_ptr; char *single_state_filename, *scmf_filename; FILE *single_state_file_ptr, *scmf_file_ptr; time_t now, start_time; extern double MASTER_OPTIMIZATION_TIME,MAX_OPTIMIZATION_TIME; extern int OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG, GET_PID, NICE_SLAVE_FLAG, LOGFILE_FLAG, FILTER_SEQUENCE_SPACE_FLAG, OUTPUT_COORD_FLAG, NUM_SYSTEM_CPU; extern char *AVAILABLE_PROCESSORS_FILE, *CURRENT_WORKING_DIRECTORY, *EXECUTABLE_FILENAME, *DEFAULT_ROTAMER_JOB; if(protein->var_pos==NULL) failure_report("ERROR VARIABLE_POSITIONS, SEQUENCE, or SCANNING_POSITIONS must be defined for MULTISTATE jobs","exit"); if(strcmp(protein->parameters.algorithm, "SCANNING_MUTAGENESIS") == 0) { strcpy(protein->parameters.sequence_algorithm,"SCAN"); FILTER_SEQUENCE_SPACE_FLAG = -1; } master_time = MASTER_OPTIMIZATION_TIME; OUTPUT_COORD_FLAG=0; if(AVAILABLE_PROCESSORS_FILE==NULL) failure_report("ERROR For multistate design, CPU_FILE must be defined or NUM_CPU > 0","exit"); word = NULL; optimized_restype = (char *)calloc(10,sizeof(char)); scmf_restype = (char *)calloc(10,sizeof(char)); line = (char *)calloc(MXLINE_INPUT,sizeof(char)); line2 = (char *)calloc(MXLINE_INPUT,sizeof(char)); inputfilename = (char *)calloc(MXLINE_INPUT,sizeof(char)); command = (char *)calloc(MXLINE_INPUT,sizeof(char)); keyword = (char *)calloc(MXLINE_INPUT,sizeof(char)); processor_name = (char *)calloc(MXLINE_INPUT,sizeof(char)); single_state_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); scmf_filename= (char *)calloc(MXLINE_INPUT,sizeof(char)); dummystring= (char *)calloc(MXLINE_INPUT,sizeof(char)); dummypid=randint(100000); logfile_name = NULL; if(LOGFILE_FLAG!=0) { logfile_name = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(logfile_name,"%s.log",protein->parameters.output_prefix); } i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) protein->var_pos[i].neighbor_level = ENDFLAG; ++i; } if(AVAILABLE_PROCESSORS_FILE!=NULL) { avail_proc_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"r"); num_processors=0; while(fgets(line,MAXLINE,avail_proc_file_ptr)!=NULL) ++num_processors; fclose(avail_proc_file_ptr); } else { AVAILABLE_PROCESSORS_FILE=(char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(AVAILABLE_PROCESSORS_FILE,"temp.avail_processors.%d",GET_PID); avail_proc_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"w"); fprintf(avail_proc_file_ptr,"%s\n", getenv("HOST")); fclose(avail_proc_file_ptr); num_processors=1; } make_directory(protein->parameters.lookup_energy_table_directory); single_state_file_ptr=NULL; scmf_file_ptr=NULL; /* create inputfiles for target and competitor structures */ for(competitor_ctr=0;competitor_ctr<=protein->num_competitors;++competitor_ctr) { target_file_ptr = fopen_file(protein->inputfilename,"r"); if(competitor_ctr == 0) { sprintf(inputfilename,"foreman.target.%d.input",GET_PID ); sprintf(single_state_filename, "singlestate.target.%d.input",GET_PID); sprintf(scmf_filename,"scmf.target.%d.input",GET_PID ); single_state_file_ptr = fopen_file(single_state_filename,"w"); scmf_file_ptr = fopen_file(scmf_filename,"w"); } else sprintf(inputfilename,"foreman.competitor.%d.%d.input",competitor_ctr, GET_PID ); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); if(competitor_ctr == 0) fprintf(logfile_ptr, "creating inputfiles %s for target structure\t%s", inputfilename, ctime(&now)); else fprintf(logfile_ptr, "creating inputfiles %s for competitor structure #%d\t%s", inputfilename, competitor_ctr, ctime(&now)); fclose(logfile_ptr); } top_file_ptr = fopen_file(inputfilename,"w"); sprintf(command,"%s.lookup_table_input",inputfilename); lookup_table_input_file_ptr = fopen_file(command,"w"); fgets(line,MXLINE_INPUT,target_file_ptr); sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); runtime_flag=0; logfile_flag=0; jobtype_flag=0; long_runtime=0; while(strcmp(keyword,"END")!=0 && strncmp(keyword,"VAR",3)!=0 && strncmp(keyword,"SCAN",4)!=0) { if(competitor_ctr != 0) // competitors { if(strstr(keyword,"TARGET")!=0 || strstr(keyword,"TEMPLATE")!=0) { fprintf(top_file_ptr,"TEMPLATE %s\n",protein->competitor_Template[competitor_ctr]); fprintf(lookup_table_input_file_ptr,"TEMPLATE %s\n",protein->competitor_Template[competitor_ctr]); } else if(!(strstr(keyword,"COMPET")!=0 || strncmp(keyword,"FREE",4)==0 ) && strstr(keyword,"PREFIX")==0 && strstr(keyword,"OTHER")==0 && (strstr(keyword,"LOOKUP")==0 && strstr(keyword,"DIR")==0) && strstr(keyword,"MAX_MUT")==0 && !(strstr(keyword, "FILE")!=0 && (strstr(keyword, "PROCESSOR")!=0 || strstr(keyword, "CPU")!=0) ) ) { if(strstr(keyword, "TIME")==0) { fprintf(top_file_ptr,"%s",line); fprintf(lookup_table_input_file_ptr,"%s",line); } // slave runtime if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")!=0) { runtime_flag=1; sscanf(line,"%s %lf",keyword,&MAX_OPTIMIZATION_TIME); } // initial optimization runtime if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")==0) sscanf(line,"%s %lf",keyword,&long_runtime); if(strcmp(keyword,"JOBTYPE")==0 || strcmp(keyword,"SLAVE_JOBTYPE")==0) jobtype_flag=1; if(strstr(keyword,"LOGFILE")!=0) logfile_flag=1; if(strstr(keyword,"OUTPUT")!=0 && (strstr(keyword,"COORD")!=0 || strstr(keyword,"STRUCT")!=0) ) sscanf_flag(line,keyword,OUTPUT_COORD_FLAG); } } else // target { if(strstr(keyword,"TARGET")!=0 || strstr(keyword,"TEMPLATE")!=0) { fprintf(top_file_ptr,"TEMPLATE %s\n",protein->Template_filename); fprintf(lookup_table_input_file_ptr,"TEMPLATE %s\n",protein->Template_filename); fprintf(single_state_file_ptr,"TEMPLATE %s\n",protein->Template_filename); fprintf(scmf_file_ptr,"TEMPLATE %s\n",protein->Template_filename); } else if(!(strstr(keyword,"COMPET")!=0 || strncmp(keyword,"FREE",4)==0 ) && strstr(keyword,"PREFIX")==0 && strstr(keyword,"OTHER")==0 && (strstr(keyword,"LOOKUP")==0 && strstr(keyword,"DIR")==0) && strstr(keyword,"MAX_MUT")==0 && !(strstr(keyword, "FILE")!=0 && (strstr(keyword, "PROCESSOR")!=0 || strstr(keyword, "CPU")!=0))) { if(strstr(keyword, "TIME")==0) { fprintf(top_file_ptr,"%s",line); fprintf(lookup_table_input_file_ptr,"%s",line); fprintf(single_state_file_ptr,"%s",line); fprintf(scmf_file_ptr,"%s",line); } // slave runtime if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")!=0) { runtime_flag=1; sscanf(line,"%s %lf",keyword,&MAX_OPTIMIZATION_TIME); } // initial optimization runtime if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")==0) sscanf(line,"%s %lf",keyword,&long_runtime); if(strcmp(keyword,"JOBTYPE")==0 || strcmp(keyword,"SLAVE_JOBTYPE")==0) jobtype_flag=1; if(strstr(keyword,"LOGFILE")!=0) logfile_flag=1; if(strstr(keyword,"OUTPUT")!=0 && (strstr(keyword,"COORD")!=0 || strstr(keyword,"STRUCT")!=0)) sscanf_flag(line,keyword,OUTPUT_COORD_FLAG); } } fgets(line,MXLINE_INPUT,target_file_ptr); sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); } fclose(target_file_ptr); fprintf(top_file_ptr,"OTHER_RESIDUES none\n"); fprintf(lookup_table_input_file_ptr,"OTHER_RESIDUES none\n"); if(runtime_flag==0) { fprintf(top_file_ptr,"RUNTIME 2.0\n"); MAX_OPTIMIZATION_TIME = 120; } else { fprintf(top_file_ptr,"RUNTIME %lf\n",MAX_OPTIMIZATION_TIME); MAX_OPTIMIZATION_TIME = 60*MAX_OPTIMIZATION_TIME; } if(jobtype_flag==0) fprintf(top_file_ptr,"JOBTYPE %s\n",DEFAULT_ROTAMER_JOB); if(logfile_flag==0) fprintf(top_file_ptr,"LOGFILE_FLAG 0\n"); if(competitor_ctr == 0) { fprintf(single_state_file_ptr,"PID %d\n",dummypid); fprintf(single_state_file_ptr,"OTHER_RESIDUES none\n"); fprintf(scmf_file_ptr,"OTHER_RESIDUES none\n"); fprintf(scmf_file_ptr,"JOBTYPE SCMF\n"); // if more than one seq possible and initialize with optimal or scmf seq if(protein->parameters.log10_seq_combinations != 0) if(FILTER_SEQUENCE_SPACE_FLAG != -1 && FILTER_SEQUENCE_SPACE_FLAG != 3) fprintf(single_state_file_ptr,"JOBTYPE MC_GA\n"); // otherwise, rotamer optimze the template sequence with the default rotamer optimization method (FASTER) if(long_runtime == 0) { fprintf(single_state_file_ptr,"RUNTIME 60\n"); fprintf(scmf_file_ptr,"RUNTIME 60\n"); } else { fprintf(single_state_file_ptr,"RUNTIME %lf\n",long_runtime); fprintf(scmf_file_ptr,"RUNTIME %lf\n",long_runtime); } fprintf(single_state_file_ptr,"LOGFILE_FLAG 1\n"); fprintf(scmf_file_ptr,"LOGFILE_FLAG 1\n"); fprintf(top_file_ptr,"SLAVE_FILENAME target.slavefilelist.%d\n",GET_PID); sprintf(line,"target.slavefilelist.%d",GET_PID); sprintf(command,"%s/target",protein->parameters.lookup_energy_table_directory); } else { fprintf(top_file_ptr,"SLAVE_FILENAME competitor.%d.slavefilelist.%d\n",competitor_ctr,GET_PID); sprintf(line,"competitor.%d.slavefilelist.%d",competitor_ctr,GET_PID); sprintf(command,"%s/competitor.%d",protein->parameters.lookup_energy_table_directory,competitor_ctr); } touch_file(line); fprintf(top_file_ptr,"LOOKUP_TABLE_DIRECTORY\t%s\n",command); fprintf(top_file_ptr,"VARIABLE_POSITIONS\n"); if(competitor_ctr == 0) { fprintf(single_state_file_ptr,"PREFIX\t%s.target_state_optimization\n",protein->parameters.output_prefix); fprintf(scmf_file_ptr,"PREFIX\t%s.target_state_scmf\n",protein->parameters.output_prefix); if(NUM_SYSTEM_CPU > 0) { fprintf(single_state_file_ptr,"NUM_CPU %d\n",NUM_SYSTEM_CPU); fprintf(scmf_file_ptr,"NUM_CPU %d\n",NUM_SYSTEM_CPU); } else { fprintf(single_state_file_ptr,"CPU_FILE %s\n",AVAILABLE_PROCESSORS_FILE); fprintf(scmf_file_ptr,"CPU_FILE %s\n",AVAILABLE_PROCESSORS_FILE); } fprintf(single_state_file_ptr,"LOOKUP_TABLE_DIRECTORY\t%s\n",command); fprintf(single_state_file_ptr,"VARIABLE_POSITIONS\n"); fprintf(scmf_file_ptr,"LOOKUP_TABLE_DIRECTORY\t%s\n",command); fprintf(scmf_file_ptr,"VARIABLE_POSITIONS\n"); } fprintf(lookup_table_input_file_ptr,"JOBTYPE LOOKUP_TABLE_SLAVE\n"); if(competitor_ctr != 0) fprintf(lookup_table_input_file_ptr,"PRECALCULATION_LEVEL 2\n"); /* precalc only side-bkbn */ else if(FILTER_SEQUENCE_SPACE_FLAG==0) fprintf(lookup_table_input_file_ptr,"PRECALCULATION_LEVEL 2\n"); fprintf(lookup_table_input_file_ptr,"LOGFILE_FLAG 0\n"); if(NUM_SYSTEM_CPU > 0) fprintf(lookup_table_input_file_ptr,"NUM_CPU %d\n",NUM_SYSTEM_CPU); else fprintf(lookup_table_input_file_ptr,"CPU_FILE %s\n",AVAILABLE_PROCESSORS_FILE); fprintf(lookup_table_input_file_ptr,"LOOKUP_TABLE_DIRECTORY\t%s\n",command); fprintf(lookup_table_input_file_ptr,"VARIABLE_POSITIONS\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) { fprintf(top_file_ptr,"\t%s\t",protein->var_pos[i].seqpos_text_map_ptr->seqpos_text); fprintf(lookup_table_input_file_ptr,"\t%s\t",protein->var_pos[i].seqpos_text_map_ptr->seqpos_text); if(competitor_ctr == 0) { fprintf(single_state_file_ptr,"\t%s\t",protein->var_pos[i].seqpos_text_map_ptr->seqpos_text); fprintf(scmf_file_ptr,"\t%s\t",protein->var_pos[i].seqpos_text_map_ptr->seqpos_text); if(FILTER_SEQUENCE_SPACE_FLAG == -1 || FILTER_SEQUENCE_SPACE_FLAG == 3) { fprintf(single_state_file_ptr,"\n"); fprintf(scmf_file_ptr,"\n"); } } for(j=1;j<=protein->var_pos[i].number_of_choices;++j) { if(j+1>protein->var_pos[i].number_of_choices) { // include ala as a choice for baseline fprintf(top_file_ptr,"%c,A.\n",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fprintf(lookup_table_input_file_ptr,"%c,A.\n",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); if(competitor_ctr == 0) if(FILTER_SEQUENCE_SPACE_FLAG != -1 && FILTER_SEQUENCE_SPACE_FLAG !=3) { fprintf(single_state_file_ptr,"%c.\n",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fprintf(scmf_file_ptr,"%c.\n",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); } } else { fprintf(top_file_ptr,"%c,",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fprintf(lookup_table_input_file_ptr,"%c,",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); if(competitor_ctr == 0) if(FILTER_SEQUENCE_SPACE_FLAG != -1 && FILTER_SEQUENCE_SPACE_FLAG !=3) { fprintf(single_state_file_ptr,"%c,",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fprintf(scmf_file_ptr,"%c,",protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); } } } } ++i; } fprintf(top_file_ptr,"END\n"); fprintf(lookup_table_input_file_ptr,"END\n"); fclose(top_file_ptr); fclose(lookup_table_input_file_ptr); if(competitor_ctr == 0) { fprintf(single_state_file_ptr,"END\n"); fprintf(scmf_file_ptr,"END\n"); fclose(single_state_file_ptr); fclose(scmf_file_ptr); } } i=1; protein->parameters.numMovingPositions = 0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].number_of_choices == 1) /* only one choice, so it's fixed */ protein->var_pos[i].fixed_flag = 1; else ++protein->parameters.numMovingPositions; ++i; } start_time = time(NULL); if(FILTER_SEQUENCE_SPACE_FLAG != 0) if(protein->parameters.numMovingPositions>0) { /* if FILTER_SEQUENCE_SPACE_FLAG == 1 (default), create complete lookup table and optimize on target backbone */ /* target-optimized sequence included in the population for the first GA */ /* if FILTER_SEQUENCE_SPACE_FLAG == 2 also do scmf; use probabs to fix residues and set residue freqs */ // FILTER_SEQUENCE_SPACE_FLAG == 3 means random seed if(FILTER_SEQUENCE_SPACE_FLAG!=0) { sprintf(command,"cd %s; %s %s lookup_table_master",CURRENT_WORKING_DIRECTORY, EXECUTABLE_FILENAME, single_state_filename); avail_proc_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"r"); fgets(line,MAXLINE,avail_proc_file_ptr); sscanf(line,"%s",processor_name); fclose(avail_proc_file_ptr); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); fprintf(logfile_ptr, "launching lookup table generation and rotamer optimization for target state inputfile %s for target structure on %s\t%s", single_state_filename, processor_name, ctime(&now)); fclose(logfile_ptr); } sprintf(dummystring,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); if(does_this_file_exist(dummystring)==0) { ssh_command(processor_name, command); // wait for this process and slaves to finish loading sprintf(dummystring,"%d.lookup_log",dummypid); // if this doesn't exist after 1 min, it probably loaded within the dead-time sprintf(command,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); k=0; while(does_this_file_exist(dummystring)==0 && k<=30 && does_this_file_exist(command)==0) { sleep(2); ++k; } if(does_this_file_exist(dummystring)==1 && does_this_file_exist(command)==0) { sprintf(dummystring,"%d.lookup_log.0.5.done",dummypid); while(does_this_file_exist(dummystring)==0 && does_this_file_exist(command)==0) sleep(2); } } if(FILTER_SEQUENCE_SPACE_FLAG==2) // use scmf probabs to restrict composition { sprintf(command,"cd %s; %s %s",CURRENT_WORKING_DIRECTORY, EXECUTABLE_FILENAME, scmf_filename); avail_proc_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"r"); fgets(line,MAXLINE,avail_proc_file_ptr); fgets(line,MAXLINE,avail_proc_file_ptr); sscanf(line,"%s",processor_name); fclose(avail_proc_file_ptr); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); fprintf(logfile_ptr, "launching scmf optimization for target state inputfile %s for target structure on %s\t%s", scmf_filename, processor_name, ctime(&now)); fclose(logfile_ptr); } sprintf(dummystring,"%s.target_state_scmf.pdb",protein->parameters.output_prefix); if(does_this_file_exist(dummystring)==0) ssh_command(processor_name, command); // wait for scmf job to finish sprintf(dummystring,"%s.target_state_scmf.pdb",protein->parameters.output_prefix); while(does_this_file_exist(dummystring)==0) sleep(2); } // wait for target optimization job to finish sprintf(dummystring,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); while(does_this_file_exist(dummystring)==0) sleep(2); // filtering w/ scmf probabs if(FILTER_SEQUENCE_SPACE_FLAG==2) { sprintf(dummystring,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); single_state_file_ptr = fopen_file(dummystring,"r"); sprintf(dummystring,"%s.target_state_scmf.pdb",protein->parameters.output_prefix); scmf_file_ptr = fopen_file(dummystring,"r"); // advance optimimized structure file_ptr to ROTAMER section fgets(line,MAXLINE,single_state_file_ptr); sscanf(line,"%s",dummystring); while(strcmp(dummystring,"ROTAMER")!=0) { fgets(line,MAXLINE,single_state_file_ptr); sscanf(line,"%s",dummystring); } // advance scmf structure file_ptr to SCMF_ENTROPY section fgets(line2,MAXLINE,scmf_file_ptr); sscanf(line2,"%s",dummystring); while(strcmp(dummystring,"SCMF_ENTROPY")!=0) { fgets(line2,MAXLINE,scmf_file_ptr); sscanf(line2,"%s",dummystring); } word = (char **)calloc(MAX_RES_CHOICES*2 + 10,sizeof(char *)); for(i=0;i<(MAX_RES_CHOICES*2 + 10);++i) word[i] = (char *)calloc(50,sizeof(char)); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].number_of_choices > 1) { // advance file_ptrs to the residue of interest sscanf(line,"%s %s",keyword, dummystring); while(strcmp(dummystring,protein->var_pos[i].seqpos_text_map_ptr->seqpos_text)!=0) { fgets(line,MAXLINE,single_state_file_ptr); sscanf(line,"%s %s",keyword, dummystring); } sscanf(line2,"%s %s",keyword, dummystring); while(strcmp(dummystring,protein->var_pos[i].seqpos_text_map_ptr->seqpos_text)!=0) { fgets(line2,MAXLINE,scmf_file_ptr); sscanf(line2,"%s %s",keyword, dummystring); } sscanf(line,"%s %s %s %s", keyword, dummystring, dummystring, optimized_restype); num_words = extract_words(line2, word); k=6; max_probab = 0; max_probab_index=1; while(k<=num_words) { sscanf(word[k],"%s",scmf_restype); j=1; while(strcmp(protein->var_pos[i].choice[j].resparam_ptr->one_letter_code,scmf_restype)!=0) ++j; ++k; sscanf(word[k],"%lf",&protein->var_pos[i].choice[j].composition); if(protein->var_pos[i].choice[j].composition < EPS) protein->var_pos[i].choice[j].composition = EPS; if(protein->var_pos[i].choice[j].composition > max_probab) { max_probab = protein->var_pos[i].choice[j].composition; max_probab_index = j; } ++k; } // equal probab for all allowed residuetypes, since scmf choice doesn't agree with optimization if(strcmp(protein->var_pos[i].choice[max_probab_index].resparam_ptr->one_letter_code,optimized_restype)!=0) { for(j=1;j<=protein->var_pos[i].number_of_choices;++j) { protein->var_pos[i].choice[j].composition = 1.0; protein->var_pos[i].choice[j].in_use_flag = 1; } } // keep this position fixed to the optimized choice since it is also really favored (P>=0.99) by scmf else if(max_probab >= 0.99) { for(j=1;j<=protein->var_pos[i].number_of_choices;++j) { protein->var_pos[i].choice[j].composition = EPS; protein->var_pos[i].choice[j].in_use_flag = 0; } protein->var_pos[i].choice[1] = protein->var_pos[i].choice[max_probab_index]; protein->var_pos[i].choice[1].composition = 1.0; protein->var_pos[i].choice[1].in_use_flag = 1; protein->var_pos[i].number_of_choices = 1; } // else use the scmf probabilties since the optimized residue is also favored by scmf // adjust the frequency array used for mutations denom = 0; for(j=1;j<=protein->var_pos[i].number_of_choices;++j) denom += protein->var_pos[i].choice[j].composition; for(j=1;j<=protein->var_pos[i].number_of_choices;++j) protein->var_pos[i].choice[j].composition = protein->var_pos[i].choice[j].composition/denom; protein->var_pos[i].residue_freqs[0]=0; protein->var_pos[i].residue_freqs[1]=0; protein->var_pos[i].residue_freqs[2]=0; for(j=1;j<=protein->var_pos[i].number_of_choices;++j) protein->var_pos[i].residue_freqs[j] = protein->var_pos[i].residue_freqs[j-1] + protein->var_pos[i].choice[j].composition; if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); if(protein->var_pos[i].number_of_choices == 1) fprintf(logfile_ptr, "Position %s fixed to %s\t%s", protein->var_pos[i].seqpos_text_map_ptr->seqpos_text, protein->var_pos[i].choice[1].resparam_ptr->residuetype, ctime(&now)); fclose(logfile_ptr); } } ++i; } fclose(single_state_file_ptr); fclose(scmf_file_ptr); for(i=0;i<(MAX_RES_CHOICES*2 + 10);++i) free_memory(word[i]); free_memory(word); } } /* launch parallel lookup table jobs for generating side-bkbn tables if number of processors > number of states */ if(num_processors>(protein->num_competitors+1)) { for(competitor_ctr=0;competitor_ctr<=protein->num_competitors;++competitor_ctr) { if(competitor_ctr == 0) sprintf(command,"foreman.target.%d.input",GET_PID ); else sprintf(command,"foreman.competitor.%d.%d.input",competitor_ctr, GET_PID ); sprintf(inputfilename,"%s.lookup_table_input",command); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); if(competitor_ctr == 0 && FILTER_SEQUENCE_SPACE_FLAG==0) fprintf(logfile_ptr, "launching lookup table generation inputfile %s for target structure\t%s", inputfilename, ctime(&now)); else if(competitor_ctr != 0) fprintf(logfile_ptr, "launching lookup table generation inputfile %s for competitor structure #%d\t%s", inputfilename, competitor_ctr, ctime(&now)); fclose(logfile_ptr); } sprintf(command,"%s %s lookup_table_master",EXECUTABLE_FILENAME,inputfilename); if(competitor_ctr != 0 || (competitor_ctr == 0 && FILTER_SEQUENCE_SPACE_FLAG==0)) if(system(command)!=0) { if(competitor_ctr == 0) sprintf(line,"ERROR failed to create lookup table for target structure"); else sprintf(line,"ERROR failed to create lookup table for competitor structure #%d",competitor_ctr); failure_report(line,"exit"); } rm_file(inputfilename); } } logfile_ptr = NULL; if(LOGFILE_FLAG != 0) { logfile_ptr = fopen(logfile_name,"a"); fprintf(logfile_ptr,"variable positions: "); } i=1; protein->parameters.numMovingPositions = 0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].number_of_choices == 1) /* only one choice, so it's fixed */ protein->var_pos[i].fixed_flag = 1; else { if(logfile_ptr != NULL) fprintf(logfile_ptr,"%s ",protein->var_pos[i].seqpos_text_map_ptr->seqpos_text); ++protein->parameters.numMovingPositions; } ++i; } if(LOGFILE_FLAG != 0) { fprintf(logfile_ptr,"\n"); fclose(logfile_ptr); } } now = time(NULL); MASTER_OPTIMIZATION_TIME = MASTER_OPTIMIZATION_TIME - difftime(now,start_time); /* launch foremen jobs */ avail_proc_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"r"); competitor_ctr=0; while(fgets(line,MAXLINE,avail_proc_file_ptr)!=NULL) { sscanf(line,"%s",processor_name); if(competitor_ctr == 0) sprintf(inputfilename,"foreman.target.%d.input", GET_PID ); else sprintf(inputfilename,"foreman.competitor.%d.%d.input",competitor_ctr, GET_PID ); if(NICE_SLAVE_FLAG == 1) sprintf(command,"cd %s; nice %s %s",CURRENT_WORKING_DIRECTORY, EXECUTABLE_FILENAME, inputfilename); else sprintf(command,"cd %s; %s %s",CURRENT_WORKING_DIRECTORY, EXECUTABLE_FILENAME, inputfilename); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); if(competitor_ctr == 0) fprintf(logfile_ptr, "launch rotamer optimization foreman on %s for target structure\t%s", processor_name, ctime(&now)); else fprintf(logfile_ptr, "launch rotamer optimization foreman on %s for competitor structure #%d\t%s", processor_name,competitor_ctr, ctime(&now)); fclose(logfile_ptr); } ssh_command(processor_name, command); ++competitor_ctr; if(competitor_ctr>protein->num_competitors) competitor_ctr=0; sleep(5); } fclose(avail_proc_file_ptr); if(LOGFILE_FLAG!=0) { logfile_ptr = fopen(logfile_name,"a"); now = time(NULL); fprintf(logfile_ptr, "start multistate optimization\t%s", ctime(&now)); fclose(logfile_ptr); } if(protein->final_chr.firstgene== NULL) inoculate_sidechains(&protein->final_chr, protein->var_pos,0); if(protein->parameters.log10_seq_combinations > 0) { /* structures w/ target_state_optimized_sequence */ sprintf(line,"%s.target_state_optimization.pdb",protein->parameters.output_prefix); if(does_this_file_exist(line)==1) { pdbfile_to_CHROMOSOME(line, &protein->final_chr); sprintf(inputfilename,"target.slavefilelist.%d",GET_PID); rm_file(inputfilename); touch_file(inputfilename); for(j=1;j<=protein->num_competitors;++j) { sprintf(inputfilename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(inputfilename); touch_file(inputfilename); } sprintf(line,"%s.target_state_optimized_sequence",protein->parameters.output_prefix); make_multistate_inputfiles_for_chr(&protein->final_chr, line, 1, 0); sprintf(inputfilename,"target.slavefilelist.%d",GET_PID); wait_for_slaves_to_finish(inputfilename); for(j=1;j<=protein->num_competitors;++j) { sprintf(inputfilename,"competitor.%d.slavefilelist.%d",j,GET_PID); wait_for_slaves_to_finish(inputfilename); } if(LOGFILE_FLAG!=0) { logfile_ptr = fopen_file(logfile_name, "a"); now = time(NULL); CHROMOSOME_to_variable_sequence(&protein->final_chr, dummystring); CHROMOSOME_to_score(&protein->final_chr); fprintf(logfile_ptr, "target state optimized sequence: %s\t%lf\t%lf\t%lf\t%lf\t%d\t%d\t%s", dummystring, protein->final_chr.energy, protein->final_chr.energy_list.dG_gap, protein->final_chr.energy_list.pseudo_dG, protein->final_chr.energy_list.E_total, protein->final_chr.energy_list.clashes, protein->final_chr.energy_list.num_unsatisfied_hbond, ctime(&now)); fclose(logfile_ptr); } } generate_custom_sequence_CHROMOSOME(&protein->final_chr, 1); // initialize stuff if(OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG == 0) { if(FILTER_SEQUENCE_SPACE_FLAG == 3) // random sequence seed { randomize_sidechains(&protein->final_chr, 0); protein->final_chr.energy = CHROMOSOME_to_score(&protein->final_chr); } if(LOGFILE_FLAG!=0) { logfile_ptr = fopen_file(logfile_name, "a"); now = time(NULL); CHROMOSOME_to_variable_sequence(&protein->final_chr, dummystring); CHROMOSOME_to_score(&protein->final_chr); fprintf(logfile_ptr, "initial sequence: %s\t%lf\t%lf\t%lf\t%lf\t%d\t%d\t%s", dummystring, protein->final_chr.energy, protein->final_chr.energy_list.dG_gap, protein->final_chr.energy_list.pseudo_dG, protein->final_chr.energy_list.E_total, protein->final_chr.energy_list.clashes, protein->final_chr.energy_list.num_unsatisfied_hbond, ctime(&now)); fclose(logfile_ptr); } if(strcmp(protein->parameters.sequence_algorithm,"HQM")==0) multistate_hqm(protein); else if(strcmp(protein->parameters.sequence_algorithm,"GA")==0) multistate_ga(protein); else if(strcmp(protein->parameters.sequence_algorithm,"SCAN")==0) multistate_scan(protein); } } /* final structures */ sprintf(inputfilename,"target.slavefilelist.%d",GET_PID); rm_file(inputfilename); touch_file(inputfilename); for(j=1;j<=protein->num_competitors;++j) { sprintf(inputfilename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(inputfilename); touch_file(inputfilename); } make_multistate_inputfiles_for_chr(&protein->final_chr, protein->parameters.output_prefix,1, 0); sprintf(inputfilename,"target.slavefilelist.%d",GET_PID); wait_for_slaves_to_finish(inputfilename); for(j=1;j<=protein->num_competitors;++j) { sprintf(inputfilename,"competitor.%d.slavefilelist.%d",j,GET_PID); wait_for_slaves_to_finish(inputfilename); } if(LOGFILE_FLAG!=0) { logfile_ptr = fopen_file(logfile_name, "a"); now = time(NULL); CHROMOSOME_to_variable_sequence(&protein->final_chr, dummystring); CHROMOSOME_to_score(&protein->final_chr); fprintf(logfile_ptr, "final optimized sequence: %s\t%lf\t%lf\t%lf\t%lf\t%d\t%d\t%s", dummystring, protein->final_chr.energy, protein->final_chr.energy_list.dG_gap, protein->final_chr.energy_list.pseudo_dG, protein->final_chr.energy_list.E_total, protein->final_chr.energy_list.clashes, protein->final_chr.energy_list.num_unsatisfied_hbond, ctime(&now)); fclose(logfile_ptr); // write out final sequence in VARIABLE_POSITIONS format sprintf(line,"%s.final.variable_positions",protein->parameters.output_prefix); logfile_ptr = fopen_file(line,"w"); fprintf(logfile_ptr, "OTHER_POSITIONS none\n"); fprintf(logfile_ptr, "VARIABLE_POSITIONS\n"); protein->final_chr.genes = protein->final_chr.firstgene; while(protein->final_chr.genes->seq_position!=ENDFLAG) { if(protein->final_chr.genes->varpos_ptr->fixed_flag==0 || protein->final_chr.genes->varpos_ptr->neighbor_level == ENDFLAG) { fprintf(logfile_ptr, "\t%s\t%s.", protein->final_chr.genes->varpos_ptr->seqpos_text_map_ptr->seqpos_text, protein->final_chr.genes->choice_ptr->resparam_ptr->one_letter_code); if(protein->final_chr.genes->varpos_ptr->fixed_flag==0) fprintf(logfile_ptr, " # variable sequence position"); fprintf(logfile_ptr, "\n"); } protein->final_chr.genes = protein->final_chr.genes->nextgene; } protein->final_chr.genes = protein->final_chr.firstgene; fprintf(logfile_ptr, "END\n"); fclose(logfile_ptr); } /* kill the slaves by removing the slavefilelist files */ /* save the baseline structures for reference */ sprintf(inputfilename,"target.slavefilelist.%d",GET_PID); rm_file(inputfilename); sprintf(inputfilename,"%s.target",protein->parameters.output_prefix); rm_file(inputfilename); sprintf(inputfilename,"baseline.%d.target.pdb",GET_PID); sprintf(line,"%s.baseline.target.pdb",protein->parameters.output_prefix); mv_file(inputfilename,line); for(j=1;j<=protein->num_competitors;++j) { sprintf(inputfilename,"competitor.%d.slavefilelist.%d",j,GET_PID); rm_file(inputfilename); sprintf(inputfilename,"%s.competitor.%d",protein->parameters.output_prefix,j); rm_file(inputfilename); sprintf(inputfilename,"baseline.%d.competitor.%d.pdb",GET_PID,j); sprintf(line,"%s.baseline.competitor.%d.pdb",protein->parameters.output_prefix,j); mv_file(inputfilename,line); } MASTER_OPTIMIZATION_TIME = master_time; sprintf(command,"/bin/rm -rf *%d*",GET_PID); system(command); free_memory(dummystring); free_memory(line2); free_memory(optimized_restype); free_memory(scmf_restype); free_memory(line); free_memory(inputfilename); free_memory(command); free_memory(keyword); free_memory(processor_name); free_memory(single_state_filename); free_memory(scmf_filename); if(LOGFILE_FLAG!=0) free_memory(logfile_name); }