/* EGAD: CHROMOSOME_to_lookupEnergy.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains the functions for calculating energies with pre-existing pair energy lookup tables. */ #include "CHROMOSOME_to_lookupEnergy.h" /* a_gene and b_gene are GENEs with all the pointers and values assigned (by mutate_sidechain, inoculate_sidechains) this function returns in the pair energy between the two rotamers (NON_INTERACT_PTR if they do not interact). If the lookup table for these residues at these positions have not been calculated or read from disk, this function takes care of that. If the entire table has been precalculated (default for all except for rotamer_foremen jobs), this function is not called. */ #define iGENERESx i_gene.choice_ptr->lookup_res_ptr->lookupRot[i_res_rot].lookupX[j_minus_i].lookupResX[j_gene.j_choice_index] double load_calc_save_lookupResX(GENE a_gene, GENE b_gene) { int i, j, h, j_minus_i; int i_res, j_res; int i_res_rot, j_res_rot; int quiet_flag; bool read_error_flag; /* used for dealing with read errors that (rarely) occur when NFS is really stressed */ FILE *var_var_file_ptr; static char *structure_filename=NULL; static char *var_var_filename, *dir_name, *directoryname, *tempfilename, *path; GENE input_i_gene, input_j_gene; MENDEL i_gene, j_gene; short int num_hbond_rot_pairs; extern LOOKUP_ENERGY_ROTAMER_X *NON_INTERACT_LOOKUP_ROT_X; extern LOOKUP_ENERGY_RESIDUE_X *NON_INTERACT_LOOKUP_RES_X; extern int GET_PID, QUIET_FLAG; extern float NON_INTERACT_PTR; extern char *LOOKUP_TABLE_DIRECTORY; quiet_flag = QUIET_FLAG; if(a_gene->seq_position < b_gene->seq_position) { input_i_gene = a_gene; input_j_gene = b_gene; } else { input_i_gene = b_gene; input_j_gene = a_gene; } i = input_i_gene->varpos_ptr->varpos_index; j = input_j_gene->varpos_ptr->varpos_index; j_minus_i = j-i; i_res = input_i_gene->j_choice_index + 1; j_res = input_j_gene->j_choice_index + 1; /* these position don't interact, so return 0 */ if(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX == NON_INTERACT_LOOKUP_RES_X) return(0); /* these residues do not interact so return 0 */ if(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX == NON_INTERACT_LOOKUP_ROT_X) return(0); /* this residue-residue pair has been examined, so return the energy */ if(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX!=NULL) { if(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX[input_j_gene->lookupRot_index-1].energy_var_var!=NON_INTERACT_PTR) /* this rotamer pair interacts */ return(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX[input_j_gene->lookupRot_index-1].energy_var_var); else return(0); } /* need to load or calculate energies for this residue pair */ /* make working genes */ i_gene.choice_ptr = input_i_gene->choice_ptr; i_gene.seq_position = input_i_gene->seq_position; i_gene.j_choice_index = i_res-1; j_gene.choice_ptr = input_j_gene->choice_ptr; j_gene.seq_position = input_j_gene->seq_position; j_gene.j_choice_index = j_res-1; i_gene.nextgene = NULL; j_gene.nextgene = NULL; /* allocate memory */ for(i_res_rot = 1; i_res_rot<= i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) { iGENERESx.lookupRotX = (LOOKUP_ENERGY_ROTAMER_X *)calloc((j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers),sizeof(LOOKUP_ENERGY_ROTAMER_X)); iGENERESx.lookupRotX_hbond = NULL; } if(structure_filename==NULL) { structure_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); var_var_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); dir_name = (char *)calloc(MXLINE_INPUT,sizeof(char)); tempfilename = (char *)calloc(MXLINE_INPUT,sizeof(char)); directoryname = (char *)calloc(MXLINE_INPUT,sizeof(char)); path = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(path,"%s/var_var",LOOKUP_TABLE_DIRECTORY); make_directory(path); } sprintf(directoryname,"%d",i_gene.seq_position); while(create_directory(directoryname, path)==0); sprintf(var_var_filename, "%s/var_var/%d/%d.%d/%s.%d.%s.%d.var_var_energy", LOOKUP_TABLE_DIRECTORY, i_gene.seq_position, i_gene.seq_position,j_gene.seq_position, i_gene.choice_ptr->resparam_ptr->residuetype, i_gene.seq_position, j_gene.choice_ptr->resparam_ptr->residuetype, j_gene.seq_position); var_var_file_ptr = NULL; if(does_this_file_exist(var_var_filename)) var_var_file_ptr = fopen_file(var_var_filename, "rb"); if(var_var_file_ptr!=NULL) /* the var_var energies for this sidechain pair exist, so load them */ { read_error_flag=1; while(read_error_flag == 1) { for(i_res_rot=1;i_res_rot <= i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { fread(iGENERESx.lookupRotX, sizeof(LOOKUP_ENERGY_ROTAMER_X), j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers, var_var_file_ptr); if(i_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms != 0) if(j_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms != 0) { fread(&num_hbond_rot_pairs,sizeof(short int),1,var_var_file_ptr); if(num_hbond_rot_pairs!=0) { iGENERESx.lookupRotX_hbond = (LOOKUP_ROTAMER_X_HBOND *)malloc(sizeof(LOOKUP_ROTAMER_X_HBOND)); iGENERESx.lookupRotX_hbond->num_hbond_rot_pairs = num_hbond_rot_pairs; iGENERESx.lookupRotX_hbond->hbond_rot_pair = (HBOND_ROTAMER_PAIR *)calloc(num_hbond_rot_pairs,sizeof(HBOND_ROTAMER_PAIR)); for(h=0;hnum_hbond_rot_pairs;++h) { fread(&iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_index,sizeof(short int),1,var_var_file_ptr); iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].i_hbond_status = (bool *)calloc(i_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,sizeof(bool)); iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_hbond_status = (bool *)calloc(j_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,sizeof(bool)); fread(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].i_hbond_status,sizeof(bool),i_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,var_var_file_ptr); fread(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_hbond_status,sizeof(bool),j_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,var_var_file_ptr); } } } } fclose(var_var_file_ptr); read_error_flag = 0; for(i_res_rot=1;i_res_rot <= i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) for(j_res_rot=1;j_res_rot <= j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++j_res_rot) { if(iGENERESx.lookupRotX[j_res_rot-1].energy_var_var<=-10000) read_error_flag = 1; } if(read_error_flag==1) { QUIET_FLAG = 0; failure_report("error reading iGENERESx.lookupRotX","warn"); QUIET_FLAG = quiet_flag; for(i_res_rot=1;i_res_rot <= i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { if(iGENERESx.lookupRotX_hbond!=NULL) { if(iGENERESx.lookupRotX_hbond->hbond_rot_pair != NULL) { for(h=0;hnum_hbond_rot_pairs;++h) { free_memory(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].i_hbond_status); free_memory(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_hbond_status); } free_memory(iGENERESx.lookupRotX_hbond->hbond_rot_pair); } free_memory(iGENERESx.lookupRotX_hbond); } } sleep(5); var_var_file_ptr = fopen_file(var_var_filename, "rb"); } } // end read_error_flag==1 } else /* energies have not been calculated, so calculate them */ { /* load coordinates if they haven't been loaded already */ if(i_gene.choice_ptr->lookup_res_ptr->lookupRot[1].sideAtoms == NULL) { sprintf(structure_filename, "%s/coordinates/%d/%s.%d.structure", LOOKUP_TABLE_DIRECTORY, i_gene.seq_position, i_gene.choice_ptr->resparam_ptr->residuetype, i_gene.seq_position); load_coordinates_lookupRes(i_gene.choice_ptr->lookup_res_ptr, i_gene.choice_ptr->resparam_ptr, structure_filename); } if(j_gene.choice_ptr->lookup_res_ptr->lookupRot[1].sideAtoms == NULL) { sprintf(structure_filename, "%s/coordinates/%d/%s.%d.structure", LOOKUP_TABLE_DIRECTORY, j_gene.seq_position, j_gene.choice_ptr->resparam_ptr->residuetype, j_gene.seq_position); load_coordinates_lookupRes(j_gene.choice_ptr->lookup_res_ptr, j_gene.choice_ptr->resparam_ptr, structure_filename); } /* calculate rotamer pair energies */ for(i_res_rot=1;i_res_rot<=i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { i_gene.lookupRot = &i_gene.choice_ptr->lookup_res_ptr->lookupRot[i_res_rot]; i_gene.lookupRot_index = i_res_rot; for(j_res_rot=1; j_res_rot<= j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++j_res_rot) { j_gene.lookupRot = &j_gene.choice_ptr->lookup_res_ptr->lookupRot[j_res_rot]; j_gene.lookupRot_index = j_res_rot; i_gene.lookupRot->lookupX[j_minus_i].lookupResX[j_gene.j_choice_index].lookupRotX[j_res_rot-1].energy_var_var = rotamer_pair_energy(&i_gene, &j_gene, &iGENERESx.lookupRotX_hbond); if(i_gene.lookupRot->lookupX[j_minus_i].lookupResX[j_gene.j_choice_index].lookupRotX[j_res_rot-1].energy_var_var == 0) i_gene.lookupRot->lookupX[j_minus_i].lookupResX[j_gene.j_choice_index].lookupRotX[j_res_rot-1].energy_var_var = NON_INTERACT_PTR; } /* trim lookupRotX_hbond */ trim_LOOKUP_ROTAMER_X_HBOND(&iGENERESx.lookupRotX_hbond); } /* free coordinates from memory */ for(i_res_rot=1;i_res_rot<=i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { free_memory(i_gene.choice_ptr->lookup_res_ptr->lookupRot[i_res_rot].sideAtoms); if(i_gene.choice_ptr->lookup_res_ptr->lookupRot[i_res_rot].rotamerlet!=NULL) free_ROTAMERLET(i_gene.choice_ptr->lookup_res_ptr->lookupRot[i_res_rot].rotamerlet, i_gene.choice_ptr->resparam_ptr); } for(j_res_rot=1;j_res_rot<=j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++j_res_rot) { free_memory(j_gene.choice_ptr->lookup_res_ptr->lookupRot[j_res_rot].sideAtoms); if(j_gene.choice_ptr->lookup_res_ptr->lookupRot[j_res_rot].rotamerlet!=NULL) free_ROTAMERLET(j_gene.choice_ptr->lookup_res_ptr->lookupRot[j_res_rot].rotamerlet, j_gene.choice_ptr->resparam_ptr); } /* open and write the file */ var_var_file_ptr=NULL; if(does_this_file_exist(var_var_filename)) var_var_file_ptr = fopen_file(var_var_filename, "rb"); if(var_var_file_ptr == NULL) /* still doesn't exist so create it */ { sprintf(tempfilename,"%s.%d",var_var_filename,GET_PID); var_var_file_ptr = fopen(tempfilename, "wb"); if(var_var_file_ptr==NULL) /* can't be created because the directory has not been created */ { sprintf(dir_name, "%s/var_var/%d/%d.%d/", LOOKUP_TABLE_DIRECTORY, i_gene.seq_position, i_gene.seq_position,j_gene.seq_position); /* create the directory; if can't 0 is returned so make next level up directory */ /* try making it again */ if(make_directory(dir_name) == 0) { sprintf(dir_name, "%s/var_var/%d/", LOOKUP_TABLE_DIRECTORY, i_gene.seq_position); make_directory(dir_name); /* now try making it */ sprintf(dir_name, "%s/var_var/%d/%d.%d/", LOOKUP_TABLE_DIRECTORY, i_gene.seq_position, i_gene.seq_position,j_gene.seq_position); make_directory(dir_name); } var_var_file_ptr = fopen_file(tempfilename, "wb"); /* it better exist, or we have a problem */ if(var_var_file_ptr == NULL) { sprintf(dir_name,"Cannot create %s",tempfilename); failure_report(dir_name,"exit"); } } for(i_res_rot=1;i_res_rot <= i_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) { fwrite(iGENERESx.lookupRotX, sizeof(LOOKUP_ENERGY_ROTAMER_X), j_gene.choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers, var_var_file_ptr); if(i_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms != 0) if(j_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms != 0) { num_hbond_rot_pairs = 0; if(iGENERESx.lookupRotX_hbond == NULL) fwrite(&num_hbond_rot_pairs,sizeof(short int),1,var_var_file_ptr); else { num_hbond_rot_pairs = iGENERESx.lookupRotX_hbond->num_hbond_rot_pairs; fwrite(&num_hbond_rot_pairs,sizeof(short int),1,var_var_file_ptr); } if(num_hbond_rot_pairs!=0) { for(h=0;hnum_hbond_rot_pairs;++h) { fwrite(&iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_index,sizeof(short int),1,var_var_file_ptr); fwrite(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].i_hbond_status,sizeof(bool),i_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,var_var_file_ptr); fwrite(iGENERESx.lookupRotX_hbond->hbond_rot_pair[h].j_hbond_status,sizeof(bool),j_gene.choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms,var_var_file_ptr); } } } } fclose(var_var_file_ptr); mv_file(tempfilename, var_var_filename); } else /* this file was or is being made by another process, so don't interfere */ fclose(var_var_file_ptr); } if(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX[input_j_gene->lookupRot_index-1].energy_var_var!=NON_INTERACT_PTR) /* this rotamer pair interacts */ return(input_i_gene->lookupRot->lookupX[j_minus_i].lookupResX[input_j_gene->j_choice_index].lookupRotX[input_j_gene->lookupRot_index-1].energy_var_var); return(0); } void goldstein_singles_elimination(PARAMETERS *parameters, VARIABLE_POSITION *varpos, LOOKUP_ENERGY *lookupEnergy) { int i,j,i_res,j_res,i_res_rot,j_res_rot,q_res,q_res_rot,k, cycle_number; int numVarPositions, elimination_flag, eliminated_rotamers; GENE i_gene, j_gene, q_gene; double elimination_criterion, min_pair_energy_diff, pair_energy_diff; char *logfile, *escape_hatch_filename=NULL; FILE *logfile_ptr; time_t now,start_time; extern double MAX_OPTIMIZATION_TIME; extern char *CURRENT_WORKING_DIRECTORY; extern int HBOND_SPECIFICITY_FLAG, LOGFILE_FLAG, GET_PID; if(parameters->log10_rotamer_combinations==0 || parameters->numMovingPositions <= 2) return; // can't use this if HBOND_SPECIFICITY_FLAG is on (usually off for single-sequence rotamer optimization) if(HBOND_SPECIFICITY_FLAG == 1) { return; } if(LOGFILE_FLAG == 1) { now = time(NULL); escape_hatch_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(escape_hatch_filename,"%s/escape.goldstein.%d",CURRENT_WORKING_DIRECTORY,GET_PID); logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", parameters->output_prefix); logfile_ptr = fopen_file(logfile,"a"); fprintf(logfile_ptr,"Starting goldstein elimination\t%s", ctime(&now)); fprintf(logfile_ptr,"To exit goldstein and move to the next step:"); fprintf(logfile_ptr,"\ttouch %s\n",escape_hatch_filename); fclose(logfile_ptr); free_memory(logfile); } i=1; while(varpos[i].seq_position!=ENDFLAG) ++i; numVarPositions = i-1; i_gene = (MENDEL *)malloc(sizeof(MENDEL)); i_gene->nextgene = NULL; j_gene = (MENDEL *)malloc(sizeof(MENDEL)); j_gene->nextgene = NULL; q_gene = (MENDEL *)malloc(sizeof(MENDEL)); q_gene->nextgene = NULL; elimination_flag=1; eliminated_rotamers = 0; cycle_number=0; start_time = time(NULL); while(elimination_flag > 0) { elimination_flag=0; for(i=1;i<=numVarPositions;++i) if(varpos[i].fixed_flag == 0) if(varpos[i].dead_ended_flag == 0) { i_gene->seq_position = varpos[i].seq_position; q_gene->seq_position = varpos[i].seq_position; mutate_sidechain(i_gene, varpos[i]); mutate_sidechain(q_gene, varpos[i]); i_gene->varpos_ptr = &varpos[i]; q_gene->varpos_ptr = &varpos[i]; for(i_res=1;i_res<=varpos[i].number_of_choices;++i_res) if(varpos[i].choice[i_res].in_use_flag == 1) { CHOICE_to_GENE(i_gene, varpos[i].choice[i_res], i_res); for(i_res_rot=1; i_res_rot <= varpos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) if(ERESROT.in_use_flag == 1) // consider only in-use resimers { i_gene->lookupRot_index = i_res_rot; i_gene->chi = i_gene->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[i_gene->lookupRot_index].chi; i_gene->lookupRot = &(i_gene->choice_ptr->lookup_res_ptr->lookupRot[i_gene->lookupRot_index]); elimination_criterion = 0; for(q_res=1;q_res<=varpos[i].number_of_choices;++q_res) if(varpos[i].choice[q_res].in_use_flag == 1) { CHOICE_to_GENE(q_gene, varpos[i].choice[q_res], q_res); for(q_res_rot=1;q_res_rot <= varpos[i].choice[q_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++q_res_rot) if(ERESROTiq.in_use_flag == 1 && ERESROT.in_use_flag == 1) // consider pairs only with both resimers in use if(q_res != i_res || q_res_rot != i_res_rot) { q_gene->lookupRot_index = q_res_rot; q_gene->chi = q_gene->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[q_gene->lookupRot_index].chi; q_gene->lookupRot = &(q_gene->choice_ptr->lookup_res_ptr->lookupRot[q_gene->lookupRot_index]); elimination_criterion = ERESROT.energy_var_fix - ERESROTiq.energy_var_fix; for(j=1;j<=numVarPositions;++j) if(j != i) if(varpos[j].fixed_flag == 0) { min_pair_energy_diff = 1e10; j_gene->seq_position = varpos[j].seq_position; mutate_sidechain(j_gene, varpos[j]); j_gene->varpos_ptr = &varpos[j]; for(j_res=1;j_res<=varpos[j].number_of_choices;++j_res) if(varpos[j].choice[j_res].in_use_flag == 1) { CHOICE_to_GENE(j_gene, varpos[j].choice[j_res], j_res); for(j_res_rot=1; j_res_rot <= varpos[j].choice[j_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++j_res_rot) if(ERESROTj.in_use_flag == 1) { j_gene->lookupRot_index = j_res_rot; j_gene->chi = j_gene->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[j_gene->lookupRot_index].chi; j_gene->lookupRot = &(j_gene->choice_ptr->lookup_res_ptr->lookupRot[j_gene->lookupRot_index]); pair_energy_diff = load_calc_save_lookupResX(i_gene, j_gene) - load_calc_save_lookupResX(q_gene, j_gene); if(pair_energy_diff < min_pair_energy_diff) min_pair_energy_diff = pair_energy_diff; } } // end j_resimer elimination_criterion += min_pair_energy_diff; } // end j if(elimination_criterion > 0) if(ERESROT.in_use_flag == 1) { ERESROT.in_use_flag = 0; // eliminate this resimer ++elimination_flag; --varpos[i].number_of_resimers; if(varpos[i].number_of_resimers == 1) // this position has only one resimer; it is dead-ended { varpos[i].dead_ended_flag=1; varpos[i].number_of_resimers = 0; for(k=1;k<=varpos[i].number_of_choices;++k) varpos[i].number_of_resimers += varpos[i].choice[k].resparam_ptr->rotamerlib_ptr->number_of_rotamers; } } now = time(NULL); if(difftime(now,start_time) >= MAX_OPTIMIZATION_TIME) { eliminated_rotamers += elimination_flag; if(LOGFILE_FLAG == 1) { logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", parameters->output_prefix); logfile_ptr = fopen_file(logfile,"a"); fprintf(logfile_ptr,"goldstein eliminated %d rotamers in total\t%s",eliminated_rotamers,ctime(&now)); fclose(logfile_ptr); free_memory(logfile); } free_memory(i_gene); free_memory(j_gene); free_memory(q_gene); return; } if(LOGFILE_FLAG == 1) if(does_this_file_exist(escape_hatch_filename)==1) { eliminated_rotamers += elimination_flag; logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", parameters->output_prefix); logfile_ptr = fopen_file(logfile,"a"); fprintf(logfile_ptr,"goldstein eliminated %d rotamers in total\t%s",eliminated_rotamers,ctime(&now)); rm_file(escape_hatch_filename); fclose(logfile_ptr); free_memory(logfile); free_memory(escape_hatch_filename); free_memory(i_gene); free_memory(j_gene); free_memory(q_gene); return; } } // end q_res_rot } // end q_resimer } } // end i_resimer } // end i eliminated_rotamers += elimination_flag; ++cycle_number; if(LOGFILE_FLAG == 1) { logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", parameters->output_prefix); logfile_ptr = fopen_file(logfile,"a"); now = time(NULL); fprintf(logfile_ptr,"goldstein cycle %d eliminated %d rotamers\t%s",cycle_number, elimination_flag,ctime(&now)); fclose(logfile_ptr); free_memory(logfile); } } if(LOGFILE_FLAG == 1) { logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", parameters->output_prefix); logfile_ptr = fopen_file(logfile,"a"); now = time(NULL); fprintf(logfile_ptr,"goldstein eliminated %d rotamers in total\t%s",eliminated_rotamers,ctime(&now)); fclose(logfile_ptr); free_memory(logfile); free_memory(escape_hatch_filename); } /* int numrot; parameters->log10_rotamer_combinations=0; i=1; while(varpos[i].seq_position!=ENDFLAG) { numrot=0; if(varpos[i].fixed_flag == 0) if(varpos[i].dead_ended_flag == 0) { for(i_res=1;i_res<=varpos[i].number_of_choices;++i_res) if(varpos[i].choice[i_res].in_use_flag == 1) { for(i_res_rot=1; i_res_rot <= varpos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) if(ERESROT.in_use_flag == 1) ++numrot; } parameters->log10_rotamer_combinations += log10(numrot); } printf("%d\t%d\t%d\t\t%d\t%d\n",varpos[i].seq_position, numrot,varpos[i].number_of_resimers,varpos[i].fixed_flag,varpos[i].dead_ended_flag); ++i; } */ free_memory(i_gene); free_memory(j_gene); free_memory(q_gene); } #define iROTjROT i_gene->lookupRot->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX[j_res_rot_minus1] /* This function calculates the energy of a CHROMOSOME chr by adding up the pre-calculated variable residue/ rotamer energies stored in the LOOKUP_ENERGY structure. The genes in the chromosome must be linked to the appropriate pointers. fixed_energy is calculated by fixedatoms_energy and adjusted by generate_lookup_table to include internal energies of non-moving sidechains */ void CHROMOSOME_to_lookupEnergy(CHROMOSOME *chr, double *fixed_energy) { int i,j,j_minus_i; int ii,jj,h,k,ii2,jj2,j_res_rot_minus1,x; static bool *hbond_status_array=NULL; GENE i_gene, j_gene; /* marker for position i on the CHROMOSOME */ extern float NON_INTERACT_PTR, FIXED_POSITION_PTR; extern LOOKUP_ENERGY_ROTAMER_X *NON_INTERACT_LOOKUP_ROT_X; extern LOOKUP_ENERGY_RESIDUE_X *NON_INTERACT_LOOKUP_RES_X; extern ATOMRESPARAM **FIXED_ATOM_HBOND_ATOMRESPARAM; extern bool *FIXED_HBOND_SATISF_ARRAY; extern int FIXED_ATOM_HBOND_STATUS_ARRAYLENGTH, HBOND_SPECIFICITY_FLAG, BKBN_HBOND_STATUS_ARRAYLENGTH; chr->energy = *fixed_energy; chr->genes = chr->firstgene; i=1; j=1; i_gene = chr->genes; ii=1; jj=1; ii2=1; jj2=1; if(HBOND_SPECIFICITY_FLAG == 1) { if(hbond_status_array == NULL) hbond_status_array = (bool *)calloc(MAX_ATOMS,sizeof(bool)); falsify_bool_array(hbond_status_array,1,MAX_ATOMS-1); copy_bool_array(hbond_status_array, 1, FIXED_HBOND_SATISF_ARRAY, 1, FIXED_ATOM_HBOND_STATUS_ARRAYLENGTH); ii = FIXED_ATOM_HBOND_STATUS_ARRAYLENGTH + 1; } while(i_gene->seq_position!=ENDFLAG) { if(iROT->energy_var_fix!=FIXED_POSITION_PTR) { chr->energy += iROT->energy_var_fix; if(i_gene->choice_ptr->in_use_flag==0) /* penalize choices not in use */ chr->energy += 1e10; if(HBOND_SPECIFICITY_FLAG == 1) { for(x=0;xnum_var_fix_hbond;++x) hbond_status_array[iROT->bkbn_hbond_indicies[x]] = 1; if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) { ii2 = ii + i_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1; union_bool_arrays(hbond_status_array, ii, hbond_status_array, ii, ii2, iROT->sidechain_hbond_status, 1, i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms); jj = ii2+1; } } j = i+1; j_gene = i_gene->nextgene; while(j_gene->seq_position!=ENDFLAG) { if(jROT->energy_var_fix!=FIXED_POSITION_PTR) { j_minus_i=j-i; if(HBOND_SPECIFICITY_FLAG == 1) if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) if(j_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) jj2 = jj + j_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1; if(iROT->lookupX[j_minus_i].lookupResX!=NON_INTERACT_LOOKUP_RES_X) { if(iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX!=NON_INTERACT_LOOKUP_ROT_X) { /* this residue pair has not been calculated; calculate it */ if(iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX==NULL) load_calc_save_lookupResX(i_gene, j_gene); j_res_rot_minus1 = j_gene->lookupRot_index-1; if(iROTjROT.energy_var_var!=NON_INTERACT_PTR) { chr->energy += iROTjROT.energy_var_var; if(HBOND_SPECIFICITY_FLAG == 1) { if(iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX_hbond!=NULL) { h = find_j_res_rot_minus1_in_hbond_rot_pair(j_res_rot_minus1, iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX_hbond); if(h != ENDFLAG) { union_bool_arrays(hbond_status_array, ii, hbond_status_array, ii, ii2, iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX_hbond->hbond_rot_pair[h].i_hbond_status, 0, i_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1); union_bool_arrays(hbond_status_array, jj, hbond_status_array, jj, jj2, iROT->lookupX[j_minus_i].lookupResX[j_gene->j_choice_index].lookupRotX_hbond->hbond_rot_pair[h].j_hbond_status, 0, j_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1); } } } } } } if(HBOND_SPECIFICITY_FLAG == 1) if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) if(j_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) jj = jj2+1; } j_gene = j_gene->nextgene; ++j; } if(HBOND_SPECIFICITY_FLAG == 1) if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) ii = ii2+1; } i_gene = i_gene->nextgene; /* recover address of chr.genes->nextgene */ ++i; } if(HBOND_SPECIFICITY_FLAG == 1) { k=1; for(h=1;h<=BKBN_HBOND_STATUS_ARRAYLENGTH ;++h) { chr->energy -= hbonding_status_dependent_specificity_energy(hbond_status_array[h],FIXED_ATOM_HBOND_ATOMRESPARAM[k]); ++k; } ii = BKBN_HBOND_STATUS_ARRAYLENGTH + 1; chr->genes = chr->firstgene; i_gene = chr->genes; while(i_gene->seq_position!=ENDFLAG) { if(iROT->energy_var_fix==FIXED_POSITION_PTR) { if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) { ii2 = ii + i_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1; if(any_false(hbond_status_array,ii,ii2)==1) { chr->energy -= i_gene->choice_ptr->resparam_ptr->E_specificity_core; } if(i_gene->choice_ptr->resparam_ptr->ligand_flag==1) { k=1; for(h=ii;h<=ii2;++h) { chr->energy -= hbonding_status_dependent_specificity_energy(hbond_status_array[h],i_gene->choice_ptr->resparam_ptr->sidechain_hbond_atom_ptr[k]); ++k; } } ii = ii2+1; } } i_gene = i_gene->nextgene; } ii2 = FIXED_ATOM_HBOND_STATUS_ARRAYLENGTH; chr->genes = chr->firstgene; i_gene = chr->genes; ii = FIXED_ATOM_HBOND_STATUS_ARRAYLENGTH + 1; while(i_gene->seq_position!=ENDFLAG) { if(iROT->energy_var_fix!=FIXED_POSITION_PTR) if(i_gene->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms!=0) { ii2 = ii + i_gene->choice_ptr->resparam_ptr->numhbondsideatoms_minus1; /* printf("%d\t%s\t%d\n",i_gene->seq_position,i_gene->choice_ptr->resparam_ptr->residuetype,count_false(hbond_status_array,ii,ii2)); */ if(any_false(hbond_status_array,ii,ii2)==1) { chr->energy -= i_gene->choice_ptr->resparam_ptr->E_specificity_core; /* printf("%d\t%s\t%lf\n",i_gene->seq_position,i_gene->choice_ptr->resparam_ptr->residuetype,i_gene->choice_ptr->resparam_ptr->E_specificity_core); */ } if(i_gene->choice_ptr->resparam_ptr->ligand_flag == 1) { k=1; for(h=ii;h<=ii2;++h) { chr->energy -= hbonding_status_dependent_specificity_energy(hbond_status_array[h],i_gene->choice_ptr->resparam_ptr->sidechain_hbond_atom_ptr[k]); ++k; } } ii = ii2+1; } i_gene = i_gene->nextgene; } // printf("%d\n",count_false(hbond_status_array,1,ii2)); } if(chr->energy<=-10000) failure_report("chr->energy<=-10000; probably an error in lookup table generation","abort"); chr->genes = chr->firstgene; } #include "MC.h" #include "HQM_rotamers.h" #include "somewhat_FASTER.h" /* protein->final_chr contains the final solution CHROMOSOME. This function allocates memory for protein->final_energy, protein->final_pdb, and protein->final_minipdb, calculates the coordinates, structures, and energy components. Should be called before output_PROTEIN */ void final_chr_to_final_pdb_final_energy(PROTEIN *protein) { int i,j,k,n,num_unsat,m; int solub_flag, specif_flag, hbond_specif_flag, logflag, *num_resimers=NULL; double E_pre_min, E_pre_min2, E_post_min; time_t now; char *logfile=NULL; bool *usage_flag=NULL; FILE *file_ptr; extern int LOCAL_MINIMIZATION_FLAG, MAX_RESIDUES; extern int SOLUBILITY_CUTOFF_FLAG, SPECIFICITY_FLAG, HBOND_SPECIFICITY_FLAG, LOGFILE_FLAG, MINIMIZE_FINAL_SEQUENCE_FLAG; extern SASA_SUM SASA_SUM_TEMP; extern double TEMPERATURE; /* is a rotamer optimization job, so quench the final sequence w/o solubility or specificity in order to get "true" structure of design */ if(MINIMIZE_FINAL_SEQUENCE_FLAG==1) if(protein->final_chr.firstgene->lookupRot != NULL) { if(LOGFILE_FLAG != 0) { logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", protein->parameters.output_prefix); file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "optimize final sequence w/o solubility/specificity\t%s",ctime(&now)); fclose(file_ptr); free_memory(logfile); } usage_flag = (bool *)calloc(protein->parameters.num_total_choices+2,sizeof(bool)); num_resimers = (int *)calloc(MAX_RESIDUES+10,sizeof(int)); /* set up so only the sequence encoded by protein->final_chr is allowed for optimization */ n=1; m=1; i=1; protein->final_chr.genes = protein->final_chr.firstgene; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) { while(protein->final_chr.genes->seq_position!=protein->var_pos[i].seq_position) protein->final_chr.genes = protein->final_chr.genes->nextgene; // set so none of the residuetypes or rotamers are allowed for(k=1;k<=protein->var_pos[i].number_of_choices;++k) { usage_flag[n] = protein->var_pos[i].choice[k].in_use_flag; ++n; protein->var_pos[i].choice[k].in_use_flag = 0; for(j=1;j<=protein->var_pos[i].choice[k].resparam_ptr->rotamerlib_ptr->number_of_rotamers;++j) protein->var_pos[i].choice[k].lookup_res_ptr->lookupRot[j].in_use_flag = 0; } num_resimers[m] = protein->var_pos[i].number_of_resimers; ++m; // set so only the current residuetype and its rotamers are permitted protein->var_pos[i].number_of_resimers = protein->final_chr.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers; protein->final_chr.genes->choice_ptr->in_use_flag = 1; for(j=1;j<=protein->final_chr.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers;++j) protein->final_chr.genes->choice_ptr->lookup_res_ptr->lookupRot[j].in_use_flag = 1; } ++i; } protein->final_chr.genes = protein->final_chr.firstgene; E_pre_min = protein->final_chr.energy; solub_flag = SOLUBILITY_CUTOFF_FLAG; specif_flag = SPECIFICITY_FLAG; hbond_specif_flag = HBOND_SPECIFICITY_FLAG; logflag = LOGFILE_FLAG; SOLUBILITY_CUTOFF_FLAG = 0; SPECIFICITY_FLAG = 0; HBOND_SPECIFICITY_FLAG = 0; MINIMIZE_FINAL_SEQUENCE_FLAG = 0; CHROMOSOME_to_lookupEnergy(&protein->final_chr, &protein->parameters.fixedatoms_energy); E_pre_min2 = protein->final_chr.energy; k=protein->parameters.number_MC_cycles; protein->parameters.number_MC_cycles = 10; FASTER_rotamers(&protein->parameters, &protein->final_chr, protein->var_pos, protein->lookupEnergy); protein->parameters.number_MC_cycles=k; E_post_min = protein->final_chr.energy; MINIMIZE_FINAL_SEQUENCE_FLAG = 1; SOLUBILITY_CUTOFF_FLAG = solub_flag; SPECIFICITY_FLAG = specif_flag; HBOND_SPECIFICITY_FLAG = hbond_specif_flag; LOGFILE_FLAG = logflag; CHROMOSOME_to_lookupEnergy(&protein->final_chr, &protein->parameters.fixedatoms_energy); if(LOGFILE_FLAG != 0) { logfile = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfile,"%s.log", protein->parameters.output_prefix); file_ptr = fopen_file(logfile, "a"); now = time(NULL); fprintf(file_ptr, "optimized final sequence w/o solubility/specificity\t%f\t%f\t%f\t%f\t%s", E_pre_min, E_pre_min2, E_post_min, protein->final_chr.energy,ctime(&now)); fclose(file_ptr); free_memory(logfile); } /* reset usage flags */ n=1; m=1; i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) { for(k=1;k<=protein->var_pos[i].number_of_choices;++k) { protein->var_pos[i].choice[k].in_use_flag = usage_flag[n]; if(protein->var_pos[i].choice[k].in_use_flag == 1) for(j=1;j<=protein->var_pos[i].choice[k].resparam_ptr->rotamerlib_ptr->number_of_rotamers;++j) protein->var_pos[i].choice[k].lookup_res_ptr->lookupRot[j].in_use_flag = 1; ++n; } protein->var_pos[i].number_of_resimers = num_resimers[m]; ++m; } ++i; } free_memory(usage_flag); free_memory(num_resimers); } if(protein->final_energy==NULL) protein->final_energy = (ENERGY *)malloc(sizeof(ENERGY)); if(protein->final_pdb == NULL) protein->final_pdb = (pdbATOM *)calloc(MAX_ATOMS, sizeof(pdbATOM)); if(protein->final_minipdb == NULL) protein->final_minipdb = (mini_pdbATOM *)calloc(MAX_ATOMS, sizeof(mini_pdbATOM)); CHROMOSOME_to_pdbATOM(&protein->final_chr, protein->fixed_atoms, protein->final_pdb, protein->chain_anchor_bkbn); pdbATOM_to_mini_pdbATOM(protein->final_pdb, protein->final_minipdb); hbond_specif_flag = HBOND_SPECIFICITY_FLAG; HBOND_SPECIFICITY_FLAG = 1; protein->final_energy->E_specificity = 0; *protein->final_energy = energy_calc(protein->final_minipdb, 1, 1); HBOND_SPECIFICITY_FLAG = hbond_specif_flag; /* get the vdw energy of the rotamerlet/methyl relaxation */ if(LOCAL_MINIMIZATION_FLAG==1) { protein->final_energy->E_vdw = relaxed_vdw_energy(&protein->final_chr, protein->mini_fixed_atoms, &protein->parameters); } i=1; while(protein->final_pdb[i].seq_position!=ENDFLAG) { protein->final_pdb[i].born_radius = protein->final_minipdb[i].born_radius; protein->final_pdb[i].sasa = protein->final_minipdb[i].sasa; protein->final_pdb[i].hbond_satisfied = protein->final_minipdb[i].hbond_satisfied; ++i; } protein->sasa_sum = SASA_SUM_TEMP; k=1; protein->final_energy->E_rss=0; protein->final_energy->E_reference=0; protein->final_energy->E_solubility = 0; protein->final_energy->TdS = 0; protein->final_chr.genes = protein->final_chr.firstgene; while(protein->final_chr.genes->seq_position!=ENDFLAG) { protein->final_energy->E_rss += protein->final_chr.genes->choice_ptr->resparam_ptr->E_random; protein->final_energy->TdS += protein->final_chr.genes->choice_ptr->resparam_ptr->S; if(protein->final_chr.genes->lookupRot != NULL) { if(protein->final_chr.genes->varpos_ptr->core_flag == 's') protein->final_energy->E_solubility += protein->final_chr.genes->choice_ptr->resparam_ptr->E_specificity_surface; if(protein->final_chr.genes->varpos_ptr->core_flag == 'i') protein->final_energy->E_solubility += protein->final_chr.genes->choice_ptr->resparam_ptr->E_specificity_interfacial; if(protein->final_chr.genes->choice_ptr->resparam_ptr->num_hbonding_sidechain_atoms != 0) { while(protein->final_pdb[k].seq_position!=protein->final_chr.genes->seq_position) ++k; num_unsat = 0; while(protein->final_pdb[k].seq_position==protein->final_chr.genes->seq_position) { if(protein->final_pdb[k].atom_ptr->donorflag[0] != ' ') if(protein->final_pdb[k].atom_ptr->other_info>0) if(protein->final_pdb[k].hbond_satisfied == 0) ++num_unsat; ++k; } if(num_unsat!=0) protein->final_energy->E_specificity += protein->final_chr.genes->choice_ptr->resparam_ptr->E_specificity_core; } } protein->final_chr.genes = protein->final_chr.genes->nextgene; } protein->final_energy->TdS = TEMPERATURE*protein->final_energy->TdS; protein->final_energy->E_reference = protein->final_energy->E_rss + protein->final_energy->E_solubility + protein->final_energy->E_specificity - protein->final_energy->TdS; protein->final_chr.genes = protein->final_chr.firstgene; protein->final_energy->E_folded = ENERGY_TOTAL_SCALED((*protein->final_energy)); protein->final_energy->E_unfolded = OVERALL_ENERGY_SCALE*(protein->final_energy->E_rss - protein->final_energy->TdS); protein->final_energy->E_structure = ENERGY_TOTAL((*protein->final_energy)); protein->final_energy->E_total = protein->final_energy->E_structure - protein->final_energy->E_reference; protein->pseudo_dG = protein->final_energy->E_folded - protein->final_energy->E_unfolded; }