/* EGAD: rotamer_calc_master.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions that may be useful when writing rotamer calculation master programs. */ #include "rotamer_calc_master.h" /* returns the number of slave files listed in the slavefilelist file that have yet to be launched */ int number_of_slaves_left(char slavefilelist[]) { int number_of_slaves_left; FILE *file_ptr; static char *line=NULL, *slavefilename, *output_filename,*dummystring; if(line==NULL) { line = (char *)calloc(MAXLINE,sizeof(char)); slavefilename = (char *)calloc(MAXLINE,sizeof(char)); output_filename = (char *)calloc(MAXLINE,sizeof(char)); dummystring = (char *)calloc(20,sizeof(char)); } number_of_slaves_left=0; file_ptr=fopen_file(slavefilelist,"r"); while(fgets(line,MAXLINE,file_ptr)!=NULL) { sscanf(line,"%s",slavefilename); if(does_this_file_exist(slavefilename)==1) { if(grep_line_from_file("PREFIX",line,MAXLINE,slavefilename)==1) sscanf(line,"%s %s",dummystring, output_filename); else strcpy(output_filename,slavefilename); sprintf(output_filename,"%s.pdb",output_filename); if(does_this_file_exist(output_filename)==0) ++number_of_slaves_left; } } fclose(file_ptr); return(number_of_slaves_left); } /* remove *.working files */ void remove_working_files(char *slavefilelist) { FILE *file_ptr; char *inputfilename, *line; inputfilename = (char *)calloc(MAXLINE,sizeof(char)); line = (char *)calloc(MAXLINE,sizeof(char)); file_ptr = fopen_file(slavefilelist,"r"); while(fgets(line,MAXLINE,file_ptr)!=NULL) { sscanf(line,"%s",inputfilename); sprintf(line,"%s.working",inputfilename); if(does_this_file_exist(line)==1) rm_file(line); } fclose(file_ptr); free_memory(inputfilename); free_memory(line); } void wait_for_slaves_to_finish(char slavefilelist[]) { time_t start_time, current_time; double time_remaining, one_hundred_percent_time; static int num_cpu=0; int number_of_slave_jobs, number_of_jobs_done, num_jobs_remaining, ten_percent_jobs; FILE *file_ptr; static char *line=NULL; extern double MAX_OPTIMIZATION_TIME; extern char *AVAILABLE_PROCESSORS_FILE; if(line == NULL) line = (char *)calloc(MAXLINE,sizeof(char)); /* make sure that at least one slave has loaded the lookup table before we start waiting */ sprintf(line,"%s.loaded",slavefilelist); while(does_this_file_exist(line)==0) sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); if(num_jobs_remaining == 0) { remove_working_files(slavefilelist); return; } if(num_cpu==0) { if(AVAILABLE_PROCESSORS_FILE != NULL) { file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"r"); num_cpu=0; while(fgets(line,MAXLINE,file_ptr)!=NULL) ++num_cpu; fclose(file_ptr); } else num_cpu = 1; } number_of_slave_jobs=0; file_ptr = fopen_file(slavefilelist,"r"); while(fgets(line,MAXLINE,file_ptr)!=NULL) ++number_of_slave_jobs; fclose(file_ptr); ten_percent_jobs = num_jobs_remaining/10; if(ten_percent_jobs<5) ten_percent_jobs=5; num_jobs_remaining = number_of_slaves_left(slavefilelist); number_of_jobs_done = num_jobs_remaining; start_time = time(NULL); while(num_jobs_remaining>=ten_percent_jobs) { sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); } number_of_jobs_done = number_of_jobs_done - num_jobs_remaining; current_time = time(NULL); if(num_jobs_remaining == 0) { remove_working_files(slavefilelist); return; } // let us assume that, at worst, each of the remaining jobs will require 2-fold more time than the previous jobs if(num_jobs_remaining > num_cpu) { one_hundred_percent_time = number_of_slave_jobs*2.0*MAX_OPTIMIZATION_TIME/(double)num_cpu; if(number_of_jobs_done != 0) one_hundred_percent_time = (double)number_of_slave_jobs*difftime(current_time,start_time)/(double)number_of_jobs_done; if(one_hundred_percent_time < number_of_slave_jobs*2.0*MAX_OPTIMIZATION_TIME/(double)num_cpu) one_hundred_percent_time = number_of_slave_jobs*2.0*MAX_OPTIMIZATION_TIME/(double)num_cpu; time_remaining = 2.0*((double)num_jobs_remaining/(double)number_of_slave_jobs)*one_hundred_percent_time; } else // no speed-up due to parallelization { one_hundred_percent_time = 2.0*MAX_OPTIMIZATION_TIME; time_remaining = 2.0*one_hundred_percent_time; } /* sprintf(line,"%s\t100_time: %lf\ttime_remaining: %lf\tnum_jobs_left: %d\tnum_jobs_done: %d\n",slavefilelist, one_hundred_percent_time,time_remaining,num_jobs_remaining,number_of_jobs_done); failure_report(line,"warn"); */ start_time = time(NULL); current_time = time(NULL); while(difftime(current_time,start_time) <= time_remaining && num_jobs_remaining!=0) { sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); current_time = time(NULL); } // probably hung jobs; remove these from the queue and re-insert them; this may fix the problem if(num_jobs_remaining!=0) { /* relaunch jobs by removing .working files */ remove_working_files(slavefilelist); sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); /* give it a lot more time before bailing out */ time_remaining = 10.0*time_remaining; start_time = time(NULL); current_time = time(NULL); while(difftime(current_time,start_time) <= time_remaining && num_jobs_remaining!=0) { sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); current_time = time(NULL); } if(num_jobs_remaining==0) { remove_working_files(slavefilelist); return; } else { sleep(2); num_jobs_remaining = number_of_slaves_left(slavefilelist); if(num_jobs_remaining==0) { remove_working_files(slavefilelist); return; } else { sprintf(line,"ERROR slavefiles in master file %s hanging and failing to complete",slavefilelist); failure_report(line,"exit"); } } } remove_working_files(slavefilelist); } /* given an EGAD output pdb file pdbfilename, return an ENERGY struct with the energy components */ void get_ENERGY_and_SASA_SUM_from_egad_pdb_file(char *pdbfilename,ENERGY *energy, SASA_SUM *sasa_sum, double *fraction_sasa_hphob, double *transfer_free_energy_density) { FILE *file_ptr; static char *dummystring1, *dummystring2, *line=NULL; ENERGY_0((*energy)); if(line==NULL) { line = (char *)calloc(MAXLINE,sizeof(char)); dummystring1 = (char *)calloc(MAXLINE,sizeof(char)); dummystring2 = (char *)calloc(MAXLINE,sizeof(char)); } file_ptr = fopen_file(pdbfilename,"r"); fgets(line,MAXLINE,file_ptr); sscanf(line,"%s",dummystring1); while(strcmp(dummystring1,"ATOM")!=0 && strcmp(dummystring1,"END")!=0) { if(fgets(line,MAXLINE,file_ptr)==NULL) { fclose(file_ptr); return; } sscanf(line,"%s",dummystring1); if(strstr(line,"E_vdw:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_vdw)); } else if(strstr(line,"E_coulomb:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_coulomb)); } else if(strstr(line,"E_1_4:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_1_4)); } else if(strstr(line,"E_born:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_born)); } else if(strstr(line,"E_pol:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_pol)); } else if(strstr(line,"E_sasa:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_sasa)); } else if(strstr(line,"E_hbond:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_hbond)); } else if(strstr(line,"E_structure:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_structure)); } else if(strstr(line,"E_unfolded:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_unfolded)); } else if(strstr(line,"E_total:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_total)); } else if(strstr(line,"E_rss:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_rss)); } else if(strstr(line,"E_specificity:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_specificity)); } else if(strstr(line,"E_solubility:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_solubility)); } else if(strstr(line,"TdS:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->TdS)); } else if(strstr(line,"E_folded:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_folded)); } else if(strstr(line,"E_reference:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->E_reference)); } else if(strstr(line,"Pseudo_DELTA_G_folding:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(energy->pseudo_dG)); } else if(strstr(line,"fraction_sasa_hphob:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,fraction_sasa_hphob); } else if(strstr(line,"transfer_free_energy_density:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,transfer_free_energy_density); } else if(strstr(line,"sasa_sp3_S:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->sp3_S)); } else if(strstr(line,"sasa_sp2:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->sp2)); } else if(strstr(line,"sasa_O:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->O)); } else if(strstr(line,"sasa_N:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->N)); } else if(strstr(line,"sasa_H:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->H)); } else if(strstr(line,"sasa_total:")!=0) { sscanf(line,"%s %s %lf",dummystring1,dummystring2,&(sasa_sum->sasa_total)); } else if(strstr(line,"number_of_clashes:")!=0) { sscanf(line,"%s %s %d",dummystring1,dummystring2,&(energy->clashes)); } else if(strstr(line,"number_of_hbonds:")!=0) { sscanf(line,"%s %s %d %d %d %d",dummystring1,dummystring2, &(energy->num_hbond), &(energy->num_ss_hbond),&(energy->num_sb_hbond),&(energy->num_bb_hbond) ); } else if(strstr(line,"number_of_unsatisfied_hbond_groups:")!=0) { sscanf(line,"%s %s %d",dummystring1,dummystring2,&(energy->num_unsatisfied_hbond)); } } fclose(file_ptr); } /* given a CHROMOSOME chr, create a file that can be used as a slave inputfile. Only the user-defined positions are written; the others are inferred from the top of the main inputfile sent to the foreman job (from the OTHER_POSITIONS line) */ int BASELINE_FLAG; void pdbfile_to_CHROMOSOME(char *filename, CHROMOSOME *chr) { FILE *file_ptr; char *line, **word, *keyword, rescode[10], corecode[10], seqpos_text[10], *error_mssg; double *chi; int i,n, i_res, i_res_rot, numChi, flag; line = (char *)calloc(MAXLINE,sizeof(char)); keyword = (char *)calloc(MAXLINE,sizeof(char)); error_mssg = (char *)calloc(MAXLINE,sizeof(char)); chi = (double *)calloc(40,sizeof(double)); word = (char **)calloc(50,sizeof(char *)); for(i=0;i<50;++i) word[i] = (char *)calloc(20,sizeof(char)); file_ptr = fopen_file(filename,"r"); fgets(line,MAXLINE,file_ptr); sscanf(line,"%s",keyword); /* get to ROTAMER section */ while(strcmp(keyword,"ROTAMER")!=0) { if(fgets(line,MAXLINE,file_ptr)==NULL) { sprintf(line,"ERROR %s does not contain a ROTAMER section",filename); failure_report(line,"exit"); } sscanf(line,"%s",keyword); } chr->genes = chr->firstgene; /* for each rotamer, create the appropriate GENE */ while(strcmp(keyword,"ROTAMER")==0) { i = extract_words(line, word); numChi = i - 4; sscanf(line,"%s %s %s %s",keyword, seqpos_text, corecode, rescode); for(n=1;n<=numChi;++n) sscanf(word[n+4],"%lf",&chi[n]); /* find the seq_position */ while(strcmp(chr->genes->varpos_ptr->seqpos_text_map_ptr->seqpos_text,seqpos_text)!=0) { chr->genes = chr->genes->nextgene; if(chr->genes->seq_position==ENDFLAG) { sprintf(error_mssg,"ERROR chr is shorter than it should be for the pdbfile %s in pdbfile_to_CHROMOSOME",filename); failure_report(error_mssg,"abort"); } } /* find the residuetype */ i_res=1; while(strcmp(chr->genes->varpos_ptr->choice[i_res].resparam_ptr->one_letter_code,rescode)!=0) { ++i_res; if(i_res > chr->genes->varpos_ptr->number_of_choices) { sprintf(error_mssg,"ERROR Cannot find %s as a choice for position %s",rescode,seqpos_text); failure_report(error_mssg,"abort"); } } /* assign residuetype-related things to chr->genes */ chr->genes->choice_ptr = &chr->genes->varpos_ptr->choice[i_res]; chr->genes->j_choice_index = i_res-1; /* find the rotamer */ if(chr->genes->varpos_ptr->choice[i_res].resparam_ptr->rotamerlib_ptr->numChi != numChi) { sprintf(error_mssg,"ERROR Residuetype %s at position %s has different number of chi",rescode,seqpos_text); failure_report(error_mssg,"abort"); } i_res_rot=0; flag=0; while(flag==0) { ++i_res_rot; if(i_res_rot > chr->genes->choice_ptr->resparam_ptr->rotamerlib_ptr->number_of_rotamers) { sprintf(error_mssg,"ERROR Cannot find rotamer"); for(n=1;n<=numChi;++n) sprintf(error_mssg,"%s %.1lf ",error_mssg, chi[n]); sprintf(error_mssg,"%s for residuetype %s at position %s\n", error_mssg, chr->genes->choice_ptr->resparam_ptr->residuetype,seqpos_text); failure_report(error_mssg,"abort"); } flag=1; for(n=1;n<=numChi;++n) if(fabs(chi[n] - chr->genes->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[i_res_rot].chi[n])>0.1) flag=0; } chr->genes->chi = chr->genes->choice_ptr->resparam_ptr->rotamerlib_ptr->rotamer[i_res_rot].chi; /* assign lookuptable elements to chr->genes */ if(chr->genes->lookupRot==NULL) /* if NULL, no lookup table exists, so don't link up */ { chr->genes->lookupRot_index = i_res_rot; } else { chr->genes->lookupRot_index = i_res_rot; chr->genes->lookupRot = &chr->genes->choice_ptr->lookup_res_ptr->lookupRot[i_res_rot]; } fgets(line,MAXLINE,file_ptr); sscanf(line,"%s",keyword); } fclose(file_ptr); if(chr->genes->nextgene->seq_position!=ENDFLAG) { sprintf(error_mssg,"ERROR chr is longer than it should be for the pdbfile %s in pdbfile_to_CHROMOSOME",filename); failure_report(error_mssg,"abort"); } chr->genes = chr->firstgene; free_memory(line); free_memory(keyword); free_memory(chi); free_memory(error_mssg); for(i=0;i<50;++i) free_memory(word[i]); free_memory(word); }