/* EGAD: rotamer_calc_foreman.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions required for an EGAD job to act as a rotamer calculation slave. */ #include "rotamer_calc_foreman.h" /* get the var_pos in input_protein to hook up w/ the lookup table in original_protein */ /* define in use flags, etc */ /* working_protein can be used as input for optimization functions, etc */ void share_lookup_table(PROTEIN *original_protein, PROTEIN *input_protein, PROTEIN *working_protein) { int i,i_res,o_res,o_i,i_res_rot; int error_check, num_choices_in_use; double denom; extern int SOLUBILITY_CUTOFF_FLAG, SPECIFICITY_FLAG, HBOND_SPECIFICITY_FLAG, MINIMIZE_FINAL_SEQUENCE_FLAG; static int solub_flag, specif_flag, min_final_seq_flag, hbond_specif_flag=ENDFLAG; int quiet_flag; extern int QUIET_FLAG; char *line; quiet_flag = QUIET_FLAG; if(hbond_specif_flag==ENDFLAG) { solub_flag = SOLUBILITY_CUTOFF_FLAG; specif_flag = SPECIFICITY_FLAG; hbond_specif_flag = HBOND_SPECIFICITY_FLAG; min_final_seq_flag = MINIMIZE_FINAL_SEQUENCE_FLAG; } *working_protein = *original_protein; working_protein->parameters.log10_seq_combinations = 0; working_protein->parameters.log10_rotamer_combinations = 0; o_i=1; for(i=1;i<=input_protein->parameters.numVarPositions;++i) { /* advance to a user-defined moving position for this slave job */ /* if not defined in the slave file, but defined in the foreman input file, move using all the choices in the foreman input file */ while(working_protein->var_pos[o_i].seq_position!=ENDFLAG && working_protein->var_pos[o_i].seq_position!=input_protein->var_pos[i].seq_position) { working_protein->var_pos[o_i].number_of_resimers=0; working_protein->var_pos[o_i].dead_ended_flag=0; denom=0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) { working_protein->var_pos[o_i].choice[o_res].in_use_flag=1; working_protein->var_pos[o_i].choice[o_res].composition=1.0; denom += working_protein->var_pos[o_i].choice[o_res].composition; working_protein->var_pos[o_i].number_of_resimers += working_protein->var_pos[o_i].choice[o_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; } for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) working_protein->var_pos[o_i].choice[o_res].composition = working_protein->var_pos[o_i].choice[o_res].composition/denom; working_protein->var_pos[o_i].residue_freqs[0]=0.0; working_protein->var_pos[o_i].residue_freqs[1]=0.0; working_protein->var_pos[o_i].residue_freqs[2]=0.0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) working_protein->var_pos[o_i].residue_freqs[o_res] = working_protein->var_pos[o_i].residue_freqs[o_res-1] + working_protein->var_pos[o_i].choice[o_res].composition; working_protein->parameters.log10_rotamer_combinations += log10(working_protein->var_pos[o_i].number_of_resimers); ++o_i; } /* this position is defined in this slave input file; remove other choices */ if(working_protein->var_pos[o_i].seq_position!=ENDFLAG && working_protein->var_pos[o_i].seq_position==input_protein->var_pos[i].seq_position) { error_check=0; working_protein->var_pos[o_i].number_of_resimers=0; working_protein->var_pos[o_i].dead_ended_flag=0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) { working_protein->var_pos[o_i].choice[o_res].in_use_flag=0; /* by default, not in use */ working_protein->var_pos[o_i].choice[o_res].composition = EPS; if(input_protein->var_pos[i].number_of_choices>working_protein->var_pos[o_i].number_of_choices) { line = (char *)calloc(MAXLINE,sizeof(char)); sprintf(line,"ERROR Must include all possible choices as choices in the master inputfile for position %s\n", input_protein->seqpos_text_map[seqpos_to_inputted_string(input_protein->var_pos[i].seq_position, input_protein->seqpos_text_map)].seqpos_text); failure_report(line,"exit"); } for(i_res=1;i_res<=input_protein->var_pos[i].number_of_choices;++i_res) { if(input_protein->var_pos[i].choice[i_res].resparam_ptr->residuecode == working_protein->var_pos[o_i].choice[o_res].resparam_ptr->residuecode) { error_check=1; working_protein->var_pos[o_i].choice[o_res].in_use_flag=1; /* in the input, so in use */ working_protein->var_pos[o_i].choice[o_res].composition = 1.0; working_protein->var_pos[o_i].number_of_resimers += working_protein->var_pos[o_i].choice[o_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; } } } for(i_res=1;i_res<=input_protein->var_pos[i].number_of_choices;++i_res) free_memory(input_protein->var_pos[i].choice[i_res].resparam_ptr); if(error_check==0) /* none in use; error */ { line = (char *)calloc(MAXLINE,sizeof(char)); sprintf(line,"ERROR no choices in use for position %d\n",input_protein->var_pos[i].seq_position); failure_report(line,"exit"); } num_choices_in_use = 0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) if(working_protein->var_pos[o_i].choice[o_res].in_use_flag==1) ++num_choices_in_use; working_protein->parameters.log10_seq_combinations += log10(num_choices_in_use); denom=0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) denom += working_protein->var_pos[o_i].choice[o_res].composition; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) working_protein->var_pos[o_i].choice[o_res].composition = working_protein->var_pos[o_i].choice[o_res].composition/denom; working_protein->var_pos[o_i].residue_freqs[0]=0.0; working_protein->var_pos[o_i].residue_freqs[1]=0.0; working_protein->var_pos[o_i].residue_freqs[2]=0.0; for(o_res=1;o_res<=working_protein->var_pos[o_i].number_of_choices;++o_res) working_protein->var_pos[o_i].residue_freqs[o_res] = working_protein->var_pos[o_i].residue_freqs[o_res-1] + working_protein->var_pos[o_i].choice[o_res].composition; working_protein->parameters.log10_rotamer_combinations += log10(working_protein->var_pos[o_i].number_of_resimers); ++o_i; } } working_protein->parameters.numMovingPositions = 0; i=1; while(working_protein->var_pos[i].seq_position!=ENDFLAG) { if(working_protein->var_pos[i].fixed_flag == 0) ++working_protein->parameters.numMovingPositions; for(i_res=1;i_res<=working_protein->var_pos[i].number_of_choices;++i_res) { if(working_protein->var_pos[i].choice[i_res].in_use_flag == 1) for(i_res_rot=1;i_res_rot<=working_protein->var_pos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers;++i_res_rot) working_protein->var_pos[i].choice[i_res].lookup_res_ptr->lookupRot[i_res_rot].in_use_flag = 1; } ++i; } /* only one seq, so ignore solubility and specificity */ if(working_protein->parameters.log10_seq_combinations == 0) { SOLUBILITY_CUTOFF_FLAG = 0; SPECIFICITY_FLAG=0; HBOND_SPECIFICITY_FLAG=0; MINIMIZE_FINAL_SEQUENCE_FLAG = 0; } else { SOLUBILITY_CUTOFF_FLAG = solub_flag; SPECIFICITY_FLAG = specif_flag; HBOND_SPECIFICITY_FLAG = hbond_specif_flag; MINIMIZE_FINAL_SEQUENCE_FLAG = min_final_seq_flag; } QUIET_FLAG = quiet_flag; } /* opens original_protein->parameters.slave_file_list_filename, goes through the list of files, launches slave optimization jobs */ void rotamer_calc_foreman(char *inputfilename, PROTEIN *original_protein) { char *slave_filename; PROTEIN *input_protein; PROTEIN *working_protein; char *line, *command, *dummyfilename, *slave_output_prefix, *marked_slavefilename; char *working_filename, *dummystring, *pdbfilename, *kill_file; FILE *slave_input_file_ptr, *filename_list_file_ptr; int k, logfile_flag, i, output_coord_flag; CHROMOSOME *dummychr; extern int LOGFILE_FLAG, OUTPUT_COORD_FLAG, QUIET_FLAG, GET_PID; extern char *INPUTFILENAME; line = (char *)calloc(MAXLINE,sizeof(char)); dummystring = (char *)calloc(MAXLINE,sizeof(char)); if(grep_line_from_file("QUIET", line, MAXLINE, inputfilename)==1) { sscanf_flag(line,dummystring, QUIET_FLAG); } else QUIET_FLAG=1; // quiet by default /* precalc all var_fix energies and set up the table for var_var energies */ generate_lookup_table(original_protein); /* calc energies between non-sequence-variable residues */ dummychr = (CHROMOSOME *)malloc(sizeof(CHROMOSOME)); dummychr->bkbngenes = NULL; dummychr->first_bkbngene = NULL; inoculate_sidechains(dummychr, original_protein->var_pos,0); CHROMOSOME_to_lookupEnergy(dummychr, &original_protein->parameters.fixedatoms_energy); free_CHROMOSOME(dummychr); free_memory(dummychr); /* tell the master that the constant part of the lookup table has been loaded/calc'd */ sprintf(line,"%s.loaded",original_protein->parameters.slave_file_list_filename); touch_file(line); input_protein = (PROTEIN *)malloc(sizeof(PROTEIN)); working_protein = (PROTEIN *)malloc(sizeof(PROTEIN)); slave_filename = (char *)calloc(MAXLINE,sizeof(char)); working_filename = (char *)calloc(MAXLINE,sizeof(char)); command = (char *)calloc(MAXLINE,sizeof(char)); dummyfilename = (char *)calloc(MAXLINE,sizeof(char)); slave_output_prefix = (char *)calloc(MAXLINE,sizeof(char)); pdbfilename = (char *)calloc(MAXLINE,sizeof(char)); kill_file = (char *)calloc(MAXLINE,sizeof(char)); marked_slavefilename = (char *)calloc(MAXLINE,sizeof(char)); logfile_flag = LOGFILE_FLAG; filename_list_file_ptr = fopen_file(original_protein->parameters.slave_file_list_filename,"r"); sprintf(kill_file,"%s.KILL", original_protein->parameters.slave_file_list_filename); while(filename_list_file_ptr!=NULL) { while(fgets(line,MAXLINE,filename_list_file_ptr)!=NULL) { sscanf(line,"%s",slave_filename); sprintf(working_filename,"%s.working",slave_filename); /* if the working_filename exists, it's being worked on */ if(does_this_file_exist(working_filename)==0) { sprintf(slave_output_prefix,"%s",slave_filename); LOGFILE_FLAG = logfile_flag; slave_input_file_ptr = NULL; k=1; while(k<=300) { if(k>=300) exit(0); slave_input_file_ptr = fopen_file(slave_filename,"r"); if(slave_input_file_ptr == NULL) { ++k; sleep(2); } else k=400; } output_coord_flag = OUTPUT_COORD_FLAG; /* advance to variable positions section */ fgets(line,MAXLINE,slave_input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while( !(strncmp(dummystring,"SEQUENCE",8)==0 || strncmp(dummystring,"VARIABLE_POSITION",17)==0) ) { if(strstr(dummystring,"PREFIX")!=0) sscanf(line,"%s %s",dummystring,slave_output_prefix); if(strstr(dummystring,"LOGFILE")!=0 && strstr(dummystring,"FLAG")!=0) sscanf_flag(line,dummystring, LOGFILE_FLAG); if(strstr(dummystring,"OUTPUT")!=0 && (strstr(dummystring,"COORD")!=0 || strstr(dummystring,"STRUCT")!=0) ) sscanf_flag(line,dummystring, output_coord_flag); fgets(line,MAXLINE,slave_input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } fclose(slave_input_file_ptr); sprintf(pdbfilename,"%s.pdb*",slave_output_prefix); /* pdb file doesn't exist, and no one else is working on it....go for it */ if(does_this_file_exist(pdbfilename)==0 && does_this_file_exist(working_filename)==0) { fclose(filename_list_file_ptr); filename_list_file_ptr=NULL; cp_file(slave_filename,working_filename); /* copy to working_filename */ sprintf(marked_slavefilename,"%s.%d",working_filename,GET_PID); cp_file(working_filename, marked_slavefilename); /* copy to marked_slavefilename */ slave_input_file_ptr = fopen_file(marked_slavefilename,"r"); if(slave_input_file_ptr != NULL) { INPUTFILENAME = working_filename; fgets(line,MAXLINE,slave_input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while( !(strncmp(dummystring,"SEQUENCE",8)==0 || strncmp(dummystring,"VARIABLE_POSITION",17)==0) ) { fgets(line,MAXLINE,slave_input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } *input_protein = *original_protein; input_protein->var_pos=NULL; if(strncmp(dummystring,"SEQUENCE",8)==0) sequence_to_var_pos_file(slave_input_file_ptr, input_protein, 0); else if(strncmp(dummystring,"VARIABLE_POSITION",17)==0) input_VARIABLE_POSITION(slave_input_file_ptr, input_protein, 0); else { sprintf(line,"ERROR in rotamer_calc_foreman.\nCannot find SEQUENCE or VARIABLE_POSITION in slave inputfile %s\n",working_filename); failure_report(line,"exit"); } share_lookup_table(original_protein, input_protein, working_protein); free_memory(input_protein->var_pos); working_protein->parameters.output_prefix = slave_output_prefix; if(strcmp(original_protein->parameters.algorithm, "GA")==0) GA_rotamers_master_control(working_protein); else if(strcmp(original_protein->parameters.algorithm, "SCMF")==0) scmf(working_protein); else if(strncmp(original_protein->parameters.algorithm, "MC",2)==0) MC_rotamer_control(working_protein); else if(strncmp(original_protein->parameters.algorithm, "DEE",3)==0) DeeControl(working_protein); else if(strcmp(original_protein->parameters.algorithm, "FASTER")==0) FASTER_rotamer_control(working_protein); if(does_this_file_exist(working_filename)==1) if(are_these_files_identical(marked_slavefilename, working_filename)==1) { /* make sure that the inputfile that exists now is the correct one; it may have been updated by the master if this process was taking a long time */ output_PROTEIN(working_protein, output_coord_flag); rm_file(working_filename); /* remove working filename */ } if(working_protein->chr!=NULL) { for(i=1;i<=working_protein->sizeof_chr_array;++i) free_CHROMOSOME(&working_protein->chr[i]); free_memory(working_protein->chr); } if(working_protein->final_chr.firstgene!=NULL) free_CHROMOSOME(&working_protein->final_chr); if(working_protein->final_energy != NULL) free_memory(working_protein->final_energy); if(working_protein->final_pdb != NULL) free_memory(working_protein->final_pdb); if(working_protein->final_minipdb!=NULL) free_memory(working_protein->final_minipdb); } // end slave_input_file_ptr != NULL rm_file(marked_slavefilename); if(does_this_file_exist(kill_file)) exit(0); } // end if does_this_file_exist(pdbfilename)==0 && does_this_file_exist(working_filename)==0 } // end if does_this_file_exist(working_filename)==0 if(filename_list_file_ptr == NULL) filename_list_file_ptr = fopen_file(original_protein->parameters.slave_file_list_filename,"r"); if(filename_list_file_ptr == NULL) { /* give the master a chance to create and update the file if needed */ k=0; while(does_this_file_exist(original_protein->parameters.slave_file_list_filename)==0) { if(k>=300) /* ten min later, kill this process */ exit(0); sleep(2); ++k; if(does_this_file_exist(kill_file)) exit(0); } if(does_this_file_exist(kill_file)) exit(0); /* if this file no longer exists, kill this process */ filename_list_file_ptr = fopen_file(original_protein->parameters.slave_file_list_filename,"r"); if(filename_list_file_ptr == NULL) exit(0); } } // end while fgets(line,MAXLINE,filename_list_file_ptr)!=NULL if(filename_list_file_ptr != NULL) fclose(filename_list_file_ptr); sleep(5); /* give the master a chance to create and update the file if needed */ k=0; while(does_this_file_exist(original_protein->parameters.slave_file_list_filename)==0) { if(k>=300) /* ten min later, kill this process */ exit(0); sleep(2); ++k; if(does_this_file_exist(kill_file)) exit(0); } if(does_this_file_exist(kill_file)) exit(0); /* if this file no longer exists, kill this process */ filename_list_file_ptr = fopen_file(original_protein->parameters.slave_file_list_filename,"r"); if(filename_list_file_ptr == NULL) exit(0); } // end while filename_list_file_ptr!=NULL free_memory(line); free_memory(slave_filename); free_memory(kill_file); free_memory(working_filename); free_memory(slave_output_prefix); free_memory(marked_slavefilename); free_memory(command); free_memory(dummystring); free_memory(pdbfilename); }