/* EGAD: scanning_mutagenesis.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functiosn for performing scanning mutagenesis and alanine shaving of protein positions */ #include "scanning_mutagenesis.h" /* this function makes all possible single point mutants within protein->var_pos[i].fixed_flag==0; inputfiles for rotamer optimization are created, launched, and the data printed to protein->output_prefix.scanning_mutagenesis */ void scanning_mutagenesis(char *main_inputfilename, PROTEIN *protein) { char *command, *out_filename, *inputfilename, *top_inputfilename; char *line, *dummystring,*middle_filename, *lookup_table_inputfilename; FILE *output_file_ptr, *input_file_ptr, *main_input_file_ptr, *batch_input_file_ptr, *lookup_table_inputfile_ptr; ENERGY energy, wt_energy; SASA_SUM sasa_sum; int lookup_flag, other_res_flag, runtime_flag, i,j,k,logfile_flag,jobtype_flag, output_coord_flag; double frac_hphob, transfer_energy_density; extern int GET_PID, CLEAN_UP_FLAG; extern char *EXECUTABLE_FILENAME, *CURRENT_WORKING_DIRECTORY, *DEFAULT_ROTAMER_JOB, *AVAILABLE_PROCESSORS_FILE; extern double MAX_OPTIMIZATION_TIME; lookup_table_inputfilename = (char *)calloc(MAXLINE,sizeof(char)); command = (char *)calloc(MAXLINE,sizeof(char)); inputfilename = (char *)calloc(MAXLINE,sizeof(char)); line = (char *)calloc(MAXLINE,sizeof(char)); dummystring = (char *)calloc(MAXLINE,sizeof(char)); out_filename = (char *)calloc(MAXLINE,sizeof(char)); top_inputfilename = (char *)calloc(MAXLINE,sizeof(char)); middle_filename = (char *)calloc(MAXLINE,sizeof(char)); /* copy and modify inputfile */ sprintf(top_inputfilename,"%s.%d.top", protein->parameters.output_prefix,GET_PID); input_file_ptr = fopen_file(top_inputfilename,"w"); main_input_file_ptr = fopen_file(main_inputfilename,"r"); fgets(line,MXLINE_INPUT,main_input_file_ptr); while(strncmp(line,"START",5)!=0) { fgets(line,MXLINE_INPUT,main_input_file_ptr); } fprintf(input_file_ptr,"%s",line); fgets(line,MXLINE_INPUT,main_input_file_ptr); lookup_flag=0; other_res_flag=0; runtime_flag=0; logfile_flag=0; jobtype_flag=0; output_coord_flag=0; sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while(strcmp(dummystring,"SCANNING_POSITIONS")!=0 && strcmp(dummystring,"END")!=0) { if(strcmp(dummystring, "LOOKUP_TABLE_DIRECTORY")==0) { lookup_flag=1; fprintf(input_file_ptr,"%s",line); } else if(strstr(dummystring,"OTHER")!=0) { other_res_flag=1; fprintf(input_file_ptr,"%s",line); } else if(strstr(dummystring,"LOGFILE")!=0) { logfile_flag=1; fprintf(input_file_ptr,"%s",line); } else if(strcmp(dummystring,"JOBTYPE")==0 || strcmp(dummystring,"SLAVE_JOBTYPE")==0) { jobtype_flag=1; fprintf(input_file_ptr,"%s",line); } else if(strstr(dummystring,"OUTPUT")!=0 && strstr(dummystring,"COORD")!=0) { output_coord_flag=1; fprintf(input_file_ptr,"%s",line); } else if(strstr(dummystring,"TIME")!=0 && strstr(dummystring,"SLAVE")!=0) { runtime_flag=1; sscanf(line,"%s %lf",dummystring, &MAX_OPTIMIZATION_TIME); MAX_OPTIMIZATION_TIME = MAX_OPTIMIZATION_TIME*60; convert_string_to_all_caps(dummystring); fprintf(input_file_ptr,"%s",line); } /* this function will define the prefix, so ignore the prefix line */ else if(strstr(dummystring,"PREFIX")==0 && strstr(dummystring,"MAX_MUT")==0) { fprintf(input_file_ptr,"%s",line); } fgets(line,MXLINE_INPUT,main_input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } if(other_res_flag==0) fprintf(input_file_ptr,"OTHER_RESIDUES none\n"); if(runtime_flag==0) { fprintf(input_file_ptr,"RUNTIME 1.0\n"); MAX_OPTIMIZATION_TIME = 60; } if(jobtype_flag==0) fprintf(input_file_ptr,"JOBTYPE %s\n",DEFAULT_ROTAMER_JOB); if(logfile_flag==0) fprintf(input_file_ptr,"LOGFILE_FLAG 0\n"); if(lookup_flag==0) /* LOOKUP_TABLE_DIRECTORY not explicitly defined */ { if(make_directory(protein->parameters.lookup_energy_table_directory) == 0) touch_file(protein->parameters.lookup_energy_table_directory); /* if can't touch this, will exit with error */ fprintf(input_file_ptr,"LOOKUP_TABLE_DIRECTORY\t%s\n",protein->parameters.lookup_energy_table_directory); } fclose(input_file_ptr); /* list of all floating positions */ sprintf(middle_filename,"%s.%d.middle", protein->parameters.output_prefix,GET_PID); input_file_ptr = fopen_file(middle_filename,"w"); fprintf(input_file_ptr,"VARIABLE_POSITIONS\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) { fprintf(input_file_ptr,"\t%s\n",protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text); } ++i; } fclose(input_file_ptr); /* master inputfile */ sprintf(inputfilename,"%s.master_inputfile",protein->parameters.output_prefix); cp_file(top_inputfilename,inputfilename); input_file_ptr = fopen_file(inputfilename,"a"); fprintf(input_file_ptr,"SLAVE_FILENAMES %s.inputfile_list\n",protein->parameters.output_prefix); fprintf(input_file_ptr,"OUTPUT_PREFIX %s\n",protein->parameters.output_prefix); sprintf(lookup_table_inputfilename,"%s.lookup_inputfile",protein->parameters.output_prefix); cp_file(top_inputfilename,lookup_table_inputfilename); lookup_table_inputfile_ptr = fopen_file(lookup_table_inputfilename,"a"); fprintf(lookup_table_inputfile_ptr,"PRECALCULATION_LEVEL 2\n"); fprintf(lookup_table_inputfile_ptr,"JOBTYPE LOOKUP_TABLE_SLAVE\n"); if(output_coord_flag == 0) fprintf(input_file_ptr,"OUTPUT_COORD_FLAG 0\n"); fprintf(lookup_table_inputfile_ptr,"VARIABLE_POSITIONS\n"); fprintf(input_file_ptr,"VARIABLE_POSITIONS\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) { line[0] = '\0'; j=1; while(protein->Template[j].seq_position!=protein->var_pos[i].seq_position) ++j; while(strcmp(protein->Template[j].atom_ptr->atomname,"CB")!=0) ++j; k=1; while(strcmp(protein->resparam[k].residuetype,protein->Template[j].residuetype)!=0) ++k; sprintf(line,"%c,",protein->resparam[k].one_letter_code[0]); for(j=1;j<=protein->var_pos[i].number_of_choices;++j) { sprintf(line,"%s%c",line,protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); if(j==protein->var_pos[i].number_of_choices) sprintf(line,"%s.",line); else sprintf(line,"%s,",line); } fprintf(input_file_ptr,"\t%s\t%s\n",protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text,line); fprintf(lookup_table_inputfile_ptr,"\t%s\t%s\n",protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text,line); } ++i; } fclose(input_file_ptr); fclose(lookup_table_inputfile_ptr); /* create directories */ sprintf(inputfilename,"%s.inputfiles",protein->parameters.output_prefix); make_directory(inputfilename); sprintf(inputfilename,"%s.structures",protein->parameters.output_prefix); make_directory(inputfilename); /* wt file */ sprintf(inputfilename,"%s.inputfiles/%s.wt.input", protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path); cp_file(top_inputfilename,inputfilename); input_file_ptr = fopen_file(inputfilename,"a"); fprintf(input_file_ptr,"OUTPUT_PREFIX\t%s.structures/%s.wt\n",protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path); fclose(input_file_ptr); append_file(middle_filename, inputfilename); sprintf(inputfilename,"%s.inputfile_list",protein->parameters.output_prefix); batch_input_file_ptr = fopen_file(inputfilename,"w"); sprintf(inputfilename,"%s.inputfiles/%s.wt.input", protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path); fprintf(batch_input_file_ptr,"%s\n",inputfilename); /* mutant inputfiles */ sprintf(top_inputfilename,"%s.%d.top", protein->parameters.output_prefix,GET_PID); input_file_ptr = fopen_file(top_inputfilename,"a"); fclose(input_file_ptr); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) { for(j=1;j<=protein->var_pos[i].number_of_choices;++j) { sprintf(inputfilename,"%s.inputfiles/%s.%s.%c.input", protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path, protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fprintf(batch_input_file_ptr,"%s\n",inputfilename); cp_file(top_inputfilename,inputfilename); input_file_ptr = fopen_file(inputfilename,"a"); fprintf(input_file_ptr,"OUTPUT_PREFIX\t%s.structures/%s.%s.%c\n",protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path, protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fclose(input_file_ptr); append_file(middle_filename, inputfilename); input_file_ptr = fopen_file(inputfilename,"a"); fprintf(input_file_ptr,"\t%s\t%c.\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); fclose(input_file_ptr); } } ++i; } rm_file(top_inputfilename); rm_file(middle_filename); fclose(batch_input_file_ptr); // launch lookup table master job, if we are in parallel mode if(AVAILABLE_PROCESSORS_FILE != NULL) lookup_table_master(lookup_table_inputfilename, protein); rm_file(lookup_table_inputfilename); /* launch jobs */ sprintf(command,"%s %s.master_inputfile",EXECUTABLE_FILENAME, protein->parameters.output_prefix); launch_command(CURRENT_WORKING_DIRECTORY, command); /* wait for jobs to finish */ sprintf(inputfilename,"%s.inputfile_list",protein->parameters.output_prefix); wait_for_slaves_to_finish(inputfilename); rm_file(inputfilename); sprintf(inputfilename,"%s.master_inputfile",protein->parameters.output_prefix); rm_file(inputfilename); sprintf(out_filename,"%s.structures/%s.wt.pdb",protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(out_filename, &wt_energy, &sasa_sum, &frac_hphob, &transfer_energy_density); pdbATOM_to_CHROMOSOME(protein->Template,&protein->final_chr, protein->resparam); sprintf(out_filename,"%s.scanning_mutagenesis.%d",protein->parameters.output_prefix,GET_PID); output_file_ptr = fopen_file(out_filename, "w"); fprintf(output_file_ptr,"seq_pos\twt_res\tmutant_res\tddG_folded\tclashes\tunsat_hbond\tddG_total\tcoresurfint\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) { protein->final_chr.genes = protein->final_chr.firstgene; while(protein->final_chr.genes->seq_position!=protein->var_pos[i].seq_position) protein->final_chr.genes = protein->final_chr.genes->nextgene; j=1; while(j<=protein->var_pos[i].number_of_choices) { sprintf(out_filename,"%s.structures/%s.%s.%c.pdb",protein->parameters.output_prefix, protein->parameters.output_prefix_sans_path, protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0]); get_ENERGY_and_SASA_SUM_from_egad_pdb_file(out_filename, &energy, &sasa_sum, &frac_hphob, &transfer_energy_density); fprintf(output_file_ptr,"%s\t%c\t%c\t\t%lf\t%d\t%d\t\t%lf\t%c\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, protein->final_chr.genes->choice_ptr->resparam_ptr->one_letter_code[0], protein->var_pos[i].choice[j].resparam_ptr->one_letter_code[0], energy.pseudo_dG - wt_energy.pseudo_dG, energy.clashes - wt_energy.clashes, energy.num_unsatisfied_hbond - wt_energy.num_unsatisfied_hbond, energy.E_total - wt_energy.E_total, protein->var_pos[i].core_flag); ++j; } protein->final_chr.genes = protein->final_chr.firstgene; } ++i; } fclose(output_file_ptr); sprintf(out_filename,"%s.scanning_mutagenesis.%d",protein->parameters.output_prefix,GET_PID); sprintf(command,"%s.scanning_mutagenesis",protein->parameters.output_prefix); mv_file(out_filename, command); if(CLEAN_UP_FLAG==1) { sprintf(out_filename,"%s.structures",protein->parameters.output_prefix); sprintf(command, "/bin/rm -rf %s",out_filename); system(command); sprintf(out_filename,"%s.inputfiles",protein->parameters.output_prefix); sprintf(command, "/bin/rm -rf %s",out_filename); system(command); } sprintf(inputfilename,"%s.inputfile_list.loaded",protein->parameters.output_prefix); rm_file(inputfilename); free_memory(middle_filename); free_memory(lookup_table_inputfilename); free_memory(line); free_memory(dummystring); free_memory(top_inputfilename); free_memory(command); free_memory(out_filename); free_memory(inputfilename); } /* for each protein->var_pos[i].fixed_flag==0, ALA is placed, and the energy scored data printed to protein->output_prefix.ala_shave; unlike scanning_mutagenesis, no rotamer optimization is performed on the mutants */ void alanine_shave(PROTEIN *protein) { int i, ala_resparam; double E_wt, E_unf_ala, E_unf_wt, E_ala, ala_chi[3]; CHROMOSOME *chr; mini_pdbATOM *ala_pdb; ENERGY energy, wt_energy; FILE *output_file_ptr; char *out_filename; CHOICE ala_choice; ala_pdb = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); out_filename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(out_filename,"%s.ala_shave",protein->parameters.output_prefix); output_file_ptr = fopen_file(out_filename,"w"); fprintf(output_file_ptr,"seq_pos\twt_res\tmutant_res\tdG\tddG\n"); pdbATOM_to_CHROMOSOME(protein->Template,&protein->final_chr, protein->resparam); chr = (CHROMOSOME *)malloc(sizeof(CHROMOSOME)); pdbATOM_to_CHROMOSOME(protein->Template,chr, protein->resparam); ala_resparam=1; while(strcmp(protein->resparam[ala_resparam].residuetype,"ALA")!=0) ++ala_resparam; ala_choice.resparam_ptr = &protein->resparam[ala_resparam]; ala_chi[0]=0; ala_chi[1]=0; ala_chi[2]=0; wt_energy = energy_calc(protein->mini_Template,GB_FLAG, SASA_FLAG); E_unf_wt = 0; protein->final_chr.genes = protein->final_chr.firstgene; while(protein->final_chr.genes->seq_position!=ENDFLAG) { E_unf_wt += protein->final_chr.genes->choice_ptr->resparam_ptr->E_unfolded; protein->final_chr.genes = protein->final_chr.genes->nextgene; } protein->final_chr.genes = protein->final_chr.firstgene; E_wt = ENERGY_TOTAL_SCALED(wt_energy) - OVERALL_ENERGY_SCALE*E_unf_wt; i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) { copyCHROMOSOME((*chr), protein->final_chr); /* reset */ /* find the gene for position positions_to_scan[i] */ chr->genes = chr->firstgene; while(protein->var_pos[i].seq_position!=chr->genes->seq_position) { chr->genes = chr->genes->nextgene; } fprintf(output_file_ptr,"%s\t%c\tA\t", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position,protein->seqpos_text_map)].seqpos_text, chr->genes->choice_ptr->resparam_ptr->one_letter_code[0]); E_unf_ala = E_unf_wt - chr->genes->choice_ptr->resparam_ptr->E_unfolded + protein->resparam[ala_resparam].E_unfolded; /* mutate to alanine */ chr->genes->choice_ptr = &ala_choice; chr->genes->choice_ptr->bkbn = &protein->var_pos[i].bkbn; chr->genes->chi = ala_chi; /* build the structure */ chr->genes = chr->firstgene; CHROMOSOME_to_mini_pdbATOM(chr, protein->mini_fixed_atoms, ala_pdb, protein->chain_anchor_bkbn); energy = energy_calc(ala_pdb,GB_FLAG, SASA_FLAG); E_ala = ENERGY_TOTAL_SCALED(energy) - OVERALL_ENERGY_SCALE*E_unf_ala; fprintf(output_file_ptr,"%lf\t%lf\n", E_ala, E_ala - E_wt); } ++i; } fclose(output_file_ptr); free_memory(ala_pdb); free_memory(out_filename); free_CHROMOSOME(chr); }