/* EGAD: complex_formation_energy.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains the functions for seperating protein-protein complexes and calculating the complex formation energy, w/ or w/o rotamer optimization. */ #include "complex_formation_energy.h" #define SASA_DIFF_TOL 1.0 #define ENERGY_DIFF_TOL 0.1 void find_interface_residues(mini_pdbATOM *complex_pdb, mini_pdbATOM *free_pdb, int num_res, int *list_of_interface_residues, double *delta_sasa_interface, double *energy_interface_residues, int calc_energy_flag) { int i, current_seq, j,k; double sasa_complex, sasa_free; double **energy_table_complex=NULL, **energy_table_free=NULL, **energy_table_diff=NULL; if(calc_energy_flag == 1) { energy_table_complex = (double **)calloc(num_res+3,sizeof(double *)); energy_table_free = (double **)calloc(num_res+3,sizeof(double *)); energy_table_diff = (double **)calloc(num_res+3,sizeof(double *)); for(i=0;i<=num_res+1;++i) { energy_table_complex[i] = (double *)calloc(num_res+3,sizeof(double)); energy_table_free[i] = (double *)calloc(num_res+3,sizeof(double)); energy_table_diff[i] = (double *)calloc(num_res+3,sizeof(double)); } energy_table_complex[num_res+2]=NULL; energy_table_free[num_res+2]=NULL; energy_table_diff[num_res+2]=NULL; generate_energy_profile_table(&energy_table_complex, complex_pdb, num_res); generate_energy_profile_table(&energy_table_free, free_pdb, num_res); energy_profile_table_difference(num_res, energy_table_diff, energy_table_complex, energy_table_free); } j=1; i=1; k=0; current_seq = complex_pdb[i].seq_position; while(complex_pdb[i].seq_position!=ENDFLAG) { sasa_complex = 0; sasa_free=0; while(complex_pdb[i].seq_position==current_seq) { sasa_complex += complex_pdb[i].sasa; sasa_free += free_pdb[i].sasa; ++i; } ++k; if(fabs(sasa_complex-sasa_free) >= SASA_DIFF_TOL) { list_of_interface_residues[j] = current_seq; delta_sasa_interface[j] = sasa_free - sasa_complex; energy_interface_residues[j] = 0; if(calc_energy_flag == 1) energy_interface_residues[j] = energy_table_diff[num_res+1][k]; ++j; } else { if(calc_energy_flag == 1) if(fabs(energy_table_diff[num_res+1][k])>=ENERGY_DIFF_TOL) { list_of_interface_residues[j] = current_seq; delta_sasa_interface[j] = sasa_free - sasa_complex; energy_interface_residues[j] = energy_table_diff[num_res+1][k]; ++j; } } current_seq = complex_pdb[i].seq_position; } list_of_interface_residues[j] = ENDFLAG; if(calc_energy_flag == 1) { for(i=0;i<=num_res+1;++i) { free_memory(energy_table_diff[i]); free_memory(energy_table_complex[i]); free_memory(energy_table_free[i]); } free_memory(energy_table_complex); free_memory(energy_table_free); free_memory(energy_table_diff); } } /* returns the minimal distance between atoms stored in mini_pdbATOM arrays new_A and free_B */ double distance_between_two_mini_pdbATOM(mini_pdbATOM *new_A, mini_pdbATOM *free_B) { int i,j; double closest_distance,dummydouble; double xmax, ymax, zmax, xmin, ymin, zmin; closest_distance = 1e10; j=1; while(free_B[j].seq_position != ENDFLAG) { xmax=free_B[j].coord.x+closest_distance; xmin=free_B[j].coord.x-closest_distance; ymax=free_B[j].coord.y+closest_distance; ymin=free_B[j].coord.y-closest_distance; zmax=free_B[j].coord.z+closest_distance; zmin=free_B[j].coord.z-closest_distance; i=1; while(new_A[i].seq_position != ENDFLAG) { if(new_A[i].coord.x<=xmax) if(new_A[i].coord.x>=xmin) if(new_A[i].coord.y<=ymax) if(new_A[i].coord.y>=ymin) if(new_A[i].coord.z<=zmax) if(new_A[i].coord.z>=zmin) { dummydouble = Distance(new_A[i].coord, free_B[j].coord); if(dummydouble <= closest_distance) { closest_distance = dummydouble; xmax=free_B[j].coord.x+closest_distance; xmin=free_B[j].coord.x-closest_distance; ymax=free_B[j].coord.y+closest_distance; ymin=free_B[j].coord.y-closest_distance; zmax=free_B[j].coord.z+closest_distance; zmin=free_B[j].coord.z-closest_distance; } } ++i; } ++j; } return(closest_distance); } /* coords of atoms in free_A are translated step angstroms along vector AB_hat; coords placed in new_A */ void translate_mini_pdbATOM_along_vector_AB(mini_pdbATOM *free_A, mini_pdbATOM *new_A, double step, CARTESIAN AB_hat) { int i; i=1; while(new_A[i].seq_position != ENDFLAG) { new_A[i] = free_A[i]; new_A[i].coord = add_CARTESIAN(free_A[i].coord, scalar_multiply_CARTESIAN(step, AB_hat)); ++i; } new_A[i] = free_A[i]; } /* move proteinA away from proteinB so that they are separation_distance +/- tolerance apart move along the inter-centroid vector freeA = coordinates of A freeB = coordinates of B newA = new coordinates of A */ void move_complex_apart(mini_pdbATOM *free_A, mini_pdbATOM *free_B, mini_pdbATOM *new_A, const double separation_distance, double tolerance) { CARTESIAN AB_hat,centroid_A, centroid_B; double step[5]; int i, stopflag; double closest_distance[5]; mini_pdbATOM *working_A; working_A = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); /* define AB_hat, the vector between the centroids */ centroid_A = get_centroid(free_A); centroid_B = get_centroid(free_B); AB_hat = unitvector(centroid_A, centroid_B); i=1; while(free_A[i].seq_position != ENDFLAG) { new_A[i] = free_A[i]; working_A[i] = free_A[i]; ++i; } new_A[i] = free_A[i]; working_A[i] = free_A[i]; stopflag = 0; step[1] = separation_distance; while(stopflag == 0) { translate_mini_pdbATOM_along_vector_AB(free_A, working_A, step[1], AB_hat); closest_distance[1] = distance_between_two_mini_pdbATOM(working_A, free_B); if(closest_distance[1]separation_distance) stopflag=1; else step[3] += 2.0; } step[2] = (step[1] + step[3])/2.0; translate_mini_pdbATOM_along_vector_AB(free_A, working_A, step[2], AB_hat); closest_distance[2] = distance_between_two_mini_pdbATOM(working_A, free_B); /* bisection search to find optimal step along AB_hat so new_A is separation_distance from free_B */ stopflag = 0; while(stopflag == 0) { /* printf("%lf\t%lf\t%lf\n",step[1],step[2],step[3]); printf("%lf\t%lf\t%lf\n",closest_distance[1],closest_distance[2],closest_distance[3]); */ if(separation_distance > closest_distance[2] && separation_distance < closest_distance[3]) /* between 2 and 3 */ { step[1] = step[2]; step[2] = (step[2] + step[3])/2.0; closest_distance[1] = closest_distance[2]; } else { step[3] = step[2]; step[2] = (step[2] + step[1])/2.0; closest_distance[3] = closest_distance[2]; } translate_mini_pdbATOM_along_vector_AB(free_A, working_A, step[2], AB_hat); closest_distance[2] = distance_between_two_mini_pdbATOM(working_A, free_B); if(fabs(separation_distance - closest_distance[2]) <= tolerance) stopflag = 1; /* printf("%lf\t%lf\n",step[2], closest_distance[2]); */ } translate_mini_pdbATOM_along_vector_AB(free_A, new_A, step[2], AB_hat); free_memory(working_A); } /* this function splits a complex in input_complex and puts the coords of the separated complex in free_A and free_B super_chain A moves away from B */ void split_complex(mini_pdbATOM *input_complex, mini_pdbATOM *free_A, mini_pdbATOM *free_B, char *chain_id_list,SUPER_CHAIN_ID_LIST *super_chain_id_list) { int i,k,a,b,found_flag; div_t chain_id_number; char chain_id; i=1; k=0; a=1;b=1; while(input_complex[i].seq_position != ENDFLAG) { chain_id_number = div(input_complex[i].seq_position, 1000); chain_id = chain_id_list[chain_id_number.quot+1]; found_flag=0; k=1; while(super_chain_id_list[1].chain_id[k]!='\0' && found_flag==0) { if(chain_id == super_chain_id_list[1].chain_id[k]) found_flag=1; ++k; } if(found_flag==1) /* copy into A; these will move as a unit */ { free_A[a] = input_complex[i]; ++a; } else /* copy into B; these will stay fixed */ { free_B[b] = input_complex[i]; ++b; } ++i; } free_A[a] = input_complex[i]; free_B[b] = input_complex[i]; } /* this function splits a complex in input_complex separation_distance apart and puts the coords of the separated complex in seperated_cmplx */ void seperate_the_complex(mini_pdbATOM *input_complex, mini_pdbATOM *seperated_cmplx, char *chain_id_list, SUPER_CHAIN_ID_LIST *super_chain_id_list, double separation_distance) { mini_pdbATOM *free_A, *free_B, *new_A; int i,a,b,found_flag,k,A_flag; div_t chain_id_number; char chain_id; free_A = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); free_B = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); new_A = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); i=1; k=0; a=1;b=1; while(input_complex[i].seq_position != ENDFLAG) { chain_id_number = div(input_complex[i].seq_position, 1000); chain_id = chain_id_list[chain_id_number.quot+1]; found_flag=0; k=1; while(super_chain_id_list[1].chain_id[k]!='\0' && found_flag==0) { if(chain_id == super_chain_id_list[1].chain_id[k]) found_flag=1; ++k; } if(found_flag==1) /* count the two superchains */ ++a; else ++b; ++i; } if(aparameters.algorithm,"STATIC",6)==0, rotamer optimization is not performed */ void complex_formation_energy_master(char *inputfilename, PROTEIN *protein) { ENERGY energy_complex, energy_free; SASA_SUM sasa_sum_complex, sasa_sum_free; char *line, *command, *complex_inputfilename, *free_inputfilename, *dummystring, *prefix; FILE *complex_inputfile_ptr, *free_inputfile_ptr, *input_file_ptr, *out_file_ptr; int prefix_flag,i,j,lookup_flag, *list_of_interface_residues; extern int GET_PID; mini_pdbATOM *seperated_cmplx, *final_cmplx; PROTEIN *protein_complex, *protein_free; extern int GB_FLAG,SASA_FLAG,TORSION_FLAG,COULOMB_FLAG,OUTPUT_COORD_FLAG; extern SASA_SUM SASA_SUM_TEMP; extern char *DEFAULT_ROTAMER_JOB; double *delta_sasa_interface, *energy_interface_residues; complex_inputfilename = (char *)calloc(MXLINE_INPUT,sizeof(char)); free_inputfilename = (char *)calloc(MXLINE_INPUT,sizeof(char)); line = (char *)calloc(MXLINE_INPUT,sizeof(char)); command = (char *)calloc(MXLINE_INPUT,sizeof(char)); dummystring = (char *)calloc(MXLINE_INPUT,sizeof(char)); prefix = (char *)calloc(MXLINE_INPUT,sizeof(char)); strcpy(prefix, protein->parameters.output_prefix); list_of_interface_residues = (int *)calloc(MAX_RESIDUES,sizeof(int)); delta_sasa_interface = (double *)calloc(MAX_RESIDUES,sizeof(double)); energy_interface_residues = (double *)calloc(MAX_RESIDUES,sizeof(double)); seperated_cmplx = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); final_cmplx = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); if(strncmp(protein->parameters.algorithm,"STATIC",6)==0) { sprintf(dummystring,"%s.complex_energy.out",protein->parameters.output_prefix); out_file_ptr = fopen_file(dummystring,"w"); energy_complex = energy_calc(protein->mini_Template,1, 1); sasa_sum_complex = SASA_SUM_TEMP; energy_complex.E_1_4 = ((double)TORSION_FLAG)*energy_complex.E_1_4; energy_complex.E_coulomb = ((double)COULOMB_FLAG)*energy_complex.E_coulomb; seperate_the_complex(protein->mini_Template, seperated_cmplx, protein->chain_id_list, protein->super_chain_id_list, 15.0); energy_free = energy_calc(seperated_cmplx,1, 1); sasa_sum_free = SASA_SUM_TEMP; energy_free.E_1_4 = ((double)TORSION_FLAG)*energy_free.E_1_4; energy_free.E_coulomb = ((double)COULOMB_FLAG)*energy_free.E_coulomb; fprintf(out_file_ptr,"energy_complex:\n"); fprintf_energy(out_file_ptr,energy_complex); fprintf(out_file_ptr,"energy_free:\n"); fprintf_energy(out_file_ptr,energy_free); protein->pseudo_dG = ENERGY_TOTAL_SCALED(energy_complex) - ENERGY_TOTAL_SCALED(energy_free); protein->final_energy = (ENERGY *)malloc(sizeof(ENERGY)); *protein->final_energy = ENERGY_SUBTRACT(energy_complex,energy_free); protein->sasa_sum = SASA_SUM_SUBTRACT(sasa_sum_complex,sasa_sum_free); fprintf(out_file_ptr,"energies_of_complex_formation:\n"); fprintf_energy(out_file_ptr,(*protein->final_energy)); fprintf(out_file_ptr,"pseudo_DELTA_G_complex_formation:\t%lf\n",protein->pseudo_dG); find_interface_residues(protein->mini_Template, seperated_cmplx, protein->num_res, list_of_interface_residues, delta_sasa_interface,energy_interface_residues, protein->calc_complex_residue_energy_flag); if(protein->calc_complex_residue_energy_flag == 1) fprintf(out_file_ptr,"INTERFACE_RESIDUES\tdG\tburied_area\n"); else fprintf(out_file_ptr,"INTERFACE_RESIDUES\tdG_not_calcd\tburied_area\n"); free_memory(line); line = (char *)calloc(MXLINE_INPUT,sizeof(char)); line[0]='\0'; i=1; while(list_of_interface_residues[i]!=ENDFLAG) { j=1; while(list_of_interface_residues[i]!=protein->Template[j].seq_position) ++j; fprintf(out_file_ptr,"%s\t%s\t\t%lf\t%lf\t", protein->seqpos_text_map[seqpos_to_inputted_string(list_of_interface_residues[i],protein->seqpos_text_map)].seqpos_text, protein->Template[j].residuetype, energy_interface_residues[i], delta_sasa_interface[i] ); if(fabs(delta_sasa_interface[i])<=EPS) /* change in energy, but not SASA */ { fprintf(out_file_ptr,"level_2\n"); } else /* true interface; SASA changes */ { fprintf(out_file_ptr,"level_1\n"); sprintf(line,"%s%s,",line,protein->seqpos_text_map[seqpos_to_inputted_string(list_of_interface_residues[i],protein->seqpos_text_map)].seqpos_text); } ++i; } fprintf(out_file_ptr,"COMMA_INTERFACE_RESIDUES:\t%s\n",line); protein->final_pdb = (pdbATOM *)calloc(MAX_ATOMS, sizeof(pdbATOM)); protein->final_minipdb = (mini_pdbATOM *)calloc(MAX_ATOMS, sizeof(mini_pdbATOM)); i=1; while(protein->Template[i].seq_position!=ENDFLAG) { protein->final_pdb[i] = protein->Template[i]; protein->final_pdb[i].coord = seperated_cmplx[i].coord; protein->final_pdb[i].born_radius = seperated_cmplx[i].born_radius; protein->final_pdb[i].sasa = seperated_cmplx[i].sasa; ++i; } protein->final_pdb[i] = protein->Template[i]; pdbATOM_to_mini_pdbATOM(protein->final_pdb,protein->final_minipdb); sprintf(protein->parameters.output_prefix,"%s.free",protein->parameters.output_prefix); fprintf(out_file_ptr,"FREE_STRUCTURE:\t%s.pdb\n",protein->parameters.output_prefix); fclose(out_file_ptr); output_PROTEIN(protein, OUTPUT_COORD_FLAG); } else /* rotamer optimize or design the complex, split, use split structure as template for rotamer optimization */ { /* copy and modify inputfile */ sprintf(complex_inputfilename,"%s.complex.%d", inputfilename,GET_PID); complex_inputfile_ptr = fopen_file(complex_inputfilename,"w"); sprintf(free_inputfilename,"%s.free.%d", inputfilename,GET_PID); free_inputfile_ptr = fopen_file(free_inputfilename,"w"); input_file_ptr = fopen_file(inputfilename,"r"); fgets(line,MXLINE_INPUT,input_file_ptr); while(strncmp(line,"START",5)!=0) { fgets(line,MXLINE_INPUT,input_file_ptr); } fprintf(complex_inputfile_ptr,"%s",line); fprintf(free_inputfile_ptr,"%s",line); fgets(line,MXLINE_INPUT,input_file_ptr); prefix_flag=0; lookup_flag=0; sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while(strcmp(dummystring,"VARIABLE_POSITIONS")!=0 && strcmp(dummystring,"SEQUENCE")!=0) { if(strstr(dummystring,"PREFIX")!=0) { fprintf(complex_inputfile_ptr,"OUTPUT_PREFIX\t%s.optimized_complex\n",protein->parameters.output_prefix); fprintf(free_inputfile_ptr,"OUTPUT_PREFIX\t%s.optimized_free\n",protein->parameters.output_prefix); prefix_flag=1; } else if(strstr(dummystring,"TEMPLATE")!=0 || strstr(dummystring,"TARGET")!=0) { fprintf(complex_inputfile_ptr,"%s",line); fprintf(free_inputfile_ptr,"TEMPLATE_PDB\t%s.free.pdb\n",protein->parameters.output_prefix); } else if(strstr(dummystring,"OTHER")!=0) { fprintf(complex_inputfile_ptr,"%s",line); } else if(strcmp(dummystring, "LOOKUP_TABLE_DIRECTORY")==0) { if(make_directory(protein->parameters.lookup_energy_table_directory) == 0) touch_file(protein->parameters.lookup_energy_table_directory); /* if can't touch this, will exit with error */ fprintf(complex_inputfile_ptr,"LOOKUP_TABLE_DIRECTORY\t%s/complex\n",protein->parameters.lookup_energy_table_directory); fprintf(free_inputfile_ptr,"LOOKUP_TABLE_DIRECTORY\t%s/free\n",protein->parameters.lookup_energy_table_directory); lookup_flag=1; } else if(strcmp(dummystring,"JOBTYPE")==0 || strcmp(dummystring,"SLAVE_JOBTYPE")==0) { fprintf(complex_inputfile_ptr,"JOBTYPE %s\n",DEFAULT_ROTAMER_JOB); fprintf(free_inputfile_ptr,"JOBTYPE %s\n",DEFAULT_ROTAMER_JOB); } else { fprintf(free_inputfile_ptr,"%s",line); fprintf(complex_inputfile_ptr,"%s",line); } fgets(line,MXLINE_INPUT, input_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } if(prefix_flag==0) { fprintf(complex_inputfile_ptr,"OUTPUT_PREFIX\t%s.optimized_complex\n",protein->parameters.output_prefix); fprintf(free_inputfile_ptr,"OUTPUT_PREFIX\t%s.optimized_free\n",protein->parameters.output_prefix); } if(lookup_flag==0) { if(make_directory(protein->parameters.lookup_energy_table_directory) == 0) touch_file(protein->parameters.lookup_energy_table_directory); /* if can't touch this, will exit with error */ fprintf(complex_inputfile_ptr,"LOOKUP_TABLE_DIRECTORY\t%s/complex\n",protein->parameters.lookup_energy_table_directory); fprintf(free_inputfile_ptr,"LOOKUP_TABLE_DIRECTORY\t%s/free\n",protein->parameters.lookup_energy_table_directory); } while(strcmp(dummystring,"END")!=0) { fprintf(complex_inputfile_ptr,"%s",line); if(fgets(line,MXLINE_INPUT,input_file_ptr)==NULL) strcpy(dummystring,"END"); else { sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } } fprintf(free_inputfile_ptr,"VARIABLE_POSITIONS\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag==0) fprintf(free_inputfile_ptr,"%s\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text); ++i; } fprintf(complex_inputfile_ptr,"END\n"); fprintf(free_inputfile_ptr,"END\n"); fclose(input_file_ptr); fclose(complex_inputfile_ptr); fclose(free_inputfile_ptr); /* launch the job for the complex */ protein_complex = (PROTEIN *)malloc(sizeof(PROTEIN)); input_stuff(complex_inputfilename,protein_complex); generate_lookup_table(protein_complex); FASTER_rotamer_control(protein_complex); output_PROTEIN(protein_complex, OUTPUT_COORD_FLAG); free_lookup_table(protein_complex->lookupEnergy, protein_complex->var_pos); protein_complex->lookupEnergy=NULL; /* split the complex and output it */ pdbATOM_to_mini_pdbATOM(protein_complex->final_pdb,final_cmplx); seperate_the_complex(final_cmplx, seperated_cmplx, protein_complex->chain_id_list, protein_complex->super_chain_id_list, 20.0); i=1; while(seperated_cmplx[i].seq_position!=ENDFLAG) { protein_complex->final_pdb[i].coord = seperated_cmplx[i].coord; ++i; } sprintf(protein_complex->parameters.output_prefix,"%s.free",protein->parameters.output_prefix); output_PROTEIN(protein_complex, OUTPUT_COORD_FLAG); /* launch the job for free complex */ protein_free = (PROTEIN *)malloc(sizeof(PROTEIN)); input_stuff(free_inputfilename,protein_free); generate_lookup_table(protein_free); FASTER_rotamer_control(protein_free); output_PROTEIN(protein_free, OUTPUT_COORD_FLAG); free_lookup_table(protein_free->lookupEnergy, protein_free->var_pos); protein_free->lookupEnergy=NULL; /* calculate the complex formation energy */ protein->pseudo_dG = ENERGY_TOTAL_SCALED((*protein_complex->final_energy)) - ENERGY_TOTAL_SCALED((*protein_free->final_energy)); protein_complex->pseudo_dG = protein->pseudo_dG; *protein_complex->final_energy = ENERGY_SUBTRACT((*protein_complex->final_energy),(*protein_free->final_energy)); protein_complex->sasa_sum = SASA_SUM_SUBTRACT(protein_complex->sasa_sum, protein_free->sasa_sum); sprintf(protein_complex->parameters.output_prefix,"%s.complex_formation_energy",protein->parameters.output_prefix); output_PROTEIN(protein_complex, OUTPUT_COORD_FLAG); sprintf(line,"%s.free.pdb",protein->parameters.output_prefix); rm_file(line); rm_file(complex_inputfilename); rm_file(free_inputfilename); free_PROTEIN(protein_free); free_PROTEIN(protein_complex); } strcpy(protein->parameters.output_prefix, prefix); free_memory(prefix); free_memory(complex_inputfilename); free_memory(free_inputfilename); free_memory(list_of_interface_residues); free_memory(line); free_memory(command); free_memory(dummystring); free_memory(energy_interface_residues); } /* this function identifies the interface residues for a protein-protein complex, and then performs scanning mutagenesis */ void interface_scan(char *main_inputfile, PROTEIN *protein) { char *dummystring, *dummystring2, *line, *prefix, *logfilename, *dummyfilename, *command, *input_algorithm; int lookup_table_flag, complex_done_flag, free_done_flag, clash_complex, clash_free, unsat_complex,unsat_free; FILE *logfile_ptr, *complex_info_file_ptr, *master_file_ptr; FILE *free_input_fileptr, *complex_input_fileptr,*main_inputfile_file_ptr, *scanpos_file_ptr; time_t now; extern int GET_PID, LOGFILE_FLAG, CLEAN_UP_FLAG, OUTPUT_COORD_FLAG, NUM_SYSTEM_CPU; extern char *EXECUTABLE_FILENAME, *AVAILABLE_PROCESSORS_FILE, *CURRENT_WORKING_DIRECTORY; double complex_ddG, free_ddG; dummystring = (char *)calloc(MAXLINE,sizeof(char)); dummystring2 = (char *)calloc(MAXLINE,sizeof(char)); line = (char *)calloc(MAXLINE,sizeof(char)); prefix = (char *)calloc(MAXLINE,sizeof(char)); dummyfilename = (char *)calloc(MAXLINE,sizeof(char)); command = (char *)calloc(MAXLINE,sizeof(char)); input_algorithm = (char *)calloc(MAXLINE,sizeof(char)); logfilename = NULL; sprintf(input_algorithm,"%s",protein->parameters.algorithm); strcpy(prefix,protein->parameters.output_prefix); /* split and get interface residues */ sprintf(protein->parameters.algorithm,"STATIC_SEPARATE_COMPLEX"); complex_formation_energy_master(main_inputfile, protein); sprintf(protein->parameters.algorithm,"%s",input_algorithm); if(strcmp(protein->parameters.algorithm, "INTERFACE_ALANINE_SCAN") == 0) sprintf(protein->parameters.output_prefix,"%s.interface_alascan",protein->parameters.output_prefix); else sprintf(protein->parameters.output_prefix,"%s.interface_satmut",protein->parameters.output_prefix); if(LOGFILE_FLAG!=0) { logfilename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfilename,"%s.log",prefix); now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\toutput file prefix is now %s\n",protein->parameters.output_prefix); fprintf(logfile_ptr,"interface_scan\tseparating complex and identifying interface\t%s",ctime(&now)); fclose(logfile_ptr); } if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\twrite master inputfiles\t%s",ctime(&now)); fclose(logfile_ptr); } main_inputfile_file_ptr = fopen_file(main_inputfile,"r"); sprintf(dummystring,"%s.complex.scan.input", protein->parameters.output_prefix); complex_input_fileptr = fopen_file(dummystring,"w"); sprintf(dummystring,"%s.free.scan.input",protein->parameters.output_prefix); free_input_fileptr = fopen_file(dummystring,"w"); lookup_table_flag=0; OUTPUT_COORD_FLAG=0; /* copy stuff from the main inputfile */ fgets(line,MAXLINE,main_inputfile_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while(strncmp(dummystring,"END",3)!=0) { if(strstr(dummystring,"OUTPUT")!=0 && (strstr(dummystring,"COORD")!=0 || strstr(dummystring,"STRUCT")!=0) ) sscanf_flag(line,dummystring,OUTPUT_COORD_FLAG); if(strstr(dummystring,"TEMPLATE")!=0 || strstr(dummystring,"TARGET")!=0) { fprintf(complex_input_fileptr,"%s",line); if(protein->num_competitors == 0) fprintf(free_input_fileptr,"TEMPLATE\t%s.free.pdb\n", prefix); else /* free structure explicitly defined */ fprintf(free_input_fileptr,"TEMPLATE\t%s\n",protein->competitor_Template[1]); } else if(strstr(dummystring,"LOOKUP")!=0 && strstr(dummystring,"DIR")!=0 ) { sscanf(line,"%s %s",dummystring,dummystring2); lookup_table_flag=1; make_directory(dummystring2); fprintf(complex_input_fileptr,"LOOKUP_TABLE_DIRECTORY %s/complex\n",dummystring2); fprintf(free_input_fileptr,"LOOKUP_TABLE_DIRECTORY %s/free\n",dummystring2); } else if(strcmp(dummystring,"JOBTYPE")!=0 && strcmp(dummystring,"SLAVE_JOBTYPE")!=0 && strstr(dummystring,"PREFIX")==0 && strstr(dummystring,"MAX_MUT")==0 && !( strstr(dummystring,"COMPET")!=0 || strncmp(dummystring,"FREE",4)==0 ) ) { fprintf(complex_input_fileptr,"%s",line); fprintf(free_input_fileptr,"%s",line); } fgets(line,MAXLINE,main_inputfile_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } if(lookup_table_flag==0) { sprintf(dummystring2,"temp_lookup_directory.%d",GET_PID); make_directory(dummystring2); fprintf(complex_input_fileptr,"LOOKUP_TABLE_DIRECTORY %s/complex\n",dummystring2); fprintf(free_input_fileptr,"LOOKUP_TABLE_DIRECTORY %s/free\n",dummystring2); lookup_table_flag=1; } if(NUM_SYSTEM_CPU > 0) { fprintf(complex_input_fileptr,"NUM_CPU\t%d\n",NUM_SYSTEM_CPU); fprintf(free_input_fileptr,"NUM_CPU\t%d\n",NUM_SYSTEM_CPU); } else if(AVAILABLE_PROCESSORS_FILE!=NULL) { fprintf(complex_input_fileptr,"CPU_FILE\t%s\n",AVAILABLE_PROCESSORS_FILE); fprintf(free_input_fileptr,"CPU_FILE\t%s\n",AVAILABLE_PROCESSORS_FILE); } fprintf(complex_input_fileptr,"OUTPUT_PREFIX %s.complex.scan\n",protein->parameters.output_prefix); fprintf(free_input_fileptr,"OUTPUT_PREFIX %s.free.scan\n",protein->parameters.output_prefix); fprintf(complex_input_fileptr,"OUTPUT_COORD_FLAG %d\n",OUTPUT_COORD_FLAG); fprintf(free_input_fileptr,"OUTPUT_COORD_FLAG %d\n",OUTPUT_COORD_FLAG); fclose(free_input_fileptr); fclose(complex_input_fileptr); fclose(main_inputfile_file_ptr); sprintf(dummyfilename,"temp.%d",GET_PID); scanpos_file_ptr = fopen_file(dummyfilename,"w"); fprintf(scanpos_file_ptr,"SCANNING_POSITIONS\n"); sprintf(dummystring,"%s.complex_energy.out",prefix); complex_info_file_ptr = fopen_file(dummystring,"r"); fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s",dummystring); while(strcmp(dummystring,"INTERFACE_RESIDUES")!=0) { fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s",dummystring); } fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s %s",dummystring, dummystring2); while(strstr(dummystring,"COMMA")==0) { /* only level_1 non-PRO/GLY/CYD residues for scan; change in SASA upon complexation */ if(strcmp(dummystring2,"PRO")!=0 && strcmp(dummystring2,"GLY")!=0 && strcmp(dummystring2,"CYD")!=0 && strstr(line,"level_1")!=0) { if(strcmp(protein->parameters.algorithm, "INTERFACE_ALANINE_SCAN") == 0) fprintf(scanpos_file_ptr,"\t%s\tA.\n",dummystring); else fprintf(scanpos_file_ptr,"\t%s\tall\n",dummystring); } fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s %s",dummystring,dummystring2); } fclose(complex_info_file_ptr); fclose(scanpos_file_ptr); sprintf(dummyfilename,"master.%d",GET_PID); master_file_ptr = fopen_file(dummyfilename,"w"); sprintf(dummyfilename,"temp.%d",GET_PID); sprintf(dummystring,"%s.complex.scan.input",protein->parameters.output_prefix); fprintf(master_file_ptr,"%s\n",dummystring); append_file(dummyfilename, dummystring); sprintf(dummystring,"%s.free.scan.input",protein->parameters.output_prefix); fprintf(master_file_ptr,"%s\n",dummystring); append_file(dummyfilename, dummystring); fclose(master_file_ptr); sprintf(dummyfilename,"temp.%d",GET_PID); rm_file(dummyfilename); /* launch */ if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\tlaunch batch master.%d\t%s",GET_PID, ctime(&now)); fclose(logfile_ptr); } sprintf(command,"cd %s; %s batch master.%d",CURRENT_WORKING_DIRECTORY, EXECUTABLE_FILENAME, GET_PID); ssh_command(getenv("HOST"),command); if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\twait for jobs to complete\t%s",ctime(&now)); fclose(logfile_ptr); } /* wait for completion */ sprintf(dummystring,"%s.complex.scan.scanning_mutagenesis",protein->parameters.output_prefix); sprintf(dummystring2,"%s.free.scan.scanning_mutagenesis",protein->parameters.output_prefix); complex_done_flag=0; free_done_flag=0; while(does_this_file_exist(dummystring)==0 || does_this_file_exist(dummystring2)==0) { sleep(1); if(complex_done_flag==0) if(does_this_file_exist(dummystring)==1) /* mark complex as done */ { sprintf(dummyfilename,"%s.complex.scan.pdb",protein->parameters.output_prefix); touch_file(dummyfilename); complex_done_flag=1; if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\tcomplex scan completed\t%s",ctime(&now)); fclose(logfile_ptr); } } if(free_done_flag==0) if(does_this_file_exist(dummystring2)==1) /* mark free as done */ { sprintf(dummyfilename,"%s.free.scan.pdb",protein->parameters.output_prefix); touch_file(dummyfilename); free_done_flag=1; if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\tfree scan completed\t%s",ctime(&now)); fclose(logfile_ptr); } } } /* these should kill the batch job slaves */ sprintf(dummyfilename,"master.%d",GET_PID); rm_file(dummyfilename); sprintf(dummyfilename,"master.%d.log",GET_PID); rm_file(dummyfilename); if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\tcalculate energy differences\t%s",ctime(&now)); fclose(logfile_ptr); } sprintf(dummystring,"%s.complex.scan.scanning_mutagenesis",protein->parameters.output_prefix); complex_input_fileptr = fopen_file(dummystring,"r"); sprintf(dummystring,"%s.free.scan.scanning_mutagenesis",protein->parameters.output_prefix); free_input_fileptr = fopen_file(dummystring,"r"); sprintf(dummystring,"%s.interface_scan",protein->parameters.output_prefix); complex_info_file_ptr = fopen_file(dummystring,"w"); fprintf(complex_info_file_ptr,"seq_pos\twt_res\tmutant_res\tdddG\t\tddGcomplex\tddGfree\tdd_clashes\tclash_complex\tclash_free\tdd_unsat_hbond\tunsat_hbond_complex\tunsat_hbond_free\n"); /* move past header lines */ fgets(line,MAXLINE,complex_input_fileptr); fgets(line,MAXLINE,free_input_fileptr); while(fgets(line,MAXLINE,complex_input_fileptr)!=NULL) { sscanf(line,"%s %s %s %lf %d %d",dummystring, dummystring2, prefix, &complex_ddG, &clash_complex, &unsat_complex); fgets(line,MAXLINE,free_input_fileptr); sscanf(line,"%s %s %s %lf %d %d",dummystring, dummystring2, prefix, &free_ddG, &clash_free, &unsat_free); fprintf(complex_info_file_ptr,"%s\t%s\t%s\t%lf\t%lf\t%lf\t%d\t%d\t%d\t%d\t%d\t%d\n", dummystring, dummystring2, prefix, complex_ddG-free_ddG, complex_ddG,free_ddG, clash_complex-clash_free,clash_complex,clash_free,unsat_complex-unsat_free, unsat_complex,unsat_free ); } fclose(complex_input_fileptr); fclose(free_input_fileptr); fclose(complex_info_file_ptr); if(CLEAN_UP_FLAG == 1) { if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_scan\tremoving intermediate files\t%s",ctime(&now)); fclose(logfile_ptr); } sprintf(dummyfilename,"%s.complex.scan.input", protein->parameters.output_prefix); if(does_this_file_exist(dummyfilename)==1) rm_file(dummyfilename); sprintf(dummyfilename,"%s.free.scan.input", protein->parameters.output_prefix); if(does_this_file_exist(dummyfilename)==1) rm_file(dummyfilename); sprintf(dummyfilename,"%s.complex.scan.input.working", protein->parameters.output_prefix); if(does_this_file_exist(dummyfilename)==1) rm_file(dummyfilename); sprintf(dummyfilename,"%s.free.scan.input.working", protein->parameters.output_prefix); if(does_this_file_exist(dummyfilename)==1) rm_file(dummyfilename); sprintf(dummyfilename,"%s.complex.scan.scanning_mutagenesis",protein->parameters.output_prefix); rm_file(dummyfilename); sprintf(dummyfilename,"%s.free.scan.scanning_mutagenesis",protein->parameters.output_prefix); rm_file(dummyfilename); sprintf(dummyfilename,"%s.free.scan.log",protein->parameters.output_prefix); rm_file(dummyfilename); sprintf(dummyfilename,"%s.complex.scan.log",protein->parameters.output_prefix); rm_file(dummyfilename); } sprintf(dummyfilename,"/bin/rm -rf *%d*",GET_PID); system(dummyfilename); if(LOGFILE_FLAG!=0) free_memory(logfilename); strcpy(protein->parameters.output_prefix, prefix); sleep(300); // prevent slave processes from inadverantly launching jobs // remove these files; the slaves that use these as 'do not go' markers should be dead by now sprintf(dummyfilename,"%s.free.scan.pdb",protein->parameters.output_prefix); rm_file(dummyfilename); sprintf(dummyfilename,"%s.complex.scan.pdb",protein->parameters.output_prefix); rm_file(dummyfilename); free_memory(dummystring2); free_memory(dummystring); free_memory(line); free_memory(prefix); free_memory(command); free_memory(input_algorithm); } void design_interface(char *main_inputfile, PROTEIN *protein) { char *dummystring, *dummystring2, *line, *logfilename, *dummyfilename, *command, *input_algorithm; int lookup_table_flag, output_prefix_flag, avail_proc_file_flag, other_residues_flag; FILE *logfile_ptr, *complex_info_file_ptr; FILE *multistate_input_fileptr,*main_inputfile_file_ptr; time_t now; extern int GET_PID, LOGFILE_FLAG, CLEAN_UP_FLAG, NUM_SYSTEM_CPU; extern char *EXECUTABLE_FILENAME, *AVAILABLE_PROCESSORS_FILE; dummystring = (char *)calloc(MAXLINE,sizeof(char)); dummystring2 = (char *)calloc(MAXLINE,sizeof(char)); line = (char *)calloc(MAXLINE,sizeof(char)); dummyfilename = (char *)calloc(MAXLINE,sizeof(char)); command = (char *)calloc(MAXLINE,sizeof(char)); input_algorithm = (char *)calloc(MAXLINE,sizeof(char)); logfilename = NULL; sprintf(input_algorithm,"%s",protein->parameters.algorithm); if(LOGFILE_FLAG!=0) { logfilename = (char *)calloc(MAXLINE,sizeof(char)); sprintf(logfilename,"%s.log",protein->parameters.output_prefix); now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); fprintf(logfile_ptr,"interface_design\tseparating complex and identifying interface\t%s",ctime(&now)); sprintf(dummystring,"%s.complex_energy.out",protein->parameters.output_prefix); fprintf(logfile_ptr,"interface_design\tinterface residues in %s\t%s",dummystring,ctime(&now)); fclose(logfile_ptr); } /* split and get interface residues */ sprintf(protein->parameters.algorithm,"STATIC_SEPARATE_COMPLEX"); complex_formation_energy_master(main_inputfile, protein); sprintf(protein->parameters.algorithm,"%s",input_algorithm); main_inputfile_file_ptr = fopen_file(main_inputfile,"r"); sprintf(dummystring,"%s.%d.multistate.input", protein->parameters.output_prefix,GET_PID); multistate_input_fileptr = fopen_file(dummystring,"w"); lookup_table_flag=0; output_prefix_flag=0; avail_proc_file_flag=0; other_residues_flag=0; /* copy stuff from the main inputfile */ fgets(line,MAXLINE,main_inputfile_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); while(strncmp(dummystring,"END",3)!=0) { if(strstr(dummystring,"TEMPLATE")!=0) { fprintf(multistate_input_fileptr,"%s",line); /* if not 0 (unbound structure user-defined), will print to file below */ if(protein->num_competitors == 0) fprintf(multistate_input_fileptr,"FREE\t%s.free.pdb\n", protein->parameters.output_prefix); } else if(strstr(dummystring,"LOOKUP")!=0 && strstr(dummystring,"DIR")!=0 ) { lookup_table_flag=1; fprintf(multistate_input_fileptr,"%s",line); } else if(strcmp(dummystring,"JOBTYPE")==0 || strcmp(dummystring,"SLAVE_JOBTYPE")==0) { sscanf(line,"%s %s",dummystring,dummystring2); convert_string_to_all_caps(dummystring2); /* this jobtype line describes the rotamer optimization method, not the INTERFACE_DESIGN */ if(strstr(dummystring2,"DESIGN")==0 && strstr(dummystring2,"INTERFACE")==0) { fprintf(multistate_input_fileptr,"%s",line); } } else fprintf(multistate_input_fileptr,"%s",line); if(strstr(dummystring,"PREFIX")!=0) output_prefix_flag=1; if(strstr(dummystring,"CPU_FILE")!=0) avail_proc_file_flag=1; if(strstr(dummystring,"OTHER")!=0) other_residues_flag=1; fgets(line,MAXLINE,main_inputfile_file_ptr); sscanf(line,"%s",dummystring); convert_string_to_all_caps(dummystring); } if(avail_proc_file_flag==0) { if(NUM_SYSTEM_CPU > 0) fprintf(multistate_input_fileptr,"NUM_CPU\t%d\n",NUM_SYSTEM_CPU); else fprintf(multistate_input_fileptr,"CPU_FILE\t%s\n",AVAILABLE_PROCESSORS_FILE); } if(output_prefix_flag==0) fprintf(multistate_input_fileptr,"OUTPUT_PREFIX %s\n",protein->parameters.output_prefix); if(other_residues_flag==0) fprintf(multistate_input_fileptr,"OTHER_RESIDUES neighbors\n"); fclose(main_inputfile_file_ptr); fprintf(multistate_input_fileptr,"VARIABLE_POSITIONS\n"); sprintf(dummystring,"%s.complex_energy.out",protein->parameters.output_prefix); complex_info_file_ptr = fopen_file(dummystring,"r"); fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s",dummystring); while(strcmp(dummystring,"INTERFACE_RESIDUES")!=0) { fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s",dummystring); } fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s %s",dummystring, dummystring2); while(strstr(dummystring,"COMMA")==0) { /* only level_1 non-PRO/GLY/CYD residues for scan; change in SASA upon complexation */ if(strcmp(dummystring2,"PRO")!=0 && strcmp(dummystring2,"GLY")!=0 && strcmp(dummystring2,"CYD")!=0 && strstr(line,"level_1")!=0) { fprintf(multistate_input_fileptr,"\t%s\tall\n",dummystring); } fgets(line,MAXLINE,complex_info_file_ptr); sscanf(line,"%s %s",dummystring,dummystring2); } fclose(complex_info_file_ptr); fprintf(multistate_input_fileptr,"END\n"); fclose(multistate_input_fileptr); /* launch */ if(LOGFILE_FLAG!=0) { now = time(NULL); logfile_ptr = fopen_file(logfilename,"a"); sprintf(dummystring,"%s.%d.multistate.input", protein->parameters.output_prefix,GET_PID); fprintf(logfile_ptr,"interface_design\tlaunch design job %s\t%s",dummystring, ctime(&now)); fclose(logfile_ptr); sprintf(dummyfilename,"%s.master",logfilename); mv_file(logfilename,dummyfilename); } sprintf(dummystring,"%s.%d.multistate.input", protein->parameters.output_prefix,GET_PID); sprintf(command,"%s %s",EXECUTABLE_FILENAME, dummystring); system(command); if(LOGFILE_FLAG!=0) { sprintf(dummyfilename,"%s.master",logfilename); append_file(logfilename,dummyfilename); mv_file(dummyfilename,logfilename); } if(CLEAN_UP_FLAG == 1) { sprintf(dummyfilename,"*%d*",GET_PID); rm_file(dummyfilename); } if(LOGFILE_FLAG!=0) free_memory(logfilename); free_memory(dummystring2); free_memory(dummystring); free_memory(line); free_memory(command); free_memory(input_algorithm); }