/* EGAD: pK_calculate.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains the pK_calculate function */ #include "pK_calculate.h" /* uses scmf to calculate the pK of ionizable groups in a protein */ void pK_calculate(PROTEIN *protein) { int i, i_res, i_res_rot, num_ionized, q, n; FILE *output_file_ptr; double **pH_dep_energy; double P_prot, P_unprot; double *pKa, *frac_crg_previous; int *ionized_group_index; double *charged_pH2_flag; double bracket, charge; char *outputfilename, *command, *dummystring; extern double PH, RT; outputfilename = (char *)calloc(MAXLINE,sizeof(char)); command = (char *)calloc(MAXLINE,sizeof(char)); dummystring = (char *)calloc(MAXLINE,sizeof(char)); ionized_group_index = (int *)calloc(MAX_RESIDUES,sizeof(int)); charged_pH2_flag = (double *)calloc(MAX_RESIDUES,sizeof(double)); /* remove reference state energy for ionizable residues */ for(i=1;i<=protein->parameters.numVarPositions;++i) for(i_res=1; i_res<=protein->var_pos[i].number_of_choices; ++i_res) for(i_res_rot=1; i_res_rot<= protein->var_pos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) protein->ERESROT.energy_var_fix = protein->ERESROT.energy_var_fix + protein->ERESROT.E_reference; /* intialize some variables, set up titr file */ sprintf(outputfilename,"%s.titr",protein->parameters.output_prefix); output_file_ptr = fopen(outputfilename, "w"); fprintf(output_file_ptr, "pH\t"); num_ionized = 0; pH_dep_energy = (double **)calloc(protein->parameters.numVarPositions+2, sizeof(double *)); for(i=1;i<=protein->parameters.numVarPositions;++i) { pH_dep_energy[i] = (double *)calloc(3, sizeof(double)); pH_dep_energy[i][1] = 0; pH_dep_energy[i][2] = 0; if(protein->var_pos[i].choice[1].resparam_ptr->pKa != 20) { ++num_ionized; ionized_group_index[num_ionized] = i; /* neutral at pH 2.0 */ if(strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "ASP")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "GLU")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "TYR")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "ASH")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "GLH")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "TYC")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "CYS")==0 || strcmp(protein->var_pos[i].choice[1].resparam_ptr->residuetype, "CYH")==0) charged_pH2_flag[num_ionized] = 0; else charged_pH2_flag[num_ionized] = 1; /* charged at pH 2.0; his, lys */ n=1; while( protein->var_pos[i].seq_position != protein->seqpos_text_map[n].seq_position) ++n; if(fabs(protein->var_pos[i].choice[1].resparam_ptr->overall_charge)>1e-6) { fprintf(output_file_ptr, "%c%s\t", protein->var_pos[i].choice[1].resparam_ptr->one_letter_code[0], protein->seqpos_text_map[n].seqpos_text); } else fprintf(output_file_ptr, "%c%s\t", protein->var_pos[i].choice[2].resparam_ptr->one_letter_code[0], protein->seqpos_text_map[n].seqpos_text); } } fprintf(output_file_ptr,"\n"); fclose(output_file_ptr); pKa = (double *)calloc(num_ionized+2, sizeof(double)); frac_crg_previous = (double *)calloc(num_ionized+2, sizeof(double)); for(i=1;i<=num_ionized;++i) { if(charged_pH2_flag[i] == 0) pKa[i] = 0.5; else pKa[i] = 14.0; frac_crg_previous[i] = charged_pH2_flag[i]; } /* do scmf as a function of pH; pH dependency implemented by altering the reference state energy */ for(PH = 1.0; PH <= 13.5; PH = PH + 0.25) { protein->pH = PH; charge=0; for(i=1;i<=protein->parameters.numVarPositions; ++i) { if(protein->var_pos[i].choice[1].resparam_ptr->pKa != 20) { pH_dep_energy[i][1] = -protein->var_pos[i].choice[1].resparam_ptr->E_tripeptide; pH_dep_energy[i][2] = -protein->var_pos[i].choice[2].resparam_ptr->E_tripeptide; /* calculate the probabs of protonated and unprotonated forms at this pH for the model cmpds */ P_prot = exp(-(PH - protein->var_pos[i].choice[1].resparam_ptr->pKa)*LN10); P_prot = P_prot/(1.0 + P_prot); /* probability of the protonated form */ P_unprot = 1.0 - P_prot; /* probability of the unprotonated form */ /* penalize the rarer form with a pH-dependent energy RT(delta_pK)ln10 */ if(protein->var_pos[i].choice[1].resparam_ptr->protonation_flag == 1) /* 1 is the protonated form */ { if(P_prot < P_unprot) /* this form is unfavorable at this pH; penalize it */ pH_dep_energy[i][1] += RT*(PH - protein->var_pos[i].choice[1].resparam_ptr->pKa)*LN10; else pH_dep_energy[i][2] += RT*(protein->var_pos[i].choice[2].resparam_ptr->pKa - PH)*LN10; } else /* 1 is the deprotonated form */ { if(P_unprot < P_prot) /* this form is unfavorable at this pH; penalize it */ pH_dep_energy[i][1] += RT*(protein->var_pos[i].choice[1].resparam_ptr->pKa - PH)*LN10; else pH_dep_energy[i][2] += RT*(PH - protein->var_pos[i].choice[2].resparam_ptr->pKa)*LN10; } i_res = 1; for(i_res_rot=1; i_res_rot<= protein->var_pos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) protein->ERESROT.energy_var_fix += pH_dep_energy[i][i_res]; i_res = 2; for(i_res_rot=1; i_res_rot<= protein->var_pos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) protein->ERESROT.energy_var_fix += pH_dep_energy[i][i_res]; } } scmf(protein); free_memory(protein->final_energy); free_memory(protein->final_pdb); free_CHROMOSOME(&protein->final_chr); output_file_ptr = fopen(outputfilename, "a"); fprintf(output_file_ptr, "%.2lf\t", PH); /* print probab of charged form to .titr output file */ q=1; for(i=1;i<=protein->parameters.numVarPositions; ++i) { for(i_res=1; i_res<=protein->var_pos[i].number_of_choices; ++i_res) { if(protein->var_pos[i].choice[i_res].resparam_ptr->pKa != 20) { if(fabs(protein->var_pos[i].choice[i_res].resparam_ptr->overall_charge)>1e-6) { fprintf(output_file_ptr, "%.2lf\t", protein->lookupEnergy[i].lookupRes[i_res].P); charge += protein->lookupEnergy[i].lookupRes[i_res].P*protein->var_pos[i].choice[i_res].resparam_ptr->overall_charge; /* pK defined as the pH at which Probab(prot)=Probab(unprot)=0.5 */ /* if we just crossed the transition at the current pH, use linear-extrapolation with the previous pH in order to estimate the true pK_1/2 */ if(charged_pH2_flag[q] == 0) { if(protein->lookupEnergy[i].lookupRes[i_res].P >= 0.5 && pKa[q] == 0.5) { bracket = fabs(frac_crg_previous[q] - protein->lookupEnergy[i].lookupRes[i_res].P); pKa[q] = (fabs(bracket - fabs(frac_crg_previous[q] - 0.5))*(PH - 0.25) + fabs(bracket - fabs(protein->lookupEnergy[i].lookupRes[i_res].P - 0.5))*PH)/bracket; } } else { if(protein->lookupEnergy[i].lookupRes[i_res].P <= 0.5 && pKa[q] == 14.0) { bracket = fabs(frac_crg_previous[q] - protein->lookupEnergy[i].lookupRes[i_res].P); pKa[q] = (fabs(bracket - fabs(frac_crg_previous[q] - 0.5))*(PH - 0.25) + fabs(bracket - fabs(protein->lookupEnergy[i].lookupRes[i_res].P - 0.5))*PH)/bracket; } } frac_crg_previous[q] = protein->lookupEnergy[i].lookupRes[i_res].P; ++q; } for(i_res_rot=1; i_res_rot<= protein->var_pos[i].choice[i_res].resparam_ptr->rotamerlib_ptr->number_of_rotamers; ++i_res_rot) { protein->ERESROT.energy_var_fix = protein->ERESROT.energy_var_fix - pH_dep_energy[i][i_res]; } } } } fprintf(output_file_ptr,"%lf\t%lf\n", charge, protein->E_scmf); /* print mean-field-energy for the protein */ fclose(output_file_ptr); } /* print out final pKs */ sprintf(outputfilename,"%s.pK",protein->parameters.output_prefix); output_file_ptr = fopen(outputfilename, "w"); fprintf(output_file_ptr, "aa#\tres\tpK\tdpK\n"); for(q=1;q<=num_ionized;++q) { if(fabs(frac_crg_previous[q] - charged_pH2_flag[q])<=0.01) pKa[q] = 14.0; n=1; while(protein->var_pos[ionized_group_index[q]].seq_position != protein->seqpos_text_map[n].seq_position) ++n; if(pKa[q]<=1.0 || pKa[q]>=14.0) fprintf(output_file_ptr, "%s\t%c\tnot_titr - \n", protein->seqpos_text_map[n].seqpos_text, protein->var_pos[ionized_group_index[q]].choice[1].resparam_ptr->one_letter_code[0]); else fprintf(output_file_ptr, "%s\t%c\t%.2lf\t%.2lf\n", protein->seqpos_text_map[n].seqpos_text, protein->var_pos[ionized_group_index[q]].choice[1].resparam_ptr->one_letter_code[0], pKa[q], pKa[q] - protein->var_pos[ionized_group_index[q]].choice[1].resparam_ptr->pKa ); } fclose(output_file_ptr); free_memory(ionized_group_index); free_memory(charged_pH2_flag); free_memory(outputfilename); free_memory(command); free_memory(dummystring); }