/* EGAD: readpdbfile.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains the readpdbfile function. */ #include "readpdbfile.h" INVARIABLE_POSITIONS *INVAR_POS; int is_this_a_ligand(char *residuetype, RESPARAM *resparam) { int i; char *line; if(strcmp(residuetype,"MSE")==0) return(0); i=1; while(strcmp(resparam[i].residuetype,residuetype)!=0) { if(strcmp(resparam[i].residuetype,"zzz")==0) { line = (char *)calloc(MAXLINE,sizeof(char)); sprintf(line,"Do not recognize residuetype %s",residuetype); failure_report(line,"exit"); } ++i; } return(resparam[i].ligand_flag); } void optimize_asn_gln_his_rotamers(pdbATOM *input_pdb, CHROMOSOME *full_structure_chr, RESPARAM *resparam) { mini_pdbATOM *non_NQH_minipdb; CHROMOSOME chr_NQH; int i,j,k,num_asn_gln_his, *soln_vector, *final_soln_vector; int flag; double score, current_score; double **fixed_energies, ****pair_energies; mini_pdbATOM ***NQH_minipdb, *sideAtoms; var_fix_ENERGY var_fix_energy; COULOMBIC *first_coulombic, *coulombic; ENERGY side_internal_energy; int gb_flag, sasa_flag; extern int GB_FLAG, SASA_FLAG; double vdw_attr_factor, coul_const_diel; double **vdw_linear_switchpoint; // int hb_func_flag,hb_func_type; // extern int HBOND_FUNCTION_FLAG, HBOND_FUNCTION_TYPE; extern double VDW_ATTRACTIVE_FACTOR, COULOMB_CONST_OVER_INTERNAL_DIELECTRIC; extern double **VDW_LINEAR_SWITCHPOINT; full_structure_chr->genes = full_structure_chr->firstgene; num_asn_gln_his=0; while(full_structure_chr->genes->seq_position!=ENDFLAG) { if(strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"ASN")==0 || strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"GLN")==0 || strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"HIS")==0) ++num_asn_gln_his; full_structure_chr->genes = full_structure_chr->genes->nextgene; } full_structure_chr->genes = full_structure_chr->firstgene; if(num_asn_gln_his==0) return; gb_flag=GB_FLAG; sasa_flag=SASA_FLAG; vdw_attr_factor=VDW_ATTRACTIVE_FACTOR; coul_const_diel=COULOMB_CONST_OVER_INTERNAL_DIELECTRIC; vdw_linear_switchpoint = VDW_LINEAR_SWITCHPOINT; GB_FLAG=0; SASA_FLAG=0; // VDW_LINEAR_SWITCHPOINT = NULL; VDW_ATTRACTIVE_FACTOR=1; COULOMB_CONST_OVER_INTERNAL_DIELECTRIC=COULOMB_CONST; // hb_func_flag = HBOND_FUNCTION_FLAG; hb_func_type = HBOND_FUNCTION_TYPE; // COULOMB_CONST_OVER_INTERNAL_DIELECTRIC=0; HBOND_FUNCTION_FLAG = 1; HBOND_FUNCTION_TYPE = 3; ENERGY_0(side_internal_energy); first_coulombic=NULL; coulombic = NULL; non_NQH_minipdb = (mini_pdbATOM *)calloc(MAX_ATOMS,sizeof(mini_pdbATOM)); sideAtoms = (mini_pdbATOM *)calloc(MAX_RES_SIZE,sizeof(mini_pdbATOM)); chr_NQH.genes = (MENDEL *)malloc(sizeof(MENDEL)); chr_NQH.firstgene = chr_NQH.genes; chr_NQH.bkbngenes = NULL; chr_NQH.first_bkbngene = NULL; full_structure_chr->genes = full_structure_chr->firstgene; while(full_structure_chr->genes->seq_position!=ENDFLAG) { if(strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"ASN")==0 || strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"GLN")==0 || strcmp(full_structure_chr->genes->choice_ptr->resparam_ptr->residuetype,"HIS")==0) { copyGENE(chr_NQH.genes,full_structure_chr->genes); chr_NQH.genes->nextgene = (MENDEL *)malloc(sizeof(MENDEL)); chr_NQH.genes = chr_NQH.genes->nextgene; chr_NQH.genes->seq_position=ENDFLAG; chr_NQH.genes->nextgene=NULL; } full_structure_chr->genes = full_structure_chr->genes->nextgene; } full_structure_chr->genes = full_structure_chr->firstgene; chr_NQH.genes = chr_NQH.firstgene; soln_vector = (int *)calloc(num_asn_gln_his+2,sizeof(int)); final_soln_vector = (int *)calloc(num_asn_gln_his+2,sizeof(int)); fixed_energies = (double **)calloc(num_asn_gln_his+2,sizeof(double *)); NQH_minipdb = (mini_pdbATOM ***)calloc(num_asn_gln_his+2,sizeof(mini_pdbATOM **)); for(i=1;i<=num_asn_gln_his;++i) { fixed_energies[i] = (double *)calloc(2,sizeof(double)); NQH_minipdb[i] = (mini_pdbATOM **)calloc(2,sizeof(mini_pdbATOM *)); NQH_minipdb[i][0] = (mini_pdbATOM *)calloc(MAX_RES_SIZE,sizeof(mini_pdbATOM)); NQH_minipdb[i][1] = (mini_pdbATOM *)calloc(MAX_RES_SIZE,sizeof(mini_pdbATOM)); } i=1; j=1; k=0; while(input_pdb[i].seq_position!=ENDFLAG) { if(strcmp(input_pdb[i].residuetype,"ASN")!=0 && strcmp(input_pdb[i].residuetype,"GLN")!=0 && strcmp(input_pdb[i].residuetype,"HIS")!=0) { non_NQH_minipdb[j].seq_position = input_pdb[i].seq_position; non_NQH_minipdb[j].coord = input_pdb[i].coord; non_NQH_minipdb[j].atom_ptr = input_pdb[i].atom_ptr; ++j; } else { if(input_pdb[i].atom_ptr->other_info<0 && strcmp(input_pdb[i].atom_ptr->atomname,"CB")!=0) /* bkbn for NHQ */ { non_NQH_minipdb[j].seq_position = input_pdb[i].seq_position; non_NQH_minipdb[j].coord = input_pdb[i].coord; non_NQH_minipdb[j].atom_ptr = input_pdb[i].atom_ptr; ++j; } else if(strcmp(input_pdb[i].atom_ptr->atomname,"CB")==0) /* CB for NHQ into sidechain arrays */ { ++k; NQH_minipdb[k][0][1].seq_position = input_pdb[i].seq_position; NQH_minipdb[k][0][1].coord = input_pdb[i].coord; NQH_minipdb[k][0][1].atom_ptr = input_pdb[i].atom_ptr; NQH_minipdb[k][1][1].seq_position = input_pdb[i].seq_position; NQH_minipdb[k][1][1].coord = input_pdb[i].coord; NQH_minipdb[k][1][1].atom_ptr = input_pdb[i].atom_ptr; } } ++i; } non_NQH_minipdb[j].seq_position = input_pdb[i].seq_position; non_NQH_minipdb[j].coord = input_pdb[i].coord; non_NQH_minipdb[j].atom_ptr = input_pdb[i].atom_ptr; /* calculate structures and fixed energies for each choice */ chr_NQH.genes = chr_NQH.firstgene; i=1; while(chr_NQH.genes->seq_position!=ENDFLAG) { GENE_to_mini_pdbATOM(&chr_NQH.genes, sideAtoms); j=2; while(sideAtoms[j-1].seq_position!=ENDFLAG) { NQH_minipdb[i][0][j] = sideAtoms[j-1]; ++j; } NQH_minipdb[i][0][j] = sideAtoms[j-1]; var_fix_energy = var_fixed_energy_calc(non_NQH_minipdb, NQH_minipdb[i][0], 0); side_internal_energy = sidechain_internal_energy(NQH_minipdb[i][0], first_coulombic, coulombic); fixed_energies[i][0] = var_fix_ENERGY_TOTAL(var_fix_energy) + ENERGY_TOTAL(side_internal_energy); chr_NQH.genes->chi[chr_NQH.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi] += 180.0; GENE_to_mini_pdbATOM(&chr_NQH.genes, sideAtoms); j=2; while(sideAtoms[j-1].seq_position!=ENDFLAG) { NQH_minipdb[i][1][j] = sideAtoms[j-1]; ++j; } NQH_minipdb[i][1][j] = sideAtoms[j-1]; var_fix_energy = var_fixed_energy_calc(non_NQH_minipdb, NQH_minipdb[i][1], 0); side_internal_energy = sidechain_internal_energy(NQH_minipdb[i][1], first_coulombic, coulombic); fixed_energies[i][1] = var_fix_ENERGY_TOTAL(var_fix_energy) + ENERGY_TOTAL(side_internal_energy); chr_NQH.genes->chi[chr_NQH.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi] -= 180.0; chr_NQH.genes = chr_NQH.genes->nextgene; ++i; } chr_NQH.genes = chr_NQH.firstgene; pair_energies = (double ****)calloc(num_asn_gln_his+2,sizeof(double ***)); for(i=1;i<=num_asn_gln_his;++i) { pair_energies[i] = (double ***)calloc(2,sizeof(double **)); pair_energies[i][0] = (double **)calloc(num_asn_gln_his+2,sizeof(double *)); pair_energies[i][1] = (double **)calloc(num_asn_gln_his+2,sizeof(double *)); for(j=i+1;j<=num_asn_gln_his;++j) { pair_energies[i][0][j] = (double *)calloc(2,sizeof(double)); var_fix_energy = var_fixed_energy_calc(NQH_minipdb[i][0], NQH_minipdb[j][0], 0); pair_energies[i][0][j][0] = var_fix_ENERGY_TOTAL(var_fix_energy); var_fix_energy = var_fixed_energy_calc(NQH_minipdb[i][0], NQH_minipdb[j][1], 0); pair_energies[i][0][j][1] = var_fix_ENERGY_TOTAL(var_fix_energy); pair_energies[i][1][j] = (double *)calloc(2,sizeof(double)); var_fix_energy = var_fixed_energy_calc(NQH_minipdb[i][1], NQH_minipdb[j][0], 0); pair_energies[i][1][j][0] = var_fix_ENERGY_TOTAL(var_fix_energy); var_fix_energy = var_fixed_energy_calc(NQH_minipdb[i][1], NQH_minipdb[j][1], 0); pair_energies[i][1][j][1] = var_fix_ENERGY_TOTAL(var_fix_energy); } } for(i=1;i<=num_asn_gln_his;++i) { soln_vector[i]=0; final_soln_vector[i]=0; } flag=0; while(flag==0) { flag=1; for(i=1;i<=num_asn_gln_his;++i) { current_score=fixed_energies[i][final_soln_vector[i]]; for(j=i+1;j<=num_asn_gln_his;++j) current_score += pair_energies[i][final_soln_vector[i]][j][final_soln_vector[j]]; for(j=1;jseq_position!=ENDFLAG) { if(final_soln_vector[i]==1) if(QUIET_FLAG==0) fprintf(stderr,"flip %d %s chi%d 180.0\n", chr_NQH.genes->seq_position, chr_NQH.genes->choice_ptr->resparam_ptr->residuetype, chr_NQH.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi); chr_NQH.genes->chi[chr_NQH.genes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi] += 180.0*final_soln_vector[i]; chr_NQH.genes = chr_NQH.genes->nextgene; ++i; } free_CHROMOSOME(&chr_NQH); free_memory(non_NQH_minipdb); for(i=1;i<=num_asn_gln_his;++i) { for(j=i+1;j<=num_asn_gln_his;++j) { free_memory(pair_energies[i][0][j]); free_memory(pair_energies[i][1][j]); } free_memory(pair_energies[i][0]); free_memory(pair_energies[i][1]); free_memory(pair_energies[i]); free_memory(fixed_energies[i]); free_memory(NQH_minipdb[i][0]); free_memory(NQH_minipdb[i][1]); free_memory(NQH_minipdb[i]); } free_memory(pair_energies); free_memory(fixed_energies); free_memory(NQH_minipdb); free_memory(sideAtoms); free_memory(soln_vector); free_memory(final_soln_vector); GB_FLAG=gb_flag; SASA_FLAG=sasa_flag; VDW_ATTRACTIVE_FACTOR=vdw_attr_factor; COULOMB_CONST_OVER_INTERNAL_DIELECTRIC=coul_const_diel; VDW_LINEAR_SWITCHPOINT = vdw_linear_switchpoint; // HBOND_FUNCTION_FLAG = hb_func_flag; HBOND_FUNCTION_TYPE = hb_func_type; } /* re-assign CYS to CYD for disulfide-bonds */ void cys_to_cyd_disulfides(pdbATOM *pdb, int num_cys, int *index_sg, char *chain_id_list, SUPER_CHAIN_ID_LIST **super_chain_id_list) { int i, j, k, superchain_index, chain_id_index,stopflag,i_found,j_found; div_t chain_id_i,chain_id_j; superchain_index=1; chain_id_index=1; for(i=1;i<=num_cys;++i) { for(j=i+1;j<=num_cys;++j) { if(Distance(pdb[index_sg[i]].coord, pdb[index_sg[j]].coord) <= MAX_DISULFIDE_BOND_LENGTH) /* probably disulfide; change both to CYD */ { if(QUIET_FLAG==0) fprintf(stderr,"disulfide found between %d & %d\n", pdb[index_sg[i]].seq_position, pdb[index_sg[j]].seq_position); if(chain_id_list==NULL) { if(QUIET_FLAG==0) fprintf(stderr,"disulfide found between %d & %d\n", pdb[index_sg[i]].seq_position, pdb[index_sg[j]].seq_position); } else { chain_id_i = div(pdb[index_sg[i]].seq_position, 1000); chain_id_j = div(pdb[index_sg[j]].seq_position, 1000); /* not same chain ids; therefore this is a disulfide-linked dimer */ if(chain_id_list[chain_id_i.quot+1] != chain_id_list[chain_id_j.quot+1]) { if(QUIET_FLAG==0) fprintf(stderr,"chains %c and %c are linked by a disulfide between %d%c & %d%c\n", chain_id_list[chain_id_i.quot+1],chain_id_list[chain_id_j.quot+1], chain_id_i.rem, chain_id_list[chain_id_i.quot+1], chain_id_j.rem, chain_id_list[chain_id_j.quot+1]); if(*super_chain_id_list==NULL) { *super_chain_id_list = (SUPER_CHAIN_ID_LIST *)calloc(MAX_CHAINS,sizeof(SUPER_CHAIN_ID_LIST)); for(superchain_index=1;superchain_indexwacky_numbering_list == NULL) { protein->wacky_numbering_list = (char *)calloc(MAX_RESIDUES, sizeof(char)); a=0; while(awacky_numbering_list[a] = ' '; ++a; } } } if(dummy != x) { ++num_residues; dummy = x; } } } fclose(input); /* return 0 if MAX_RESIDUES is too small; will reset here; 0 means try again */ if(MAX_RESIDUES <= num_residues) { MAX_RESIDUES = num_residues + MAX_LIGAND + 4; if(protein->wacky_numbering_list != NULL) { free_memory(protein->wacky_numbering_list); protein->wacky_numbering_list = (char *)calloc(MAX_RESIDUES, sizeof(char)); a=0; while(awacky_numbering_list[a] = ' '; ++a; } } MAX_ATOMS = MAX_PROT_RES_SIZE*MAX_RESIDUES + 10; free_memory(line); free_memory(lineflag); return(0); } /* return 0 if resparam hasn't been read yet */ if(protein->resparam==NULL) return(0); /* read the file for real */ input=fopen_file(pdbfilename,"r"); index_sg = (int *)calloc(MAX_RESIDUES,sizeof(int)); num_cys=0; num_invar=0; protein->seqpos_text_map = (SEQPOS_TEXT_MAPPING_LIST *)calloc(MAX_RESIDUES,sizeof(SEQPOS_TEXT_MAPPING_LIST)); ligand_tempfilename = (char *)calloc(MAXLINE, sizeof(char)); sprintf(ligand_tempfilename,"temp.%d.lig",GET_PID); i=1; num_res = 0; j=0; chain_ctr = 0; missing_atom_flag = 0; current_seq_pos=0; chain_id = 'A'; chain_id_current = ' '; worry_about_backbone=0; N_found = 0; C_found=0; O_found=0; CA_found=0; previous_seq=0; true_multichain_flag=0; while(fgets(line,MAXLINE,input)!=NULL /* && (strncmp(line,"TER",3)!=0 && strncmp(line,"END",3)!=0) */ ) { if(strstr(line,"SUPER_CHAIN")!=0) { if(protein->super_chain_id_list==NULL) /* use the pdbfile def iff not defined in inputfile */ { protein->super_chain_id_list = (SUPER_CHAIN_ID_LIST *)calloc(MAX_CHAINS,sizeof(SUPER_CHAIN_ID_LIST)); for(b=1;bsuper_chain_id_list[b].chain_id=NULL; x = extract_words(line, word); /* number of chains listed is num_words - 1 */ b=1; while(protein->super_chain_id_list[b].chain_id!=NULL) ++b; protein->super_chain_id_list[b].chain_id = (char *)calloc(MAX_CHAINS,sizeof(char)); for(a=2;a<=x;++a) protein->super_chain_id_list[b].chain_id[a-1] = word[a][0]; protein->super_chain_id_list[b].chain_id[x] = '\0'; } } if(strncmp(line,"ATOM",4)==0) { for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 17; while(b<=19) { dummystring[a] = line[b]; ++a; ++b; } if(is_this_a_ligand(dummystring, protein->resparam)==1) { line[0] = 'L'; line[1] = 'I'; line[2] = 'G'; } } /* put ligands into temp files; read later in initialize_stuff_for_ligands */ if(strncmp(line,"LIG",3)==0) { for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 17; while(b<=19) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%s", dumbAtom.residuetype); sprintf(ligand_tempfilename,"temp.%d.%s.lig",GET_PID,dumbAtom.residuetype); if(does_this_file_exist(ligand_tempfilename)==0) { ligand_file_ptr = fopen_file(ligand_tempfilename,"w"); fprintf(ligand_file_ptr,"START\n"); fclose(ligand_file_ptr); } ligand_file_ptr = fopen_file(ligand_tempfilename,"a"); fprintf(ligand_file_ptr,"%s",line); fclose(ligand_file_ptr); } else if(strncmp(line,"ATOM",4)==0) { if(does_this_file_exist(ligand_tempfilename)==1) { ligand_file_ptr = fopen_file(ligand_tempfilename,"a"); fprintf(ligand_file_ptr,"END\n"); fclose(ligand_file_ptr); } if(NEW_CHAIN_ID != OLD_CHAIN_ID) if(line[21] == OLD_CHAIN_ID) line[21] = NEW_CHAIN_ID; for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 6; while(b<=10) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%d", &dumbAtom.atom_number); for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 12; while(b<=15) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%s", dumbAtom.atomname); for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 16; while(b<=16) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%c", &altconf); if(altconf == '1') altconf = 'A'; for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 17; while(b<=19) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%s", dumbAtom.residuetype); /* selenomethionine - substitute w/ met */ if(strcmp(dumbAtom.residuetype,"MSE")==0) { strcpy(dumbAtom.residuetype,"MET"); if(strcmp(dumbAtom.atomname,"SE")==0) strcpy(dumbAtom.atomname,"SD"); } if(strcmp(dumbAtom.residuetype,"CYS")==0 && strcmp(dumbAtom.atomname,"SG")==0) if(IGNORE_DISULFIDE_FLAG==0) /* don't ignore disulfides, by default */ { ++num_cys; index_sg[num_cys] = i; } for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 21; while(b<=21) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%c", &chain_id); dumbAtom.chain_id = chain_id; for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 22; while(b<=25) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%d", &seq_pos_text); dumbAtom.seq_position = seq_pos_text; for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 26; while(b<=26) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%c", &wacky_numbering_char); if(wacky_numbering_char != ' ') { if(QUIET_FLAG==0) fprintf(stderr, "wacky numbering %c %d%c\n", chain_id, dumbAtom.seq_position, wacky_numbering_char); dumbAtom.seq_position = dumbAtom.seq_position + wacky_numbering_char; } if(previous_seq !=0 && chain_id == chain_id_current) { if(abs(dumbAtom.seq_position - previous_seq) > 1 && chain_id == chain_id_current) { if(QUIET_FLAG==0) fprintf(stderr, "WARNING Missing atoms for %c %d to %d....Will ignore\n", chain_id, previous_seq, dumbAtom.seq_position); missing_atom_flag = 1; } else { if(dumbAtom.seq_position != previous_seq) { previous_seq = dumbAtom.seq_position; } } } else previous_seq = dumbAtom.seq_position; if(current_seq_pos == 0 ) { ++num_res; if(chain_id != ' ') { if(protein->wacky_numbering_list != NULL) { protein->wacky_numbering_list[num_res] = wacky_numbering_char; if(wacky_numbering_char != ' ') sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c%c",seq_pos_text, wacky_numbering_char, chain_id); else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, chain_id); } else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, chain_id); } else { if(protein->wacky_numbering_list != NULL) { protein->wacky_numbering_list[num_res] = wacky_numbering_char; if(wacky_numbering_char != ' ') sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, wacky_numbering_char); else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d",seq_pos_text); } else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d",seq_pos_text); } current_seq_pos = dumbAtom.seq_position; } if(current_seq_pos != dumbAtom.seq_position) { if(worry_about_backbone == 1) { if(N_found == 0) { fprintf(stderr, "ERROR N for residue %d not found\n", current_seq_pos); exit(1); } if(C_found == 0) { fprintf(stderr, "ERROR C for residue %d not found\n", current_seq_pos); exit(1); } if(O_found == 0) { fprintf(stderr, "ERROR O for residue %d not found\n", current_seq_pos); exit(1); } if(CA_found == 0) { fprintf(stderr, "ERROR CA for residue %d not found\n", current_seq_pos); exit(1); } } ++num_res; if(chain_id != ' ') { if(protein->wacky_numbering_list != NULL) { protein->wacky_numbering_list[num_res] = wacky_numbering_char; if(wacky_numbering_char != ' ') sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c%c",seq_pos_text, wacky_numbering_char, chain_id); else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, chain_id); } else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, chain_id); } else { if(protein->wacky_numbering_list != NULL) { protein->wacky_numbering_list[num_res] = wacky_numbering_char; if(wacky_numbering_char != ' ') sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d%c",seq_pos_text, wacky_numbering_char); else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d",seq_pos_text); } else sprintf(protein->seqpos_text_map[num_res].seqpos_text,"%d",seq_pos_text); } current_seq_pos = dumbAtom.seq_position; worry_about_backbone=0; N_found = 0; C_found=0; O_found=0; CA_found=0; } if(strcmp(dumbAtom.atomname, "N")==0) { N_found=1; worry_about_backbone=1; if(strcmp(dumbAtom.residuetype,"PRO")==0 || strcmp(dumbAtom.residuetype,"GLY")==0) ++num_invar; } else if(strcmp(dumbAtom.atomname, "C")==0) { C_found=1; worry_about_backbone=1; } else if( strcmp(dumbAtom.atomname, "O")==0 || strcmp(dumbAtom.atomname, "OT")==0 || strcmp(dumbAtom.atomname, "OXT")==0 ) { O_found=1; worry_about_backbone=1; } else if(strcmp(dumbAtom.atomname, "CA")==0) { CA_found=1; worry_about_backbone=1; } if(dumbAtom.seq_position<=0) { fprintf(stderr, "ERROR Sequence position less than or equal to zero!!\n"); exit(1); } for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 30; while(b<=37) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%lf", &dumbAtom.coord.x); for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 38; while(b<=45) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%lf", &dumbAtom.coord.y); for(a=0;a<20;++a) dummystring[a] = '\0'; a=0; b = 46; while(b<=53) { dummystring[a] = line[b]; ++a; ++b; } sscanf(dummystring, "%lf", &dumbAtom.coord.z); dumbAtom.born_radius = 1; dumbAtom.sasa = 0; if(chain_id != ' ' || missing_atom_flag==1 ) /* this file has multiple chains or missing residues */ { if(protein->chain_id_list == NULL) /* allocate memory */ { protein->chain_id_list = (char *)calloc(MAX_CHAINS, sizeof(char)); a=0; while(achain_id_list[a] = ' '; ++a; } protein->chain_id_list[MAX_CHAINS] = '\0'; } if(chain_id != chain_id_current || abs(dumbAtom.seq_position - previous_seq) > 1 ) /* new chain or missing residue */ { if(chain_ctr == 0 && abs(dumbAtom.seq_position - previous_seq) > 1) { chain_ctr = 1; chain_id_current = chain_id; protein->chain_id_list[chain_ctr] = chain_id; } if(chain_id!=' ' && chain_id != chain_id_current) { if(QUIET_FLAG==0) fprintf(stderr,"Reading chain %c\n",chain_id); ++true_multichain_flag; } ++chain_ctr; previous_seq = 0; chain_id_current = chain_id; protein->chain_id_list[chain_ctr] = chain_id; } dumbAtom.seq_position = dumbAtom.seq_position + (chain_ctr-1)*1000; } protein->seqpos_text_map[num_res].seq_position = dumbAtom.seq_position; if(altconf == 'A') /* alternate conformation; take A by default */ { if(QUIET_FLAG==0) fprintf(stderr, "WARNING alternate conformer detected for %d %s; will take A \n", dumbAtom.seq_position,dumbAtom.residuetype); } if(altconf == 'A' || altconf == ' ') { if(strcmp(dumbAtom.atomname, "HN")==0) strcpy(dumbAtom.atomname, "H"); if(strcmp(dumbAtom.residuetype, "GLY")==0 && strcmp(dumbAtom.atomname, "HA")==0) strcpy(dumbAtom.atomname, "1HA"); if(CTE_FLAG == 1) { if(strcmp(dumbAtom.atomname, "OXT")==0) strcpy(dumbAtom.atomname, "OT"); } else /* we are ignoring charged c-term */ if(strcmp(dumbAtom.atomname, "OXT")==0 || strcmp(dumbAtom.atomname, "OT")==0) { if(QUIET_FLAG==0) fprintf(stderr,"WARNING Converting C-term OXT to O since CTE_FLAG = 0\n"); strcpy(dumbAtom.atomname, "O"); } if(NTE_FLAG == 0) /* we are ignoring charged N-term */ { if(strcmp(dumbAtom.atomname, "2H")==0 || strcmp(dumbAtom.atomname, "3H")==0) { if(QUIET_FLAG==0) fprintf(stderr,"WARNING Ignoring charged N-term %s since NTE_FLAG = 0\n",dumbAtom.atomname); strcpy(dumbAtom.atomname, "H"); strcpy(dumbAtom.residuetype,"UUU"); } if(strcmp(dumbAtom.atomname, "1H")==0) { if(QUIET_FLAG==0) fprintf(stderr,"WARNING Converting N-term 1H to H since NTE_FLAG = 0\n"); strcpy(dumbAtom.atomname, "H"); } } strcpy(dumbAtom.seqpos_text, protein->seqpos_text_map[num_res].seqpos_text); dumbAtom.atom_number = i; dumbAtom.sasa=0; protein->Template[i]=dumbAtom; ++i; } } } if(does_this_file_exist(ligand_tempfilename)==1) { ligand_file_ptr = fopen_file(ligand_tempfilename,"a"); fprintf(ligand_file_ptr,"END\n"); fclose(ligand_file_ptr); } if(worry_about_backbone == 1) /* for last residue */ { if(N_found == 0) { fprintf(stderr, "ERROR N for residue %d not found\n", current_seq_pos); exit(1); } if(C_found == 0) { fprintf(stderr, "ERROR C for residue %d not found\n", current_seq_pos); exit(1); } if(O_found == 0) { fprintf(stderr, "ERROR O for residue %d not found\n", current_seq_pos); exit(1); } if(CA_found == 0) { fprintf(stderr, "ERROR CA for residue %d not found\n", current_seq_pos); exit(1); } } if(i==1) { if(does_this_file_exist(ligand_tempfilename)==1) // use a UUU tripeptide for ligands if no backbone atoms were assigned { num_res=1; sprintf(protein->seqpos_text_map[num_res].seqpos_text,"1"); protein->seqpos_text_map[num_res].seq_position = 1; ++num_res; sprintf(protein->seqpos_text_map[num_res].seqpos_text,"2"); protein->seqpos_text_map[num_res].seq_position = 2; ++num_res; sprintf(protein->seqpos_text_map[num_res].seqpos_text,"3"); protein->seqpos_text_map[num_res].seq_position = 3; num_res=1; for(i=1;i<=21;++i) { sprintf(protein->Template[i].residuetype,"UUU"); protein->Template[i].sasa = 0; protein->Template[i].born_radius = 1; protein->Template[i].chain_id = ' '; strcpy(protein->Template[i].seqpos_text,protein->seqpos_text_map[num_res].seqpos_text); protein->Template[i].atom_number = i; protein->Template[i].seq_position = protein->seqpos_text_map[num_res].seq_position; if(i%7==0) ++num_res; } i=1; for(num_res=1;num_res<=3;++num_res) { sprintf(protein->Template[i].atomname,"N"); protein->Template[i].coord = DUMMY_BKBN.N; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"H"); protein->Template[i].coord = DUMMY_BKBN.H; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"CA"); protein->Template[i].coord = DUMMY_BKBN.CA; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"HA"); protein->Template[i].coord = DUMMY_BKBN.HA; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"C"); protein->Template[i].coord = DUMMY_BKBN.C; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"O"); protein->Template[i].coord = DUMMY_BKBN.O; protein->Template[i].coord.x += num_res; ++i; sprintf(protein->Template[i].atomname,"CB"); protein->Template[i].coord = DUMMY_BKBN.CB; protein->Template[i].coord.x += num_res; ++i; } num_res = 4; i=22; } else { sprintf(line, "%s does not contain atoms!\n", pdbfilename); failure_report(line,"exit"); } } if(protein->seqpos_text_map[num_res].seq_position > MAX_SEQ_POSITION) MAX_SEQ_POSITION = protein->seqpos_text_map[num_res].seq_position; protein->seqpos_text_map[num_res+1].seq_position = ENDFLAG; strcpy(protein->Template[i].residuetype,"END"); protein->Template[i].seq_position = ENDFLAG; protein->Template[i].atom_number = ENDFLAG; protein->Template[i].atom_ptr=NULL; protein->Template[i].sasa = ENDFLAG; protein->Template[0].seq_position = 0; protein->Template[0].atom_number = 0; protein->Template[0].atom_ptr=NULL; protein->Template[0].sasa = 0; protein->chain_gap_flag = missing_atom_flag; /* find disulfides; rename as CYD so they won't move */ if(num_cys>1) cys_to_cyd_disulfides(protein->Template, num_cys, index_sg, protein->chain_id_list, &protein->super_chain_id_list); if(true_multichain_flag>1) { if(protein->super_chain_id_list==NULL) /* if super_chain_id_list not defined, first chain is a super_chain */ { protein->super_chain_id_list = (SUPER_CHAIN_ID_LIST *)calloc(MAX_CHAINS,sizeof(SUPER_CHAIN_ID_LIST)); for(b=1;bsuper_chain_id_list[b].chain_id=NULL; protein->super_chain_id_list[1].chain_id = (char *)calloc(MAX_CHAINS,sizeof(char)); protein->super_chain_id_list[1].chain_id[1] = protein->chain_id_list[1]; protein->super_chain_id_list[1].chain_id[2] = '\0'; } i=1; while(protein->super_chain_id_list[i].chain_id!=NULL) { b=1; while(protein->super_chain_id_list[i].chain_id[b]!='\0') { k=1; while(protein->super_chain_id_list[i].chain_id[b] != protein->chain_id_list[k] && protein->chain_id_list[k]!='\0') ++k; if(protein->chain_id_list[k]=='\0') { fprintf(stderr,"ERROR Cannot find SUPER_CHAIN member %c\n",protein->super_chain_id_list[i].chain_id[b]); exit(1); } ++b; } ++i; } } fclose(input); free_memory(line); free_memory(lineflag); free_memory(index_sg); free_memory(ligand_tempfilename); for(i=0;i<50;++i) free_memory(word[i]); free_memory(word); /* attach pointers and flags to the Template structure and convert to standard arrangement */ attach_ptr_pdbATOM(protein->Template, protein->resparam); /* transform coordinates with user-defined translate_rotate_array or transform_matrix */ if(protein->transform_matrix!=NULL) { if(protein->translate_rotate_array!=NULL) { rotation_vector_start.x = protein->translate_rotate_array[1]; rotation_vector_start.y = protein->translate_rotate_array[2]; rotation_vector_start.z = protein->translate_rotate_array[3]; rotation_vector_end.x = protein->translate_rotate_array[4]; rotation_vector_end.y = protein->translate_rotate_array[5]; rotation_vector_end.z = protein->translate_rotate_array[6]; /* rotate about defined vector */ protein->translate_rotate_array[1] = 0; protein->translate_rotate_array[2] = 0; protein->translate_rotate_array[3] = 0; protein->translate_rotate_array[4] = rotation_vector_end.x - rotation_vector_start.x; protein->translate_rotate_array[5] = rotation_vector_end.y - rotation_vector_start.y; protein->translate_rotate_array[6] = rotation_vector_end.z - rotation_vector_start.z; translate_rotate_array_to_transform_matrix(protein->translate_rotate_array, protein->transform_matrix); rotation_vector_midpoint = midpoint(rotation_vector_start, rotation_vector_end); rotated_start = tranform_coordinates(rotation_vector_start,protein->transform_matrix); rotated_end = tranform_coordinates(rotation_vector_end,protein->transform_matrix); rotated_midpoint = tranform_coordinates(rotation_vector_midpoint,protein->transform_matrix); /* bring rotated structure back to the original location */ protein->translate_rotate_array[1] = ((rotation_vector_midpoint.x - rotated_midpoint.x) + (rotation_vector_start.x - rotated_start.x) + (rotation_vector_end.x - rotated_end.x))/3.0; protein->translate_rotate_array[2] = ((rotation_vector_midpoint.y - rotated_midpoint.y) + (rotation_vector_start.y - rotated_start.y) + (rotation_vector_end.y - rotated_end.y))/3.0; protein->translate_rotate_array[3] = ((rotation_vector_midpoint.z - rotated_midpoint.z) + (rotation_vector_start.z - rotated_start.z) + (rotation_vector_end.z - rotated_end.z))/3.0; translate_rotate_array_to_transform_matrix(protein->translate_rotate_array, protein->transform_matrix); } check_transform_matrix(protein->transform_matrix); transform_pdb(protein->Template, protein->transform_matrix); } /* double check for weird sequences */ i=1; previous_seq=0; while(protein->Template[i].atom_ptr!=NULL) { if(strcmp(protein->Template[i].atomname,"CB")==0) { current_seq_pos = protein->Template[i].seq_position; if(previous_seq == current_seq_pos) { fprintf(stderr,"ERROR Same sequence number id for two consecutive residues %s\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->Template[i].seq_position, protein->seqpos_text_map)].seqpos_text); exit(1); } previous_seq = protein->Template[i].seq_position; } ++i; } i=1; previous_seq=0; while(protein->Template[i].atom_ptr!=NULL) { if(strcmp(protein->Template[i].atomname,"N")==0) { current_seq_pos = protein->Template[i].seq_position; if(previous_seq == current_seq_pos) { fprintf(stderr,"ERROR Same sequence number id for two consecutive residues %s\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->Template[i].seq_position, protein->seqpos_text_map)].seqpos_text); exit(1); } previous_seq = protein->Template[i].seq_position; } ++i; } i=1; previous_seq=0; while(protein->Template[i].atom_ptr!=NULL) { if(strcmp(protein->Template[i].atomname,"CG")==0 || strcmp(protein->Template[i].atomname,"OG")==0 || strcmp(protein->Template[i].atomname,"SG")==0) { current_seq_pos = protein->Template[i].seq_position; if(previous_seq == current_seq_pos) { fprintf(stderr,"ERROR Same sequence number id for two consecutive residues %s\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->Template[i].seq_position, protein->seqpos_text_map)].seqpos_text); exit(1); } previous_seq = protein->Template[i].seq_position; } ++i; } /* pro/gly seq_positions are stored in invar_pos; needed converting CHROMOSOMES to seqs and vice versa */ protein->invar_pos = (INVARIABLE_POSITIONS *)calloc(num_invar+2,sizeof(INVARIABLE_POSITIONS)); INVAR_POS = protein->invar_pos; i=1; j=1; while(protein->Template[i].seq_position!=ENDFLAG) { if(strcmp(protein->Template[i].atom_ptr->atomname,"N")==0) { if(strcmp(protein->Template[i].residuetype,"PRO")==0) { protein->invar_pos[j].seq_position = protein->Template[i].seq_position; protein->invar_pos[j].restype = 'P'; ++j; } else if(strcmp(protein->Template[i].residuetype,"GLY")==0) { protein->invar_pos[j].seq_position = protein->Template[i].seq_position; protein->invar_pos[j].restype = 'G'; ++j; } } a = protein->Template[i].seq_position; while(a==protein->Template[i].seq_position) ++i; } protein->invar_pos[j].seq_position=ENDFLAG; if(QUIET_FLAG == 0) if(rearrange_flag == 0) { fprintf(stderr,"\nWARNING The structure is not being rebuilt.\n"); fprintf(stderr,"\tThe energies will NOT be accurate if atoms are missing!\n"); if(CHARGES_PH_INDEPENDENT_FLAG==0) { fprintf(stderr,"\tSince CHARGES_PH_INDEPENDENT_FLAG 0 , the energies for\n"); fprintf(stderr,"\tionizable residues will NOT be correct!!!\n"); fprintf(stderr,"\tTry REBUILD_FLAG 1 \n"); } fprintf(stderr,"\n"); } /* extract BACKBONE structures */ protein->bkbn = (BACKBONE *)calloc(MAX_RESIDUES,sizeof(BACKBONE)); extract_bkbn(protein->Template,protein->bkbn); protein->chain_anchor_bkbn = (BACKBONE *)calloc(MAX_CHAINS,sizeof(BACKBONE)); i=1; chain_number = 0; while(protein->bkbn[i].seq_position!=ENDFLAG) { chain_id_div = div(protein->bkbn[i].seq_position, 1000); if(chain_number != chain_id_div.quot + 1) { chain_number = chain_id_div.quot + 1; protein->chain_anchor_bkbn[chain_number] = protein->bkbn[i]; } ++i; } /* rebuild with ideal bondlengths and angles */ if(rearrange_flag != 0) { chr = (CHROMOSOME *)malloc(sizeof(CHROMOSOME)); pdbATOM_to_CHROMOSOME(protein->Template, chr, protein->resparam); if(rearrange_flag==1) /* rebuild sidechains only; h-build backbone */ { bkbn_atoms = (pdbATOM *)calloc(MAX_ATOMS, sizeof(pdbATOM)); dummy_varpos = (VARIABLE_POSITION *)calloc(MAX_RESIDUES, sizeof(VARIABLE_POSITION)); i=1; j=0; while(protein->Template[i].seq_position!=ENDFLAG) { ++j; dummy_varpos[j].seq_position = protein->Template[i].seq_position; while(protein->Template[i].seq_position == dummy_varpos[j].seq_position) ++i; } dummy_varpos[j+1].seq_position = ENDFLAG; make_fixed_pdbATOM(protein->Template, dummy_varpos, bkbn_atoms); } else if(rearrange_flag==2) /* rebuild everything w/ ideal geometry, including backbone */ { bkbn_atoms = NULL; dummy_varpos = NULL; } CHROMOSOME_to_pdbATOM(chr, bkbn_atoms, protein->Template, protein->chain_anchor_bkbn); if(FLIP_ASN_GLN_HIS_FLAG==1) optimize_asn_gln_his_rotamers(protein->Template, chr, protein->resparam); CHROMOSOME_to_pdbATOM(chr, bkbn_atoms, protein->Template, protein->chain_anchor_bkbn); chr->genes = chr->firstgene; while(chr->genes->nextgene!=NULL) { free_memory(chr->genes->choice_ptr->resparam_ptr); free_memory(chr->genes->choice_ptr->bkbn); free_memory(chr->genes->choice_ptr); free_memory(chr->genes->chi); chr->genes = chr->genes->nextgene; } chr->genes = chr->firstgene; free_CHROMOSOME(chr); free_memory(chr); if(bkbn_atoms != NULL) free_memory(bkbn_atoms); if(dummy_varpos != NULL) free_memory(dummy_varpos); } pdbATOM_to_mini_pdbATOM(protein->Template, protein->mini_Template); i=1; protein->num_res=0; while(protein->mini_Template[i].seq_position!=ENDFLAG) { if(strcmp(protein->mini_Template[i].atom_ptr->atomname,"N")==0) ++protein->num_res; ++i; } protein->template_sequence = (char *)calloc(MAX_RESIDUES,sizeof(char)); protein->final_sequence = (char *)calloc(MAX_RESIDUES,sizeof(char)); pdbATOM_to_sequence(protein->Template, protein->template_sequence, protein->resparam); return(1); } void rotamer_build(PROTEIN *protein, char *custom_rotamer_file) { FILE *file_ptr; char *line, **word, *keyword, one_letter_code[3]; int i,j, k,q,stopflag,num_words; CHROMOSOME *chr; pdbATOM *bkbn_atoms; VARIABLE_POSITION *dummy_varpos; word = (char **)calloc(50,sizeof(char *)); for(i=0;i<50;++i) word[i] = (char *)calloc(50,sizeof(char)); line = (char *)calloc(MAXLINE,sizeof(char)); keyword = (char *)calloc(MAXLINE,sizeof(char)); chr = (CHROMOSOME *)malloc(sizeof(CHROMOSOME)); pdbATOM_to_CHROMOSOME(protein->Template, chr, protein->resparam); file_ptr = fopen_file(custom_rotamer_file,"r"); fgets(line,MAXLINE,file_ptr); sscanf(line,"%s",keyword); while(strcmp(keyword,"ROTAMER")!=0) { fgets(line,MAXLINE,file_ptr); sscanf(line,"%s",keyword); } stopflag=0; while(stopflag==0) { num_words = extract_words(line, word); /* this rotamer entry is from a pdb or .out file */ /* ROTAMER seqpos core/surf/int one_letter_code chi1 chi2 .... */ if(word[3][0] == 'i' || word[3][0] == 's' || word[3][0] == 'c' || word[3][0] == 'l') { strcpy(one_letter_code,word[4]); k=4; } else /* ROTAMER seqpos one_letter_code chi1 chi2 .... */ { strcpy(one_letter_code,word[3]); k=3; } chr->genes = chr->firstgene; while(strcmp(chr->genes->varpos_ptr->seqpos_text_map_ptr->seqpos_text,word[2])!=0) { chr->genes = chr->genes->nextgene; if(chr->genes->seq_position==ENDFLAG) { sprintf(line,"ERROR Residue %s in ROTAMER_FILE %s is not found in the template pdb", word[2], custom_rotamer_file); failure_report(line,"exit"); } } q=1; while(strcmp(one_letter_code,protein->resparam[q].one_letter_code)!=0) { ++q; if(strcmp(protein->resparam[q].residuetype,"zzz")==0) { sprintf(line,"ERROR Residue %s %s in ROTAMER_FILE %s not found in resparam file", word[2], one_letter_code, custom_rotamer_file); failure_report(line,"exit"); } } *chr->genes->choice_ptr->resparam_ptr = protein->resparam[q]; chr->genes->varpos_ptr->choice[1] = *chr->genes->choice_ptr; if(num_wordsgenes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi) { sprintf(line,"ERROR Residue %s %s in ROTAMER_FILE %s does not define enough dihedrals", word[2], one_letter_code,custom_rotamer_file); failure_report(line,"exit"); } for(q=1;q<=chr->genes->choice_ptr->resparam_ptr->rotamerlib_ptr->numChi;++q) { sscanf(word[k+q],"%lf",&chr->genes->chi[q]); } if(fgets(line,MAXLINE,file_ptr)==NULL) stopflag=1; else { sscanf(line,"%s",keyword); if(strcmp(keyword,"ROTAMER")!=0) stopflag=1; } } fclose(file_ptr); bkbn_atoms = (pdbATOM *)calloc(MAX_ATOMS, sizeof(pdbATOM)); dummy_varpos = (VARIABLE_POSITION *)calloc(MAX_RESIDUES, sizeof(VARIABLE_POSITION)); i=1; j=0; while(protein->Template[i].seq_position!=ENDFLAG) { ++j; dummy_varpos[j].seq_position = protein->Template[i].seq_position; while(protein->Template[i].seq_position == dummy_varpos[j].seq_position) ++i; } dummy_varpos[j+1].seq_position = ENDFLAG; make_fixed_pdbATOM(protein->Template, dummy_varpos, bkbn_atoms); CHROMOSOME_to_pdbATOM(chr, bkbn_atoms, protein->Template, protein->chain_anchor_bkbn); pdbATOM_to_mini_pdbATOM(protein->Template, protein->mini_Template); pdbATOM_to_sequence(protein->Template, protein->template_sequence, protein->resparam); free_memory(dummy_varpos); free_memory(bkbn_atoms); chr->genes = chr->firstgene; while(chr->genes->nextgene!=NULL) { free_memory(chr->genes->choice_ptr->resparam_ptr); free_memory(chr->genes->choice_ptr->bkbn); free_memory(chr->genes->choice_ptr); free_memory(chr->genes->chi); chr->genes = chr->genes->nextgene; } chr->genes = chr->firstgene; free_CHROMOSOME(chr); free_memory(chr); free_memory(line); free_memory(keyword); for(i=0;i<50;++i) free_memory(word[i]); free_memory(word); }