/* EGAD: GA_utilities.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions for initializing, randomizing, mating, and freeing GENE's and CHROMOSOME's, the datastructures used inside EGAD for representing protein structures in dihedral space. */ #include "GA_utilities.h" /* This function calculates the boltzmann distribution for the first pop_size members of a sorted CHROMOSOME array at a given temperature T. The probabilities are stored in chr[i].boltzProbab (for energy-based probabilties) and in chr[i[.rankProbab for rank-based probabilties */ void boltzmann_CHROMOSOME(CHROMOSOME *chr, double *T, int *pop_size) { int i; double sigma_energy, sigma_energy_rank; double under; extern double R; sigma_energy = 0; sigma_energy_rank=0; under = -1.0*(*T)*R; for(i=1;i<=(*pop_size);++i) { (chr[i]).boltzProbab = exp((((chr[i]).pop_energy))/under); sigma_energy += (chr[i]).boltzProbab; (chr[i]).rankProbab = exp((((chr[i]).deltaRank))/under); sigma_energy_rank += (chr[i]).rankProbab; } for(i=1;i<=(*pop_size);++i) { (chr[i]).boltzProbab = (chr[i]).boltzProbab/sigma_energy; (chr[i]).rankProbab = (chr[i]).rankProbab/sigma_energy_rank; } } /* This function de-allocates memory for a GENE linked-list */ void free_GENE(GENE gene) { GENE temp; while(gene!=NULL) { temp = gene->nextgene; free_memory(gene); gene=temp; } } /* This function de-allocates memory for a BKBNGENE linked-list */ void free_BKBNGENE(bkbnGENE gene) { bkbnGENE temp; while(gene!=NULL) { temp = gene->nextbkbngene; free_memory(gene); gene=temp; } } /* This function de-allocates memory for a CHROMOSOME */ void free_CHROMOSOME(CHROMOSOME *chr) { if(chr->firstgene!=NULL) { chr->genes = chr->firstgene; if(chr->genes!=NULL) free_GENE(chr->genes); chr->genes = NULL; chr->firstgene = NULL; } if(chr->first_bkbngene!=NULL) { chr->bkbngenes = chr->first_bkbngene; if(chr->bkbngenes!=NULL) free_BKBNGENE(chr->bkbngenes); chr->bkbngenes = NULL; chr->first_bkbngene = NULL; } } /* This function allocates memory for the GENE linked-list in CHROMOSOME, and seeds it with residues and sidechain rotamers.If check_solubility_flag=1, chr is sent to solubilize_CHROMOSOME to generate a soluble chromosome. */ void inoculate_sidechains(CHROMOSOME *chr, VARIABLE_POSITION *var_pos, int check_solubility_flag) { int j; chr->genes = (MENDEL *)malloc(sizeof(MENDEL)); /* defining the gene locations */ chr->firstgene = chr->genes; /* initializing firstgene to first gene */ j=1; while(var_pos[j].seq_position!=ENDFLAG) /* gene loop */ { chr->genes->seq_position = var_pos[j].seq_position; /* assign the sequence position */ mutate_sidechain(chr->genes, var_pos[j]); /* assign random residuetype and rotamer */ chr->genes->varpos_ptr = &var_pos[j]; chr->genes->nextgene = (MENDEL *)malloc(sizeof(MENDEL)); /* assign nextgene pointer */ chr->genes = chr->genes->nextgene; /* ptr to next gene is nextgene */ ++j; } /* end gene loop */ chr->genes->seq_position = ENDFLAG; /* terminate the linked-list */ chr->genes->nextgene=NULL; chr->genes = chr->firstgene; if(check_solubility_flag==1) { solubilize_CHROMOSOME(chr); } chr->genes = chr->firstgene; chr->energy = DBL_MAX; } /* This function randomizes GENEs for a CHROMOSOME; it assumes that memory has already been allocated by inoculate_sidechains. If check_solubility_flag=1, chr is sent to solubilize_CHROMOSOME to generate a soluble chromosome. */ void randomize_sidechains(CHROMOSOME *chr, int check_solubility_flag) { chr->firstgene = chr->genes; /* initializing firstgene to first gene */ while(chr->genes->seq_position!=ENDFLAG) /* gene loop */ { if(chr->genes->varpos_ptr->fixed_flag==0) /* assign random residuetype and rotamer for moving positions */ mutate_sidechain(chr->genes, *chr->genes->varpos_ptr); chr->genes = chr->genes->nextgene; } /* end gene loop */ chr->genes = chr->firstgene; if(check_solubility_flag==1) { solubilize_CHROMOSOME(chr); } chr->genes = chr->firstgene; chr->energy = DBL_MAX; generate_custom_sequence_CHROMOSOME(chr, 0); } /* This function mates an array of CHROMOSOME of size 2*pop_size (allocated by inoculate_sidechains). Members 1->pop_size are mated, based on their mating frequencies chr[i].boltzProbab and chr[i].rankProbab. Crossover occurs with recombination_freq probability. Mutations are permitted to occur on each child with mutation_freq probability. The children are stored in members pop_size+1 -> 2*pop_size and are solubilized w/ solubilize_CHROMOSOME */ void mate_sidechains(CHROMOSOME *chr, int pop_size, double recombination_freq, double mutation_freq) { int i; static double *mate_freq=NULL; /* arrays of boltzmann freqs; index = chr index */ static double *mate_freq_rank=NULL; static int current_popsize = 0; int A,B; /* indexes for mating freq arrays and parent CHROMOSOMEs */ int a,b; /* indexes for children CHROMOSOMEs */ int newpopsize, pop_size2; GENE dummyNextgene; pop_size2 = pop_size*2; /* allocate memory if needed */ if(current_popsize <= pop_size2 ) { if(mate_freq!=NULL) { free_memory(mate_freq); free_memory(mate_freq_rank); } current_popsize = pop_size*4; mate_freq = (double *)calloc(current_popsize + 2,sizeof(double)); mate_freq_rank = (double *)calloc(current_popsize + 2,sizeof(double)); } mate_freq[0] = 0; mate_freq[1] = 0; mate_freq[2] = 0; for(i=1;i<=pop_size;++i) /* loading mating frequency arrays */ mate_freq[i] = (chr[i]).boltzProbab + mate_freq[i-1]; mate_freq_rank[0] = 0; mate_freq_rank[1] = 0; mate_freq_rank[2] = 0; for(i=1;i<=pop_size;++i) /* loading mating frequency arrays */ mate_freq_rank[i] = (chr[i]).rankProbab + mate_freq_rank[i-1]; newpopsize = pop_size; while(newpopsize < pop_size2 ) { if(newpopsize%4==0) /* mating based on boltzmann probability */ { A = roll_loaded_dice(mate_freq, pop_size); /* selecting parent A */ B = roll_loaded_dice(mate_freq, pop_size); /* selecting parent B */ } else /* mating based on rank */ { A = roll_loaded_dice(mate_freq_rank, pop_size); /* selecting parent A */ B = roll_loaded_dice(mate_freq_rank, pop_size); /* selecting parent B */ } if((chr[A]).energy!=(chr[B]).energy) /* no incest or hermaphrodites */ { ++newpopsize; a=newpopsize; ++newpopsize; b=newpopsize; copyCHROMOSOME(chr[a],chr[A]); copyCHROMOSOME(chr[b],chr[B]); (chr[a]).genes = (chr[a]).firstgene; (chr[b]).genes = (chr[b]).firstgene; while((chr[a]).genes->seq_position!=ENDFLAG) { if(dice()<=recombination_freq) /* crossover?? */ { dummyNextgene = (chr[a]).genes->nextgene; (chr[a]).genes->nextgene = (chr[b]).genes->nextgene; (chr[b]).genes->nextgene = dummyNextgene; } if((chr[a]).genes->varpos_ptr->fixed_flag == 0) /* mutation only at moving positions */ { if(dice()<=mutation_freq) /* mutate chr[a].genes?? */ mutate_sidechain(chr[a].genes, *(chr[a]).genes->varpos_ptr); if(dice()<=mutation_freq) /* mutate chr[b].genes?? */ mutate_sidechain(chr[b].genes, *(chr[b]).genes->varpos_ptr); } (chr[a]).genes = (chr[a]).genes->nextgene; (chr[b]).genes = (chr[b]).genes->nextgene; } (chr[a]).genes = (chr[a]).firstgene; (chr[b]).genes = (chr[b]).firstgene; generate_custom_sequence_CHROMOSOME(&chr[a], 0); generate_custom_sequence_CHROMOSOME(&chr[b], 0); /* make children soluble */ solubilize_CHROMOSOME(&chr[a]); solubilize_CHROMOSOME(&chr[b]); } } } /* if chr[i] is isoenergetic with any chr[1]->chr[i-1], returns 1; else 0; */ int is_chr_i_isoenergetic_with_others(int i, CHROMOSOME *chr, int popsize) { int n; for(n=1;n<=popsize;++n) if(i!=n) if(fabs(chr[i].energy - chr[n].energy)<=EPS) return(1); return(0); } /* This function mutagenizes the GENEs in chr w/ mut_freq probability */ void mutagenize_sidechains_CHROMOSOME(CHROMOSOME *chr, double mut_freq) { chr->genes = chr->firstgene; while(chr->genes->seq_position!=ENDFLAG) { if(chr->genes->varpos_ptr->fixed_flag==0) /* mutagenize moving positions */ if(dice()<=mut_freq) mutate_sidechain(chr->genes, (*chr->genes->varpos_ptr) ); chr->genes = chr->genes->nextgene; } generate_custom_sequence_CHROMOSOME(chr, 0); chr->genes = chr->firstgene; } /* This function identifies isoenergetic CHROMOSOMEs and mutates them until they are no longer isoenergetic; solubility is also checked and insoluble chrs solubilized w/ solubilize_CHROMOSOME */ void get_rid_of_isoenergetic_chrs(CHROMOSOME *chr, double mut_freq, int pop_size, double fixedatoms_energy) { int i; int flag; int ctr, local_ctr; flag=0; ctr=0; while(flag==0 && ctr<=pop_size) { flag=1; for(i=1;i<=pop_size;++i) { local_ctr=0; while(is_chr_i_isoenergetic_with_others(i, chr, pop_size)==1 && local_ctr<=pop_size) { flag=0; mutagenize_sidechains_CHROMOSOME(&chr[i], mut_freq); solubilize_CHROMOSOME(&chr[i]); CHROMOSOME_to_lookupEnergy(&chr[i], &fixedatoms_energy); ++local_ctr; } } ++ctr; } } /* returns 1 if successful, 0 if not (ie: chr[1].energy = chr[pop_size].energy */ int calculate_mating_frequencies(CHROMOSOME *chrs, int pop_size, double T) { double max_energy; int i; extern double R; /* for calculating mating frequencies of the parameters->pop_size best solns */ for(i=1;i<=pop_size;++i) /* get deltaE */ { chrs[i].pop_energy = chrs[i].energy - chrs[1].energy; /* for energy-based mating freqs */ chrs[i].deltaRank = i - 1.0; /* for rank-based mating freqs */ } max_energy = chrs[pop_size].deltaRank; if(chrs[pop_size].deltaRank > chrs[pop_size].pop_energy) max_energy = chrs[pop_size].deltaRank; else max_energy = chrs[pop_size].pop_energy; /* there must be a difference between the best and worst chr */ if(fabs(chrs[pop_size].energy - chrs[1].energy)>EPS) { /* if max_energy/(R*T) is too big, we get an overflow when calculating boltzmann probability */ R=R_univ; if(max_energy/(R*T)>=10) R=max_energy/(10*T); /* calculate boltzmann distribution aka mating freqs for pop_size best structures */ boltzmann_CHROMOSOME(chrs,&T,&pop_size); return(1); } else return(0); } /* linear cooling schedule for GA and MC temperatures */ double update_simulation_temperature(double T, double dT, double Tf) { if(T > Tf) return(T + dT); else return(Tf); }