/* EGAD: input_stuff.cpp Navin Pokala and Tracy Handel Dept. of Molecular and Cell Biology University of California, Berkeley Copyright (C) 2003 Regents of the University of California GNU Public License Aug 12 2003 Absolutely no warranties are made or are implied with the use of this program or its parts. This file contains functions for parsing inputfiles, reading forcefield info, and initializing PROTEIN structures. */ #include "input_stuff.h" int NUM_SYSTEM_CPU = 0; char *DEFAULT_ROTAMER_JOB=NULL; char *BATCH_QUEUE_PREFIX=NULL; int MAXLINE=300; /* max string size */ int MXLINE_INPUT=300; int QUIET_FLAG = 0; int MAX_MUTATIONS=0; /* declare/allocate/initialize lots of extern variables; mostly constants and flags */ double R = R_univ; /* gas constant for GA */ double RT = 0.595404; double TEMPERATURE = 298.0; double SCMF_TEMPERATURE = ENDFLAG; double KAPPA = 0; double IONIC_STRENGTH = 0; double PH = 7.0; double CORESURF_MEA_SASA = ENDFLAG; double CORESURF_MEA_BORN = ENDFLAG; double UNSAT_HBOND_REFSTATE=ENDFLAG; double MAX_OPTIMIZATION_TIME = 1e10; /* no limit unless specified */ double MASTER_OPTIMIZATION_TIME = 1e10; int MAX_RESIDUES=0; int MAX_SEQ_POSITION=0; int MAX_ATOMS=0; int IGNORE_DISULFIDE_FLAG=0; int *MISSING_SIDECHAIN_LIST=NULL; int SASA_FLAG=1; int GB_FLAG=1; int HBOND_FUNCTION_FLAG=ENDFLAG; int HBOND_FUNCTION_TYPE = 1; int PAIR_ENERGY_TABLE_FLAG=0; int COULOMB_FLAG = 1; int TORSION_FLAG = 1; int LOGFILE_FLAG = 1; int LOGFILE_PERIOD=10; int LOCAL_MINIMIZATION_FLAG=ENDFLAG; int MINIMIZE_METHYL_FLAG=0; int LOTS_OF_ROTAMERLETS_FLAG=0; int RESTRAIN_MINIMIZATION_FLAG=0; int CLEAN_UP_FLAG=1; /* remove intermediate files, by default; used for scanning mutagenesis and interface scan */ int FLIP_ASN_GLN_HIS_FLAG=1; int SOLUBILITY_CUTOFF_FLAG = ENDFLAG; int SPECIFICITY_FLAG = ENDFLAG; int HBOND_SPECIFICITY_FLAG = ENDFLAG; double OVERALL_CHARGE = 0; int CHARGE_EQUALITY_FLAG = 3; /* default |overall_charge| >=0 */ double OVERALL_CHARGE_TOLERANCE = 0.5; /* when designing for a particular charge, this is the tolerance */ int CHARGES_PH_INDEPENDENT_FLAG=0; int CTE_FLAG = 0; int NTE_FLAG = 0; int GET_PID; int OUTPUT_COORD_FLAG=1; /* state for which the fold stability is calc'd; default target unless FREE is specified; then FREE */ int MULTISTATE_INDEX_STABILITY = 0; /* state for which specificity is calc'd; default target */ int MULTISTATE_INDEX_SPECIFICITY = 0; /* state for which solubility is calc'd; default target unless FREE is specified; then FREE */ int MULTISTATE_INDEX_SOLUBILITY = 0; char NEW_CHAIN_ID,OLD_CHAIN_ID; char *TEMP_LOOKUP_DIRECTORY; char *CURRENT_WORKING_DIRECTORY=NULL; char *LOOKUP_TABLE_DIRECTORY; char *AVAILABLE_PROCESSORS_FILE=NULL; char *RESPARAMFILE = NULL; char *ROTAMERFILE = NULL; char *SPECIFICITY_FILE=NULL; char *EXECUTABLE_FILENAME=NULL; int MINIMIZE_FINAL_SEQUENCE_FLAG=1; int OUTPUT_ENERGY_PER_ATOM_FLAG=1; int FILTER_SEQUENCE_SPACE_FLAG=0; int OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG=0; char *LOOKUP_TABLE_SERVER = NULL; double DEBYE_SCALE_FACTOR = 0.000140; int NICE_SLAVE_FLAG=1; double VDW_CUTOFF = ENDFLAG; double VDW_CLASH_ENERGY = ENDFLAG; double WEIGHT_VDW = ENDFLAG; double WEIGHT_ELECTROSTATICS = ENDFLAG; double WEIGHT_1_4 = ENDFLAG; double GENERAL_ASP = ENDFLAG; double HYDROPHOBIC_ASP = ENDFLAG; int INTRA_ROTAMER_FLAG=ENDFLAG; int INTRA_ROTAMER_FLAG_14=ENDFLAG; double VDW_ATTRACTIVE_FACTOR=ENDFLAG; double VDW_REPULSIVE_FACTOR=ENDFLAG; double VDW_LINEAR_START_POINT=ENDFLAG; double OVERALL_ENERGY_SCALE=ENDFLAG; char **LIGANDFILENAME; char *INPUTFILENAME=NULL; FILE *MESSAGE_FILE_PTR; char *MULTISTATE_OBJ_FUNCT_INPUTFILENAME=NULL; char *SEQ_FILTER_INPUTFILENAME=NULL; int LOOKUP_TABLE_SLAVE_FLAG=0; int KEEP_CYS_FLAG=0; int MAX_LIGAND; /* max number of freely moving ligands */ /* reads an input file, parses variable positions, reads Template, forcefield files. Initializes and sets lots of variables. See manual for file formats. This is akin to a "constructor" for PROTEIN structures */ void input_stuff(char *inputfilename, PROTEIN *protein) { FILE *input, *logfile_ptr, *file_ptr; char *pdbfile,**word, *forcefieldfile,*custom_rotamer_file, *line, *dummystring, *dummystring2, *keyword, *seqpos_string, *pos_string; int i,j,k,p,n,q, stopflag, rebuild_flag,linenum, *incld_exld_positions, num_positions, first, last, *tempseqVarpos1, *tempseqVarpos, competitor_ctr; extern double WATER_DIELECTRIC, VDW_RADII_SCALE, INTERNAL_DIELECTRIC, HBOND_DIELECTRIC; extern double FRACTION_HYDROPHOBIC_SASA_CUTOFF, HYDROPHOBIC_ASP, GENERAL_ASP,TRANSFER_FREE_ENERGY_DENSITY_CUTOFF, ENERGY_ERROR; long fp; int random_seed_flag=ENDFLAG; extern char *EXECUTABLE_FILENAME; int flag, flag2, neighbors_define_flag=0; time_t now; /* re-initialize the externs declared above; reset each time this function is called; this prevents information from old PROTEIN's interfereing with new PROTEIN's */ custom_rotamer_file=NULL; R = R_univ; RT = 0.595404; TEMPERATURE = 298.0; SCMF_TEMPERATURE = ENDFLAG; KAPPA = 0; IONIC_STRENGTH = 0; PH = 7.0; CORESURF_MEA_SASA = ENDFLAG; CORESURF_MEA_BORN = ENDFLAG; MAX_OPTIMIZATION_TIME = 1e10; MASTER_OPTIMIZATION_TIME = 1e10; IGNORE_DISULFIDE_FLAG=0; SASA_FLAG=1; GB_FLAG=1; PAIR_ENERGY_TABLE_FLAG=0; COULOMB_FLAG = 1; TORSION_FLAG = 1; LOGFILE_FLAG = 1; LOGFILE_PERIOD=10; LOCAL_MINIMIZATION_FLAG=ENDFLAG; MINIMIZE_METHYL_FLAG=0; LOTS_OF_ROTAMERLETS_FLAG=0; RESTRAIN_MINIMIZATION_FLAG=0; SOLUBILITY_CUTOFF_FLAG = ENDFLAG; OVERALL_CHARGE = 0; CHARGE_EQUALITY_FLAG = 3; OVERALL_CHARGE_TOLERANCE = 0.5; CHARGES_PH_INDEPENDENT_FLAG=0; CTE_FLAG = 0; NTE_FLAG = 0; LOOKUP_TABLE_SLAVE_FLAG=0; SPECIFICITY_FLAG = ENDFLAG; HBOND_SPECIFICITY_FLAG = ENDFLAG; UNSAT_HBOND_REFSTATE=ENDFLAG; RESPARAMFILE = NULL; ROTAMERFILE = NULL; SPECIFICITY_FILE = NULL; LOOKUP_TABLE_SERVER = NULL; DEBYE_SCALE_FACTOR = 0.000140; NICE_SLAVE_FLAG=1; VDW_CUTOFF = ENDFLAG; VDW_CLASH_ENERGY = ENDFLAG; WEIGHT_VDW = ENDFLAG; WEIGHT_ELECTROSTATICS = ENDFLAG; WEIGHT_1_4 = ENDFLAG; GENERAL_ASP = ENDFLAG; HYDROPHOBIC_ASP = ENDFLAG; INTRA_ROTAMER_FLAG=ENDFLAG; INTRA_ROTAMER_FLAG_14=ENDFLAG; FILTER_SEQUENCE_SPACE_FLAG=0; VDW_ATTRACTIVE_FACTOR=ENDFLAG; VDW_REPULSIVE_FACTOR=ENDFLAG; OVERALL_ENERGY_SCALE=ENDFLAG; LIGANDFILENAME = NULL; MINIMIZE_FINAL_SEQUENCE_FLAG=1; OUTPUT_COORD_FLAG=1; KEEP_CYS_FLAG=0; OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG=0; MXLINE_INPUT=MAXLINE; INPUTFILENAME=NULL; if(DEFAULT_ROTAMER_JOB == NULL) { DEFAULT_ROTAMER_JOB = (char *)calloc(20,sizeof(char)); sprintf(DEFAULT_ROTAMER_JOB,"FASTER"); } OUTPUT_ENERGY_PER_ATOM_FLAG=1; NEW_CHAIN_ID = ' '; OLD_CHAIN_ID = ' '; /* allocate memory and set some default values */ first=1; tempseqVarpos1 = NULL; tempseqVarpos = NULL; word = (char **)calloc(50,sizeof(char *)); for(i=0;i<50;++i) word[i] = (char *)calloc(10,sizeof(char)); pdbfile= (char *)calloc(MXLINE_INPUT, sizeof(char)); line= (char *)calloc(MXLINE_INPUT, sizeof(char)); dummystring = (char *)calloc(MXLINE_INPUT, sizeof(char)); dummystring2 = (char *)calloc(MXLINE_INPUT, sizeof(char)); keyword = (char *)calloc(MXLINE_INPUT, sizeof(char)); seqpos_string = (char *)calloc(MXLINE_INPUT, sizeof(char)); pos_string = (char *)calloc(MXLINE_INPUT, sizeof(char)); forcefieldfile= NULL; protein->energy_profile_table=NULL; protein->atomparam = NULL; protein->resparam = NULL; protein->rotamerlib = NULL; protein->parameters.recomb_freq = 0.5; protein->parameters.mutation_freq = 0.05; protein->parameters.ga_convergence = 300; protein->parameters.pop_size = 500; protein->parameters.number_GA_cycles = 10; protein->parameters.num_GA_solns_per_cycle = 10; protein->parameters.hqm_convergence = 0; protein->parameters.MC_convergence = 0; protein->parameters.ga_T0 = 5000; protein->parameters.ga_Tf = 100; protein->parameters.ga_dT = -10; protein->parameters.mc_T0 = 5000; protein->parameters.mc_Tf = 100; protein->parameters.mc_dT = -10; protein->parameters.disk_lookup_table_flag = 1; protein->parameters.neighbordist = 1000; protein->fixed_atoms = NULL; protein->mini_fixed_atoms = NULL; protein->parameters.algorithm = (char *)calloc(MAXLINE,sizeof(char)); strcpy(protein->parameters.algorithm, "POINT_ENERGY"); protein->parameters.max_iterations = 0; protein->parameters.max_function_calls = 0; protein->parameters.rebuild_backbone_flag = 0; protein->align_def = NULL; protein->first_align_def = NULL; protein->chain_gap_flag = 0; protein->parameters.scmf_lambda = 0.25; strcpy(protein->parameters.scmf_seed, "unbiased"); protein->parameters.max_scmf_iterations = 5000; protein->parameters.number_scmf_cycles = 1; protein->parameters.scmf_quench_flag = 0; protein->parameters.number_scmf_solns = 10; protein->parameters.dee_max_nodes = 100000; protein->parameters.dee_E_bounding = 0.0; protein->parameters.dee_E_bounding_ceiling=TOL; protein->E_working = 0; protein->E_scmf = 0; protein->parameters.number_MC_cycles = 2500; protein->parameters.number_MC_solns = 0; protein->calc_complex_residue_energy_flag = 0; protein->chain_id_list = NULL; protein->min_fixed_res = NULL; protein->min_float_res = NULL; protein->seqpos_text_map = NULL; protein->torsion_include_res = NULL; protein->torsion_exclude_res = NULL; protein->wacky_numbering_list = NULL; protein->final_energy = NULL; protein->invar_pos=NULL; protein->super_chain_id_list=NULL; protein->template_sequence = NULL; protein->final_sequence = NULL; protein->bkbn = NULL; protein->chain_anchor_bkbn = NULL; protein->parameters.output_prefix = NULL; protein->parameters.output_prefix_sans_path = NULL; ENERGY_0(protein->fixedatoms_energy); protein->lookupEnergy = NULL; protein->var_pos = NULL; protein->sizeof_chr_array = 0; protein->chr = NULL; protein->final_pdb = NULL; protein->final_minipdb = NULL; protein->final_chr.genes = NULL; protein->final_chr.firstgene = NULL; protein->final_chr.bkbngenes = NULL; protein->final_chr.first_bkbngene = NULL; protein->parameters.avoid_wt_rotamer_flag=0; protein->parameters.slave_file_list_filename=NULL; protein->parameters.ln_exhaustive_search_max_combo=ENDFLAG; protein->parameters.sequence_algorithm=NULL; protein->parameters.log10_seq_combinations=0; protein->parameters.log10_rotamer_combinations=0; protein->parameters.dock_local_flag = 0; protein->transform_matrix = NULL; protein->translate_rotate_array = NULL; protein->parameters.max_bkbn_delta_dihedral = 5; protein->inputfilename = inputfilename; INPUTFILENAME = inputfilename; GET_PID = getpid(); protein->parameters.lookup_energy_table_directory = (char *)calloc(MAXLINE,sizeof(char)); TEMP_LOOKUP_DIRECTORY = (char *)calloc(MAXLINE,sizeof(char)); LOOKUP_TABLE_DIRECTORY = (char *)calloc(MAXLINE,sizeof(char)); sprintf(TEMP_LOOKUP_DIRECTORY,"temp_lookup_directory.%d",GET_PID); strcpy(protein->parameters.lookup_energy_table_directory, TEMP_LOOKUP_DIRECTORY); if(CURRENT_WORKING_DIRECTORY==NULL) CURRENT_WORKING_DIRECTORY = (char *)calloc(MAXLINE,sizeof(char)); sprintf(CURRENT_WORKING_DIRECTORY,"%s",getenv("PWD")); rebuild_flag=ENDFLAG; /* check for competitor structure definitions */ protein->competitor_Template = NULL; protein->num_competitors = 0; input = fopen_file(inputfilename, "r"); if(input == NULL) exit(1); fgets(line,MXLINE_INPUT,input); while(strncmp(line,"START",5)!=0) fgets(line,MXLINE_INPUT,input); while(fgets(line,MXLINE_INPUT,input)!=NULL) { sscanf(line,"%s", keyword); convert_string_to_all_caps(keyword); if( strstr(keyword,"COMPET")!=0 || strncmp(keyword,"FREE",4)==0 || ( strstr(keyword, "TARGET")!=0 && (strstr(keyword, "ALIGN")!=0 || strstr(keyword, "SUPER")!=0 || strstr(keyword, "OVERLAY")!=0) && strstr(keyword, "OPT")==0 ) ) ++protein->num_competitors; if(strstr(keyword, "ALIGN")!=0) { FLIP_ASN_GLN_HIS_FLAG = 0; LOGFILE_FLAG = 0; sprintf(protein->parameters.algorithm,"ALIGN_STRUCTURES"); } } fclose(input); if(protein->num_competitors>0) { protein->parameters.sequence_algorithm = (char *)calloc(50,sizeof(char)); strcpy(protein->parameters.sequence_algorithm,"GA"); protein->competitor_Template = (char **)calloc(protein->num_competitors+2,sizeof(char *)); for(i=1;i<=protein->num_competitors;++i) protein->competitor_Template[i] = (char *)calloc(MXLINE_INPUT,sizeof(char)); MULTISTATE_INDEX_STABILITY = 0; MULTISTATE_INDEX_SPECIFICITY = 0; MULTISTATE_INDEX_SOLUBILITY = 0; } // count the number of ligands MAX_LIGAND = 0; input = fopen_file(inputfilename, "r"); if(input == NULL) exit(1); fgets(line,MXLINE_INPUT,input); while(strncmp(line,"START",5)!=0) fgets(line,MXLINE_INPUT,input); while(fgets(line,MXLINE_INPUT,input)!=NULL) { sscanf(line,"%s", keyword); convert_string_to_all_caps(keyword); if(strstr(keyword,"LIGAND")!=0) ++MAX_LIGAND; } MAX_LIGAND=MAX_LIGAND+2; fclose(input); input = fopen_file(inputfilename, "r"); linenum = 1; fgets(line,MXLINE_INPUT,input); while(strncmp(line,"START",5)!=0) { fgets(line,MXLINE_INPUT,input); ++linenum; } protein->num_ligands=0; competitor_ctr=0; /* parse each line in the inputfile until we get to residue-specific info */ fgets(line,MXLINE_INPUT,input); while(strncmp(line,"VAR",3)!=0 && strncmp(line,"var",3)!=0 && strncmp(line,"END",3)!=0 && strncmp(line,"SEQUENCE",8)!=0 && strncmp(line,"sequence",8)!=0 && strncmp(line, "INCLUDE",7)!=0 && strncmp(line, "include",7)!=0 && strncmp(line, "EXCLUDE",7)!=0 && strncmp(line, "exclude",7)!=0 && strncmp(line, "FIX",3)!=0 && strncmp(line, "fix",3)!=0 && strncmp(line, "FLOAT",5)!=0 && strncmp(line, "float",5)!=0 && strncmp(line, "SCAN",4)!=0 && strncmp(line, "scan",4)!=0) { ++linenum; /* this line in the input file is a comment; ignore */ if(line[0] == '\0' || line[0] == '!' || line[0] == '#' || line[0] == 0 || line[1] == '\0') { fgets(line,MXLINE_INPUT,input); } else { sscanf(line,"%s", keyword); convert_string_to_all_caps(keyword); if(strstr(keyword, "TEMPLATE")!=0 || ( strstr(keyword, "TARGET")!=0 && strstr(keyword, "ALIGN")==0 && strstr(keyword, "SUPERI")==0 && strstr(keyword, "OVERLAY")==0 && strstr(keyword, "OPT")==0 ) ) { sscanf(line, "%s %s",dummystring, pdbfile); protein->Template_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(protein->Template_filename,"%s",pdbfile); /* get the prefix of the Template pdb file; the default output prefix for blah.pdb is blah.egad */ if(protein->parameters.output_prefix == NULL) { protein->parameters.output_prefix = (char *)calloc(MXLINE_INPUT, sizeof(char)); if(strstr(pdbfile,".pdb") != 0) { i=0; stopflag=0; while(stopflag==0 && pdbfile[i] != '\0') { if(pdbfile[i+3] != '\0') { if(pdbfile[i] == '.' && pdbfile[i+1] == 'p' && pdbfile[i+2] == 'd' && pdbfile[i+3] == 'b') stopflag=1; else ++i; } } j = i-1; while(pdbfile[i] != '/' && i!=0) --i; if(pdbfile[i] == '/') ++i; k=0; while(i<=j) { dummystring[k] = pdbfile[i]; ++i; ++k; } dummystring[k] = '\0'; } else strcpy(dummystring,pdbfile); sprintf(protein->parameters.output_prefix, "%s.egad", dummystring); protein->parameters.output_prefix_sans_path = protein->parameters.output_prefix; } fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "FORCEFIELD")!=0) { forcefieldfile= (char *)calloc(MXLINE_INPUT, sizeof(char)); sscanf(line, "%s %s",dummystring, forcefieldfile); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "ROTAMER")!=0 && strstr(keyword, "LIB")!=0) { ROTAMERFILE = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s",dummystring, ROTAMERFILE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SPECIFICITY_FILE")==0) { SPECIFICITY_FILE = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s",dummystring, SPECIFICITY_FILE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "TEMPERATURE")==0) { sscanf(line, "%s %lf", dummystring, &TEMPERATURE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF_TEMPERATURE")==0) { sscanf(line, "%s %lf", dummystring, &SCMF_TEMPERATURE); fgets(line,MXLINE_INPUT,input); } else if( strstr(keyword,"COMPET")!=0 || strncmp(keyword,"FREE",4)==0 || ( strstr(keyword, "TARGET")!=0 && (strstr(keyword, "ALIGN")!=0 || strstr(keyword, "SUPER")!=0 || strstr(keyword, "OVERLAY")!=0) && strstr(keyword, "OPT")==0 ) ) { ++competitor_ctr; sscanf(line, "%s %s",dummystring, protein->competitor_Template[competitor_ctr]); if(strncmp(keyword,"FREE",4)==0) { if(MULTISTATE_INDEX_STABILITY == 0) MULTISTATE_INDEX_STABILITY = competitor_ctr; if(MULTISTATE_INDEX_SOLUBILITY == 0) MULTISTATE_INDEX_SOLUBILITY = competitor_ctr; } if(word_count(line)>2) { sscanf(line, "%s %s %s",dummystring, protein->competitor_Template[competitor_ctr], dummystring); convert_string_to_all_caps(dummystring); if(strstr(dummystring,"RSS")!=0 || strstr(dummystring,"UNF")!=0) MULTISTATE_INDEX_STABILITY = competitor_ctr; else if(strstr(dummystring,"SPECIF")!=0) MULTISTATE_INDEX_SPECIFICITY = competitor_ctr; else if(strstr(dummystring,"SOLUB")!=0) MULTISTATE_INDEX_SOLUBILITY = competitor_ctr; } if(does_this_file_exist(protein->competitor_Template[competitor_ctr])==0) { fprintf(stderr,"ERROR COMPETITOR structure file %s does not exist\n", protein->competitor_Template[competitor_ctr]); exit(1); } fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "LIGAND")!=0) /* ligand param files will be read by read_forcefield later */ { if(LIGANDFILENAME==NULL) { LIGANDFILENAME = (char **)calloc(MAX_LIGAND,sizeof(char *)); for(i=0;inum_ligands; LIGANDFILENAME[protein->num_ligands] = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s", dummystring, LIGANDFILENAME[protein->num_ligands]); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "OVERALL_CHARGE_TOLERANCE")==0) { sscanf(line, "%s %lf", dummystring, &OVERALL_CHARGE_TOLERANCE); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "CHAIN_ID")!=0) { if(word_count(line)==2) sscanf(line, "%s %c", dummystring, &NEW_CHAIN_ID); else sscanf(line, "%s %c %c", dummystring, &OLD_CHAIN_ID, &NEW_CHAIN_ID); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "DEBYE_SCALE_FACTOR")==0) { sscanf(line, "%s %lf", dummystring, &DEBYE_SCALE_FACTOR); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "HYDROPHOBIC_ASP")==0) { sscanf(line, "%s %lf", dummystring, &HYDROPHOBIC_ASP); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "ENERGY")!=0 && (strstr(keyword, "ERROR")!=0 || strstr(keyword, "UNCERT")!=0 ) ) { sscanf(line, "%s %lf", dummystring, &ENERGY_ERROR); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GENERAL_ASP")==0) { sscanf(line, "%s %lf", dummystring, &GENERAL_ASP); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "WEIGHT_VDW")==0) { sscanf(line, "%s %lf", dummystring, &WEIGHT_VDW); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "TRANSFORM")!=0 && strstr(keyword, "MATRIX")!=0) { protein->transform_matrix = (double *)calloc(14,sizeof(double)); sscanf(line, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf", dummystring, &protein->transform_matrix[1], &protein->transform_matrix[2], &protein->transform_matrix[3], &protein->transform_matrix[4], &protein->transform_matrix[5], &protein->transform_matrix[6], &protein->transform_matrix[7], &protein->transform_matrix[8], &protein->transform_matrix[9], &protein->transform_matrix[10], &protein->transform_matrix[11], &protein->transform_matrix[12] ); fgets(line,MXLINE_INPUT,input); } else if(strncmp(keyword, "ROTATE",6)==0) { protein->translate_rotate_array = (double *)calloc(9,sizeof(double)); protein->transform_matrix = (double *)calloc(14,sizeof(double)); sscanf(line, "%s %lf %lf %lf %lf %lf %lf %lf", dummystring, &protein->translate_rotate_array[1], &protein->translate_rotate_array[2], &protein->translate_rotate_array[3], &protein->translate_rotate_array[4],&protein->translate_rotate_array[5], &protein->translate_rotate_array[6], &protein->translate_rotate_array[7]); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword,"SUPER_CHAIN")==0) { if(protein->super_chain_id_list==NULL) { protein->super_chain_id_list = (SUPER_CHAIN_ID_LIST *)calloc(MAX_CHAINS,sizeof(SUPER_CHAIN_ID_LIST)); for(i=1;isuper_chain_id_list[i].chain_id=NULL; } j = extract_words(line, word); /* number of chains listed is num_words - 1 */ i=1; while(protein->super_chain_id_list[i].chain_id!=NULL) ++i; protein->super_chain_id_list[i].chain_id = (char *)calloc(MAX_CHAINS,sizeof(char)); for(n=2;n<=j;++n) protein->super_chain_id_list[i].chain_id[n-1] = word[n][0]; protein->super_chain_id_list[i].chain_id[j] = '\0'; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "WEIGHT_ELECTROSTATICS")==0) { sscanf(line, "%s %lf", dummystring, &WEIGHT_ELECTROSTATICS); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "REBUILD_FLAG")==0) { sscanf(line, "%s %d", dummystring, &rebuild_flag); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "WEIGHT_1_4")==0 || strcmp(keyword, "WEIGHT_TORSION")==0) { sscanf(line, "%s %lf", dummystring, &WEIGHT_1_4); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "PID")==0 || strstr(keyword, "PROCESS_ID")!=0) { sscanf(line, "%s %d", dummystring, &GET_PID); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "PRECALC")!=0 && strstr(keyword, "LEVEL")!=0) { sscanf(line, "%s %d", dummystring, &protein->parameters.disk_lookup_table_flag); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "OVERALL_ENERGY_SCALE")==0) { sscanf(line, "%s %lf", dummystring, &OVERALL_ENERGY_SCALE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CTE_FLAG")==0) { sscanf_flag(line,dummystring,CTE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "NTE_FLAG")==0) { sscanf_flag(line,dummystring,NTE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "NUM")!=0 && strstr(keyword, "CPU")!=0) { sscanf(line, "%s %d", dummystring, &NUM_SYSTEM_CPU); if(NUM_SYSTEM_CPU > 1) { if(AVAILABLE_PROCESSORS_FILE == NULL) AVAILABLE_PROCESSORS_FILE = (char *)calloc(MAXLINE,sizeof(char)); sprintf(AVAILABLE_PROCESSORS_FILE,"temp.avail_processors.%d",GET_PID); FILE *num_system_cpu_file_ptr; num_system_cpu_file_ptr = fopen_file(AVAILABLE_PROCESSORS_FILE,"w"); for(i=1;i<=NUM_SYSTEM_CPU;++i) fprintf(num_system_cpu_file_ptr,"cpu_%d\n",i); fclose(num_system_cpu_file_ptr); } fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "OUTPUT")!=0 && (strstr(keyword, "COORD")!=0 || strstr(keyword, "STRUCT")!=0)) { sscanf_flag(line,dummystring,OUTPUT_COORD_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MULITSTATE")!=0 && strstr(keyword, "INIT")!=0) { sscanf(line, "%s %s", dummystring, keyword); convert_string_to_all_caps(keyword); if(strstr(keyword,"SCMF")!=0) // use scmf probabs with the target optimal seq FILTER_SEQUENCE_SPACE_FLAG = 2; if(strstr(keyword,"TEMPL")!=0) // use the template seq FILTER_SEQUENCE_SPACE_FLAG = -1; if(strstr(keyword,"OPT")!=0) // use the target optimal seq FILTER_SEQUENCE_SPACE_FLAG = 1; if(strstr(keyword,"RAND")!=0) // use a random seq FILTER_SEQUENCE_SPACE_FLAG = 3; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MULITSTATE")!=0 && strstr(keyword, "FUNC")!=0) { if(MULTISTATE_OBJ_FUNCT_INPUTFILENAME==NULL) MULTISTATE_OBJ_FUNCT_INPUTFILENAME = (char *)calloc(MAXLINE,sizeof(char)); sscanf(line, "%s %s", dummystring, MULTISTATE_OBJ_FUNCT_INPUTFILENAME); if(does_this_file_exist(MULTISTATE_OBJ_FUNCT_INPUTFILENAME)==0) { sprintf(line,"ERROR %s does not exist",MULTISTATE_OBJ_FUNCT_INPUTFILENAME); failure_report(line,"exit"); } } else if(strstr(keyword, "SEQ")!=0 && (strstr(keyword, "FILTER")!=0 || strstr(keyword, "REST")!=0) ) { if(SEQ_FILTER_INPUTFILENAME==NULL) SEQ_FILTER_INPUTFILENAME = (char *)calloc(MAXLINE,sizeof(char)); sscanf(line, "%s %s", dummystring, SEQ_FILTER_INPUTFILENAME); if(does_this_file_exist(SEQ_FILTER_INPUTFILENAME)==0) { sprintf(line,"ERROR %s does not exist",SEQ_FILTER_INPUTFILENAME); failure_report(line,"exit"); } } else if(strstr(keyword, "TARGET")!=0 && strstr(keyword, "OPT")!=0) { sscanf_flag(line,dummystring,OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "FLIP")!=0) { sscanf_flag(line,dummystring,FLIP_ASN_GLN_HIS_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "FINAL")!=0 && strstr(keyword, "SEQ")!=0 && (strstr(keyword, "OPT")!=0 || strstr(keyword, "QUENCH")!=0 || strstr(keyword, "MIN")!=0)) { sscanf_flag(line,dummystring,MINIMIZE_FINAL_SEQUENCE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CLEAN_UP_FLAG")==0 || (strstr(keyword, "DELETE")!=0 && strstr(keyword, "FLAG")!=0)) { sscanf_flag(line,dummystring,CLEAN_UP_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "PH")==0) { sscanf(line, "%s %lf", dummystring, &PH); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CORESURF_MEA_SASA")==0) { sscanf(line, "%s %lf", dummystring, &CORESURF_MEA_SASA); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CORESURF_MEA_BORN")==0) { sscanf(line, "%s %lf", dummystring, &CORESURF_MEA_BORN); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword,"MULTISTATE")!=0 && strstr(keyword,"JOBTYPE")!=0) { if(protein->parameters.sequence_algorithm!=NULL) { sscanf(line, "%s %s", dummystring, protein->parameters.sequence_algorithm); convert_string_to_all_caps(protein->parameters.sequence_algorithm); if(strstr(protein->parameters.sequence_algorithm,"GENE")!=0 || strstr(protein->parameters.sequence_algorithm,"GA")!=0) sprintf(protein->parameters.sequence_algorithm,"GA"); else if(strstr(protein->parameters.sequence_algorithm,"QUENCH")!=0 || strstr(protein->parameters.sequence_algorithm,"MIN")!=0 || strstr(protein->parameters.sequence_algorithm,"HQM")!=0) sprintf(protein->parameters.sequence_algorithm,"HQM"); else if(strstr(protein->parameters.sequence_algorithm,"SCAN")!=0) sprintf(protein->parameters.sequence_algorithm,"SCAN"); if(word_count(line)==3) { sscanf(line, "%s %s %s", dummystring, dummystring, keyword); convert_string_to_all_caps(keyword); /* use the sequence of the template/target structure as a starting point for multistate optimization */ if(strstr(keyword,"TEMPL")!=0) FILTER_SEQUENCE_SPACE_FLAG = -1; if(strstr(keyword,"SCMF")!=0) // use scmf probabs FILTER_SEQUENCE_SPACE_FLAG = 2; if(strstr(keyword,"OPT")!=0) // use the optimal seq for the target state FILTER_SEQUENCE_SPACE_FLAG = 1; if(strstr(keyword,"RAND")!=0) // use a random seq for the target state FILTER_SEQUENCE_SPACE_FLAG = 3; } } fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "IONIC_STRENGTH")==0) { sscanf(line, "%s %lf", dummystring, &IONIC_STRENGTH); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC_TF")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.mc_Tf); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC_T0")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.mc_T0); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC_DT")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.mc_dT); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GA_TF")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.ga_Tf); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GA_T0")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.ga_T0); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GA_DT")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.ga_dT); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_RADII_SCALE")==0) { sscanf(line, "%s %lf", dummystring, &VDW_RADII_SCALE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "INTERNAL_DIELECTRIC")==0 || strcmp(keyword, "EP")==0) { sscanf(line, "%s %lf", dummystring, &INTERNAL_DIELECTRIC); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "HBOND_DIELECTRIC")==0 || strcmp(keyword, "EHB")==0) { sscanf(line, "%s %lf", dummystring, &HBOND_DIELECTRIC); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_ATTRACTIVE_FACTOR")==0) { sscanf(line, "%s %lf", dummystring, &VDW_ATTRACTIVE_FACTOR); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_REPULSIVE_FACTOR")==0) { sscanf(line, "%s %lf", dummystring, &VDW_REPULSIVE_FACTOR); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_LINEAR_START_POINT")==0) { sscanf(line, "%s %lf", dummystring, &VDW_LINEAR_START_POINT); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "NICE")!=0) { sscanf_flag(line,dummystring,NICE_SLAVE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")==0) { sscanf(line, "%s %lf", dummystring, &MAX_OPTIMIZATION_TIME); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "TIME")!=0 && strstr(keyword, "RUN")!=0 && strstr(keyword, "SLAVE")!=0) { // will parse SLAVE_RUNTIME in the appropriate functions as needed fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "TIME")!=0 && strstr(keyword, "MASTER")!=0) { sscanf(line, "%s %lf", dummystring, &MASTER_OPTIMIZATION_TIME); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MAX_NODES")!=0) { sscanf(line, "%s %d", dummystring, &protein->parameters.dee_max_nodes); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "E_BOUNDING")==0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.dee_E_bounding); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "E_BOUNDING_CEILING")!=0) { sscanf(line, "%s %lf", dummystring, &protein->parameters.dee_E_bounding_ceiling); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "BOUNDING_FLAG")!=0) { sscanf_flag(line,dummystring,n); if(n==0) protein->parameters.dee_E_bounding=ENDFLAG; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "COMB")!=0 && strstr(keyword, "EXHA")!=0 && strstr(keyword, "MAX")!=0 ) { sscanf(line, "%s %lf", dummystring, &protein->parameters.ln_exhaustive_search_max_combo); protein->parameters.ln_exhaustive_search_max_combo = log(protein->parameters.ln_exhaustive_search_max_combo); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "FILE")!=0 && (strstr(keyword, "PROCESSOR")!=0 || strstr(keyword, "CPU")!=0) ) { if(AVAILABLE_PROCESSORS_FILE==NULL) AVAILABLE_PROCESSORS_FILE=(char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s", dummystring, AVAILABLE_PROCESSORS_FILE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "BATCH_QUEUE_PREFIX")==0) { if(BATCH_QUEUE_PREFIX==NULL) BATCH_QUEUE_PREFIX=(char *)calloc(MAXLINE,sizeof(char)); sscanf(line, "%s %s", dummystring, BATCH_QUEUE_PREFIX); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "ROTAMER")!=0 && strstr(keyword, "LIB")==0) { if(custom_rotamer_file == NULL) custom_rotamer_file = (char *)calloc(MAXLINE,sizeof(char)); if(strstr(keyword, "FILE")!=0) sscanf(line,"%s %s",dummystring,custom_rotamer_file); else { sprintf(custom_rotamer_file,"temp.rotamer.%d",GET_PID); file_ptr = fopen_file(custom_rotamer_file,"a"); fprintf(file_ptr,"%s",line); fclose(file_ptr); } fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "RESPARAM")!=0) { RESPARAMFILE = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s", dummystring, RESPARAMFILE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_CUTOFF")==0) { sscanf(line, "%s %lf", dummystring, &VDW_CUTOFF); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "VDW_CLASH")!=0) { sscanf(line, "%s %lf", dummystring, &VDW_CLASH_ENERGY); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MAX")!=0 && strstr(keyword, "MUT")!=0) { sscanf(line, "%s %d", dummystring, &MAX_MUTATIONS); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "UNSAT")!=0 && strstr(keyword, "HBOND")!=0 && (strstr(keyword, "REF")!=0 || strstr(keyword, "SPECIF")!=0) ) { sscanf(line, "%s %lf", dummystring, &UNSAT_HBOND_REFSTATE); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "EXECUTABLE_FILENAME")==0) { if(EXECUTABLE_FILENAME==NULL) EXECUTABLE_FILENAME = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s", dummystring, EXECUTABLE_FILENAME); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SLAVE")!=0 && strstr(keyword, "FILE")!=0) { protein->parameters.slave_file_list_filename = (char *)calloc(MXLINE_INPUT,sizeof(char)); sscanf(line, "%s %s", dummystring, protein->parameters.slave_file_list_filename); protein->parameters.disk_lookup_table_flag=2; /* build up lookup table as needed */ QUIET_FLAG=1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "EW")==0) { sscanf(line, "%s %lf", dummystring, &WATER_DIELECTRIC); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GBSA_FLAG")==0 || strcmp(keyword, "SOLVATION_FLAG")==0) { sscanf_flag(line,dummystring,GB_FLAG); SASA_FLAG = GB_FLAG; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GB_FLAG")==0) { sscanf_flag(line,dummystring,GB_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "HBOND_FUNCTION_FLAG")==0) { sscanf_flag(line,dummystring,HBOND_FUNCTION_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "HBOND_FUNCTION_TYPE")==0) { sscanf(line,"%s %s", dummystring,dummystring); convert_string_to_all_caps(dummystring); if(strstr(dummystring,"10")!=0 && strstr(dummystring,"12")!=0) HBOND_FUNCTION_TYPE = 2; HBOND_FUNCTION_FLAG=1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "VDW_FLAG")==0) { sscanf_flag(line,dummystring,p); fgets(line,MXLINE_INPUT,input); if(p==0) WEIGHT_VDW = 0; } else if(strcmp(keyword, "IGNORE_DISULFIDE_FLAG")==0) { sscanf_flag(line,dummystring,IGNORE_DISULFIDE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "KEEP_CYS_FLAG")==0) { sscanf_flag(line,dummystring,KEEP_CYS_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "AVOID_NATIVE_ROTAMER_FLAG")==0) { sscanf_flag(line,dummystring,protein->parameters.avoid_wt_rotamer_flag); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "INTRA_ROTAMER_FLAG")==0) { sscanf_flag(line,dummystring,INTRA_ROTAMER_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "ENERGY")!=0 && strstr(keyword, "PER")!=0 && strstr(keyword, "ATOM")!=0) { sscanf_flag(line,dummystring,OUTPUT_ENERGY_PER_ATOM_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "QUIET")!=0) { sscanf_flag(line,dummystring,QUIET_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF_QUENCH_FLAG")==0) { sscanf_flag(line,dummystring,protein->parameters.scmf_quench_flag); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "RANDOM_SEED")==0) { sscanf(line, "%s %d",dummystring, &random_seed_flag); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "FRACTION_HYDROPHOBIC_SASA_CUTOFF")==0) { sscanf(line, "%s %lf",dummystring, &FRACTION_HYDROPHOBIC_SASA_CUTOFF); SOLUBILITY_CUTOFF_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MAX")!=0 && strstr(keyword, "DIHED")!=0 && (strstr(keyword, "BKBN")!=0 || strstr(keyword, "BACKBONE")!=0 )) { sscanf(line, "%s %lf",dummystring, &protein->parameters.max_bkbn_delta_dihedral); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "TRANSFER_FREE_ENERGY_DENSITY_CUTOFF")==0) { sscanf(line, "%s %lf",dummystring, &TRANSFER_FREE_ENERGY_DENSITY_CUTOFF); SOLUBILITY_CUTOFF_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "OVERALL_CHARGE")==0) { sscanf(line, "%s %s",dummystring, keyword); /* overall charge must be equal to this */ if(keyword[0] == '+' || keyword[0] == '-' || keyword[0] == '|') { if(keyword[0] == '+' || keyword[0] == '-') { i=1; while(keyword[i]!='\0') { dummystring[i-1] = keyword[i]; ++i; } dummystring[i-1] = '\0'; sscanf(dummystring,"%lf",&OVERALL_CHARGE); if(keyword[0] == '-') OVERALL_CHARGE = -OVERALL_CHARGE; CHARGE_EQUALITY_FLAG = 1; } else if(keyword[0] == '|') /* absolute value */ { i=1; while(keyword[i]!='|') { dummystring[i-1] = keyword[i]; ++i; } dummystring[i-1] = '\0'; CHARGE_EQUALITY_FLAG = 0; sscanf(dummystring,"%lf",&OVERALL_CHARGE); } } else if(keyword[0] == '>' || keyword[0] == '<') /* overall charge is > or < the value */ { if(keyword[1] == '+' || keyword[1] == '-') { i=2; while(keyword[i]!='\0') { dummystring[i-2] = keyword[i]; ++i; } dummystring[i-2] = '\0'; sscanf(dummystring,"%lf",&OVERALL_CHARGE); if(keyword[1] == '-') OVERALL_CHARGE = -OVERALL_CHARGE; if(keyword[0] == '>') CHARGE_EQUALITY_FLAG = 2; else CHARGE_EQUALITY_FLAG = -2; } else if(keyword[1] == '|') /* absolute value */ { i=2; while(keyword[i]!='|') { dummystring[i-2] = keyword[i]; ++i; } dummystring[i-2] = '\0'; sscanf(dummystring,"%lf",&OVERALL_CHARGE); if(keyword[0] == '>') CHARGE_EQUALITY_FLAG = 3; else CHARGE_EQUALITY_FLAG = -3; } else { fprintf(stderr,"ERROR overall_charge must have a sign or absolute value; ie:\n OVERALL_CHARGE >+5 or |5| or <|5| or >-5\n"); exit(1); } } else { fprintf(stderr,"ERROR overall_charge must have a sign or absolute value; ie:\n OVERALL_CHARGE >+5 or |5| or <|5| or >-5\n"); exit(1); } SOLUBILITY_CUTOFF_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SCMF_SOL")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.number_scmf_solns); protein->parameters.scmf_quench_flag=1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF_SEED")==0) { sscanf(line, "%s %s", keyword, protein->parameters.scmf_seed); /* unbiased, random, weighted */ fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF_LAMBDA")==0) { sscanf(line, "%s %lf",dummystring, &protein->parameters.scmf_lambda); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SCMF_CYCLES")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.number_scmf_cycles); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MC_CYCLES")!=0 || strstr(keyword, "SA_CYCLES")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.number_MC_cycles); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "MC_SOL")!=0 || strstr(keyword, "SA_SOL")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.number_MC_solns); if(protein->parameters.number_MC_solns%2 != 0) protein->parameters.number_MC_solns = protein->parameters.number_MC_solns + 1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "POPULATION")!=0 || strstr(keyword, "POPSIZE")!=0 || strstr(keyword, "POP_SIZE")!=0) { sscanf(line, "%s %d", dummystring, &protein->parameters.pop_size); if(protein->parameters.pop_size%2!=0) /* popsize must be even; if it's odd, make it even */ protein->parameters.pop_size = protein->parameters.pop_size+1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "GA_CYCLES")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.number_GA_cycles); if(protein->parameters.number_GA_cycles%2!=0) protein->parameters.number_GA_cycles=protein->parameters.number_GA_cycles+1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SOL")!=0 && strstr(keyword, "GA")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.num_GA_solns_per_cycle); if(protein->parameters.pop_size < protein->parameters.num_GA_solns_per_cycle) protein->parameters.pop_size = protein->parameters.num_GA_solns_per_cycle; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "CONVERGENCE")!=0 && strstr(keyword, "GA")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.ga_convergence); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "CONVERGENCE")!=0 && strstr(keyword, "MC")!=0) { sscanf(line, "%s %d",dummystring, &protein->parameters.MC_convergence); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SASA_FLAG")==0) { sscanf_flag(line,dummystring,SASA_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "COULOMB_FLAG")==0) { sscanf_flag(line,dummystring,COULOMB_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "TORSION_FLAG")==0) { sscanf_flag(line,dummystring,TORSION_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "LOGFILE_FLAG")==0) { sscanf_flag(line,dummystring,LOGFILE_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "LOGFILE_PERIOD")!=0) { sscanf(line,"%s %d",dummystring,&LOGFILE_PERIOD); LOGFILE_FLAG=1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "LIST")!=0 && strstr(keyword, "POS")!=0) { strcpy(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "LOCAL_MINIMIZATION_FLAG")==0) { sscanf_flag(line,dummystring,LOCAL_MINIMIZATION_FLAG); if(LOCAL_MINIMIZATION_FLAG == 1) // for local min, don't use the modified LJ { VDW_LINEAR_START_POINT = 1000; VDW_ATTRACTIVE_FACTOR = 1.0; VDW_REPULSIVE_FACTOR = 1.0; MINIMIZE_METHYL_FLAG = 1; } if(LOCAL_MINIMIZATION_FLAG == 0) MINIMIZE_METHYL_FLAG = 0; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MINIMIZE_ROTAMERS_FLAG")==0) { sscanf_flag(line,dummystring,LOCAL_MINIMIZATION_FLAG); if(LOCAL_MINIMIZATION_FLAG==1) LOCAL_MINIMIZATION_FLAG=2; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MINIMIZE_METHYL_FLAG")==0) { sscanf_flag(line,dummystring,MINIMIZE_METHYL_FLAG); if(MINIMIZE_METHYL_FLAG == 1) LOCAL_MINIMIZATION_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "LOTS_OF_ROTAMERLETS_FLAG")==0) { sscanf_flag(line,dummystring,LOTS_OF_ROTAMERLETS_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CHARGES_PH_INDEPENDENT_FLAG")==0) { sscanf_flag(line,dummystring,CHARGES_PH_INDEPENDENT_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "CHARGES_PH_DEPENDENT_FLAG")==0) { sscanf_flag(line,dummystring,CHARGES_PH_INDEPENDENT_FLAG); if(CHARGES_PH_INDEPENDENT_FLAG==1) CHARGES_PH_INDEPENDENT_FLAG=0; else CHARGES_PH_INDEPENDENT_FLAG=1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SOLUB")!=0) { sscanf_flag(line,dummystring,SOLUBILITY_CUTOFF_FLAG); if(SOLUBILITY_CUTOFF_FLAG == 1) SASA_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strncmp(keyword, "SPECIF",6)==0) { sscanf_flag(line,dummystring,SPECIFICITY_FLAG); if(SPECIFICITY_FLAG == 0) HBOND_SPECIFICITY_FLAG = 0; if(SPECIFICITY_FLAG == 1) HBOND_SPECIFICITY_FLAG = 1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "SPECIF")!=0 && (strstr(keyword, "HBOND")!=0 || strstr(keyword, "HYDROGEN")!=0 )) { sscanf_flag(line,dummystring,HBOND_SPECIFICITY_FLAG); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "LOOKUP_TABLE_DIRECTORY")==0) { sscanf(line, "%s %s",dummystring, protein->parameters.lookup_energy_table_directory); flag = 0; i=0; while(protein->parameters.lookup_energy_table_directory[i] != 0) { if(protein->parameters.lookup_energy_table_directory[i] == ':') flag = 1; ++i; } if(flag == 1) /* the lookup table server is specified, so extract it */ { LOOKUP_TABLE_SERVER = (char *)calloc(MAXLINE,sizeof(char)); i=0; while(protein->parameters.lookup_energy_table_directory[i] != ':') { LOOKUP_TABLE_SERVER[i] = protein->parameters.lookup_energy_table_directory[i]; ++i; } LOOKUP_TABLE_SERVER[i] = 0; ++i; /* move past ':' */ j=0; while(protein->parameters.lookup_energy_table_directory[i] != 0) { dummystring[j] = protein->parameters.lookup_energy_table_directory[i]; ++i; ++j; } dummystring[j] = 0; i=j; strcpy(protein->parameters.lookup_energy_table_directory, dummystring); } if(protein->parameters.lookup_energy_table_directory[i-1] == '/') { protein->parameters.lookup_energy_table_directory[i-1] = 0; } fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "OTHER")!=0) { sscanf(line, "%s %s", keyword, dummystring); convert_string_to_all_caps(dummystring); if(strcmp(dummystring, "ALL")==0) protein->parameters.neighbordist = 1000; else if(strcmp(dummystring, "NEIGHBORS")==0) protein->parameters.neighbordist = 0; else if(strcmp(dummystring, "NONE")==0) protein->parameters.neighbordist = TINY; else if(strstr(dummystring, "NEAR")!=0) protein->parameters.neighbordist = PI_PLUS_LN10_PLUS_SQRT2_MINUS_E_1e5; neighbors_define_flag=1; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "PREFIX") != 0) { if(protein->parameters.output_prefix == NULL) protein->parameters.output_prefix = (char *)calloc(MXLINE_INPUT, sizeof(char)); sscanf(line, "%s %s",dummystring, protein->parameters.output_prefix); /* the prefix is a path; need to get output_prefix_sans_path */ if(strstr(protein->parameters.output_prefix,"/")!=0) { protein->parameters.output_prefix_sans_path = (char *)calloc(MXLINE_INPUT,sizeof(char)); i=0; while(protein->parameters.output_prefix[i]!='\0') ++i; while(protein->parameters.output_prefix[i]!='/') --i; ++i; j=0; while(protein->parameters.output_prefix[i]!='\0') { protein->parameters.output_prefix_sans_path[j] = protein->parameters.output_prefix[i]; ++i; ++j; } protein->parameters.output_prefix_sans_path[j] = protein->parameters.output_prefix[i]; } else protein->parameters.output_prefix_sans_path = protein->parameters.output_prefix; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "GA") == 0 || strcmp(keyword, "GENETIC_ALGORITHM") == 0) { strcpy(protein->parameters.algorithm, "GA"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF")==0 || strstr(keyword, "MEAN_FIELD_OPTIMIZATION")!=0) { strcpy(protein->parameters.algorithm, "SCMF"); SOLUBILITY_CUTOFF_FLAG = 0; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "FASTER")==0) { strcpy(protein->parameters.algorithm, "FASTER"); if(protein->parameters.number_MC_cycles == 2500) protein->parameters.number_MC_cycles = 10; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "DOCK")!=0 && strstr(keyword, "RIGID")!=0) { protein->parameters.mutation_freq = 0.025; if(protein->parameters.pop_size == 500) protein->parameters.pop_size = 100; protein->parameters.ga_convergence = 100; strcpy(protein->parameters.algorithm, "RIGID_DOCK"); if(strstr(keyword, "MIN")!=0 || strstr(keyword, "LOCAL")!=0) { if(strstr(keyword, "LOCAL")!=0) protein->parameters.dock_local_flag = 1; else protein->parameters.dock_local_flag = 2; } fgets(line,MXLINE_INPUT,input); } else if(strncmp(keyword, "PK",2)==0) { strcpy(protein->parameters.algorithm, "SCMF_PK"); SOLUBILITY_CUTOFF_FLAG = 0; SPECIFICITY_FLAG=0; HBOND_SPECIFICITY_FLAG=0; CHARGES_PH_INDEPENDENT_FLAG=1; PH = ENDFLAG; LOGFILE_FLAG = 0; MINIMIZE_FINAL_SEQUENCE_FLAG=0; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC")==0 || strcmp(keyword, "MONTE_CARLO")==0 || strcmp(keyword, "SA")==0 || strcmp(keyword, "SIMULATED_ANNEALLING")==0) { strcpy(protein->parameters.algorithm, "MC"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC_GA")==0 || strcmp(keyword, "SA_GA")==0 ) { strcpy(protein->parameters.algorithm, "MC_GA"); fgets(line,MXLINE_INPUT,input); protein->parameters.number_GA_cycles = 2; } else if(strcmp(keyword, "DEE")==0 || strcmp(keyword, "DEAD_END_ELIMINATION")==0 ) { strcpy(protein->parameters.algorithm, "DEE"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "LOOKUP_TABLE_SLAVE")==0) { strcpy(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE"); fgets(line,MXLINE_INPUT,input); LOOKUP_TABLE_SLAVE_FLAG=1; QUIET_FLAG=1; } else if(strcmp(keyword, "POINT_ENERGY")==0 || ( (strstr(keyword,"SASA")!=0 || strstr(keyword,"AREA")!=0) && strstr(keyword,"CALC")!=0 ) ) { strcpy(protein->parameters.algorithm, "POINT_ENERGY"); if((strstr(keyword,"SASA")!=0 || strstr(keyword,"AREA")!=0) && strstr(keyword,"CALC")!=0) strcpy(protein->parameters.algorithm, "POINT_ENERGY_SASA"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if(strstr(keyword, "ENERGY_PROFILE")!=0) { strcpy(protein->parameters.algorithm, "ENERGY_PROFILE_TABLE"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if(strstr(keyword, "COMPLEX_FORMATION_ENERGY")!=0 || strstr(keyword, "SPLIT_COMPLEX")!=0 || strstr(keyword, "SEPARATE_COMPLEX")!=0 ) { /* optimization algorithm not specificied */ if(strstr(keyword, "COMPLEX_FORMATION_ENERGY") == &keyword[0] || strstr(keyword, "SPLIT_COMPLEX") == &keyword[0] || strstr(keyword, "SEPARATE_COMPLEX") == &keyword[0]) strcpy(protein->parameters.algorithm,"COMPLEX_FORMATION_ENERGY"); else strcpy(protein->parameters.algorithm,keyword); if(strstr(keyword, "COMPLEX_FORMATION_ENERGY")!=0) protein->calc_complex_residue_energy_flag = 1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "HBUILD")==0) { strcpy(protein->parameters.algorithm, "HBUILD"); protein->parameters.max_iterations = ENDFLAG; /* just build hydrogens and return structure */ fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if((strstr(keyword, "ADJUST")!=0 || strstr(keyword, "IDEAL")!=0) && (strstr(keyword,"TORS")!=0 || strstr(keyword,"DIHED")!=0 || strstr(keyword,"GEO")!=0) && strstr(keyword,"BKBN")!=0 || strstr(keyword,"BACKBONE")!=0 ) { strcpy(protein->parameters.algorithm, "ADJUST_BACKBONE_TORSIONS"); protein->parameters.rebuild_backbone_flag = 1; protein->parameters.max_function_calls = INT_MAX-10; protein->parameters.max_iterations = INT_MAX-10; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MINIMIZE")==0) { strcpy(protein->parameters.algorithm, "MINIMIZE"); protein->parameters.rebuild_backbone_flag = 0; stopflag=0; while(stopflag==0) { ++linenum; fgets(line,MXLINE_INPUT,input); sscanf(line, "%s", keyword); convert_string_to_all_caps(keyword); if(strcmp(keyword, "MAX_ITERATIONS")==0) sscanf(line, "%s %d", dummystring, &protein->parameters.max_iterations); else if(strstr(keyword, "BACKBONE")!=0) { protein->parameters.rebuild_backbone_flag=1; } else if(strstr(keyword, "MAX_FUNCTION_CALLS")!=0) sscanf(line, "%s %d", dummystring, &protein->parameters.max_function_calls); else if(strstr(keyword, "RESTRAIN")!=0) { if(word_count(line)==2) sscanf(line,"%s %s",keyword, dummystring); else { sscanf(line,"%s",keyword); strcpy(keyword,dummystring); } convert_string_to_all_caps(keyword); convert_string_to_all_caps(dummystring); if(strstr(dummystring,"UN")!=0) RESTRAIN_MINIMIZATION_FLAG=0; if(strstr(dummystring,"DIHED")!=0) RESTRAIN_MINIMIZATION_FLAG=1; if(strstr(dummystring,"RMSD")!=0) RESTRAIN_MINIMIZATION_FLAG=2; if(RESTRAIN_MINIMIZATION_FLAG == 0) RESTRAIN_MINIMIZATION_FLAG = ENDFLAG; } else stopflag=1; } SASA_FLAG = 0; GB_FLAG = 0; } else if(strcmp(keyword, "JOBTYPE")==0 || strcmp(keyword, "SLAVE_JOBTYPE")==0) { sscanf(line, "%s %s", dummystring, keyword); convert_string_to_all_caps(keyword); if(strcmp(keyword, "GA") == 0 || strcmp(keyword, "GENETIC_ALGORITHM") == 0) { strcpy(protein->parameters.algorithm, "GA"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "SCMF")==0 || strstr(keyword, "MEAN_FIELD_OPTIMIZATION")!=0) { strcpy(protein->parameters.algorithm, "SCMF"); SOLUBILITY_CUTOFF_FLAG = 0; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "FASTER")==0) { strcpy(protein->parameters.algorithm, "FASTER"); if(protein->parameters.number_MC_cycles == 2500) protein->parameters.number_MC_cycles = 10; fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "DOCK")!=0 && strstr(keyword, "RIGID")!=0) { protein->parameters.mutation_freq = 0.025; if(protein->parameters.pop_size == 500) protein->parameters.pop_size = 100; protein->parameters.ga_convergence = 100; strcpy(protein->parameters.algorithm, "RIGID_DOCK"); if(strstr(keyword, "MIN")!=0 || strstr(keyword, "LOCAL")!=0) { if(strstr(keyword, "LOCAL")!=0) protein->parameters.dock_local_flag = 1; else protein->parameters.dock_local_flag = 2; } fgets(line,MXLINE_INPUT,input); } else if(strncmp(keyword, "PK",2)==0) { strcpy(protein->parameters.algorithm, "SCMF_PK"); SOLUBILITY_CUTOFF_FLAG = 0; SPECIFICITY_FLAG=0; HBOND_SPECIFICITY_FLAG=0; CHARGES_PH_INDEPENDENT_FLAG=1; PH = ENDFLAG; LOGFILE_FLAG = 0; MINIMIZE_FINAL_SEQUENCE_FLAG=0; fgets(line,MXLINE_INPUT,input); } else if( strstr(keyword, "ALA")!=0 && strstr(keyword, "SHAV")!=0 ) { strcpy(protein->parameters.algorithm, "ALANINE_SHAVE"); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "INTERFACE")!=0 && strstr(keyword, "ALA")!=0) { strcpy(protein->parameters.algorithm, "INTERFACE_ALANINE_SCAN"); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "INTERFACE")!=0 && strstr(keyword, "SAT")!=0) { strcpy(protein->parameters.algorithm, "INTERFACE_SATURATION_MUTAGENESIS"); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "INTERFACE")!=0 && strstr(keyword, "DESIGN")!=0) { strcpy(protein->parameters.algorithm, "INTERFACE_DESIGN"); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "ALIGN")!=0 || strstr(keyword, "SUPERIMPOSE")!=0 || strstr(keyword, "OVERLAY")!=0) { strcpy(protein->parameters.algorithm, "ALIGN_STRUCTURES"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG = 0; FLIP_ASN_GLN_HIS_FLAG = 0; } else if(strcmp(keyword, "MC")==0 || strcmp(keyword, "MONTE_CARLO")==0 || strcmp(keyword, "SA")==0 || strcmp(keyword, "SIMULATED_ANNEALLING")==0) { strcpy(protein->parameters.algorithm, "MC"); fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MC_GA")==0 || strcmp(keyword, "SA_GA")==0) { strcpy(protein->parameters.algorithm, "MC_GA"); fgets(line,MXLINE_INPUT,input); protein->parameters.number_GA_cycles = 2; } else if(strcmp(keyword, "DEE")==0 || strcmp(keyword, "DEAD_END_ELIMINATION")==0 ) { strcpy(protein->parameters.algorithm, "DEE"); fgets(line,MXLINE_INPUT,input); } else if(strstr(keyword, "LIST")!=0 && strstr(keyword, "POS")!=0) { strcpy(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if(strcmp(keyword, "LOOKUP_TABLE_SLAVE")==0) { strcpy(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE"); fgets(line,MXLINE_INPUT,input); LOOKUP_TABLE_SLAVE_FLAG=1; QUIET_FLAG=1; } else if(strcmp(keyword, "POINT_ENERGY")==0 || ( (strstr(keyword,"SASA")!=0 || strstr(keyword,"AREA")!=0) && strstr(keyword,"CALC")!=0) ) { strcpy(protein->parameters.algorithm, "POINT_ENERGY"); if((strstr(keyword,"SASA")!=0 || strstr(keyword,"AREA")!=0) && strstr(keyword,"CALC")!=0) strcpy(protein->parameters.algorithm, "POINT_ENERGY_SASA"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if(strstr(keyword, "ENERGY_PROFILE")!=0) { strcpy(protein->parameters.algorithm, "ENERGY_PROFILE_TABLE"); fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if(strstr(keyword, "COMPLEX_FORMATION_ENERGY")!=0 || strstr(keyword, "SPLIT_COMPLEX")!=0 || strstr(keyword, "SEPARATE_COMPLEX")!=0 ) { /* optimization algorithm not specificied */ if(strstr(keyword, "COMPLEX_FORMATION_ENERGY") == &keyword[0] || strstr(keyword, "SPLIT_COMPLEX") == &keyword[0] || strstr(keyword, "SEPARATE_COMPLEX") == &keyword[0]) strcpy(protein->parameters.algorithm,"COMPLEX_FORMATION_ENERGY"); else strcpy(protein->parameters.algorithm,keyword); if(strstr(keyword, "COMPLEX_FORMATION_ENERGY")!=0) protein->calc_complex_residue_energy_flag = 1; fgets(line,MXLINE_INPUT,input); } else if(strcmp(keyword, "MINIMIZE")==0) { strcpy(protein->parameters.algorithm, "MINIMIZE"); protein->parameters.rebuild_backbone_flag = 0; stopflag=0; while(stopflag==0) { ++linenum; fgets(line,MXLINE_INPUT,input); sscanf(line, "%s", keyword); convert_string_to_all_caps(keyword); if(strcmp(keyword, "MAX_ITERATIONS")==0) sscanf(line, "%s %d", dummystring, &protein->parameters.max_iterations); else if(strstr(keyword, "BACKBONE")!=0) { protein->parameters.rebuild_backbone_flag=1; } else if(strstr(keyword, "MAX_FUNCTION_CALLS")!=0) sscanf(line, "%s %d", dummystring, &protein->parameters.max_function_calls); else if(strstr(keyword, "RESTRAIN")!=0) { if(word_count(line)==2) sscanf(line,"%s %s",keyword, dummystring); else { sscanf(line,"%s",keyword); strcpy(keyword,dummystring); } convert_string_to_all_caps(keyword); convert_string_to_all_caps(dummystring); if(strstr(dummystring,"UN")!=0) RESTRAIN_MINIMIZATION_FLAG=0; if(strstr(dummystring,"DIHED")!=0) RESTRAIN_MINIMIZATION_FLAG=1; if(strstr(dummystring,"RMSD")!=0) RESTRAIN_MINIMIZATION_FLAG=2; if(RESTRAIN_MINIMIZATION_FLAG == 0) RESTRAIN_MINIMIZATION_FLAG = ENDFLAG; } else stopflag=1; } SASA_FLAG = 0; GB_FLAG = 0; } else if(strcmp(keyword, "HBUILD")==0) { strcpy(protein->parameters.algorithm, "HBUILD"); protein->parameters.max_iterations = ENDFLAG; /* just build hydrogens and return structure */ fgets(line,MXLINE_INPUT,input); LOGFILE_FLAG=0; } else if((strstr(keyword, "ADJUST")!=0 || strstr(keyword, "IDEAL")!=0) && (strstr(keyword,"TORS")!=0 || strstr(keyword,"DIHED")!=0 || strstr(keyword,"GEO")!=0) && strstr(keyword,"BKBN")!=0 || strstr(keyword,"BACKBONE")!=0 ) { strcpy(protein->parameters.algorithm, "ADJUST_BACKBONE_TORSIONS"); protein->parameters.rebuild_backbone_flag = 1; protein->parameters.max_function_calls = INT_MAX-10; protein->parameters.max_iterations = INT_MAX-10; fgets(line,MXLINE_INPUT,input); } else { sprintf(dummystring, "ERROR line %d: Cannot recognize %s", linenum, line); failure_report(dummystring,"exit"); } } else { sprintf(dummystring, "ERROR line %d: Cannot recognize %s", linenum, line); failure_report(dummystring,"exit"); } } } /* END reading file and parameter block */ if(SCMF_TEMPERATURE==ENDFLAG) SCMF_TEMPERATURE = TEMPERATURE; /* check output prefix; if already in use, modify protein->parameters.output_prefix */ sprintf(dummystring,"%s.pdb",protein->parameters.output_prefix); if(does_this_file_exist(dummystring)==1) { fprintf(stderr,"WARNING OUTPUT_PREFIX %s in use; will rename to ",protein->parameters.output_prefix); k=0; while(does_this_file_exist(dummystring)==1) { ++k; sprintf(dummystring,"%s_%d.pdb",protein->parameters.output_prefix,k); } sprintf(protein->parameters.output_prefix,"%s_%d",protein->parameters.output_prefix,k); fprintf(stderr,"%s\n",protein->parameters.output_prefix); } /* the backbone is fixed, so the minimization is already restrained */ if(protein->parameters.rebuild_backbone_flag == 0) RESTRAIN_MINIMIZATION_FLAG=ENDFLAG; /* seed random number generator w/ time if not specified */ if(random_seed_flag == ENDFLAG) random_seed_flag = time(NULL); srand(random_seed_flag); IDUM = -1*rand(); rand2(); protein->temperature = TEMPERATURE; protein->pH = PH; protein->ionic_strength = IONIC_STRENGTH; if(protein->num_competitors>0) { for(i=1;i<=protein->num_competitors;++i) if(strcmp(protein->Template_filename,protein->competitor_Template[i])==0) { fprintf(stderr,"ERROR COMPETITOR structure file %s is identical to the TARGET or TEMPLATE structure file\n", protein->competitor_Template[i]); exit(1); } } /* define MAX_ATOMS, MAX_RESIDUES etc from the size of the size of the pdb file */ /* these variables are needed by read_forcefield for initializing SASA structures */ /* won't actually read in the file until later */ readpdbfile(pdbfile, protein, 0); if(MAXLINE<=MAX_RESIDUES+50) MAXLINE = MAX_RESIDUES+50; MXLINE_INPUT = MAXLINE; protein->atomparam = (ATOMPARAM *)calloc(MAXATOMTYPES, sizeof(ATOMPARAM)); protein->resparam = (RESPARAM *)calloc(MAXRESTYPES, sizeof(RESPARAM)); protein->rotamerlib = (ROTAMERLIB *)calloc(MAXRESTYPES, sizeof(ROTAMERLIB)); read_forcefield(forcefieldfile, protein->atomparam, protein->resparam, protein->rotamerlib); MAX_ATOMS += MAX_RES_SIZE*protein->num_ligands; flag2=move_past_comments_and_empty_lines(line, input); if(strncmp(line,"END",3)==0) /* formal end of file and parameter block; advance to next block or EOF */ { if(fgets(line, MXLINE_INPUT, input)!=NULL) { flag2=move_past_comments_and_empty_lines(line, input); } } if(flag2!=0) /* not end of file */ convert_string_to_all_caps(line); else /* at the end of file; but shouldn't be! exit */ { fprintf(stderr,"ERROR Must end main block with END or must specify one of the following:\n"); fprintf(stderr,"\tVARIABLE_POSITIONS, SCANNING_POSITIONS, SEQUENCE, INCLUDE, EXCLUDE, FIXED, FLOAT\n"); exit(1); } // slaves have this turned off if(strcmp(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE")==0) MAX_MUTATIONS = 0; if(protein->parameters.slave_file_list_filename!=NULL) MAX_MUTATIONS = 0; if(protein->num_competitors == 0) { // MAX_MUTATIONS only for GA if(strncmp(protein->parameters.algorithm, "GA",2)!=0) if(strcmp(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS")!=0) { if(MAX_MUTATIONS > 0) { MAX_MUTATIONS = 0; failure_report("WARNING MAX_MUTATIONS > 0 allowed only for GA jobs; switching MAX_MUTATIONS off","warn"); } } } else { if(strncmp(protein->parameters.sequence_algorithm, "GA",2)!=0) if(strcmp(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS")!=0) if(MAX_MUTATIONS > 0) { failure_report("ERROR MAX_MUTATIONS > 0 allowed only for GA jobs","exit"); } } /* implies a rotamer optimization job of some sort */ if(strncmp(line,"VAR",3)==0 || strncmp(line,"SEQUENCE",8)==0) { /* default to FASTER if rotamer optimization jobtype is not defined explicitly */ if(strcmp(protein->parameters.algorithm, "POINT_ENERGY")==0) { strcpy(protein->parameters.algorithm, DEFAULT_ROTAMER_JOB); if(strcmp(protein->parameters.algorithm,"FASTER")==0) if(protein->parameters.number_MC_cycles == 2500) protein->parameters.number_MC_cycles = 10; } else if(strcmp(protein->parameters.algorithm, "ENERGY_PROFILE_TABLE")==0) /* spit out the energy profile table */ strcpy(protein->parameters.algorithm, "MC_GA_ENERGY_PROFILE_TABLE"); else if(strstr(protein->parameters.algorithm,"COMPLEX_FORMATION_ENERGY")!=0) { if(strcmp(protein->parameters.algorithm,"COMPLEX_FORMATION_ENERGY")==0) /* no algorithm specified, so use default */ { strcpy(protein->parameters.algorithm, "MC_GA_COMPLEX_FORMATION_ENERGY"); protein->parameters.number_GA_cycles = 2; } } } else if(strncmp(line,"SCAN",4)==0) { /* if not alanine_shave, then optimize rotamers for each position */ if(strcmp(protein->parameters.algorithm, "ALANINE_SHAVE")!=0) sprintf(protein->parameters.algorithm, "SCANNING_MUTAGENESIS"); } else if(strcmp(protein->parameters.algorithm,"COMPLEX_FORMATION_ENERGY")==0) strcpy(protein->parameters.algorithm, "STATIC_COMPLEX_FORMATION_ENERGY"); if(protein->transform_matrix!=NULL) if(strcmp(protein->parameters.algorithm, "POINT_ENERGY")==0) { strcpy(protein->parameters.algorithm,"HBUILD"); LOGFILE_FLAG=0; rebuild_flag=0; QUIET_FLAG=1; OUTPUT_ENERGY_PER_ATOM_FLAG = 0; } if(OLD_CHAIN_ID != NEW_CHAIN_ID) if(strcmp(protein->parameters.algorithm, "POINT_ENERGY")==0) { strcpy(protein->parameters.algorithm,"HBUILD"); LOGFILE_FLAG=0; rebuild_flag=0; QUIET_FLAG=1; OUTPUT_ENERGY_PER_ATOM_FLAG = 0; } /* only for point-energy, set partial atomic charges set to the pH-dependent weighted average charge state */ /* for other jobtypes, the charges will be set later after resparam has been hooked up w/ variable positions */ if(strstr(protein->parameters.algorithm, "POINT_ENERGY")!=0 || strcmp(protein->parameters.algorithm, "ENERGY_PROFILE_TABLE")==0 || strcmp(protein->parameters.algorithm, "STATIC_COMPLEX_FORMATION_ENERGY")==0) LOGFILE_FLAG=0; if(rebuild_flag==ENDFLAG) /* rebuild_flag has not been defined by the user explicitly; use defaults */ { if(strstr(protein->parameters.algorithm, "POINT_ENERGY")!=0 || strcmp(protein->parameters.algorithm, "ENERGY_PROFILE_TABLE")==0 || strcmp(protein->parameters.algorithm, "STATIC_COMPLEX_FORMATION_ENERGY")==0) rebuild_flag=0; /* point energy default is not to rebuild */ else rebuild_flag=1; } protein->Template = (pdbATOM *)calloc(MAX_ATOMS, sizeof(pdbATOM)); protein->mini_Template = (mini_pdbATOM *)calloc(MAX_ATOMS, sizeof(mini_pdbATOM)); /* read template pdb file for real this time */ if(readpdbfile(pdbfile, protein, rebuild_flag) == 0) { fprintf(stderr,"ERROR Problem w/ MAX_ATOMS or MAX_RESIDUES when reading %s\n",pdbfile); exit(1); } /* we have ligands.....take care of some book-keeping */ if(protein->num_ligands!=0) initialize_stuff_for_ligands(protein); if(custom_rotamer_file != NULL) { rotamer_build(protein,custom_rotamer_file); sprintf(dummystring,"temp.rotamer.%d",GET_PID); if(strcmp(dummystring,custom_rotamer_file)==0) rm_file(custom_rotamer_file); free_memory(custom_rotamer_file); } if(LOGFILE_FLAG!=0) { sprintf(dummystring,"%s.log",protein->parameters.output_prefix); logfile_ptr = fopen_file(dummystring,"w"); now = time(NULL); fprintf(logfile_ptr,"LOGFILE for inputfile %s\t%sRANDOM_SEED %d\n",inputfilename, ctime(&now),random_seed_flag); fclose(logfile_ptr); } /* list of included or excluded residues for adjusting backbone torsions or minimization */ /* include = score rmsd only for these positions exclude = don't include these positions in the rmsd scoring for minimization, fixed = keep these positions fixed; ie: don't allow dihedrals for these positions to move float = only allow these positions to move */ /* neighboring residues are permitted to float as defined under "OTHER" */ stopflag = 0; if(strstr(line, "INCLUDE")!=0 || strstr(line, "EXCLUDE")!=0 || strstr(line, "FIX")!=0 || strstr(line, "FLOAT")!=0) { if(strcmp(protein->parameters.algorithm,"MINIMIZE")!=0 && strcmp(protein->parameters.algorithm,"HBUILD")!=0 && strcmp(protein->parameters.algorithm,"ADJUST_BACKBONE_TORSIONS")!=0) { sscanf(line,"%s",keyword); fprintf(stderr,"ERROR %s is not an option for JOBTYPE %s\n",keyword, protein->parameters.algorithm); exit(1); } if(RESTRAIN_MINIMIZATION_FLAG == 0) /* since we are explicitly defining floating or fixed residues, it is */ RESTRAIN_MINIMIZATION_FLAG = 1; /* implied that we want things restrained; */ /* if the user defines RESTRAIN_MINIMIZATION_FLAG 0, then */ /* RESTRAIN_MINIMIZATION_FLAG = ENDFLAG here; changed to 0 below */ while(stopflag == 0) { sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); incld_exld_positions = (int *)calloc(MAX_RESIDUES, sizeof(int)); fp = ftell(input); /* count the number of positions listed by scrolling down the file; reset and then actually read them */ fseek(input, fp, 0); num_positions = 0; flag=0; strcpy(dummystring," "); fgets(line, MXLINE_INPUT, input); /* move past keyword */ while(flag==0) /* read until END */ { if(strncmp(line,"END",3)!=0) ++num_positions; else flag=1; fgets(line, MXLINE_INPUT, input); } fseek(input, fp, 0); /* reset pointer */ i=1; for(p=1;p<=num_positions;++p) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); incld_exld_positions[i] = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); ++i; } last = i-1; sort_int(&first,&last,incld_exld_positions); incld_exld_positions[i] = ENDFLAG; // copy incld_exld_positions to the appropriate structure in PROTEIN if(strstr(keyword, "INCLUDE")!=0) protein->torsion_include_res = incld_exld_positions; else if(strstr(keyword, "FLOAT")!=0) { protein->min_float_res = incld_exld_positions; /* neighbor movement relevant only for minimization not torsion adjustment w/ floating residues specified */ /* if FLOAT defined, then moving residues must be explicitly defined */ if(neighbors_define_flag == 1) if(strcmp(protein->parameters.algorithm,"MINIMIZE")==0) { tempseqVarpos = (int *)calloc(MAX_RESIDUES,sizeof(int)); tempseqVarpos1 = (int *)calloc(MAX_RESIDUES,sizeof(int)); find_neighbors(protein->min_float_res, protein->Template, tempseqVarpos1, &protein->parameters.neighbordist, 1.5); last = find_neighbors(tempseqVarpos1, protein->Template, tempseqVarpos, &protein->parameters.neighbordist, 1.0); free_memory(protein->min_float_res); protein->min_float_res = tempseqVarpos; protein->min_float_res[last+1] = 0; sort_int(&first,&last,protein->min_float_res); protein->min_float_res[last+1] = ENDFLAG; free_memory(tempseqVarpos1); } } else if(strstr(keyword, "EXCLUDE")!=0) protein->torsion_exclude_res = incld_exld_positions; else if(strstr(keyword, "FIX")!=0) protein->min_fixed_res = incld_exld_positions; if(fgets(line, MXLINE_INPUT, input)!=NULL) { if(strncmp(line,"END",3)==0) /* formal end of this block; advance to next block or EOF */ { if(fgets(line, MXLINE_INPUT, input)!=NULL) { flag2 = move_past_comments_and_empty_lines(line, input); if(flag2==0) stopflag=1; } else stopflag=1; } else { stopflag=1; } } else stopflag=1; } fclose(input); /* if a residue is not defined as both fixed and floating, fix it */ if(protein->min_float_res!=NULL && protein->min_fixed_res!=NULL) { i=1; k=0; while(protein->min_float_res[i]!=ENDFLAG) { j=1; while(protein->min_fixed_res[j]!=ENDFLAG) { if(protein->min_float_res[i] == protein->min_fixed_res[j]) { fprintf(stderr,"Residue %d is defined as both fixed and floating for minimization; will fix\n", protein->min_float_res[i]); protein->min_float_res[i]=ENDFLAG2; k=1; /* fprintf(stderr,"ERROR Residue %d is defined as both fixed and floating for minimization\n", protein->min_float_res[i]); exit(1); */ } ++j; } ++i; } if(k==1) { i=1; while(protein->min_float_res[i]!=ENDFLAG) { if(protein->min_float_res[i]==ENDFLAG2) { j=i+1; k=i; while(protein->min_float_res[j]!=ENDFLAG) { protein->min_float_res[k] = protein->min_float_res[j]; ++j; ++k; } protein->min_float_res[k] = protein->min_float_res[j]; } ++i; } } } /* if a residue is not defined as both included and excluded, exclude it */ if(protein->torsion_include_res!=NULL && protein->torsion_exclude_res!=NULL) { i=1; k=0; while(protein->torsion_include_res[i]!=ENDFLAG) { j=1; while(protein->torsion_exclude_res[j]!=ENDFLAG) { if(protein->torsion_include_res[i] == protein->torsion_exclude_res[j]) { fprintf(stderr,"Residue %d is defined as both included and excluded for rmsd; will exclude\n", protein->torsion_include_res[i]); protein->torsion_include_res[i]=ENDFLAG2; k=1; /* fprintf(stderr,"ERROR Residue %d is defined as both included and excluded for rmsd\n", protein->torsion_include_res[i]); exit(1); */ } ++j; } ++i; } if(k==1) { i=1; while(protein->torsion_include_res[i]!=ENDFLAG) { if(protein->torsion_include_res[i]==ENDFLAG2) { j=i+1; k=i; while(protein->torsion_include_res[j]!=ENDFLAG) { protein->torsion_include_res[k] = protein->torsion_include_res[j]; ++j; ++k; } protein->torsion_include_res[k] = protein->torsion_include_res[j]; } ++i; } } } } else if(strncmp(line,"ALIGN",5)==0) { if(strcmp(protein->parameters.algorithm,"ALIGN_STRUCTURES")!=0) { fprintf(stderr,"ERROR ALIGN_DEFINITION is an option only for JOBTYPE ALIGN_STRUCTURES\n"); exit(1); } protein->align_def = (ALIGNMENT_DEFINITION *)malloc(sizeof(ALIGNMENT_DEFINITION)); protein->first_align_def = protein->align_def; protein->align_def->next = NULL; stopflag=0; while(stopflag==0) { if(fgets(line,MAXLINE,input)!=NULL) { if(strncmp(line,"END",3)!=0) { sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); if(strstr(keyword,"TRACE")!=0 || strstr(keyword,"BACK")!=0 || strstr(keyword,"BKBN")!=0) { sscanf(line,"%s %s %s %s",dummystring, dummystring2,dummystring,dummystring); // advance to the initial position q = parse_seqpos_string(dummystring2, protein->seqpos_text_map); i=1; while(protein->Template[i].seq_position != q) { ++i; if(protein->Template[i].seq_position == ENDFLAG) { sprintf(line,"ERROR Cannot find initial residue %s in template file for alignment",dummystring2); failure_report(line,"exit"); } } sscanf(line,"%s %s %s %s",dummystring, dummystring,dummystring,dummystring2); // keep going until the target is reached q = parse_seqpos_string(dummystring2, protein->seqpos_text_map); while(protein->Template[i].seq_position != q) { if(protein->Template[i].seq_position == ENDFLAG) { sprintf(line,"ERROR Cannot find final residue %s in template file for alignment",dummystring2); failure_report(line,"exit"); } k = protein->Template[i].seq_position; while(protein->Template[i].seq_position == k) { if(strstr(keyword,"TRACE")!=0) // only CA trace { if(strcmp(protein->Template[i].atomname,"CA")==0) { protein->align_def->seqpos_text_initial = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_initial = NULL; protein->align_def->atomname_initial = (char *)calloc(10,sizeof(char)); protein->align_def->seqpos_text_target = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_target = NULL; protein->align_def->atomname_target = (char *)calloc(10,sizeof(char)); strcpy(protein->align_def->seqpos_text_initial,protein->Template[i].seqpos_text); strcpy(protein->align_def->atomname_initial,protein->Template[i].atomname); strcpy(protein->align_def->seqpos_text_target,protein->Template[i].seqpos_text); strcpy(protein->align_def->atomname_target,protein->Template[i].atomname); protein->align_def->next = (ALIGNMENT_DEFINITION *)malloc(sizeof(ALIGNMENT_DEFINITION)); protein->align_def = protein->align_def->next; protein->align_def->next = NULL; } } else // backbone CA, N, C { if(strcmp(protein->Template[i].atomname,"CA")==0 || strcmp(protein->Template[i].atomname,"C")==0 || strcmp(protein->Template[i].atomname,"N")==0) { protein->align_def->seqpos_text_initial = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_initial = NULL; protein->align_def->atomname_initial = (char *)calloc(10,sizeof(char)); protein->align_def->seqpos_text_target = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_target = NULL; protein->align_def->atomname_target = (char *)calloc(10,sizeof(char)); strcpy(protein->align_def->seqpos_text_initial,protein->Template[i].seqpos_text); strcpy(protein->align_def->atomname_initial,protein->Template[i].atomname); strcpy(protein->align_def->seqpos_text_target,protein->Template[i].seqpos_text); strcpy(protein->align_def->atomname_target,protein->Template[i].atomname); protein->align_def->next = (ALIGNMENT_DEFINITION *)malloc(sizeof(ALIGNMENT_DEFINITION)); protein->align_def = protein->align_def->next; protein->align_def->next = NULL; } } ++i; } } } else { protein->align_def->seqpos_text_initial = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_initial = (char *)calloc(10,sizeof(char)); protein->align_def->atomname_initial = (char *)calloc(10,sizeof(char)); protein->align_def->seqpos_text_target = (char *)calloc(10,sizeof(char)); protein->align_def->residuetype_target = (char *)calloc(10,sizeof(char)); protein->align_def->atomname_target = (char *)calloc(10,sizeof(char)); sscanf(line,"%s %s %s %s %s %s %s", protein->align_def->seqpos_text_initial, protein->align_def->residuetype_initial, protein->align_def->atomname_initial, dummystring, protein->align_def->seqpos_text_target, protein->align_def->residuetype_target, protein->align_def->atomname_target); protein->align_def->next = (ALIGNMENT_DEFINITION *)malloc(sizeof(ALIGNMENT_DEFINITION)); protein->align_def = protein->align_def->next; protein->align_def->next = NULL; } } else stopflag=1; } else stopflag=1; } fclose(input); } else if(strncmp(line,"VAR",3)==0 || strncmp(line,"SEQUENCE",8)==0 || strncmp(line,"SCAN",4)==0) { if(strncmp(line,"SEQUENCE",8)==0) { if(strncmp(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE",18)==0 || strncmp(protein->parameters.algorithm, "GA",2)==0 || strncmp(protein->parameters.algorithm, "DEE",3)==0 || strncmp(protein->parameters.algorithm, "SCMF",4)==0 || strncmp(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS",23)==0 || strncmp(protein->parameters.algorithm, "MC",2)==0 || strcmp(protein->parameters.algorithm, "FASTER")==0) sequence_to_var_pos_file(input, protein, 1); else { fprintf(stderr,"ERROR SEQUENCE is not an option for JOBTYPE %s\n",protein->parameters.algorithm); exit(1); } } else if(strncmp(line,"VAR",3)==0) { if(strncmp(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE",18)==0 || strncmp(protein->parameters.algorithm, "GA",2)==0 || strncmp(protein->parameters.algorithm, "DEE",3)==0 || strncmp(protein->parameters.algorithm, "SCMF",4)==0 || strncmp(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS",23)==0 || strncmp(protein->parameters.algorithm, "MC",2)==0 || strcmp(protein->parameters.algorithm, "FASTER")==0 || strstr(protein->parameters.algorithm, "DOCK")!=0) { if(FILTER_SEQUENCE_SPACE_FLAG == -1) input_VARIABLE_POSITION(input, protein, 2); else input_VARIABLE_POSITION(input, protein, 1); } else { fprintf(stderr,"ERROR VARIABLE_POSITIONS is not an option for JOBTYPE %s\n",protein->parameters.algorithm); exit(1); } } else if(strncmp(line,"SCAN",4)==0) { if(strcmp(protein->parameters.algorithm,"ALANINE_SHAVE")==0 || strcmp(protein->parameters.algorithm,"SCANNING_MUTAGENESIS")==0) { /* for multistate scannning, append the wt residue; for other scanning jobtypes, the master will deal w/ including the wt energy */ if(protein->parameters.sequence_algorithm!=NULL) input_VARIABLE_POSITION(input, protein, 2); else { protein->parameters.neighbordist = TINY; input_VARIABLE_POSITION(input, protein, 1); } } else { fprintf(stderr,"ERROR SCANNING_POSITIONS is not an option for JOBTYPE %s\n",protein->parameters.algorithm); exit(1); } } } else { fclose(input); if(strncmp(protein->parameters.algorithm, "LOOKUP_TABLE_SLAVE",18)==0 || strncmp(protein->parameters.algorithm, "GA",2)==0 || strncmp(protein->parameters.algorithm, "DEE",3)==0 || strncmp(protein->parameters.algorithm, "SCMF",4)==0 || strcmp(protein->parameters.algorithm, "FASTER")==0 || strncmp(protein->parameters.algorithm, "LIST_FLOATING_POSITIONS",23)==0 || strncmp(protein->parameters.algorithm, "MC",2)==0) { if(strcmp(protein->parameters.algorithm, "SCMF_PK")==0) { sprintf(dummystring,"temp.%d",GET_PID); input = fopen_file(dummystring,"w"); fprintf(input,"VARIABLE_POSITIONS\n"); fprintf(input,"optimize_all_rotamers\n"); fclose(input); input = fopen_file(dummystring,"r"); fgets(line,MAXLINE,input); input_VARIABLE_POSITION(input, protein, 1); rm_file(dummystring); } else { fprintf(stderr,"ERROR Must define VARIABLE_POSITIONS or SEQUENCE for JOBTYPE %s\n",protein->parameters.algorithm); exit(1); } } } strcpy(LOOKUP_TABLE_DIRECTORY,protein->parameters.lookup_energy_table_directory); sprintf(dummystring,"temp.%d",GET_PID); if(does_this_file_exist(dummystring)==1) rm_file(dummystring); if(RESTRAIN_MINIMIZATION_FLAG == ENDFLAG) RESTRAIN_MINIMIZATION_FLAG = 0; if(OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG==1) FILTER_SEQUENCE_SPACE_FLAG=1; /* only one sequence, so ignore solubility and specificity */ if(protein->parameters.log10_seq_combinations==0) { OPTIMIZE_TARGET_STRUCTURE_ONLY_FLAG=1; FILTER_SEQUENCE_SPACE_FLAG=1; SOLUBILITY_CUTOFF_FLAG = 0; if(HBOND_SPECIFICITY_FLAG != 1) { HBOND_SPECIFICITY_FLAG=0; MINIMIZE_FINAL_SEQUENCE_FLAG = 0; } if(SPECIFICITY_FLAG == ENDFLAG) { SPECIFICITY_FLAG=0; MINIMIZE_FINAL_SEQUENCE_FLAG = 0; } } if(HBOND_SPECIFICITY_FLAG == ENDFLAG) HBOND_SPECIFICITY_FLAG = 1; if(SPECIFICITY_FLAG == ENDFLAG) SPECIFICITY_FLAG = 1; if(SOLUBILITY_CUTOFF_FLAG==ENDFLAG) SOLUBILITY_CUTOFF_FLAG=0; for(i=0;i<50;++i) free_memory(word[i]); free_memory(word); MAX_OPTIMIZATION_TIME = 60.0*MAX_OPTIMIZATION_TIME; /* convert to seconds */ MASTER_OPTIMIZATION_TIME = 60.0*MASTER_OPTIMIZATION_TIME; free_memory(pdbfile); free_memory(forcefieldfile); free_memory(line); free_memory(dummystring); free_memory(keyword); free_memory(dummystring2); free_memory(seqpos_string); free_memory(pos_string); } /* parses file pointer *input set at the line following the "VARIABLE_POSITIONS" line in an input file. Assumes that protein has gone through input_stuff (or has variables from input_stuff set) This function is called by input_stuff, sequence_to_var_pos_file, and rotamer_calc_foreman if modify_varpos_flag != 0, then VARIABLE_POSITIONS:modify_VARIABLE_POSITION is not called if modify_varpos_flag=2, (SCANNING_POSITIONS), automatically include the wt as a choice */ void input_VARIABLE_POSITION(FILE *input, PROTEIN *protein, int modify_varpos_flag) { char *resline, *line, *seqpos_string, *keyword, *one_letter_code, *dummystring; long fp; int i, n, last, num_rot, resline_index, q, p, j, k, g, first, flag, num_fixed_positions,*fixed_positions,dummy_numvarpos; int *surf_positions, *core_positions, *inter_positions, num_coresurfint_positions, saturation_stepsize; FILE *floating_pos_file_ptr, *detailed_floating_pos_file_ptr, *logfile_ptr; double number_of_combinations; extern double **VDW_WELLDEPTH; int alanine_scan_flag; surf_positions = NULL; core_positions = NULL; inter_positions = NULL; alanine_scan_flag =0; saturation_stepsize=1; floating_pos_file_ptr=NULL; detailed_floating_pos_file_ptr=NULL; line = (char *)calloc(MXLINE_INPUT, sizeof(char)); keyword = (char *)calloc(MAXLINE, sizeof(char)); fp = ftell(input); /* move past VARIABLE_POSITIONS or SCANNING_POSITIONS line */ if(fgets(line, MXLINE_INPUT, input)==NULL) exit(0); sscanf(line, "%s", keyword); convert_string_to_all_caps(keyword); /* HP patterning for total design */ if(strncmp(keyword,"HP",2)==0) { free_memory(line); free_memory(keyword); hp_pattern_design(input, protein); } resline = (char *)calloc(MAXLINE, sizeof(char)); seqpos_string = (char *)calloc(MAXLINE, sizeof(char)); dummystring = (char *)calloc(MAXLINE, sizeof(char)); one_letter_code = (char *)calloc(5, sizeof(char)); fixed_positions = NULL; if(strcmp(keyword, "OPTIMIZE_ALL_ROTAMERS")==0 || strcmp(keyword, "RELAX_ALL_ROTAMERS")==0 || strcmp(keyword, "PAIR_ENERGY_TABLE")==0 || strstr(keyword, "ALA")!=0 || strstr(keyword, "ALL")!=0) { /* optimize rotamers or ala-scan on all positions */ if(strcmp(keyword, "OPTIMIZE_ALL_ROTAMERS")==0 || strstr(keyword, "ALA")!=0) { protein->parameters.neighbordist = 1000; if(strstr(keyword, "ALA")!=0) alanine_scan_flag=1; } else { if(strcmp(keyword, "PAIR_ENERGY_TABLE")==0) { fprintf(stderr,"WARNING you should use JOBTYPE ENERGY_PROFILE instead of PAIR_ENERGY_TABLE!!!!\n"); fprintf(stderr,"\tENERGY_PROFILE is faster, and uses the actual coordinates of the inputted pdb\n"); fprintf(stderr,"\tSee the current README for details\n"); strcpy(protein->parameters.lookup_energy_table_directory, TEMP_LOOKUP_DIRECTORY); PAIR_ENERGY_TABLE_FLAG = 1; if(word_count(line) == 2) { PAIR_ENERGY_TABLE_FLAG = 2; sscanf(line,"%s %s",keyword, dummystring); convert_string_to_all_caps(dummystring); if(strcmp(dummystring,"VDW")==0) /* vdw and torsion only */ { SASA_FLAG = 0; COULOMB_FLAG = 0; GB_FLAG = 0; } else if(strstr(dummystring,"ELEC")!=0) /* no vdw */ { TORSION_FLAG = 0; /* we want E_vdw to be zero, but this will cause problems, so just make them really really small */ for(i=1;iatomparam[i].welldepth*protein->atomparam[j].welldepth); } } } } } protein->parameters.neighbordist = TINY; if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)!=0) if(strstr(protein->parameters.algorithm,"PK")==0) sprintf(protein->parameters.algorithm,"FASTER"); protein->parameters.number_MC_cycles = 2; LOGFILE_FLAG = 0; sprintf(dummystring,"%s.log",protein->parameters.output_prefix); if(does_this_file_exist(dummystring)==1) rm_file(dummystring); } protein->parameters.numVarPositions=1; protein->var_pos = (VARIABLE_POSITION *)calloc((protein->parameters.numVarPositions+2), sizeof(VARIABLE_POSITION)); /* find some small residue to act as a "user-defined" variable position */ i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "ALA")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "SER")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "THR")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "VAL")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "LEU")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "ILE")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "ASN")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } if(protein->Template[i].seq_position == ENDFLAG) { i=1; while(strcmp(protein->Template[i].residuetype, "NTE")==0 || strcmp(protein->Template[i].residuetype, "CYD")==0 || strcmp(protein->Template[i].residuetype, "PRO")==0 || strcmp(protein->Template[i].residuetype, "GLY")==0) ++i; while(strcmp(protein->Template[i].residuetype, "ASP")!=0 && protein->Template[i].seq_position!=ENDFLAG) ++i; } protein->var_pos[1].seq_position = protein->Template[i].seq_position; protein->var_pos[2].seq_position = ENDFLAG; /* get resparam pointer */ q=1; while(strcmp(protein->Template[i].residuetype, protein->resparam[q].residuetype)!=0) ++q; strcpy(one_letter_code, protein->resparam[q].one_letter_code); protein->var_pos[1].choice[1].resparam_ptr = (RESPARAM *)malloc(sizeof(RESPARAM)); *protein->var_pos[1].choice[1].resparam_ptr = protein->resparam[q]; protein->var_pos[1].choice[1].composition = 1; protein->var_pos[1].number_of_choices = 1; protein->var_pos[1].residue_freqs[0]=0; protein->var_pos[1].residue_freqs[1]=1; protein->var_pos[1].residue_freqs[2]=ENDFLAG; } else /* float all residues at all positions */ if( (strstr(keyword, "TOTAL")!=0 || strstr(keyword, "COMPLETE")!=0 || strstr(keyword, "FULL")!=0) || strstr(keyword, "SATURATION")!=0) { if(strstr(keyword, "SATURATION")!=0) { if(strstr(keyword, "2")!=0) saturation_stepsize=2; if(strstr(keyword, "3")!=0) saturation_stepsize=3; } i=1; protein->parameters.numVarPositions=0; while(protein->Template[i].seq_position!=ENDFLAG) { k= protein->Template[i].seq_position; while(protein->Template[i].seq_position == k && protein->Template[i].atom_ptr->other_info<0 && protein->Template[i].seq_position!=ENDFLAG) ++i; if(protein->Template[i].seq_position!=ENDFLAG) if(protein->Template[i].atom_ptr->other_info>0 && protein->Template[i].seq_position == k) { if(strcmp(protein->Template[i].residuetype,"CYD")!=0 && strcmp(protein->Template[i].residuetype,"PRO")!=0 && strcmp(protein->Template[i].residuetype,"GLY")!=0) { if(protein->Template[i].seq_position%saturation_stepsize == 0) { if(KEEP_CYS_FLAG == 0) ++protein->parameters.numVarPositions; else { if(strcmp(protein->Template[i].residuetype,"CYS")!=0) ++protein->parameters.numVarPositions; } } } while(protein->Template[i].seq_position == k && protein->Template[i].seq_position!=ENDFLAG) ++i; } } protein->var_pos = (VARIABLE_POSITION *)calloc((protein->parameters.numVarPositions+2), sizeof(VARIABLE_POSITION)); protein->parameters.neighbordist = 1000; i=1; q=0; while(protein->Template[i].seq_position!=ENDFLAG) { k= protein->Template[i].seq_position; while(protein->Template[i].seq_position == k && protein->Template[i].atom_ptr->other_info<0 && protein->Template[i].seq_position!=ENDFLAG) ++i; if(protein->Template[i].seq_position!=ENDFLAG) if(protein->Template[i].atom_ptr->other_info>0 && protein->Template[i].seq_position == k) { if(strcmp(protein->Template[i].residuetype,"CYD")!=0 && strcmp(protein->Template[i].residuetype,"PRO")!=0 && strcmp(protein->Template[i].residuetype,"GLY")!=0) { if(protein->Template[i].seq_position%saturation_stepsize == 0) /* DUDE */ { if(KEEP_CYS_FLAG == 0) { ++q; protein->var_pos[q].seq_position = k; } else { if(strcmp(protein->Template[i].residuetype,"CYS")!=0) { ++q; protein->var_pos[q].seq_position = k; } } } } while(protein->Template[i].seq_position == k && protein->Template[i].seq_position!=ENDFLAG) ++i; } } protein->var_pos[q+1].seq_position = ENDFLAG; strcpy(resline, "AADEFHIKLMNQRSTVWY"); for(i=1;i<=protein->parameters.numVarPositions;++i) { protein->var_pos[i].number_of_choices = 17; strcpy(resline, "AADEFHIKLMNQRSTVWY"); /* // for SCANNING jobtype or MAX_MUTATIONS!=0, don't mess w/ CYS positions if(modify_varpos_flag == 2 || MAX_MUTATIONS >0 ) { if(find_one_letter_code_pdbATOM_seq_position(protein->Template, protein->var_pos[i].seq_position, protein->resparam) == 'C') { protein->var_pos[i].number_of_choices = 1; strcpy(resline, "AC"); } } */ for(k=1;k<=protein->var_pos[i].number_of_choices;++k) { q=1; while(protein->resparam[q].one_letter_code[0] != resline[k]) ++q; protein->var_pos[i].choice[k].resparam_ptr = (RESPARAM *)malloc(sizeof(RESPARAM)); *protein->var_pos[i].choice[k].resparam_ptr = protein->resparam[q]; protein->var_pos[i].choice[k].composition = 0; } protein->var_pos[i].residue_freqs[protein->var_pos[i].number_of_choices+1]=ENDFLAG; /* generate residue_freq array */ protein->var_pos[i].residue_freqs[0]=0; protein->var_pos[i].residue_freqs[1]=0; protein->var_pos[i].residue_freqs[2]=0; for(q=1;q<=protein->var_pos[i].number_of_choices;++q) { protein->var_pos[i].choice[q].composition = 1.0/((double)protein->var_pos[i].number_of_choices); protein->var_pos[i].residue_freqs[q] = protein->var_pos[i].residue_freqs[q-1] + protein->var_pos[i].choice[q].composition; } } } else /* define floating positions explicitly */ { fseek(input, fp, 0); /* reset file pointer to position of first listed seq_position/resline line */ /* count the number of user-defined variable positions */ protein->parameters.numVarPositions = 0; flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; else ++protein->parameters.numVarPositions; } else flag = 1; } fseek(input, fp, 0); /* reset the file pointer */ // if neighbordist == TINY -> other_residues "none"; move only the user-defined residues if(protein->parameters.neighbordist != TINY) protein->parameters.numVarPositions += protein->num_ligands; protein->var_pos = (VARIABLE_POSITION *)calloc((protein->parameters.numVarPositions+2), sizeof(VARIABLE_POSITION)); g=1; i=1; while(i<=protein->parameters.numVarPositions) { // move ligands, unless other_residues "none" if(protein->num_ligands!=0) if(i<=protein->num_ligands) if(protein->parameters.neighbordist != TINY) { while(protein->resparam[g].ligand_flag!=1) ++g; n=1; while(strcmp(protein->Template[n].residuetype,protein->resparam[g].residuetype)!=0) ++n; protein->var_pos[i].seq_position = protein->Template[n].seq_position; sprintf(resline,"%s.",protein->resparam[g].one_letter_code); ++g; } // read user-defined positions from file // user-defined ligand positions will superceede the automatically attached ligands later if(i>protein->num_ligands || protein->parameters.neighbordist == TINY) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); if(strcmp(seqpos_string,"l")==0) sprintf(seqpos_string,"1l"); protein->var_pos[i].seq_position = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); sprintf(resline,"."); if(word_count(line)==2) { sscanf(line, "%s %s",seqpos_string,resline); if(strncmp(resline,"all",3)==0 || strncmp(resline,"ALL",3)==0) strcpy(resline,"A,D,E,F,H,I,K,L,M,N,Q,R,S,T,V,W,Y."); else if(strncmp(resline,"polar",5)==0 || strncmp(resline,"POLAR",5)==0) strcpy(resline,"A,D,E,H,K,N,Q,R,S,T."); else if(strncmp(resline,"hydrophobic",11)==0 || strncmp(resline,"HYDROPHOBIC",11)==0 || strncmp(resline,"hphob",5)==0 || strncmp(resline,"HPHOB",5)==0) strcpy(resline,"A,F,I,L,V,W,Y,M."); if(strncmp(resline,"aromatic",8)==0 || strncmp(resline,"AROMATIC",8)==0) strcpy(resline,"F,W,Y."); if(strncmp(resline,"aliphatic",9)==0 || strncmp(resline,"ALIPHATIC",9)==0) strcpy(resline,"A,I,L,V."); if(strncmp(resline,"neutral",7)==0 || strncmp(resline,"NEUTRAL",7)==0) strcpy(resline,"N,Q,S,T."); if(strncmp(resline,"charged",7)==0 || strncmp(resline,"CHARGED",7)==0) strcpy(resline,"D,E,H,K,R."); if(strncmp(resline,"acidic",6)==0 || strncmp(resline,"ACIDIC",6)==0) strcpy(resline,"D,E."); if(strncmp(resline,"basic",5)==0 || strncmp(resline,"BASIC",5)==0) strcpy(resline,"R,K."); } resline_index=0; while(resline[resline_index] != '.' && resline[resline_index]!='\0' && resline[resline_index]!=' ') ++resline_index; resline[resline_index] = '.'; resline[resline_index+1]='\0'; if(strcmp(resline,".")==0) /* resline not defined or is simply a '.', so use wt residuetype */ { /* default is to float rotamers at all positions.... however if the user is defining positions whose rotamers are allowed to float, it implies that the user does not want others to float (unless otherwise specified) */ if(protein->parameters.neighbordist != 0 && protein->parameters.neighbordist != PI_PLUS_LN10_PLUS_SQRT2_MINUS_E_1e5) /* protein->parameters.neighbordist == 0 if neighbors allowed to float */ protein->parameters.neighbordist = TINY; n=1; flag=0; while(flag == 0) { if(protein->var_pos[i].seq_position == protein->Template[n].seq_position) { flag=1; while(strcmp(protein->Template[n].residuetype,"NTN")==0 || strcmp(protein->Template[n].residuetype,"NTE")==0 || strcmp(protein->Template[n].residuetype,"CTE")==0 || strcmp(protein->Template[n].residuetype,"CTN")==0 ) /* advance to sidechain */ { ++n; } if(strcmp(protein->Template[n].residuetype,"PRO")!=0 && strcmp(protein->Template[n].residuetype,"GLY")!=0 && strcmp(protein->Template[n].residuetype,"CYD")!=0) { if(QUIET_FLAG==0) fprintf(stderr,"WARNING Residuetypes not explicitly defined for position %s....will assume template structure identity of %s\n", seqpos_string,protein->Template[n].residuetype); } q=1; while(strcmp(protein->Template[n].residuetype, protein->resparam[q].residuetype)!=0) ++q; sprintf(resline,"%s.",protein->resparam[q].one_letter_code); } else { ++n; if(protein->Template[n].seq_position == ENDFLAG) flag=1; } } } } /* include the wt residue if this is a SCANNING jobtype or MAX_MUTATIONS!=0 */ if(modify_varpos_flag == 2 || MAX_MUTATIONS >0 ) { resline_index=0; while(resline[resline_index] != '.' && resline[resline_index]!='\0' && resline[resline_index]!=' ') ++resline_index; resline[resline_index] = ','; ++resline_index; resline[resline_index] = find_one_letter_code_pdbATOM_seq_position(protein->Template, protein->var_pos[i].seq_position, protein->resparam); ++resline_index; resline[resline_index] = '.'; ++resline_index; resline[resline_index] = '\0'; /* if(find_one_letter_code_pdbATOM_seq_position(protein->Template, protein->var_pos[i].seq_position, protein->resparam) == 'C') { strcpy(resline, "C."); if(QUIET_FLAG==0) fprintf(stderr,"WARNING CYS position %s has been listed as a variable position for SCANNING OR MAX_MUTATIONS > 0; will ignore\n",seqpos_string); } */ } /* parse resline into requested residuetypes */ resline_index=0; protein->var_pos[i].number_of_choices=0; while(strncmp(&resline[resline_index], ".", 1)!=0 && resline[resline_index]!='\0' && resline[resline_index]!=' ') { ++protein->var_pos[i].number_of_choices; strncpy(one_letter_code, &resline[resline_index], 1); /* get residuetype */ /* get resparam pointer */ q=1; while(strcmp(one_letter_code, protein->resparam[q].one_letter_code)!=0 && protein->resparam[q].one_letter_code[0]!='Z') ++q; if(protein->resparam[q].one_letter_code[0]=='Z') { fprintf(stderr,"ERROR unknown residuecode %s defined in inputfile for position %s\n", one_letter_code, seqpos_string); exit(1); } if(strcmp(protein->resparam[q].residuetype,"PRO")==0 || strcmp(protein->resparam[q].residuetype,"GLY")==0 || strcmp(protein->resparam[q].residuetype,"CYD")==0) { if(QUIET_FLAG==0) fprintf(stderr,"WARNING cannot use %s as a choice for position %s; violates fixed-bkbn assumption...will ignore\n", protein->resparam[q].residuetype, seqpos_string); protein->var_pos[i].seq_position = MAX_SEQ_POSITION + protein->var_pos[i].seq_position; --protein->var_pos[i].number_of_choices; } else { n=protein->var_pos[i].number_of_choices-1; while(n>=1) { if(strcmp(protein->resparam[q].residuetype,protein->var_pos[i].choice[n].resparam_ptr->residuetype)==0) n=ENDFLAG; --n; } if(n>=0) { protein->var_pos[i].choice[protein->var_pos[i].number_of_choices].resparam_ptr = (RESPARAM *)malloc(sizeof(RESPARAM)); *protein->var_pos[i].choice[protein->var_pos[i].number_of_choices].resparam_ptr = protein->resparam[q]; protein->var_pos[i].choice[protein->var_pos[i].number_of_choices].composition = 0; } else { --protein->var_pos[i].number_of_choices; } } ++resline_index; if(strncmp(&resline[resline_index], ".", 1)!=0) while(strncmp(&resline[resline_index], ",", 1)==0) ++resline_index; } protein->var_pos[i].residue_freqs[protein->var_pos[i].number_of_choices+1]=ENDFLAG; /* generate residue_freq array */ protein->var_pos[i].residue_freqs[0]=0; protein->var_pos[i].residue_freqs[1]=0; protein->var_pos[i].residue_freqs[2]=0; for(q=1;q<=protein->var_pos[i].number_of_choices;++q) { protein->var_pos[i].choice[q].composition = 1.0/((double)protein->var_pos[i].number_of_choices); protein->var_pos[i].residue_freqs[q] = protein->var_pos[i].residue_freqs[q-1] + protein->var_pos[i].choice[q].composition; } ++i; } } dummy_numvarpos = protein->parameters.numVarPositions; protein->var_pos[0].seq_position = 0; for(i=1;i<=dummy_numvarpos;++i) { /* PRO/GLY/CYD */ if(protein->var_pos[i].seq_position > MAX_SEQ_POSITION) { protein->parameters.numVarPositions = protein->parameters.numVarPositions - 1; /* one fewer varpos */ } else { q=1; while(qvar_pos[q].seq_position == protein->var_pos[i].seq_position) /* yes; delete previous ones */ { if(QUIET_FLAG==0) fprintf(stderr,"WARNING Position %s defined multiple times in input file; using the final definition\n", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[q].seq_position, protein->seqpos_text_map)].seqpos_text); /* when sorted, this will go to end of array */ protein->var_pos[q].seq_position = MAX_SEQ_POSITION + protein->var_pos[q].seq_position; protein->parameters.numVarPositions = protein->parameters.numVarPositions - 1; /* one fewer varpos */ } ++q; } } } first = 1; sort_VARIABLE_POSITION(&first, &dummy_numvarpos, protein->var_pos); protein->var_pos[protein->parameters.numVarPositions+1].seq_position = ENDFLAG; /* mark the end of the array */ fgets(line, MXLINE_INPUT, input); /* find user-defined fixed positions and user-defined core/surf/interface positions (for location-specific refstates) */ if(strncmp(line,"END",3)==0) { num_fixed_positions = 0; while(fgets(line, MXLINE_INPUT, input)!=NULL) { flag=0; if(line[0]=='!' || line[0]=='#' || line[0]==' ' || line[0]=='\n') /* move past comments */ { while(flag==0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { if(line[0]=='!' || line[0]=='#') /* comment line; keep reading */ flag=0; else { flag=1; /* not a comment line; break out of here */ if(line[0]==' ' || line[0]=='\n') /* empty line - user left an empty line by mistake */ { flag=2; /* EOF */ } } } else { flag=2; /* end of file */ } } } if(flag!=2) /* not end of file */ { convert_string_to_all_caps(line); if(strncmp(line,"FIX",3)==0) { fixed_positions = (int *)calloc(MAX_RESIDUES, sizeof(int)); fp = ftell(input); fseek(input, fp, 0); num_fixed_positions = 0; flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; else ++num_fixed_positions; } else flag = 1; } fseek(input, fp, 0); i=1; for(p=1;p<=num_fixed_positions;++p) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); fixed_positions[i] = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); ++i; } last = i-1; sort_int(&first,&last,fixed_positions); fixed_positions[i] = ENDFLAG; } else { if(strncmp(line,"SURF",4)==0) /* user-defined surface residues */ { surf_positions = (int *)calloc(MAX_RESIDUES, sizeof(int)); fp = ftell(input); fseek(input, fp, 0); num_coresurfint_positions = 0; flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; else ++num_coresurfint_positions; } else flag = 1; } fseek(input, fp, 0); i=1; for(p=1;p<=num_coresurfint_positions;++p) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); surf_positions[i] = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); ++i; } last = i-1; sort_int(&first,&last,surf_positions); surf_positions[i] = ENDFLAG; } else if(strncmp(line,"CORE",4)==0) /* user-defined core residues */ { core_positions = (int *)calloc(MAX_RESIDUES, sizeof(int)); fp = ftell(input); fseek(input, fp, 0); num_coresurfint_positions = 0; flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; else ++num_coresurfint_positions; } else flag = 1; } fseek(input, fp, 0); i=1; for(p=1;p<=num_coresurfint_positions;++p) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); core_positions[i] = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); ++i; } last = i-1; sort_int(&first,&last,core_positions); core_positions[i] = ENDFLAG; } else if(strncmp(line,"INTER",5)==0) /* user-defined interfacials */ { inter_positions = (int *)calloc(MAX_RESIDUES, sizeof(int)); fp = ftell(input); fseek(input, fp, 0); num_coresurfint_positions = 0; flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, input)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; else ++num_coresurfint_positions; } else flag = 1; } fseek(input, fp, 0); i=1; for(p=1;p<=num_coresurfint_positions;++p) { fgets(line, MXLINE_INPUT, input); sscanf(line, "%s",seqpos_string); inter_positions[i] = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); ++i; } last = i-1; sort_int(&first,&last,inter_positions); inter_positions[i] = ENDFLAG; } } } } } i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { protein->var_pos[i].neighbor_level = 0; first=1; last=protein->var_pos[i].number_of_choices; sort_CHOICE(&first, &last, protein->var_pos[i].choice); ++i; } /* this function finds the neighbors, hooks up lots of pointers, extracts fixed atoms, etc */ if(modify_varpos_flag!=0) modify_VARIABLE_POSITION(protein, fixed_positions); if(inter_positions !=NULL) { j=1; while(inter_positions[j] != ENDFLAG) { i=1; while(protein->var_pos[i].seq_position!=inter_positions[j]) ++i; protein->var_pos[i].core_flag = 'i'; ++j; } free_memory(inter_positions); } if(core_positions !=NULL) { j=1; while(core_positions[j] != ENDFLAG) { i=1; while(protein->var_pos[i].seq_position!=core_positions[j]) ++i; protein->var_pos[i].core_flag = 'c'; ++j; } free_memory(core_positions); } if(surf_positions !=NULL) { j=1; while(surf_positions[j] != ENDFLAG) { i=1; while(protein->var_pos[i].seq_position!=surf_positions[j]) ++i; protein->var_pos[i].core_flag = 's'; ++j; } free_memory(surf_positions); } if(fixed_positions != NULL) free_memory(fixed_positions); /* For JOBTYPE LIST_FLOATING_POSITIONS */ if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)==0) { sprintf(dummystring,"%s.variable_positions",protein->parameters.output_prefix); floating_pos_file_ptr = fopen_file(dummystring,"w"); sprintf(dummystring,"%s.detailed_floating_positions_list",protein->parameters.output_prefix); detailed_floating_pos_file_ptr = fopen_file(dummystring,"w"); free_memory(resline); resline = (char *)calloc(protein->parameters.numVarPositions*5,sizeof(char)); resline[0] = '\0'; } i=1; number_of_combinations=0; protein->parameters.log10_seq_combinations=0; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)==0) { if(protein->var_pos[i].fixed_flag == 0) { if(protein->var_pos[i].neighbor_level == ENDFLAG) fprintf(detailed_floating_pos_file_ptr,"user\t%s\t", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text ); else if(protein->var_pos[i].neighbor_level == 1) fprintf(detailed_floating_pos_file_ptr,"nearest_neighbor\t%s\t", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text ); else fprintf(detailed_floating_pos_file_ptr,"neighbor\t%s\t", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text ); sprintf(resline,"%s%s,",resline, protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text ); fprintf(floating_pos_file_ptr,"\t%s\t",protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text); } else fprintf(detailed_floating_pos_file_ptr,"fixed\t%s\t", protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text ); } protein->parameters.log10_seq_combinations += log10(protein->var_pos[i].number_of_choices); num_rot=0; for(k=1;k<=protein->var_pos[i].number_of_choices;++k) { if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)==0) { fprintf(detailed_floating_pos_file_ptr,"%s ", protein->var_pos[i].choice[k].resparam_ptr->one_letter_code); if(protein->var_pos[i].fixed_flag == 0) { fprintf(floating_pos_file_ptr,"%s", protein->var_pos[i].choice[k].resparam_ptr->one_letter_code); if(k==protein->var_pos[i].number_of_choices) fprintf(floating_pos_file_ptr,".\n"); else fprintf(floating_pos_file_ptr,","); } } num_rot += protein->var_pos[i].choice[k].resparam_ptr->rotamerlib_ptr->number_of_rotamers; } number_of_combinations += log10(num_rot); if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)==0) { fprintf(detailed_floating_pos_file_ptr,"\t%c\t%d\t%f\t%f\n", protein->var_pos[i].core_flag, num_rot,protein->var_pos[i].mea_sasa,protein->var_pos[i].mea_born); } ++i; } if(strncmp(protein->parameters.algorithm,"LIST_FLOATING_POSITIONS",23)==0) { sprintf(resline,"%s\n",resline); fprintf(detailed_floating_pos_file_ptr,"\nCOMMA_DELIMITED_LIST %s\n",resline); fprintf(detailed_floating_pos_file_ptr,"NUMBER_OF_SEQUENCE_COMBINATIONS 10^%lf\n",protein->parameters.log10_seq_combinations); fprintf(detailed_floating_pos_file_ptr,"NUMBER_OF_ROTAMER_COMBINATIONS 10^%lf\n",number_of_combinations); fprintf(detailed_floating_pos_file_ptr,"ESTIMATED_MEMORY_USAGE <= %lf MB\n",0.30536*pow(number_of_combinations,1.339)); fprintf(detailed_floating_pos_file_ptr,"ESTIMATED_DISK_USAGE %lf GB\n",0.00073*pow(number_of_combinations,1.45725)); fclose(floating_pos_file_ptr); fclose(detailed_floating_pos_file_ptr); exit(0); } /* calculate the number of rotamer combinations before filtering */ if(LOGFILE_FLAG!=0) { sprintf(dummystring,"%s.log",protein->parameters.output_prefix); logfile_ptr = fopen_file(dummystring,"a"); fprintf(logfile_ptr,"NUMBER_OF_SEQUENCE_COMBINATIONS 10^%lf\n",protein->parameters.log10_seq_combinations); fprintf(logfile_ptr,"NUMBER_ROTAMER_COMBINATIONS 10^%lf\n",number_of_combinations); fprintf(logfile_ptr,"ESTIMATED_MEMORY_USAGE <= %lf MB\n", 0.30536*pow(number_of_combinations,1.339)); fprintf(logfile_ptr,"ESTIMATED_DISK_USAGE %lf GB\n",0.00073*pow(number_of_combinations,1.45725)); fclose(logfile_ptr); } protein->parameters.log10_rotamer_combinations = number_of_combinations; fclose(input); /* generate file w/ ala defined as a choice at each position; free VARIABLE_POSITION; send file to input_VARIABLE_POSITION */ if(alanine_scan_flag==1) { sprintf(dummystring,"temp.%d",GET_PID); input = fopen_file(dummystring,"w"); fprintf(input,"SCANNING_POSITIONS\n"); i=1; while(protein->var_pos[i].seq_position!=ENDFLAG) { if(protein->var_pos[i].fixed_flag == 0) /* only the bona-fide floating positions */ fprintf(input,"\t%s\tA.\n",protein->seqpos_text_map[seqpos_to_inputted_string(protein->var_pos[i].seq_position, protein->seqpos_text_map)].seqpos_text); ++i; } fclose(input); free_memory(protein->var_pos); input = fopen_file(dummystring,"r"); fgets(line,MAXLINE,input); protein->parameters.neighbordist = TINY; input_VARIABLE_POSITION(input, protein,1); /* re-generate protein->var_pos and close input */ rm_file(dummystring); } free_memory(resline); free_memory(line); free_memory(seqpos_string); free_memory(keyword); free_memory(one_letter_code); free_memory(dummystring); } /* parses file pointer *input set at the line following the "SEQUENCE" line in an input file. Assumes that protein has gone through input_stuff (or has variables from input_stuff set) This function is called by input_stuff and by rotamer_calc_foreman if modify_varpos_flag=0, then VARIABLE_POSITIONS:modify_VARIABLE_POSITION is not called */ void sequence_to_var_pos_file(FILE *input, PROTEIN *protein, int modify_varpos_flag) { char *line, *sequence, *dummystring, *seqpos_string, *pos_string, *temp_filename; int start_seq_pos, i,k; FILE *temp_file_ptr; extern int GET_PID; if(protein->chain_gap_flag != 0) { fprintf(stderr,"ERROR Cannot use SEQUENCE for Templates w/ missing backbone atoms\n"); exit(1); } line = (char *)calloc(MAX_RESIDUES + MXLINE_INPUT, sizeof(char)); sequence = (char *)calloc(MAX_RESIDUES, sizeof(char)); seqpos_string = (char *)calloc(10, sizeof(char)); pos_string = (char *)calloc(10, sizeof(char)); dummystring = (char *)calloc(MXLINE_INPUT, sizeof(char)); temp_filename = (char *)calloc(MXLINE_INPUT, sizeof(char)); sprintf(temp_filename,"temp.%d.input",GET_PID); temp_file_ptr = fopen_file(temp_filename,"w"); /* Converts the inputed sequence into a VARIABLE_POSITIONS style inputfile, and sends it to input_VARIABLE_POSITION for parsing */ while(fgets(line,MXLINE_INPUT, input)!=NULL && strncmp(line,"END",3)!=0) { if(word_count(line) == 1) { sscanf(line,"%s",sequence); start_seq_pos = protein->Template[1].seq_position; } else { sscanf(line,"%s %s",seqpos_string, sequence); start_seq_pos = parse_seqpos_string(seqpos_string, protein->seqpos_text_map); } k = seqpos_to_inputted_string(start_seq_pos, protein->seqpos_text_map); i=0; while(sequence[i]!=0) { if(sequence[i] != 'P' && sequence[i] != 'G' && sequence[i] != 'c') { fprintf(temp_file_ptr,"%s\t%c.\n",protein->seqpos_text_map[k].seqpos_text, sequence[i]); } ++i; ++k; } } /* copy anything after to the temp file */ if(strncmp(line,"END",3)==0) { fprintf(temp_file_ptr,"%s",line); while(fgets(line,MXLINE_INPUT, input)!=NULL) fprintf(temp_file_ptr,"%s",line); } fclose(input); fclose(temp_file_ptr); temp_file_ptr = fopen_file(temp_filename,"r"); input_VARIABLE_POSITION(temp_file_ptr, protein, modify_varpos_flag); rm_file(temp_filename); free_memory(line); free_memory(sequence); free_memory(seqpos_string); free_memory(dummystring); free_memory(pos_string); free_memory(temp_filename); } /* this function sets up total sequence design, using hp patterning. called by input_VARIABLE_POSITION when it sees "HP_pattern" following the "VARIABLE_POSITIONS" line in the input file */ void hp_pattern_design(FILE *original_input_ptr, PROTEIN *protein) { FILE *header_file_ptr, *footer_file_ptr, *file_ptr, *pdb_file_ptr; int flag; char *seq_pos_text; char *command, *line, *keyword, *dummystring, *coresurf; extern char *EXECUTABLE_FILENAME; if(EXECUTABLE_FILENAME==NULL) { fprintf(stderr,"ERROR For hp_pattern design, EXECUTABLE_FILENAME must be defined\n"); exit(1); } else { if(does_this_file_exist(EXECUTABLE_FILENAME)==0) { fprintf(stderr,"ERROR EXECUTABLE_FILENAME %s does not exist.\n",EXECUTABLE_FILENAME); exit(1); } } coresurf = (char *)calloc(10,sizeof(char)); command = (char *)calloc(MXLINE_INPUT,sizeof(char)); line = (char *)calloc(MXLINE_INPUT,sizeof(char)); keyword = (char *)calloc(MAXLINE,sizeof(char)); dummystring = (char *)calloc(MAXLINE,sizeof(char)); seq_pos_text = (char *)calloc(MAXLINE,sizeof(char)); fseek(original_input_ptr, 0, 0); /* reset to start of the input file */ /* copy everything up to and including "VARIABLE_POSITIONS" to temp.PID.header */ sprintf(dummystring,"temp.%d.header",GET_PID); header_file_ptr = fopen_file(dummystring,"w"); fgets(line,MXLINE_INPUT,original_input_ptr); sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); while(strncmp(keyword,"HP",2)!=0) { fprintf(header_file_ptr,"%s",line); fgets(line,MXLINE_INPUT,original_input_ptr); sscanf(line,"%s",keyword); convert_string_to_all_caps(keyword); } fclose(header_file_ptr); /* move to "FIXED_POSITIONS" if any */ flag=0; strcpy(dummystring," "); while(flag == 0) { if(fgets(line, MXLINE_INPUT, original_input_ptr)!=NULL) { sscanf(line,"%s",dummystring); if(word_count(line)==0) flag = 1; else if(strcmp(dummystring,"END")==0) flag = 1; } else flag = 1; } /* copy fixed positions to temp.PID.footer */ sprintf(dummystring,"temp.%d.footer",GET_PID); footer_file_ptr = fopen_file(dummystring,"w"); while(fgets(line, MXLINE_INPUT, original_input_ptr)!=NULL) { fprintf(footer_file_ptr,"%s",line); } fclose(footer_file_ptr); fclose(original_input_ptr); /* generate pdb file w/ core/surf info (stored in ROTAMER entries) */ sprintf(dummystring,"dummy.%d.input",GET_PID); file_ptr = fopen_file(dummystring,"w"); sprintf(dummystring,"temp.%d.header",GET_PID); header_file_ptr = fopen_file(dummystring,"r"); flag = 0; while(fgets(line,MXLINE_INPUT,header_file_ptr)!=NULL) /* copy header to dummy.PID.input */ { if(strstr(line,"OUTPUT_PREFIX")!=0 || strstr(line,"output_prefix")!=0) /* change output prefix to dummy.PID */ { fprintf(file_ptr,"OUTPUT_PREFIX dummy.%d\n",GET_PID); flag = 1; } else if(strstr(line,"VARIABLE")!=0 || strstr(line,"variable")!=0) { if(flag == 0) /* the master input file is using the default output prefix; need to insert dummy.PID as the prefix */ { fprintf(file_ptr,"OUTPUT_PREFIX dummy.%d\n",GET_PID); flag = 1; if(strstr(line,"JOBTYPE")==0 && strstr(line,"jobtype")==0) fprintf(file_ptr,"%s",line); } else if(strstr(line,"JOBTYPE")==0 && strstr(line,"jobtype")==0) fprintf(file_ptr,"%s",line); } else if(strstr(line,"JOBTYPE")==0 && strstr(line,"jobtype")==0) fprintf(file_ptr,"%s",line); } fclose(header_file_ptr); fprintf(file_ptr,"relax_all_rotamers\n"); fprintf(file_ptr,"END\n"); fclose(file_ptr); sprintf(command,"%s dummy.%d.input",EXECUTABLE_FILENAME,GET_PID); system(command); /* parse HP pattern from the pdb file created above, put HP pattern into temp.PID.middle (ie: below VARIABLE_POSITIONS in an inputfile) */ sprintf(dummystring,"temp.%d.middle",GET_PID); file_ptr = fopen_file(dummystring,"w"); sprintf(dummystring,"dummy.%d.pdb",GET_PID); pdb_file_ptr = fopen_file(dummystring,"r"); fgets(line,MXLINE_INPUT,pdb_file_ptr); sscanf(line,"%s",keyword); while(strcmp(keyword,"ROTAMER")!=0) { fgets(line,MXLINE_INPUT,pdb_file_ptr); sscanf(line,"%s",keyword); } while(strcmp(keyword,"ROTAMER")==0) { sscanf(line,"%s %s %s",keyword, seq_pos_text, coresurf); if(coresurf[0] == 'c') { fprintf(file_ptr,"%s hydrophobic\n",seq_pos_text); } else if(coresurf[0] == 's') { fprintf(file_ptr,"%s polar\n",seq_pos_text); } else { fprintf(file_ptr,"%s all\n",seq_pos_text); } fgets(line,MXLINE_INPUT,pdb_file_ptr); sscanf(line,"%s",keyword); } fprintf(file_ptr,"END\n"); free_memory(coresurf); fclose(pdb_file_ptr); fclose(file_ptr); /* assemble the inputfile with the HP pattern */ sprintf(command,"/bin/cat temp.%d.header temp.%d.middle temp.%d.footer > %s.hp.%d.input",GET_PID,GET_PID, GET_PID, protein->parameters.output_prefix,GET_PID); system(command); sprintf(command,"/bin/rm -f temp.%d.* dummy.%d.*",GET_PID, GET_PID); system(command); /* launch an independent job with the new inputfile */ sprintf(command,"%s %s.hp.%d.input",EXECUTABLE_FILENAME,protein->parameters.output_prefix,GET_PID); system(command); free_memory(command); free_memory(line); free_memory(keyword); free_memory(dummystring); free_memory(seq_pos_text); exit(0); /* exit */ } /* this function frees memory from PROTEIN structures, including the lookup table */ void free_PROTEIN(PROTEIN *protein) { int i; char *command; extern int GET_PID; extern char *AVAILABLE_PROCESSORS_FILE; command=NULL; if(protein!=NULL) { if(protein->energy_profile_table!=NULL) { for(i=0;i<=protein->num_res+1;++i) free_memory(protein->energy_profile_table[i]); free_memory(protein->energy_profile_table); protein->energy_profile_table=NULL; } if(protein->chr != NULL) { for(i=1;i<=protein->sizeof_chr_array;++i) free_CHROMOSOME(&protein->chr[i]); free_memory(protein->chr); protein->chr = NULL; } /* if(protein->lookupEnergy != NULL) { free_lookup_table(protein->lookupEnergy, protein->var_pos); free_memory(protein->lookupEnergy); protein->lookupEnergy = NULL; } */ if(strcmp(protein->parameters.lookup_energy_table_directory, TEMP_LOOKUP_DIRECTORY)==0) { command = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(command, "/bin/rm -rf %s", TEMP_LOOKUP_DIRECTORY); system(command); free_memory(command); } if(protein->var_pos!=NULL) { free_memory(protein->var_pos); protein->var_pos=NULL; } if(protein->Template != NULL) { free_memory(protein->Template); protein->Template = NULL; } if(protein->parameters.sequence_algorithm != NULL) { free_memory(protein->parameters.sequence_algorithm); protein->parameters.sequence_algorithm = NULL; } if(protein->mini_Template!=NULL) { free_memory(protein->mini_Template); protein->mini_Template = NULL; } if(protein->fixed_atoms != NULL) { free_memory(protein->fixed_atoms); protein->fixed_atoms = NULL; } if(protein->mini_fixed_atoms != NULL) { free_memory(protein->mini_fixed_atoms); protein->mini_fixed_atoms = NULL; } if(protein->final_pdb != NULL) { free_memory(protein->final_pdb); protein->final_pdb = NULL; } if(protein->final_minipdb != NULL) { free_memory(protein->final_minipdb); protein->final_minipdb = NULL; } if(protein->final_energy != NULL) { free_memory(protein->final_energy); protein->final_energy = NULL; } if(protein->invar_pos != NULL) { free_memory(protein->invar_pos); protein->invar_pos = NULL; } if(protein->template_sequence != NULL) { free_memory(protein->template_sequence); protein->template_sequence = NULL; } if(protein->final_sequence != NULL) { free_memory(protein->final_sequence); protein->final_sequence = NULL; } if(protein->torsion_include_res != NULL) { free_memory(protein->torsion_include_res); protein->torsion_include_res = NULL; } if(protein->torsion_exclude_res != NULL) { free_memory(protein->torsion_exclude_res); protein->torsion_exclude_res = NULL; } if(protein->min_fixed_res != NULL) { free_memory(protein->min_fixed_res); protein->min_fixed_res = NULL; } if(protein->min_float_res != NULL) { free_memory(protein->min_float_res); protein->min_float_res = NULL; } if(protein->chain_id_list != NULL) { free_memory(protein->chain_id_list); protein->chain_id_list = NULL; } if(protein->wacky_numbering_list != NULL) { free_memory(protein->wacky_numbering_list); protein->wacky_numbering_list = NULL; } if(protein->seqpos_text_map != NULL) { free_memory(protein->seqpos_text_map); protein->seqpos_text_map = NULL; } if(protein->parameters.output_prefix != NULL) { if(protein->parameters.output_prefix_sans_path != protein->parameters.output_prefix) free_memory(protein->parameters.output_prefix_sans_path); free_memory(protein->parameters.output_prefix); } if(protein->transform_matrix != NULL) free_memory(protein->transform_matrix); if(protein->translate_rotate_array != NULL) free_memory(protein->translate_rotate_array); if(protein->super_chain_id_list != NULL) { i=1; while(protein->super_chain_id_list[i].chain_id!=NULL) { free_memory(protein->super_chain_id_list[i].chain_id); protein->super_chain_id_list[i].chain_id=NULL; ++i; } free_memory(protein->super_chain_id_list); protein->super_chain_id_list = NULL; } // free_memory(protein); } if(AVAILABLE_PROCESSORS_FILE!=NULL) { command = (char *)calloc(MXLINE_INPUT,sizeof(char)); sprintf(command,"temp.avail_processors.%d",GET_PID); if(does_this_file_exist(command)==1) rm_file(command); free_memory(command); } } /* moves the file_ptr past comments and empty lines to the start of the next command block in the inputfile. Upon return, line and file_ptr are at the line just past the comments. if an empty line or end of file (EOF) is reached, returns 0; else returns 1. line must be allocated by the calling function */ int move_past_comments_and_empty_lines(char *line, FILE *file_ptr) { int flag; flag=0; if(line[0]=='!' || line[0]=='#' || line[0]==' ' || line[0]=='\n') /* move past comments or empty lines */ { while(flag==0) { if(fgets(line, MXLINE_INPUT, file_ptr)!=NULL) { if(line[0]=='!' || line[0]=='#') /* comment line; keep reading */ flag=0; else { flag=1; /* not a comment line; break out of here */ if(line[0]==' ' || line[0]=='\n') /* empty line - user left an empty line by mistake */ { flag=2; /* EOF */ } } } else { flag=2; /* end of file */ } } } if(flag==2) flag=0; else flag=1; return(flag); }