/* This code was based on eval_nh.c in NHML (Galtier & Gouy, 1998; Galtier, 2001) */ #include "mynhmlg.h" #define MAXNTREE 100 tree* s_tree; compute_option compute_o; noeud root; int gamma_nbcl; char infileName[100], intreeName[100]; double pi, covar, covar1, covar2, covar3; void err_mess(char* option){ printf("Missing or wrong line specifying option %s in option file\nBy-default option used\n", option); } void *check_alloc(int nbrelt, int sizelt) { void *retval; if( (retval=calloc(nbrelt,sizelt)) != NULL ) { return retval; } printf("Not enough memory\n"); exit(0); } void normalString (char *str) { char *p; for (p = str; *p; ++p) if (isalpha(*p)) { *str = toupper(*p); ++str; } *str = 0; } /* refresh: remove all sites with gaps only from sequence alignment seq. If option==0, all sites with at least one gap or with ambiguous amino acids B, X, Z are removed. */ void refresh(char** seq, int nbseq, int option){ int lgseq, l=-1, drapeau, i, j, k; char **seqref ; lgseq=(int)strlen(seq[0]); seqref=(char**)check_alloc(nbseq, sizeof(char*)); for(i=0;inb_var_param=0; if(comp->OPTIMIZE_ROOT) s_tree->nb_var_param++; if(comp->OPTIMIZE_LENGTH) s_tree->nb_var_param+=s_tree->nbalivebr; if(comp->OPTIMIZE_COV1) s_tree->nb_var_param++; if(comp->OPTIMIZE_COV2) s_tree->nb_var_param++; if(comp->OPTIMIZE_COV3) s_tree->nb_var_param++; if(comp->OPTIMIZE_PI) s_tree->nb_var_param++; s_tree->var_param=(double**)check_alloc(s_tree->nb_var_param, sizeof(double*)); s_tree->param_nature=(int*)check_alloc(s_tree->nb_var_param, sizeof(int)); for(i=0;iwhich_var_param[i]=-1; // nb_param_type defined in mynhml.h i=0; if(comp->OPTIMIZE_ROOT){ s_tree->var_param[i]=&(s_tree->froot); s_tree->param_nature[i]=ROOT; s_tree->which_var_param[ROOT]=i; i++; } if(comp->OPTIMIZE_LENGTH){ k=0; for(j=0;j<2*s_tree->nbseq-3;j++){ if(s_tree->alivebr[j]){ s_tree->var_param[i+k]=&(s_tree->listbr[j]->length); s_tree->param_nature[i+k]=LENGTH; k++; } } s_tree->which_var_param[LENGTH]=i; i+=s_tree->nbalivebr; } if(comp->OPTIMIZE_COV1){ s_tree->var_param[i]=&covar1; s_tree->param_nature[i]=COVA; s_tree->which_var_param[COVA]=i; i++; } if(comp->OPTIMIZE_COV2){ s_tree->var_param[i]=&covar2; s_tree->param_nature[i]=COVB; s_tree->which_var_param[COVB]=i; i++; } if(comp->OPTIMIZE_COV3){ s_tree->var_param[i]=&covar3; s_tree->param_nature[i]=COVC; s_tree->which_var_param[COVC]=i; i++; } if(comp->OPTIMIZE_PI){ s_tree->var_param[i]=π /* if we change s_tree->var_param[i] pi will be changed */ s_tree->param_nature[i]=GaltierPI; s_tree->which_var_param[GaltierPI]=i; } } void getoptions(options* opt_arg, FILE* in_opt) { char* line, *text, *prov; int i, a, ret; options opt; /* by-default options */ opt=(struct options*)check_alloc(1, sizeof(struct options)); opt->init=(struct init_option*)check_alloc(1, sizeof(struct init_option)); opt->compute=(struct compute_option*)check_alloc(1, sizeof(struct compute_option)); opt->converge=(struct converge_option*)check_alloc(1, sizeof(struct converge_option)); opt->print=(struct print_option*)check_alloc(1, sizeof(struct print_option)); opt->compute->OPTIMIZE_LENGTH=TRUE; opt->compute->OPTIMIZE_ROOT=TRUE; opt->compute->OPTIMIZE_GAMMA=TRUE; opt->compute->OPTIMIZE_COV1=FALSE; opt->compute->OPTIMIZE_COV2=FALSE; opt->compute->OPTIMIZE_COV3=FALSE; opt->compute->OPTIMIZE_PI=FALSE; sprintf(opt->init->INIT_LENGTH, "REDO"); opt->init->INIT_ROOT=-1.; opt->init->NBRANDOM=0; opt->init->NOISE=0; opt->init->INIT_GAMMA=-1.; opt->init->GAMMA_NCL=4; opt->init->INIT_COV1=-1.; opt->init->INIT_COV2=-1.; opt->init->INIT_COV3=-1.; opt->init->INIT_PI=-1.; opt->converge->PRECISION=4; opt->converge->SIMPLEX=TRUE; opt->print->PRINT1=TRUE; opt->print->PRINT2=FALSE; opt->print->PRINT3=FALSE; opt->print->EVAL_OUT=FALSE; opt->print->OUTPUT_COMP=FALSE; opt->print->COV_SITE_PATTERN=FALSE; opt->SH_G=-1; opt->SH_MAXLCROSSED=lmax; opt->SH_MAXBOOTCROSSED=INT_MAX; opt->SH_RESTART=0; opt->ALLCOUPLES=0; line=(char*)check_alloc(100, sizeof(char)); text=(char*)check_alloc(5000, sizeof(char)); while(fgets(line, 100, in_opt)) strcat(text, line); prov=strstr(text, "ALLCOUPLES"); if(prov) prov=strchr(prov, '='); else { err_mess("ALLCOUPLES"); goto finac; } if(prov) prov++; else { err_mess("ALLCOUPLES"); goto finac; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->ALLCOUPLES)); if(opt->ALLCOUPLES!=0 && opt->ALLCOUPLES!=1) { err_mess("ALLCOUPLES"); opt->ALLCOUPLES=1; } finac: prov=strstr(text, "INIT_LENGTH"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_LENGTH"); goto fininitlg; } if(prov) prov++; else { err_mess("INIT_LENGTH"); goto fininitlg; } while(*prov==' ') prov++; i=0; while(*prov!=' ' && *prov!='\t' && *prov!='\n'){ opt->init->INIT_LENGTH[i]=*prov; i++; prov++; } opt->init->INIT_LENGTH[i]=0; if(strcmp(opt->init->INIT_LENGTH, "KEEP") && strcmp(opt->init->INIT_LENGTH, "REDO")){ err_mess("INIT_LENGTH"); sprintf(opt->init->INIT_LENGTH, "REDO"); } fininitlg: prov=strstr(text, "OPTIMIZE_LENGTH"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_LENGTH"); goto finol; } if(prov) prov++; else { err_mess("OPTIMIZE_LENGTH"); goto finol; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_LENGTH)); if(opt->compute->OPTIMIZE_LENGTH!=0 && opt->compute->OPTIMIZE_LENGTH!=1) { err_mess("OPTIMIZE_LENGTH"); opt->compute->OPTIMIZE_LENGTH=1; } finol: prov=strstr(text, "OPTIMIZE_ROOT"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_ROOT"); goto finor; } if(prov) prov++; else { err_mess("OPTIMIZE_ROOT"); goto finor; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_ROOT)); if(opt->compute->OPTIMIZE_ROOT!=0 && opt->compute->OPTIMIZE_ROOT!=1) { err_mess("OPTIMIZE_ROOT"); opt->compute->OPTIMIZE_ROOT=1; } finor: prov=strstr(text, "OPTIMIZE_GAMMA"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_GAMMA"); goto finog; } if(prov) prov++; else { err_mess("OPTIMIZE_GAMMA"); goto finog; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_GAMMA)); if(opt->compute->OPTIMIZE_GAMMA!=0 && opt->compute->OPTIMIZE_GAMMA!=1) { err_mess("OPTIMIZE_GAMMA"); opt->compute->OPTIMIZE_GAMMA=1; } finog: prov=strstr(text, "OPTIMIZE_COV1"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_COV_off/on"); goto finocov1; } if(prov) prov++; else { err_mess("OPTIMIZE_COV_off/on"); goto finocov1; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_COV1)); if(opt->compute->OPTIMIZE_COV1!=0 && opt->compute->OPTIMIZE_COV1!=1) { err_mess("OPTIMIZE_COV_off/on"); opt->compute->OPTIMIZE_COV1=0; } finocov1: prov=strstr(text, "OPTIMIZE_COV2"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_COV_on/off"); goto finocov2; } if(prov) prov++; else { err_mess("OPTIMIZE_COV_on/off"); goto finocov2; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_COV2)); if(opt->compute->OPTIMIZE_COV2!=0 && opt->compute->OPTIMIZE_COV2!=1) { err_mess("OPTIMIZE_COV_on/off"); opt->compute->OPTIMIZE_COV2=0; } finocov2: prov=strstr(text, "OPTIMIZE_COV3"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_COV_on/on"); goto finocov3; } if(prov) prov++; else { err_mess("OPTIMIZE_COV_on/on"); goto finocov3; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_COV3)); if(opt->compute->OPTIMIZE_COV3!=0 && opt->compute->OPTIMIZE_COV3!=1) { err_mess("OPTIMIZE_COV_on/on"); opt->compute->OPTIMIZE_COV3=0; } finocov3: prov=strstr(text, "OPTIMIZE_PI"); if(prov) prov=strchr(prov, '='); else { err_mess("OPTIMIZE_PI"); goto finopi; } if(prov) prov++; else { err_mess("OPTIMIZE_PI"); goto finopi; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->compute->OPTIMIZE_PI)); if(opt->compute->OPTIMIZE_PI!=0 && opt->compute->OPTIMIZE_PI!=1) { err_mess("OPTIMIZE_PI"); opt->compute->OPTIMIZE_PI=0; } finopi: prov=strstr(text, "INIT_ROOT"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_ROOT"); goto finkrl; } if(prov) prov++; else { err_mess("INIT_ROOT"); goto finkrl; } while(*prov==' ') prov++; a=sscanf(prov, "%le", &(opt->init->INIT_ROOT)); if( opt->init->INIT_ROOT>1. || a<1){ err_mess("INIT_ROOT"); opt->init->INIT_ROOT=-1.; } finkrl: prov=strstr(text, "INIT_COV1"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_COV1"); goto finico1; } if(prov) prov++; else { err_mess("INIT_COV1"); goto finico1; } while(*prov==' ') prov++; a=sscanf(prov, "%le", &(opt->init->INIT_COV1)); if(a<1){ err_mess("INIT_COV1"); opt->init->INIT_COV1=-1.; } finico1: prov=strstr(text, "INIT_COV2"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_COV2"); goto finico2; } if(prov) prov++; else { err_mess("INIT_COV2"); goto finico2; } while(*prov==' ') prov++; a=sscanf(prov, "%le", &(opt->init->INIT_COV2)); if(a<1){ err_mess("INIT_COV2"); opt->init->INIT_COV2=-1.; } finico2: prov=strstr(text, "INIT_COV3"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_COV3"); goto finico3; } if(prov) prov++; else { err_mess("INIT_COV3"); goto finico3; } while(*prov==' ') prov++; a=sscanf(prov, "%le", &(opt->init->INIT_COV3)); if(a<1){ err_mess("INIT_COV3"); opt->init->INIT_COV3=-1.; } finico3: prov=strstr(text, "INIT_PI"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_PI"); goto finipi; } if(prov) prov++; else { err_mess("INIT_PI"); goto finipi; } while(*prov==' ') prov++; a=sscanf(prov, "%le", &(opt->init->INIT_PI)); if(a<1){ err_mess("INIT_PI"); opt->init->INIT_PI=-1.; } finipi: prov=strstr(text, "PRECISION"); if(prov) prov=strchr(prov, '='); else { err_mess("PRECISION"); goto finpr; } if(prov) prov++; else { err_mess("PRECISION"); goto finpr; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->converge->PRECISION)); if(opt->converge->PRECISION<=-5) { err_mess("PRECISION"); opt->converge->PRECISION=4; } finpr: prov=strstr(text, "SIMPLEX"); if(prov) prov=strchr(prov, '='); else { err_mess("SIMPLEX"); goto finsp; } if(prov) prov++; else { err_mess("SIMPLEX"); goto finsp; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->converge->SIMPLEX)); if(opt->converge->SIMPLEX!=0 && opt->converge->SIMPLEX!=1) { err_mess("SIMPLEX"); opt->converge->SIMPLEX=1; } finsp: prov=strstr(text, "PRINT1"); if(prov) prov=strchr(prov, '='); else { err_mess("PRINT1"); goto finp1; } if(prov) prov++; else { err_mess("PRINT1"); goto finp1; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->print->PRINT1)); if(opt->print->PRINT1!=0 && opt->print->PRINT1!=1) { err_mess("PRINT1"); opt->print->PRINT1=1; } finp1: prov=strstr(text, "PRINT2"); if(prov) prov=strchr(prov, '='); else { err_mess("PRINT2"); goto finp2; } if(prov) prov++; else { err_mess("PRINT2"); goto finp2; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->print->PRINT2)); if(opt->print->PRINT2!=0 && opt->print->PRINT2!=1) { err_mess("PRINT2"); opt->print->PRINT2=1; } finp2: prov=strstr(text, "PRINT3"); if(prov) prov=strchr(prov, '='); else { err_mess("PRINT3"); goto finp3; } if(prov) prov++; else { err_mess("PRINT3"); goto finp3; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->print->PRINT3)); if(opt->print->PRINT3!=0 && opt->print->PRINT3!=1) { err_mess("PRINT3"); opt->print->PRINT3=1; } finp3: prov=strstr(text, "EVAL_OUT"); if(prov) prov=strchr(prov, '='); else { err_mess("EVAL_OUT"); goto finevo; } if(prov) prov++; else { err_mess("EVAL_OUT"); goto finevo; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->print->EVAL_OUT)); if(opt->print->EVAL_OUT!=0 && opt->print->EVAL_OUT!=1) { err_mess("EVAL_OUT"); opt->print->EVAL_OUT=1; } finevo: prov=strstr(text, "COV_SITE_PATTERN"); if(prov) prov=strchr(prov, '='); else { err_mess("COV_SITE_PATTERN"); goto fincsp; } if(prov) prov++; else { err_mess("COV_SITE_PATTERN"); goto fincsp; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->print->COV_SITE_PATTERN)); if(opt->print->COV_SITE_PATTERN!=0 && opt->print->COV_SITE_PATTERN!=1) { err_mess("COV_SITE_PATTERN"); opt->print->COV_SITE_PATTERN=1; } fincsp: prov=strstr(text, "SH_G"); if(prov) prov=strchr(prov, '='); else { err_mess("SH_G"); goto finshg; } if(prov) prov++; else { err_mess("SH_G"); goto finshg; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->SH_G)); finshg: prov=strstr(text, "SH_MAXLCROSSED"); if(prov) prov=strchr(prov, '='); else { err_mess("SH_MAXLCROSSED"); goto finshm; } if(prov) prov++; else { err_mess("SH_MAXLCROSSED"); goto finshm; } while(*prov==' ') prov++; sscanf(prov, "%le", &(opt->SH_MAXLCROSSED)); if(opt->SH_MAXLCROSSED<=0) opt->SH_MAXLCROSSED=lmax; finshm: prov=strstr(text, "SH_MAXBOOTCROSSED"); if(prov) prov=strchr(prov, '='); else { err_mess("SH_MAXBOOTCROSSED"); goto finshb; } if(prov) prov++; else { err_mess("SH_MAXBOOTCROSSED"); goto finshb; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->SH_MAXBOOTCROSSED)); if(opt->SH_MAXBOOTCROSSED<=0) opt->SH_MAXBOOTCROSSED=INT_MAX; finshb: prov=strstr(text, "SH_RESTART"); if(prov) prov=strchr(prov, '='); else { err_mess("SH_RESTART"); goto finshr; } if(prov) prov++; else { err_mess("SH_RESTART"); goto finshr; } while(*prov==' ') prov++; sscanf(prov, "%d", &(opt->SH_RESTART)); if(opt->SH_RESTART<=0) opt->SH_RESTART=0; finshr: prov=strstr(text, "INIT_GAMMA"); if(prov) prov=strchr(prov, '='); else { err_mess("INIT_GAMMA"); goto fingam; } if(prov) prov++; else { err_mess("INIT_GAMMA"); goto fingam; } while(*prov==' ') prov++; ret=sscanf(prov, "%le", &(opt->init->INIT_GAMMA)); if(ret==0) { err_mess("INIT_GAMMA"); opt->init->INIT_GAMMA=-1.;} fingam: prov=strstr(text, "GAMMA_NBCLASS"); if(prov) prov=strchr(prov, '='); else { err_mess("GAMMA_NBCLASS"); goto fingac; } if(prov) prov++; else { err_mess("GAMMA_NBCLASS"); goto fingac; } while(*prov==' ') prov++; ret=sscanf(prov, "%d", &(opt->init->GAMMA_NCL)); // if(ret==0 || opt->init->GAMMA_NCL<2){ if(ret==0 || opt->init->GAMMA_NCL<1){ err_mess("GAMMA_NBCLASS less than 1 or not defined - using as default 4 gamma rate categories"); opt->init->GAMMA_NCL=4; } fingac: if(opt->init->INIT_GAMMA<0. && opt->compute->OPTIMIZE_GAMMA==0) opt->init->GAMMA_NCL=1; *opt_arg=opt; free(line); free(text); return; } void restore_lengths(tree* s_tree){ int i, nbseq; noeud nd1, nd2; branche br; double l; nbseq=s_tree->nbseq; for(i=0;i<2*nbseq-3;i++){ br=s_tree->listbr[i]; l=s_tree->lgbr_init[i]; br->length=l; nd1=br->bout1; nd2=br->bout2; if(nd2==nd1->v3){ nd1->l3=l; if(nd1==nd2->v1) nd2->l1=l; else nd2->l2=l; } else if(nd1==nd2->v3){ nd2->l3=l; if(nd2==nd1->v1) nd1->l1=l; else nd1->l2=l; } else if(nd1->v3==root && nd2->v3==root){ nd1->l3=l; nd2->l3=l; root->l1=l*s_tree->froot; root->l2=l*(1.-s_tree->froot); } else { printf("unexpected unrooted tree\n"); exit(0); } } } void free_node(noeud nd, int lgseq, int nbparam, int leaf, int nbcl){ int j; nbcl ++; // add in a class of OFF sites free(nd->underlyingbr); free(nd->underlyinggc); free(nd->seq); if(!leaf) free(nd->patternson1); if(!leaf) free(nd->patternson2); for(j=0;jx[j]); free(nd->x); for(j=0;jdx[j]); free(nd->dx); for(j=0;jd2x[j]); free(nd->d2x); for(j=0;jp1[j]); free(nd->p1); for(j=0;jp2[j]); free(nd->p2); for(j=0;jdp1[j]); free(nd->dp1); for(j=0;jdp2[j]); free(nd->dp2); for(j=0;jd2p1[j]); free(nd->d2p1); for(j=0;jd2p2[j]); free(nd->d2p2); for(j=0;jrr1[j]); free(nd->rr1); for(j=0;jrr2[j]); free(nd->rr2); for(j=0;jdrr1[j]); free(nd->drr1); for(j=0;jdrr2[j]); free(nd->drr2); for(j=0;jd2rr1[j]); free(nd->d2rr1); for(j=0;jd2rr2[j]); free(nd->d2rr2); free(nd); } void free_tree(tree* s_tree, int nbcl){ int i, j, nbseq, nbparam, lgseq; nbseq=s_tree->nbseq; lgseq=s_tree->lgseq; nbparam=s_tree->nb_var_param; for(i=0;i<2*nbseq-2;i++) free_node(s_tree->node[i], lgseq, nbparam, (ilistbr[i]); free(s_tree->listbr); free(s_tree->listgc); free(s_tree->lgbr_init); free(s_tree->deriv2); free(s_tree->deriv); for(i=0;iclasslike_s[i]); free(s_tree->classlike_s); free(s_tree->like_s); free(s_tree->likeasrv_s); for(j=0;jclassdlike_s[j][i]); free(s_tree->classd2like_s[j][i]); } free(s_tree->classdlike_s[j]); free(s_tree->classd2like_s[j]); } free(s_tree->classdlike_s); free(s_tree->classd2like_s); for(i=0;idlike_s[i]); free(s_tree->dlikeasrv_s[i]); free(s_tree->d2like_s[i]); free(s_tree->d2likeasrv_s[i]); } free(s_tree->alivebr); free(s_tree->dlike_s); free(s_tree->dlikeasrv_s); free(s_tree->d2like_s); free(s_tree->d2likeasrv_s); free(s_tree->weight); free(s_tree->var_param); free(s_tree->param_nature); for(i=0;idist[i]); free(s_tree->dist); free(s_tree->sitetopatt); } void alloc_node(noeud nd, int lgseq, int nbparam, int nbseq, int leaf, int cross, int nbcl){ int j, i; // nbcl ++; // add in a class of OFF sites nd->underlyingbr=(int*)check_alloc(2*nbseq-3, sizeof(int)); nd->underlyinggc=(int*)check_alloc(2*nbseq-2, sizeof(int)); nd->seq=(char*)check_alloc(lgseq, sizeof(char)); if(!leaf) nd->patternson1=(int*)check_alloc(lgseq, sizeof(int)); if(!leaf) nd->patternson2=(int*)check_alloc(lgseq, sizeof(int)); nd->x=(like**)check_alloc(nbcl, sizeof(like*)); nd->dx=(like**)check_alloc(nbcl, sizeof(like*)); nd->d2x=(like**)check_alloc(nbcl, sizeof(like*)); for(j=0;jx[j]=(like*)check_alloc(lgseq, sizeof(like)); nd->dx[j]=(like*)check_alloc(lgseq, sizeof(like)); /* nd->d2x[j]=(like*)check_alloc(lgseq, sizeof(like)); // d2x is not used in procov */ } nd->x2sites=check_alloc(nbcl, sizeof(double**)); for(j=0;jx2sites[j]=check_alloc(20, sizeof(double*)); for(i=0;i<20;i++) nd->x2sites[j][i]=check_alloc(20, sizeof(double)); } nd->p1=(proba**)check_alloc(nbcl, sizeof(proba*)); nd->p2=(proba**)check_alloc(nbcl, sizeof(proba*)); nd->dp1=(dproba**)check_alloc(nbcl, sizeof(dproba*)); nd->dp2=(dproba**)check_alloc(nbcl, sizeof(dproba*)); nd->d2p1=(dproba**)check_alloc(nbcl, sizeof(dproba*)); nd->d2p2=(dproba**)check_alloc(nbcl, sizeof(dproba*)); for(j=0;jp1[j]=(proba*)check_alloc(nbcl, sizeof(proba)); nd->p2[j]=(proba*)check_alloc(nbcl, sizeof(proba)); nd->dp1[j]=(dproba*)check_alloc(nbcl, sizeof(dproba)); nd->dp2[j]=(dproba*)check_alloc(nbcl, sizeof(dproba)); nd->d2p1[j]=(dproba*)check_alloc(nbcl, sizeof(dproba)); nd->d2p2[j]=(dproba*)check_alloc(nbcl, sizeof(dproba)); } nd->rr1=check_alloc(nbcl, sizeof(double*)); nd->rr2=check_alloc(nbcl, sizeof(double*)); nd->drr1=check_alloc(nbcl, sizeof(double*)); nd->drr2=check_alloc(nbcl, sizeof(double*)); nd->d2rr1=check_alloc(nbcl, sizeof(double*)); nd->d2rr2=check_alloc(nbcl, sizeof(double*)); for(i=0;irr1[i]=check_alloc(nbcl, sizeof(double)); nd->rr2[i]=check_alloc(nbcl, sizeof(double)); nd->drr1[i]=check_alloc(nbcl, sizeof(double)); nd->drr2[i]=check_alloc(nbcl, sizeof(double)); nd->d2rr1[i]=check_alloc(nbcl, sizeof(double)); nd->d2rr2[i]=check_alloc(nbcl, sizeof(double)); } } double maxlike(int nbseq, char** seq, char** seqname, char* c_tree, options opt) { int i, j, k, l, ii, nbseq2, lgseq, lgsite, *drapeau, *drapeau2, cont=0, tree_is_rooted, nbparam, nbcompute, bl; char **orderedseq, *site; double *weight, lkh, maxlkh=-1.e100, rem_root; // static int nbtree=0; // FILE* outfile; /* printmemory("deb maxlike"); */ gamma_nbcl=opt->init->GAMMA_NCL; /* if (gamma_nbcl==1) */ gamma_nbcl *=2; // add in a class of OFF sites /* READ TREE, GET NB OF TAXA */ s_tree=(tree*)check_alloc(1, sizeof(tree)); lgseq=(int)strlen(seq[0]); s_tree->alivebr=(int*)check_alloc(2*nbseq, sizeof(int)); s_tree->node=ctos(c_tree, &nbseq2, &tree_is_rooted); if(strchr(c_tree, ':')) bl=1; else bl=0; if (nbseq2>nbseq) { printf("More taxa in tree file than in sequence file\n"); exit(0); } /* ALLOCATE AND INIT S_TREE */ s_tree->nbseq=nbseq=nbseq2; s_tree->seq=seq; s_tree->names=seqname; s_tree->listgc=(double*)check_alloc(2*nbseq, sizeof(double)); s_tree->listbr=(branche*)check_alloc(2*nbseq, sizeof(branche)); s_tree->lgbr_init=(double*)check_alloc(2*nbseq-3, sizeof(double)); for(i=0;i<2*nbseq-2;i++){ s_tree->listbr[i]=(struct branche*)check_alloc(1, sizeof(struct branche)); } makelistbr_unrooted(NULL, s_tree->node[0], s_tree->listbr, s_tree->alivebr, 2*nbseq-3); for(i=0;i<2*nbseq-3;i++) s_tree->lgbr_init[i]=s_tree->listbr[i]->length; for(ii=0;ii<2*nbseq-3;ii++) s_tree->alivebr[ii]=1; s_tree->nbalivebr=0; for(i=0;i<2*nbseq-3;i++) s_tree->nbalivebr+=s_tree->alivebr[i]; set_var_param(s_tree, opt->compute); nbparam=s_tree->nb_var_param; s_tree->classlike_s=(double**)check_alloc(gamma_nbcl, sizeof(double)); for(i=0;iclasslike_s[i]=(double*)check_alloc(lgseq, sizeof(double)); s_tree->like_s=(double*)check_alloc(lgseq, sizeof(double)); s_tree->likeasrv_s=(double*)check_alloc(lgseq, sizeof(double)); s_tree->dlike_s=(double**)check_alloc(nbparam, sizeof(double*)); s_tree->dlikeasrv_s=(double**)check_alloc(nbparam, sizeof(double*)); for(i=0;idlike_s[i]=(double*)check_alloc(lgseq, sizeof(double)); s_tree->dlikeasrv_s[i]=(double*)check_alloc(lgseq, sizeof(double)); } s_tree->d2like_s=(double**)check_alloc(nbparam, sizeof(double*)); s_tree->d2likeasrv_s=(double**)check_alloc(nbparam, sizeof(double*)); for(i=0;id2like_s[i]=(double*)check_alloc(lgseq, sizeof(double)); s_tree->d2likeasrv_s[i]=(double*)check_alloc(lgseq, sizeof(double)); } s_tree->classdlike_s=(double***)check_alloc(gamma_nbcl, sizeof(double**)); s_tree->classd2like_s=(double***)check_alloc(gamma_nbcl, sizeof(double**)); for(i=0;iclassdlike_s[i]=(double**)check_alloc(nbparam, sizeof(double*)); s_tree->classd2like_s[i]=(double**)check_alloc(nbparam, sizeof(double*)); for(j=0;jclassdlike_s[i][j]=(double*)check_alloc(lgseq, sizeof(double)); s_tree->classd2like_s[i][j]=(double*)check_alloc(lgseq, sizeof(double)); } } s_tree->deriv=(double*)check_alloc(nbparam, sizeof(double)); s_tree->deriv2=(double*)check_alloc(nbparam, sizeof(double)); /* printmemory("fin tree alloc"); */ for(i=0;i<2*nbseq-2;i++) alloc_node(s_tree->node[i], lgseq, nbparam, nbseq, (inode[i]->x[l][j][k]=1.; else s_tree->node[i]->x[l][j][k]=0.; } } } } for(i=0;inode[i]->x[l][j][k]=1.; else s_tree->node[i]->x[l][j][k]=0.; } } } for(i=0;inode[i]->x[l][j][k]=1.; else s_tree->node[i]->x[l][j][k]=0.; } } } for(i=0;inode[i]->x[l][j][k]=1.; } } } /* CHECK SEQUENCE FILE */ for(i=nbseq2;i<2*nbseq2-2;i++) sprintf(s_tree->node[i]->nom, "int%d", i-nbseq2+1); drapeau=(int*)check_alloc(nbseq, sizeof(int)); drapeau2=(int*)check_alloc(nbseq, sizeof(double)); for(i=0;inode[i]->nom, seqname[j], MAXLNAME)==0){ drapeau[j]=drapeau2[i]=1; orderedseq[i]=seq[j]; break; } } if(drapeau2[i]==0){ printf("Taxon %s not found in sequence file.\n", s_tree->node[i]->nom); exit(0); } } s_tree->nbseq=nbseq=nbseq2; /* SET "PATTERNS" (relationships between sites) */ s_tree->sitetopatt=check_alloc(lgseq, sizeof(int)); weight=(double*)check_alloc(lgseq, sizeof(double)); for(i=0;isitetopatt[i]=k; continue; } for(j=0;jnode[j]->seq[lgsite]=site[j]; weight[lgsite]=1.; s_tree->sitetopatt[i]=lgsite; lgsite++; } free(drapeau); free(drapeau2); free(site); s_tree->lgseq=lgsite; /* lgseq holds the number of difference site patterns */ s_tree->weight=weight; s_tree->weightsum=lgseq; /* s_tree->weightsum holds the sequence length */ /* printf("%d species, %d sites, %d patterns\n", nbseq, lgseq, lgsite); */ /* AMMEND INPUT */ /* root */ rem_root=opt->init->INIT_ROOT; if(opt->init->INIT_ROOT<=0.){ opt->init->INIT_ROOT=root->l1/(root->l1+root->l2); if(!bl) opt->init->INIT_ROOT=0.5; } /* branch lengths */ s_tree->dist=(double**)check_alloc(nbseq, sizeof(double*)); for(i=0;idist[i]=(double*)check_alloc(nbseq, sizeof(double)); /* if(strcmp(opt->init->INIT_LENGTH, "REDO")==0){ for(i=0;idist[i][j]=0.; else s_tree->dist[i][j]=s_tree->dist[j][i]=gg95(seq[i], seq[j], opt->init->INIT_TITV); } } } */ /* ROOT TREE AND ORGANIZE */ organize_tree(NULL, root); organize_listbr(s_tree); setpattern(s_tree); for(ii=0;ii<2*nbseq-3;ii++) setunderlyingbr(root, s_tree->listbr[ii], ii); setunderlyinggc(root, s_tree->nbseq); // ??? set_numerobr(root, s_tree->listbr, 2*nbseq-3); // ??? for(i=0;inode[i]->alive1=0; s_tree->node[i]->alive2=0; s_tree->node[i]->alive3=1; } for(i=nbseq;i<2*nbseq-2;i++){ s_tree->node[i]->alive1=1; s_tree->node[i]->alive2=1; s_tree->node[i]->alive3=1; } root->alive1=root->alive2=1; root->alive3=-1; for(ii=0;ii<2*nbseq-3;ii++) s_tree->alivebr[ii]=1; /* CALL COMPUTE */ if(opt->init->NBRANDOM>0) nbcompute=opt->init->NBRANDOM; else nbcompute=1; for(l=0;linit, opt->compute, opt->converge, opt->print); if(lkh>maxlkh) maxlkh=lkh; if(!opt->print->PRINT1 && !opt->print->PRINT2) printf("%f\n", lkh); } /* if(ctree1) stoc(s_tree->node, nbseq, 1, ctree, 0); if(ctree2) stoc(s_tree->node, nbseq, 1, ctree2, 1); if(opt->print->EVAL_OUT){ outfile=fopen("detailed_out", "a"); nbtree++; fprintf(outfile, "%d\t%s\t%f\t%f\t%f\t%f\n", nbtree, ctree, maxlkh, s_tree->gamma_shp, s_tree->covar, s_tree->pi); fclose(outfile); } */ opt->init->INIT_ROOT=rem_root; /* FREE */ free_tree(s_tree, gamma_nbcl); free(s_tree); free_node(root, lgseq, nbparam, 0, gamma_nbcl); free(orderedseq); return maxlkh; } int main(int argc, char** argv) { int i, j, nbseq, nbtree, nbdataset, nbseqvrai, maxtreesize, allcouples=0; int interleaved; char nomfopt[100], **seq, **seqname, print1; char* alltrees, **c_tree, *prov, *prov2 /* *ctree1, *ctree2 */; char ch, ch_for_input_format; double **maxl; FILE *treefile, *optfile, *in; FILE* dutheil; options opt; /* printmemory_init(); */ init_cas(); srand48(seed); /** input sequences **/ if(argc<2){ while(1){ printf("Input sequence file? "); gets(infileName); in=fopen(infileName, "r"); if(in) break; printf("Cannot find file: %s\n", infileName); } } else{ in=fopen(argv[1], "r"); if(!in){ printf("Cannot find sequence file: %s\n", argv[1]); exit(0); } sprintf(infileName, "%s", argv[1]); } fclose(in); for (;;) { printf("Input sequences interleaved?[y/n]"); scanf("%c%*[^\n]", &ch_for_input_format); getchar(); if (ch_for_input_format == '\n') ch_for_input_format = ' '; uppercase(&ch_for_input_format); if (ch_for_input_format == 'Y'){ interleaved = 1; break; } if (ch_for_input_format == 'N'){ interleaved = 0; break; } } nbseq=readSequencefile(infileName, &seq, &seqname, interleaved); nbdataset = 1; refresh(seq, nbseq, 1); /** input trees **/ maxtreesize=50*nbseq*MAXNTREE; alltrees=(char*)check_alloc(maxtreesize+1, sizeof(char)); c_tree=(char**)check_alloc(MAXNTREE, sizeof(char*)); if(argc<3){ while(1){ printf("Input tree file ? "); gets(intreeName); treefile=fopen(intreeName, "r"); if(treefile) break; printf("Cannot find file: %s\n", intreeName); } } else{ treefile=fopen(argv[2], "r"); if(!treefile){ printf("Cannot find tree file : %s\n", argv[2]); exit(0); } } i=0; /* while(iprint->PRINT1; allcouples=opt->ALLCOUPLES; /* if(opt->print->EVAL_OUT){ dutheil=fopen("detailed_out", "w"); fprintf(dutheil, "numero\tarbre\tlnL\talpha\tcovar\tpi\n"); fclose(dutheil); } */ if(print1) printf("\n%d sequence data sets and %d trees found : ", nbdataset, nbtree); if(!allcouples){ nbdataset=mini(nbdataset, nbtree); nbtree=nbdataset; } if(print1) printf("%d evaluations processed\n\n", allcouples?nbdataset*nbtree:nbdataset); maxl=(double**)check_alloc(nbdataset, sizeof(double*)); for(i=0;i