/* This header file was based on nhmlg.h in NHML (Galtier & Gouy, 1998; Galtier, 2001) */ #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 #define MAXNSP 100 #define MAXDATASET 100 #define MAXLNAME 20 #define MAXLLINE 10000 #define lmot (8*sizeof(int)) #define seed 1234 #define deltax 1.e-4 #define nb_param_type 6 // nb of params for optimization enum optimParam{ROOT, LENGTH, COVA, COVB, COVC, GaltierPI}; #define lmin 1.e-6 #define lmax 9999. #define frmin 1.e-6 #define frmax 0.999999 #define gammamin 0.005 #define gammamax 100. #define covmin 1.e-6 #define covmax 100. #define pimin 1.e-6 #define pimax 0.999999 #define GAMMA_RANGE_ITER 5 #define GAMMA_RANGE_SIZE 9 #define ERROR_THRESHOLD -1.e-9 typedef double proba[20][20]; typedef double dproba[4][20][20]; /* param type, nt1, nt2 */ typedef double like[20]; /********* STRUCTURES *********/ typedef struct noeud{ struct noeud *v1, *v2, *v3; /* neighbors */ struct noeud *remain; /* unchanged neighbor */ double l1, l2, l3; /* branch lengths */ double lremain; /* unchanged length */ int alremain; /* unchanged status */ double b1, b2, b3; /* bootstrap values */ int alive1, alive2, alive3; int numerobr; double aeq, ceq, geq, teq; /* eq. base comp. in parent branch */ int nbpattern; int *patternson1, *patternson2; int *underlyingbr, *underlyinggc; int depth; like **x; /* likelihood (rate class, pattern) */ like **dx; /* lkh 1st derivative (rate class, pattern) */ like **d2x; /* lkh 2nd derivative (rate class, pattern) */ /* dx and d2x are for the parameter currently derived */ proba** p1; /* subst. proba matrix toward v1 */ proba** p2; /* subst. proba matrix toward v2 */ dproba** dp1; /* p1 first derivative */ dproba** dp2; /* p2 first derivative */ dproba** d2p1; /* p1 second derivative */ dproba** d2p2; /* p2 second derivative */ /* these probas have two additional dimensions with respct to nhml1: initial rate class and final rate class */ double pdif1, pdif2; /* proba switch rate class i to rate class j */ double pide1, pide2; /* proba no switch */ double** rr1, **rr2; /* rij */ double** drr1, **drr2; /* rij derivatives with respect */ double** d2rr1, **d2rr2; /* to x=l.covar */ double ***x2sites; /* lkh of the current two sites (rate class, state1, state2) */ char nom[MAXLNAME+1]; /* name */ char* seq; /* data if tip */ double a, c, g, t; /* base composition */ } *noeud; typedef struct branche{ noeud bout1; noeud bout2; double length; double bootstrap; } *branche; typedef struct tree{ noeud* node; /* array of nodes */ int nbseq; /* number of compared sequences */ int lgseq; /* length of compared sequences (patterns) */ char** seq; /* sequences */ char** names; /* sequence names */ int* sitetopatt; /* site to "pattern" correspondence */ double** dist; /* distances between sequences */ double* weight; /* weights at each site */ double weightsum; /* sum of weights */ double titv; /* transitions / transversions ratio */ double froot; /* root->l1 / root->l2 ratio */ double GCanc; /* ancestral G+C content */ double gamma_shp; double covar; double pi; double omega1, omega2; branche* listbr; /* list of branches */ int* alivebr; /* status of branches */ int nbalivebr; double* lgbr_init; /* list of initial branch lengths */ double* listgc; /* list of G+C contents */ int nb_var_param; /* number of optimized parameters */ int which_var_param[6]; /* 0:fr, 1:length, 2:cov1, 3:cov2, 4.cov3,5.pi */ int* param_nature; /* 0:fr, 1:length, 2:cov1, 3:cov2, 4.cov3,5.pi */ double** var_param; /* array of addresses of optimized parameters */ double** classlike_s; /* class-dependant likelihood at each site */ double* like_s; /* likelihood at each site (site) */ double* likeasrv_s; double*** classdlike_s; /* class-dependant derivative at each site */ double** dlike_s; /* derivatives of likelihood at each site (param, site) */ double** dlikeasrv_s; double*** classd2like_s; /* class-dependant second derivative at each site */ double** d2like_s; /* second deriv. of like. at each site (param, site) */ double** d2likeasrv_s; double lkh; /* total log likelihood ln(L) */ double lssrv, lasrv; double* deriv; /* ln(L) derivatives (param) */ double* deriv2; /* ln(L) second and cross derivatives (param) */ } tree; typedef struct upgmanoeud{ struct upgmanoeud *v1, *v2, *v3; /* neighbors */ double l1, l2, l3; double b1, b2, b3; int num; int alive; double depth; char* nom; } *upgmanoeud; typedef struct print_option{ int PRINT0; /* more */ int PRINT1; /* and */ int PRINT2; /* more */ int PRINT3; /* output */ int COV_SITE_PATTERN; /* details about site-specific covariation */ int OUTPUT_COMP; int EVAL_OUT; /* detailed results in file detailed_out */ } *print_option; typedef struct compute_option{ // char MODEL[20]; int OPTIMIZE_LENGTH; int OPTIMIZE_COMP; int OPTIMIZE_TITV; int OPTIMIZE_ROOT; int OPTIMIZE_ANC; int OPTIMIZE_GAMMA; int OPTIMIZE_COV1; int OPTIMIZE_COV2; int OPTIMIZE_COV3; int OPTIMIZE_PI; } *compute_option; typedef struct init_option{ char INIT_LENGTH[10]; char INIT_COMP[10]; double INIT_TITV; double INIT_ROOT; double INIT_ANC; double INIT_GAMMA; double INIT_COV; double INIT_COV1; double INIT_COV2; double INIT_COV3; double INIT_PI; int GAMMA_NCL; int NBRANDOM; int NOISE; }*init_option; typedef struct converge_option{ int PRECISION; int MAXITERATION; int SIMPLEX; }* converge_option; typedef struct options{ init_option init; compute_option compute; converge_option converge; print_option print; int ALLCOUPLES; /* for program EVAL */ int SH_G; /* for program SHAKE */ double SH_MAXLCROSSED; /* for program SHAKE */ int SH_MAXBOOTCROSSED; /* for program SHAKE */ int SH_RESTART; /* for program SHAKE */ } *options; /***** Functions ******/ void printmemory_init(); void printmemory(char []); void init_cas(); void *check_alloc(int nbrelt, int sizelt); void init_bl(tree* stree); void set_listgc(tree* s_tree, noeud nd); void reset_like(tree* s_tree); void boundaries(tree* s_tree, int* bounded); void reset_tree(tree* s_tree); double log_like_total(tree* s_tree, int nbcl); double minus_log_likelihood(double *paramprov); void num_derivatives (tree* s_tree, double *paramvect); void derivatives(tree* s_tree, double *paramvect); double simplex(double* initpoint, double bound, double* finalvect); int invmat(double** mat, int n, double** invmat); int mindepth (noeud comesfrom, noeud node); void makelistbr_unrooted(noeud from, noeud nd, branche* br, int* alivebr, int nbbranch); void set_var_param(tree* s_tree, compute_option comp); void organize_tree(noeud from, noeud nd); void organize_listbr(tree* s_tree); void setpattern(tree* s_tree); int setunderlyingbr(noeud nd, branche br, int i); void setunderlyinggc(noeud nd, int nb); void set_numerobr(noeud nd, branche* listbr, int nbbr); void set_node_deduced_comp(noeud root); noeud* ctos(char* ctree, int* notu, int *tree_is_rooted); void stoc(noeud* arbre_s, int racine, int notu, char* ctree, int nodegc); void refresh(char** seq, int nbseq, int option); double compute(tree* s_tree, init_option init_o, compute_option compute_opt, converge_option converge_o, print_option print_o); char* readtree(FILE* treef, int nbsp); void getoptions(options* opt_arg, FILE* in_opt); double *vector(long nl, long nh); void free_vector(double *v, long nl, long nh); void amoeba_cont(double **p, double y[], int ndim, double ftol, double (*funk)(double []), int *nfunk, double** contraintes); void ASRVlike_node(noeud nd); void Covarionlike_node(noeud nd); int rtop(int n, double t, double** P, double **vr, double **invvr, double *rr); int samesite(char* site, tree* s_tree, int i); int readSequencefile(char *, char ***seqp, char ***seqnamep, int interleaved); void uppercase(char *ch); double GeneralCovarionSiteLikelihood (tree* s_tree); int callMyEigen(int n, double **R, double **vr, double *rr); int my_eigen(int job, double** A, int n, double *rr, double *ri , double** vr, double** vi); int compute_exact_probacov(noeud nd, double **vr, double **invvr, double *rr); void setR (); /* See the gcc info page, (gcc)Typeof */ #define maxi(a,b) \ ({ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a > _b ? _a : _b; }) #define mini(a,b) \ ({ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; })