# Parametric Estimation of the Expected Number of Genes # The first two functions are the main ones of interest. They give # the parameter estimates for the parametric model (dnb.ml) # the expected number of new genes (deltat) #= Obtain the fitted parameters for the truncated negative binomial model dnb.ml <- function(k, Nk, x0){ # arguments: # k, Nk - vectors # Nk[i] gives the number of genes that had k[i] hits # # x0 - scalar truncation parameter # the model is fit to the counts for k=1,...,x0 hits. # value: # a list with elements # alpha - the estimated alpha parameter # beta - the estimated beta parameter (gamma = beta/(1+beta)) # example: # k <- c(1,2, 3, 4, 5, 6, 7, 9, 14) # Nk <- c(200, 21, 14, 4, 3, 3, 1, 1, 1) # params <- dnb.ml(k, Nk, 10) f <- function(v){ return(-dnb.lik(k, Nk, x0, v[1], v[2])) } v <- optim(c(0,1), f, lower = c(-1.0+1.0e-8,1.0e-8), method = "L-BFGS-B") return(list(alpha = v\$par[1], beta = v\$par[2])) } #= The expected number of new genes for the trucated negative binomial model deltat <- function(k, Nk, alpha, beta, tval = seq(0.01, 1.0, 0.01), x0 = 10){ # arguments: # k, Nk - vectors # Nk[i] gives the number of genes that had k[i] hits # # alpha - the estimated alpha parameter # # beta - the estimated beta parameter (gamma = beta/(1+beta)) # # tval - vector: estimates are of the expected numbers of new genes in # a sample of size tval * n, where n is the original sample size # value: # dtp - the expected numbers of new genes in # a sample of size tval * n, where n is the original sample size # example: # k <- c(1,2, 3, 4, 5, 6, 7, 9, 14) # Nk <- c(200, 21, 14, 4, 3, 3, 1, 1, 1) # params <- dnb.ml(k, Nk, 10) # tval <- seq(0.01, 2, 0.01) # dt <- deltat(k, Nk, params\$alpha, params\$beta, tval = tval) gam <- beta/(beta+1) if (alpha != 0) dtp <- Nk[1] * (1 - (1 + gam*tval)^(-alpha))/(alpha*gam) else dtp <- Nk[1] * log(1 + gam*tval)/gam return(dtp) } #= likelihood for the truncated negative binomial model dnb.lik <- function(k, Nk, x0, alpha, beta){ # arguments: # k, Nk - vectors # Nk[i] gives the number of genes that had k[i] hits # # x0 - scalar truncation parameter # the model is fit to the counts for k=1,...,x0 hits. # # alpha - alpha parameter # # beta - beta parameter (gamma = beta/(1+beta)) # value: # likelihood Nk <- Nk[k <= x0] N0 <- sum(Nk) k <- k[k <= x0] p <- dnb.ext(x0, alpha, beta) lik <- sum( Nk*log(p[k]) ) return(lik) } #= vector of multinomial probs for negative binomial dnb.ext <- function(x0, alpha, beta){ # arguments: # x0 - scalar truncation parameter # the model is fit to the counts for k=1,...,x0 hits. # # alpha - alpha parameter # # beta - beta parameter (gamma = beta/(1+beta)) # value: # a vector giving the probabilities for x=1,...x_0 y <- c(1:x0) gam <- beta/(beta+1) p <- gamma(y+alpha)*gam^(y-1)/(gamma(y+1)*gamma(1+alpha)) p <- p/sum(p) return(p) }