Processing math: 100%

Nonlinear model approximation

See the method vignette for details of the general inlabru method. Here, we’ll take a look at a small toy example to see the difference between a few posterior approximation methods.

Note: This vignette is not completed yet.

A small toy problem

Hierarchical model: λExp(γ)(yi|λ)Po(λ), independent across i=1,,n With ¯y=1nni=1yi, the posterior density is p(λ|{yi})p(λ,y1,,yn)exp(γλ)exp(nλ)λn¯y=exp{(γ+n)λ}λn¯y, which is the density of a Ga(α=1+n¯y,β=γ+n) distribution.

Latent Gaussian predictor version

Introducing a latent Gaussian variable uN(0,1), the model can be reformulated as λ(u)=ln{1Φ(u)}/γ(yi|u)Po(λ(u)) We will need some derivatives of λ with respect to u: λ(u)u=1γϕ(u)1Φ(u)=λ(u)2λ(u)u2=1γϕ(u)1Φ(u)(u+ϕ(u)1Φ(u))=λ(u){uγλ(u)}lnλ(u)u=1λ(u)λ(u)u=1ln{1Φ(u)}ϕ(u)1Φ(u)=λ(u)λ(u)2lnλ(u)u2=1λ(u)2λ(u)u21λ(u)2(λ(u)u)2=λ(u){uγλ(u)}λ(u)λ(u)2λ(u)2=λ(u)λ(u){uγλ(u)+λ(u)λ(u)}

lambda <- function(u, gamma) {
  -pnorm(u, lower.tail = FALSE, log.p = TRUE) / gamma
}
lambda_inv <- function(lambda, gamma) {
  qnorm(-lambda * gamma, lower.tail = FALSE, log.p = TRUE)
}
D1lambda <- function(u, gamma) {
  exp(dnorm(u, log = TRUE) - pnorm(u, lower.tail = FALSE, log.p = TRUE)) / gamma
}
D2lambda <- function(u, gamma) {
  D1L <- D1lambda(u, gamma)
  -D1L * (u - gamma * D1L)
}
D1log_lambda <- function(u, gamma) {
  D1lambda(u, gamma) / lambda(u, gamma)
}
D2log_lambda <- function(u, gamma) {
  D1logL <- D1log_lambda(u, gamma)
  -D1logL * (u - gamma * D1lambda(u, gamma = gamma) + D1logL)
}

Latent Gaussian posterior approximations

A basic approximation of the posterior distribution for λ given y can be defined as a deterministic transformation of a Gaussian distribution obtained from a 2nd order Taylor approximation of lnp(u|{yi}) at the posterior mode u0 of p(u|{yi}). The needed derivatives are lnp(u|{yi})u=lnϕ(u)unλ(u)+n¯yλ(u)λ(u)=u+nλ(u)λ(u){¯yλ(u)}2lnp(u|{yi})u2=1nλ(u)λ(u){uγλ(u)+λ(u)λ(u)}{¯yλ(u)}nλ(u)2λ(u) At the mode u0, the first order derivative is zero, and 2lnp(u|{yi})u2|u=u0=1{u0γλ(u0)+λ(u0)λ(u0)}u0nλ(u0)2λ(u0). The quadratic approximation of the log-posterior density at the mode u0 is then ln˘p(u|{yi})=const(uu0)22[2lnp(u|{yi})u2|u=u0] In inlabru, the approximation first linearises lnλ(u) at u0 before applying the Taylor approximation of lnp(u|{yi}). The linearised log-predictor is ln¯λ(u)=lnλ(u0)+λ(u0)λ(u0)(uu0) so that ¯λ(u)=λ(u0)λ(u0)¯λ(u) and the second order derivative of the linearised log-posterior density is 2ln¯p(u|{yi})u2|u=u0=1nλ(u0)2λ(u0).

log_p <- function(u, y, gamma) {
  L <- lambda(u, gamma)
  n <- length(y)
  dnorm(u, log = TRUE) - n * L + n * mean(y) * log(L) - sum(lgamma(y + 1))
}
D1log_p <- function(u, y, gamma) {
  n <- length(y)
  -u + n * D1log_lambda(u, gamma) * (mean(y) - lambda(u, gamma))
}
D2log_p <- function(u, y, gamma) {
  n <- length(y)
  -1 +
    n * D2log_lambda(u, gamma) * (mean(y) - lambda(u, gamma)) -
    n * D1log_lambda(u, gamma) * D1lambda(u, gamma)
}
g <- 1
y <- c(0, 1, 2)
y <- c(0, 0, 0, 0, 0)
y <- rpois(5, 5)
mu_quad <- uniroot(
  D1log_p,
  lambda_inv((1 + sum(y)) / (g + length(y)) * c(1 / 10, 10), gamma = g),
  y = y, gamma = g
)$root
sd_quad <- (-D2log_p(mu_quad, y = y, gamma = g))^-0.5
sd_lin <- (1 + length(y) * D1log_lambda(mu_quad, gamma = g)^2 *
  lambda(mu_quad, gamma = g))^-0.5
lambda0 <- lambda(mu_quad, gamma = g)

Posterior densities

ggplot() +
  xlim(
    lambda(mu_quad - 4 * sd_quad, gamma = g),
    lambda(mu_quad + 4 * sd_quad, gamma = g)
  ) +
  xlab("lambda") +
  ylab("Density") +
  geom_function(
    fun = function(x) {
      exp(log_p(lambda_inv(x, gamma = g), y = y, gamma = g)) /
        D1lambda(lambda_inv(x, gamma = g), gamma = g) / (
          exp(log_p(lambda_inv(lambda0, gamma = g), y = y, gamma = g)) /
            D1lambda(lambda_inv(lambda0, gamma = g), gamma = g)
        ) *
        dgamma(lambda0, shape = 1 + sum(y), rate = g + length(y))
    },
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = dgamma,
    args = list(shape = 1 + sum(y), rate = g + length(y)),
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = function(x) {
      dnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_quad) /
        D1lambda(lambda_inv(x, gamma = g), gamma = g)
    },
    mapping = aes(col = "Quadratic"),
    n = 1000
  ) +
  geom_function(
    fun = function(x) {
      dnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_lin) /
        D1lambda(lambda_inv(x, gamma = g), gamma = g)
    },
    mapping = aes(col = "Linearised"),
    n = 1000
  ) +
  geom_vline(mapping = aes(
    xintercept = (1 + sum(y)) / (g + length(y)),
    lty = "Bayes mean"
  )) +
  geom_vline(mapping = aes(xintercept = lambda0, lty = "Bayes mode")) +
  geom_vline(mapping = aes(xintercept = mean(y), lty = "Plain mean"))

ggplot() +
  xlim(
    lambda_inv(lambda0, gamma = g) - 4 * sd_quad,
    lambda_inv(lambda0, gamma = g) + 4 * sd_quad
  ) +
  xlab("u") +
  ylab("Density") +
  geom_function(
    fun = function(x) {
      exp(log_p(x, y = y, gamma = g) -
        log_p(lambda_inv(lambda0, gamma = g), y = y, gamma = g)) *
        (dgamma(lambda0, shape = 1 + sum(y), rate = g + length(y)) *
          D1lambda(lambda_inv(lambda0, gamma = g), gamma = g))
    },
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = function(x) {
      dgamma(lambda(x, gamma = g), shape = 1 + sum(y), rate = g + length(y)) *
        D1lambda(x, gamma = g)
    },
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = dnorm,
    args = list(mean = mu_quad, sd = sd_quad),
    mapping = aes(col = "Quadratic"),
    n = 1000
  ) +
  geom_function(
    fun = dnorm,
    args = list(mean = mu_quad, sd = sd_lin),
    mapping = aes(col = "Linearised"),
    n = 1000
  ) +
  geom_vline(
    mapping = aes(
      xintercept = lambda_inv((1 + sum(y)) / (g + length(y)),
        gamma = g
      ),
      lty = "Bayes mean"
    )
  ) +
  geom_vline(
    mapping = aes(
      xintercept = lambda_inv(lambda0, gamma = g),
      lty = "Bayes mode"
    )
  ) +
  geom_vline(
    mapping = aes(
      xintercept = lambda_inv(mean(y), gamma = g),
      lty = "Plain mean"
    )
  )

Posterior CDFs

ggplot() +
  xlim(
    lambda(mu_quad - 4 * sd_quad, gamma = g),
    lambda(mu_quad + 4 * sd_quad, gamma = g)
  ) +
  xlab("lambda") +
  ylab("CDF") +
  geom_function(
    fun = pgamma,
    args = list(shape = 1 + sum(y), rate = g + length(y)),
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = function(x) {
      pnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_quad)
    },
    mapping = aes(col = "Quadratic"),
    n = 1000
  ) +
  geom_function(
    fun = function(x) {
      pnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_lin)
    },
    mapping = aes(col = "Linearised"),
    n = 1000
  ) +
  geom_vline(mapping = aes(
    xintercept = (1 + sum(y)) / (g + length(y)),
    lty = "Bayes mean"
  )) +
  geom_vline(mapping = aes(xintercept = lambda0, lty = "Bayes mode")) +
  geom_vline(mapping = aes(xintercept = mean(y), lty = "Plain mean"))

ggplot() +
  xlim(
    lambda_inv(lambda0, gamma = g) - 4 * sd_quad,
    lambda_inv(lambda0, gamma = g) + 4 * sd_quad
  ) +
  xlab("u") +
  ylab("CDF") +
  geom_function(
    fun = function(x) {
      pgamma(lambda(x, gamma = g), shape = 1 + sum(y), rate = g + length(y))
    },
    mapping = aes(col = "Theory"),
    n = 1000
  ) +
  geom_function(
    fun = pnorm,
    args = list(mean = mu_quad, sd = sd_quad),
    mapping = aes(col = "Quadratic"),
    n = 1000
  ) +
  geom_function(
    fun = pnorm,
    args = list(mean = mu_quad, sd = sd_lin),
    mapping = aes(col = "Linearised"),
    n = 1000
  ) +
  geom_vline(mapping = aes(
    xintercept = lambda_inv((1 + sum(y)) / (g + length(y)),
      gamma = g
    ),
    lty = "Bayes mean"
  )) +
  geom_vline(mapping = aes(
    xintercept = lambda_inv(lambda0, gamma = g),
    lty = "Bayes mode"
  )) +
  geom_vline(mapping = aes(
    xintercept = lambda_inv(mean(y), gamma = g),
    lty = "Plain mean"
  ))