## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(ggplot2)
library(magrittr)
library(dplyr)
library(tidyr)

## ----eval=FALSE---------------------------------------------------------------
# install.packages('devtools') # skip this line if `devtools` is already installed.

## ----eval=FALSE---------------------------------------------------------------
# devtools::install_github("joonhap/sbi")

## -----------------------------------------------------------------------------
library(sbim)  

## -----------------------------------------------------------------------------
n <- 1000 # number of iid observations
gamma_t <- 1 # true shape parameter 
theta_t <- 1 # true rate parameter
set.seed(98475)
## Marginally, observations are distributed following the negative binomial distribution.
y_o <- rnbinom(n=n, size=gamma_t, prob=theta_t/(theta_t+1)) # observed data (y_1,...,y_n). 
MESLE <- gamma_t*n/sum(y_o) # MESLE for theta (= MLE)
sims_at <- seq(.8, 1.2, by=.001) # theta values at which simulations are made
llest <- sapply(sims_at, function(tht) {dpois(y_o, rgamma(n, gamma_t, rate=tht), log=TRUE)})

## -----------------------------------------------------------------------------
s <- simll(llest, params=sims_at)

## -----------------------------------------------------------------------------
nulls_theta <- c(seq(.8, 1.2, by=.05), MESLE) # null values to be tested

## -----------------------------------------------------------------------------
ht_result <- ht(s, null.value=as.list(nulls_theta), test="MESLE")

## -----------------------------------------------------------------------------
ht_result

## -----------------------------------------------------------------------------
cci <- ci(s, level=c(.9, .95), ci="MESLE") # constructed confidence intervals

## ----echo=FALSE, fig.width=6--------------------------------------------------
coef_est <- ht_result$regression_estimates 
est_quad <- function(x) coef_est[[1]] + c(coef_est[[2]])*x + c(coef_est[[3]])*x^2
## exact log-likelihood
ll_exact <- function(x) { dnbinom(sum(y_o), gamma_t*n, x/(x+1), log=TRUE) }
## exact log-likelihood, shifted in y-direction
ll_sub <- function(x) { ll_exact(x) - ll_exact(MESLE) + est_quad(MESLE) } 
sll <- apply(unclass(s), 2, sum) # simulation log likelihood (for all observations)
gg_gp_simll <- ggplot(data.frame(theta=sims_at, sll=sll), aes(x=theta, y=sll)) +
    geom_point(size=.5) 
gg_gp_simll + geom_function(fun=est_quad, linetype="longdash", color='blue') +
    geom_function(fun=ll_sub, linetype="longdash", color='red') + xlab('theta') +
    ylab('simulation log-likelihood') +
    geom_vline(data=data.frame(
        kind=factor(c("MESLE",rep("CI90",2),rep("CI95",2)), levels=c("MESLE","CI90","CI95")),
        value=c(MESLE, cci$confidence_interval[1,2:3], cci$confidence_interval[2,2:3])),
        aes(xintercept=value, linetype=kind)) +
    scale_linetype_manual(name="", labels=c("MESLE", "90% CI", "95% CI"),
        values=c(MESLE="dashed",CI90="dotdash",CI95="dotted")) +
    theme(legend.position="top", panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

## -----------------------------------------------------------------------------
ht_result <- ht(s, null.value=as.list(nulls_theta), test="parameter", case="iid")
ht_result

## ----message=FALSE------------------------------------------------------------
library(pomp)

## -----------------------------------------------------------------------------
# Stochastic volatility model
SV_pomp <- function(t0, times, kappa, tau, sim_seed=NULL) {

    rinit <- pomp::Csnippet("
        s = tau*rnorm(0,1);
    ")

    rproc <- pomp::Csnippet("
        s = kappa*s + tau*sqrt(1-kappa*kappa)*rnorm(0,1);
    ")

    rmeas <- pomp::Csnippet("
        r = exp(s)*rt(5);
    ")
    
    dmeas <- pomp::Csnippet("
        double logdmeas = dt(r*exp(-s), 5, 1) - s;
        lik = (give_log) ? logdmeas : exp(logdmeas);
    ")

    if (!is.null(sim_seed)) {
        set.seed(sim_seed)
    }
    
    pomp::simulate(t0=t0, times=times,
        rinit=rinit,
        rprocess=pomp::discrete_time(rproc,delta.t=1),
        rmeasure=rmeas,
        dmeasure=dmeas,
        params=c(kappa=kappa, tau=tau),
        statenames = "s",
        obsnames = "r",
        paramnames = c("kappa", "tau")
    )
}

## -----------------------------------------------------------------------------
t0 <- 0
T <- 500
times <- 1:T
kappa_t <- 0.8
tau_t <- 1
theta_t <- c(kappa=kappa_t, tau=tau_t) # true parameter value
theta_Est <- c(kappa=logit(kappa_t), tau=log(tau_t)) # true parameter on the estimation scale
# simulate the process and generate data
SV_t <- SV_pomp(t0, times, kappa_t, tau_t, sim_seed=1554654) 
r_o <- obs(SV_t) # generated observation sequence
dat_raw <- data.frame(time=time(SV_t), latent=c(states(SV_t)), obs=c(r_o))
dat <- pivot_longer(dat_raw, -time)
parnames <- c("kappa", "tau") 
trans <- list(logit, log) # functions to use for transformations to the estimation scale
btrans <- list(plogis, exp) # functions for transformations back to the original scale
transnames <- c("logit(kappa)", "log(tau)")

## ----warning=FALSE, fig.width=3.4, fig.height=2.7-----------------------------
sim_widths <- c(1, .4) 
inc <- c("kappa") # parameter being estimated
parid <- which(parnames%in%inc)
M <- 100 # number of simulation points
Np <- 100 # number of particles used by the bootstrap particle filter
# simulation points are uniformly spaced between the true value +-1
sims_incl_Est <- cbind(seq(theta_Est[inc] - sim_widths[parid], theta_Est[inc] + sim_widths[parid],
    length.out=M)) |> `colnames<-`(inc)
sims_at_Nat <- outer(rep(1,M), theta_t)
sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i]))
set.seed(729875)
llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) {
    sp <- sims_at_Nat[i,]
    ## run the particle filter for each simulation point
    pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction") 
    ## estimate the simulation log-likelihood for each r_t
    lme <- sapply(saved_states(pfSV)$weights, logmeanexp)
    lme
}))
ll_est <- apply(llp_est, 2, sum)

## -----------------------------------------------------------------------------
s <- simll(llp_est, params=sims_incl_Est)
null_values_Est <- cbind(theta_Est[inc]+sim_widths[parid]/5*(-10:10))
colnames(null_values_Est) <- inc
ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary")

## -----------------------------------------------------------------------------
ci_result <- ci(s, level=c(.9,.95), ci="parameter", case="stationary")
regcoef_k <- ci_result$regression_estimates
# estimated expected simulation log-likelihood
eesl <- function(x) { regcoef_k$a + regcoef_k$b*x + regcoef_k$c*x^2 } 
vline_names <- c("true","CI90","CI95")
dat <- data.frame(simll=ll_est)
dat[inc] <- sims_incl_Est
g1 <- ggplot(dat, aes(x=!!sym(inc), y=simll)) + geom_point() +
    geom_function(fun=eesl, linetype="longdash") +
    geom_vline(data=data.frame(
        kind=factor(c("true",rep("CI90",2),rep("CI95",2)), levels=vline_names),
        value=c(theta_Est[inc], ci_result$confidence_interval[1,2:3],
            ci_result$confidence_interval[2,2:3])),
        aes(xintercept=value, linetype=kind)) +
    xlab(transnames[parid]) + ylab('simulation log-likelihood') +
    scale_linetype_manual(name="", labels=c("true", "90%CI", "95%CI"),
        values=c(true="dashed", CI90="dotdash", CI95="dotted")) +
    theme(legend.position="bottom")
g1

## ----warning=FALSE, fig.width=3.4, fig.height=2.7-----------------------------
sim_widths <- c(1, .4) 
inc <- c("tau") # parameter being estimated
parid <- which(parnames%in%inc)
M <- 100
Np <- 100
# simulation points are uniformly spaced between the true value +-.4
sims_incl_Est <- cbind(seq(theta_Est[inc] - sim_widths[parid], theta_Est[inc] + sim_widths[parid],
    length.out=M)) |> `colnames<-`(inc)
sims_at_Nat <- outer(rep(1,M), theta_t)
sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i]))
set.seed(729875)
llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) {
    sp <- sims_at_Nat[i,]
    pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction")
    lme <- sapply(saved_states(pfSV)$weights, logmeanexp)
    lme
}))
ll_est <- apply(llp_est, 2, sum)
s <- simll(llp_est, params=sims_incl_Est)
null_values_Est <- cbind(theta_Est[inc]+sim_widths[parid]/5*(-10:10))
colnames(null_values_Est) <- inc
ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary")
dat <- data.frame(simll=ll_est)
dat[inc] <- sims_incl_Est
ci_result <- ci(s, level=c(.9,.95), ci="parameter", case="stationary")
regcoef_t <- ci_result$regression_estimates
eesl <- function(x) { regcoef_t$a + regcoef_t$b*x + regcoef_t$c*x^2 } # estimated expected simlation log-likelihood
vline_names <- c("true","CI90","CI95")
g2 <- ggplot(dat, aes(x=!!sym(inc), y=simll)) + geom_point() +
    geom_function(fun=eesl, linetype="longdash") +
    geom_vline(data=data.frame(kind=factor(c("true",rep("CI90",2),rep("CI95",2)), levels=vline_names),
        value=c(theta_Est[inc], ci_result$confidence_interval[1,2:3],
            ci_result$confidence_interval[2,2:3])),
        aes(xintercept=value, linetype=kind)) +
    scale_linetype_manual(name=NULL, labels=c("true", "90%CI", "95%CI"),
        values=c(true="dashed",CI90="dotdash",CI95="dotted")) +
    ylab('simulation log-likelihood') + xlab(transnames[parid]) + theme(legend.position="bottom")
g2

## ----warning=FALSE, fig.width=2.4, fig.height=2.7-----------------------------
sim_widths <- c(.5, .2) 
inc <- c("kappa", "tau") # parameters being estimated
parid <- which(parnames%in%inc)
M <- 400
Np <- 100
sims_incl_Est <- expand.grid(seq(theta_Est[inc[1]]-sim_widths[parid[1]], theta_Est[inc[1]]+sim_widths[parid[1]], length.out=sqrt(M)), seq(theta_Est[inc[2]]-sim_widths[parid[2]], theta_Est[inc[2]]+sim_widths[parid[2]], length.out=sqrt(M))) |> as.matrix() |> `colnames<-`(inc)
sims_at_Nat <- outer(rep(1,M), theta_t)
sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i]))
set.seed(729875)
llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) {
    sp <- sims_at_Nat[i,]
    pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction")
    lme <- sapply(saved_states(pfSV)$weights, logmeanexp)
    lme
}))
ll_est <- apply(llp_est, 2, sum)
s <- simll(llp_est, params=sims_incl_Est)
null_values_Est <- expand.grid(theta_Est[inc[1]]+sim_widths[parid[1]]/5*(-10:10), theta_Est[inc[2]]+sim_widths[parid[2]]/5*(-10:10)) |> as.matrix() |> `colnames<-`(inc)
ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary")
dat <- data.frame(null_values_Est)
g3 <- ggplot(dat %>% mutate(pval=ht_result$Hypothesis_Tests[,"pvalue"]) %>%
             mutate(conflvl=cut(pval, breaks=c(0,0.05,0.1,0.2,1), right=FALSE)),
    aes(x=!!sym(inc[1]), y=!!sym(inc[2]))) +
    geom_point(aes(color=conflvl), size=0.6) +
    xlab(transnames[parid[1]]) + ylab(transnames[parid[2]]) +
    scale_color_manual(name="", labels=c("100%", "95%", "90%", "80%"),
        values=rgb(0,0,0,c(0.05,0.25,0.5,1))) +
    geom_point(aes(x=theta_Est[inc[1]], y=theta_Est[inc[2]]), shape=4, size=2) +
    theme(legend.position="bottom")
g3

## -----------------------------------------------------------------------------
null_moments <- list(a=-984, b=c(25.8,-23.7), c=matrix(c(-8.25,8,8,-93),2,2), sigsq=5)
ht(s, null.value=null_moments, test="moments")

## -----------------------------------------------------------------------------
set.seed(1098204)
s <- simll(rnorm(50, 0, 1))

## -----------------------------------------------------------------------------
null_values <- rbind(c(0,1), c(0,0.8), c(0.2, 1))
ht(s, null.value=null_values, test="moments", type="point")