require("lgpr")
# Loading required package: lgpr
# Attached lgpr 1.2.4, using rstan 2.26.23. Type ?lgpr to get started.
require("ggplot2")
# Loading required package: ggplot2
require("rstan")
# Loading required package: rstan
# Loading required package: StanHeaders
#
# rstan version 2.26.23 (Stan version 2.26.1)
# For execution on a local, multicore CPU with excess RAM we recommend calling
# options(mc.cores = parallel::detectCores()).
# To avoid recompilation of unchanged Stan programs, we recommend calling
# rstan_options(auto_write = TRUE)
# For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
# change `threads_per_chain` option:
# rstan_options(threads_per_chain = 1)
In this tutorial we simulate and analyse count data.
We first show how to generate binomial data
set.seed(131)
simData <- simulate_data(N = 12,
t_data = seq(12, 72, by = 12),
covariates = c( 2),
noise_type = "binomial",
relevances = c(0,1,1),
lengthscales = c(18,24,18),
N_trials = 100,
t_jitter = 1)
plot_sim(simData)
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
The noise level in the previous example was rather low. To generate more noisy observations, we can draw from the beta-binomial distribution. The overdispersion parameter \(\gamma \in (0,1)\) controls the noise level, so that \(\gamma \rightarrow 0\) would be equivalent to binomial noise (no overdispersion) and larger values mean more noise. Here we set gamma = 0.3
.
set.seed(131)
simData <- simulate_data(N = 12,
t_data = seq(12, 72, by = 12),
covariates = c( 2),
noise_type = "bb",
gamma = 0.3,
relevances = c(0,1,1),
lengthscales = c(18,24,18),
N_trials = 100,
t_jitter = 1)
plot_sim(simData)
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
dat <- simData@data
str(dat)
# 'data.frame': 72 obs. of 4 variables:
# $ id : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
# $ age: num 11.2 23.5 37 48 59.5 ...
# $ z : Factor w/ 2 levels "1","2": 2 2 2 2 2 2 1 1 1 1 ...
# $ y : int 29 34 30 45 100 84 1 55 96 77 ...
To use beta-binomial observation model in our analysis, we use the likelihood
argument of lgp()
. Here we define a \(\text{Beta}(2,2)\) prior for the gamma
parameter.
my_prior <- list(
alpha = student_t(20),
gamma = bet(2, 2)
)
We fit a model using the generated beta-binomial data.
fit <- lgp(y ~ age + age|id + age|z,
data = dat,
likelihood = "bb",
num_trials = 100,
prior = my_prior,
refresh = 300,
chains = 3,
cores = 3,
control = list(adapt_delta = 0.95),
iter = 3000)
print(fit)
# An object of class lgpfit. See ?lgpfit for more info.
# Inference for Stan model: lgp_latent.
# 3 chains, each with iter=3000; warmup=1500; thin=1;
# post-warmup draws per chain=1500, total post-warmup draws=4500.
#
# mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
# alpha[1] 1.13 0.01 0.48 0.48 0.79 1.04 1.39 2.30 2001 1
# alpha[2] 0.34 0.01 0.26 0.01 0.14 0.29 0.48 0.99 609 1
# alpha[3] 1.17 0.01 0.45 0.53 0.85 1.09 1.40 2.24 2011 1
# ell[1] 0.83 0.01 0.42 0.21 0.52 0.77 1.09 1.77 1398 1
# ell[2] 1.41 0.04 2.10 0.11 0.37 0.78 1.69 6.39 2312 1
# ell[3] 0.73 0.01 0.29 0.24 0.53 0.71 0.89 1.37 1320 1
# gamma[1] 0.30 0.00 0.05 0.18 0.27 0.31 0.34 0.40 793 1
#
# Samples were drawn using NUTS(diag_e) at Sun Sep 24 10:17:54 2023.
# For each parameter, n_eff is a crude measure of effective sample size,
# and Rhat is the potential scale reduction factor on split chains (at
# convergence, Rhat=1).
We see that the posterior mean of the gamma
parameter is close to the value used in simulation (0.3). We visualize its posterior draws for each MCMC chain:
plot_draws(fit, type = "trace", regex_pars = "gamma")
relevances(fit)
# gp(age) gp(age)*zs(id) gp(age)*zs(z) noise
# 0.1751555 0.0460864 0.2219434 0.5568147
We subsample 50 draws from the posterior of each component \(f^{(j)}\), \(j=1,2,3\)
draws <- sample.int(1000, 50)
and visualize them
p <- pred(fit, draws = draws)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
as well as the sum \(f = f^{(1)} + f^{(2)} + f^{(3)}\)
plot_pred(fit, pred = p, alpha = 0.3)
t <- seq(2, 100, by = 2)
x_pred <- new_x(dat, t)
p <- pred(fit, x_pred, draws = draws)
# Extrapolating function posterior draws...
# | 10%| 20%| 30%| 40%| 50%| 60%| 70%| 80%| 90%| 100%|
# ======================================================================
# c_hat_pred not given, using constant c_hat_pred = 0.0155558692386378
# Done.
plot_pred(fit, p, alpha = 0.3)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
sessionInfo()
# R version 4.1.2 (2021-11-01)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Linux Mint 21.1
#
# Matrix products: default
# BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
# LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
#
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
# [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
# [5] LC_MONETARY=fi_FI.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=fi_FI.UTF-8 LC_NAME=C
# [9] LC_ADDRESS=C LC_TELEPHONE=C
# [11] LC_MEASUREMENT=fi_FI.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] rstan_2.26.23 StanHeaders_2.26.28 ggplot2_3.4.3
# [4] lgpr_1.2.4 rmarkdown_2.23
#
# loaded via a namespace (and not attached):
# [1] tidyselect_1.2.0 xfun_0.39 QuickJSR_1.0.6
# [4] bslib_0.5.0 reshape2_1.4.4 colorspace_2.1-0
# [7] vctrs_0.6.3 generics_0.1.3 htmltools_0.5.5
# [10] stats4_4.1.2 loo_2.6.0 yaml_2.3.7
# [13] utf8_1.2.3 rlang_1.1.1 pkgbuild_1.4.2
# [16] jquerylib_0.1.4 pillar_1.9.0 glue_1.6.2
# [19] withr_2.5.0 distributional_0.3.2 plyr_1.8.8
# [22] matrixStats_1.0.0 lifecycle_1.0.3 stringr_1.5.0
# [25] posterior_1.4.1 munsell_0.5.0 gtable_0.3.4
# [28] codetools_0.2-18 evaluate_0.21 labeling_0.4.3
# [31] inline_0.3.19 knitr_1.43 callr_3.7.3
# [34] fastmap_1.1.1 ps_1.7.5 parallel_4.1.2
# [37] fansi_1.0.4 bayesplot_1.10.0 highr_0.10
# [40] rstantools_2.3.1.1 Rcpp_1.0.11 backports_1.4.1
# [43] checkmate_2.2.0 scales_1.2.1 cachem_1.0.8
# [46] RcppParallel_5.1.7 jsonlite_1.8.7 abind_1.4-5
# [49] farver_2.1.1 gridExtra_2.3 tensorA_0.36.2
# [52] digest_0.6.33 stringi_1.7.12 processx_3.8.2
# [55] dplyr_1.1.3 grid_4.1.2 cli_3.6.1
# [58] tools_4.1.2 magrittr_2.0.3 sass_0.4.6
# [61] tibble_3.2.1 crayon_1.5.2 pkgconfig_2.0.3
# [64] MASS_7.3-55 prettyunits_1.1.1 R6_2.5.1
# [67] compiler_4.1.2