require("lgpr")
# Loading required package: lgpr
# Attached lgpr 1.2.5, using rstan 2.32.7. Type ?lgpr to get started.
require("ggplot2")
# Loading required package: ggplot2
require("rstan")
# Loading required package: rstan
# Loading required package: StanHeaders
#
# rstan version 2.32.7 (Stan version 2.32.2)
# For execution on a local, multicore CPU with excess RAM we recommend calling
# options(mc.cores = parallel::detectCores()).
# To avoid recompilation of unchanged Stan programs, we recommend calling
# rstan_options(auto_write = TRUE)
# For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
# change `threads_per_chain` option:
# rstan_options(threads_per_chain = 1)
In this tutorial we simulate and analyse count data.
We first show how to generate binomial data
set.seed(131)
simData <- simulate_data(
N = 12,
t_data = seq(12, 72, by = 12),
covariates = c(2),
noise_type = "binomial",
relevances = c(0, 1, 1),
lengthscales = c(18, 24, 18),
N_trials = 100,
t_jitter = 1
)
plot_sim(simData)
# Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
# ℹ Please use tidy evaluation idioms with `aes()`.
# ℹ See also `vignette("ggplot2-in-packages")` for more information.
# ℹ The deprecated feature was likely used in the lgpr package.
# Please report the issue at <https://github.com/jtimonen/lgpr/issues>.
# This warning is displayed once every 8 hours.
# Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
The noise level in the previous example was rather low. To generate
more noisy observations, we can draw from the beta-binomial
distribution. The overdispersion parameter \(\gamma \in (0,1)\) controls the noise
level, so that \(\gamma \rightarrow 0\)
would be equivalent to binomial noise (no overdispersion) and larger
values mean more noise. Here we set gamma = 0.3.
set.seed(131)
simData <- simulate_data(
N = 12,
t_data = seq(12, 72, by = 12),
covariates = c(2),
noise_type = "bb",
gamma = 0.3,
relevances = c(0, 1, 1),
lengthscales = c(18, 24, 18),
N_trials = 100,
t_jitter = 1
)
plot_sim(simData)
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
dat <- simData@data
str(dat)
# 'data.frame': 72 obs. of 4 variables:
# $ id : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
# $ age: num 11.2 23.5 37 48 59.5 ...
# $ z : Factor w/ 2 levels "1","2": 2 2 2 2 2 2 1 1 1 1 ...
# $ y : int 32 26 36 52 99 80 0 21 96 80 ...
To use beta-binomial observation model in our analysis, we use the
likelihood argument of lgp(). Here we define a
\(\text{Beta}(2,2)\) prior for the
gamma parameter.
my_prior <- list(
alpha = student_t(20),
gamma = bet(2, 2)
)
We fit a model using the generated beta-binomial data.
fit <- lgp(y ~ age + age | id + age | z,
data = dat,
likelihood = "bb",
num_trials = 100,
prior = my_prior,
refresh = 300,
chains = 3,
cores = 3,
control = list(adapt_delta = 0.95),
iter = 3000
)
print(fit)
# An object of class lgpfit. See ?lgpfit for more info.
# Inference for Stan model: lgp_latent.
# 3 chains, each with iter=3000; warmup=1500; thin=1;
# post-warmup draws per chain=1500, total post-warmup draws=4500.
#
# mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
# alpha[1] 1.11 0.01 0.49 0.44 0.75 1.01 1.37 2.31 2026 1
# alpha[2] 0.37 0.01 0.28 0.01 0.15 0.31 0.53 1.04 530 1
# alpha[3] 1.30 0.01 0.47 0.63 0.97 1.22 1.55 2.40 1868 1
# ell[1] 0.85 0.01 0.46 0.21 0.51 0.75 1.12 1.88 1119 1
# ell[2] 1.10 0.03 1.49 0.13 0.44 0.69 1.18 4.54 2023 1
# ell[3] 0.74 0.01 0.28 0.26 0.55 0.73 0.91 1.34 1326 1
# gamma[1] 0.27 0.00 0.05 0.14 0.24 0.27 0.30 0.36 631 1
#
# Samples were drawn using NUTS(diag_e) at Thu Oct 30 23:09:01 2025.
# For each parameter, n_eff is a crude measure of effective sample size,
# and Rhat is the potential scale reduction factor on split chains (at
# convergence, Rhat=1).
We see that the posterior mean of the gamma parameter is
close to the value used in simulation (0.3). We visualize its posterior
draws for each MCMC chain:
plot_draws(fit, type = "trace", regex_pars = "gamma")
relevances(fit)
# gp(age) gp(age)*zs(id) gp(age)*zs(z) noise
# 0.14125061 0.04760887 0.25839685 0.55274366
We subsample 50 draws from the posterior of each component \(f^{(j)}\), \(j=1,2,3\)
draws <- sample.int(1000, 50)
and visualize them
p <- pred(fit, draws = draws)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
as well as the sum \(f = f^{(1)} + f^{(2)} + f^{(3)}\)
plot_pred(fit, pred = p, alpha = 0.3)
t <- seq(2, 100, by = 2)
x_pred <- new_x(dat, t)
p <- pred(fit, x_pred, draws = draws)
# Extrapolating function posterior draws...
# | 10%| 20%| 30%| 40%| 50%| 60%| 70%| 80%| 90%| 100%|
# ======================================================================
# c_hat_pred not given, using constant c_hat_pred = 0.14581335876668
# Done.
plot_pred(fit, p, alpha = 0.3)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
sessionInfo()
# R version 4.4.1 (2024-06-14)
# Platform: aarch64-apple-darwin20
# Running under: macOS Sonoma 14.6.1
#
# Matrix products: default
# BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#
# locale:
# [1] C/UTF-8/C/C/C/C
#
# time zone: Europe/Helsinki
# tzcode source: internal
#
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] rstan_2.32.7 StanHeaders_2.32.10 ggplot2_4.0.0
# [4] lgpr_1.2.5 rmarkdown_2.30
#
# loaded via a namespace (and not attached):
# [1] tensorA_0.36.2.1 sass_0.4.10 generics_0.1.4
# [4] stringi_1.8.7 digest_0.6.37 magrittr_2.0.4
# [7] evaluate_1.0.5 grid_4.4.1 RColorBrewer_1.1-3
# [10] fastmap_1.2.0 plyr_1.8.9 jsonlite_2.0.0
# [13] pkgbuild_1.4.8 backports_1.5.0 gridExtra_2.3
# [16] QuickJSR_1.8.1 scales_1.4.0 codetools_0.2-20
# [19] jquerylib_0.1.4 abind_1.4-8 cli_3.6.5
# [22] crayon_1.5.3 rlang_1.1.6 withr_3.0.2
# [25] cachem_1.1.0 yaml_2.3.10 tools_4.4.1
# [28] inline_0.3.21 parallel_4.4.1 reshape2_1.4.4
# [31] checkmate_2.3.3 rstantools_2.5.0 dplyr_1.1.4
# [34] colorspace_2.1-2 curl_6.2.3 posterior_1.6.1
# [37] vctrs_0.6.5 R6_2.6.1 matrixStats_1.5.0
# [40] stats4_4.4.1 lifecycle_1.0.4 stringr_1.5.2
# [43] V8_6.0.0 MASS_7.3-60.2 pkgconfig_2.0.3
# [46] RcppParallel_5.1.11-1 pillar_1.11.1 bslib_0.9.0
# [49] gtable_0.3.6 loo_2.8.0 glue_1.8.0
# [52] Rcpp_1.1.0 xfun_0.53 tibble_3.3.0
# [55] tidyselect_1.2.1 knitr_1.50 farver_2.1.2
# [58] bayesplot_1.14.0 htmltools_0.5.8.1 labeling_0.4.3
# [61] compiler_4.4.1 S7_0.2.0 distributional_0.5.0