--- title: "Forecasting Swiss ILI counts using `prophet::prophet`" author: "Sebastian Meyer" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: fig_width: 6 fig_height: 4 toc: TRUE vignette: > %\VignetteIndexEntry{Forecasting Swiss ILI counts using `prophet::prophet`} %\VignetteEngine{knitr::rmarkdown} %\VignetteDepends{ggplot2, dplyr, surveillance, fanplot, prophet} --- ```{r setup_knitr, include = FALSE} knitr::opts_chunk$set(message = FALSE, warning = FALSE, error = FALSE, fig.align = "center", dev.args = list(pointsize = 10)) ``` ```{r setup} options(digits = 4) # for more compact numerical outputs library("HIDDA.forecasting") library("ggplot2") source("setup.R", local = TRUE) # define test periods (OWA, TEST) ``` In this vignette, we use forecasting methods provided by: ```{r} library("prophet") ``` The corresponding software reference is: ```{r, echo = FALSE, results = "asis"} cat("
") print(citation(package = "prophet", auto = TRUE), style = "html") cat("
\n") ``` ## Modelling **prophet** uses a Gaussian likelihood, so we will work with log-counts as in ARIMA. ```{r} ## which dates correspond to ISO week 52? index(CHILI)[strftime(index(CHILI), "%V") == "52"] ``` So for consistency with the other models we define holidays from 21 to 28 December each year. ```{r, include = FALSE} if (packageVersion("prophet") < "0.4") { ## workaround a bug in make_holiday_features(), which needs row_number() library("dplyr") } ``` ```{r prophetfit, cache = TRUE} set.seed(1411171) christmas <- data.frame( holiday = "Christmas", ds = as.Date(paste0(2000:2016, "-12-21")), lower_window = 0, upper_window = 7 ) prophetfit_control <- prophet( yearly.seasonality = TRUE, weekly.seasonality = FALSE, daily.seasonality = FALSE, holidays = christmas, mcmc.samples = 0, # invokes rstan::optimizing (fast MAP estimation) interval.width = 0.95, fit = FALSE) prophetfit <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI)) ) prophetfitted <- predict(prophetfit) ``` ```{r prophetfitted_components} prophet_plot_components(prophetfit, prophetfitted) ``` ```{r} CHILIdat <- fortify(CHILI) CHILIdat[c("prophetfitted","prophetlower","prophetupper")] <- exp(prophetfitted[c("yhat", "yhat_lower", "yhat_upper")]) ``` ```{r prophetfitted, fig.width = 7, fig.height = 4} ##plot(prophetfit, prophetfitted) ggplot(CHILIdat, aes(x=Index, ymin=prophetlower, y=prophetfitted, ymax=prophetupper)) + geom_ribbon(fill="orange") + geom_line(col="darkred") + geom_point(aes(y=CHILI), pch=20) + scale_y_sqrt(expand = c(0,0), limits = c(0,NA)) ``` ## One-week-ahead forecasts We compute `r length(OWA)` one-week-ahead forecasts from `r format_period(OWA)` (the `OWA` period). For each time point, forecasting with `prophet` takes about 3 seconds, i.e., computing all one-week-ahead forecasts takes approx. `r sprintf("%.1f", length(OWA) * 3/60)` minutes ... ```{r prophetowa, eval = !file.exists("prophetowa.RData"), results = "hide"} set.seed(1411172) prophetowa <- cross_validation( model = prophetfit, horizon = 1, period = 1, initial = OWA[1] - 1, units = "weeks" ) ## add forecast variance (calculated from prediction interval) prophetowa$sigma <- with(prophetowa, (yhat_upper-yhat_lower)/2/qnorm(0.975)) ## save results save(prophetowa, file = "prophetowa.RData") ``` ```{r, include = FALSE} load("prophetowa.RData") ``` ```{r, include = FALSE} ## BTW, the error SD can be obtained from a prophet fit as prophetfit$y.scale * prophetfit$params$sigma_obs ``` `prophet` forecasts for the log-counts are approximately Gaussian with mean `yhat` and variance `sigma^2` => back-transformation via exp() is log-normal ```{r prophetowa_pit, fig.width = 3, fig.height = 3, echo = -1} par(mar = c(5,5,1,1), las = 1) .PIT <- plnorm(exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma) hist(.PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = "", xlab = "PIT") abline(h = 1, lty = 2, col = "grey") ``` ```{r prophetowa_scores} prophetowa_scores <- scores_lnorm( x = exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma, which = c("dss", "logs")) summary(prophetowa_scores) ``` Note that discretized forecast distributions yield almost identical scores (essentially due to the large counts): ```{r prophetowa_scores_discretized} prophetowa_scores_discretized <- scores_lnorm_discrete( x = exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma, which = c("dss", "logs")) summary(prophetowa_scores_discretized) ``` ```{r, include = FALSE} stopifnot( all.equal(prophetowa_scores_discretized[,"dss"], prophetowa_scores[,"dss"], tolerance = 1e-5), all.equal(prophetowa_scores_discretized[,"logs"], prophetowa_scores[,"logs"], tolerance = 1e-5) ) ``` ```{r prophetowa_plot, echo = -2} prophetowa_quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = prophetowa$yhat, sdlog = prophetowa$sigma) par(mar = c(5,5,1,1)) osaplot( quantiles = prophetowa_quantiles, probs = 1:99/100, observed = exp(prophetowa$y), scores = prophetowa_scores, start = OWA[1]+1, xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL) ) ``` ## Long-term forecasts ```{r prophetfor1, include = FALSE, eval = FALSE} ## try with the first test period TEST1 <- TEST[[1]] fit1 <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI))[1:(TEST1[1]-1),] ) prophetfor1 <- predict(fit1, data.frame(ds = index(CHILI)[TEST1])) ## add forecast variance (calculated from prediction interval) prophetfor1$sigma <- with(prophetfor1, (yhat_upper-yhat_lower)/2/qnorm(0.975)) ``` ```{r prophetfor, results = "hide"} set.seed(1411173) prophetfor <- lapply(TEST, function (testperiod) { t0 <- testperiod[1] - 1 fit0 <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI))[1:t0,] ) fc <- predict(fit0, data.frame(ds = index(CHILI)[testperiod])) ## add forecast variance (calculated from prediction interval) fc$sigma <- with(fc, (yhat_upper-yhat_lower)/2/qnorm(0.975)) list(testperiod = testperiod, observed = as.vector(CHILI[testperiod]), yhat = fc$yhat, sigma = fc$sigma) }) ``` ```{r prophetfor_pit, echo = -1} par(mar = c(5,5,1,1), mfrow = sort(n2mfrow(length(prophetfor))), las = 1) invisible(lapply(prophetfor, function (x) { PIT <- plnorm(x$observed, meanlog = x$yhat, sdlog = x$sigma) hist(PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = format_period(x$testperiod, fmt = "%Y", collapse = "/")) abline(h = 1, lty = 2, col = "grey") })) ``` ```{r prophetfor_plot, echo = -1, fig.show = "hold"} par(mar = c(5,5,1,1)) t(sapply(prophetfor, function (x) { quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = x$yhat, sdlog = x$sigma) scores <- scores_lnorm(x = x$observed, meanlog = x$yhat, sdlog = x$sigma, which = c("dss", "logs")) osaplot(quantiles = quantiles, probs = 1:99/100, observed = x$observed, scores = scores, start = x$testperiod[1], xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL)) colMeans(scores) })) ```