În #182, am prezentat un punct de referință bazat pe 30.000 de serii de timp, comparând metoda conformă a împărțirii secvențiale cu stadiul tehnicii în Predicția conformă pentru Prognoză.
În această postare, voi prezenta un benchmark bazat pe 1311 serii cronologice din competiția de Turism, comparând metoda conformă a split secvențial cu stadiul tehnicii. Metoda Theta din forecast pachetul este folosit ca model de bază, împreună cu pachetul meu R ahead pentru conformarea modelului de bază. Sunt luate în considerare 2 niveluri de acoperire țintă: 80% și 95%. Erorile de evaluare comparativă sunt măsurate prin:
utils::install.packages(c('foreach', 'forecast', 'fpp', 'fpp2', 'remotes', 'Tcomp'),
repos="https://cran.r-project.org")
remotes::install_github("Techtonique/ahead")
remotes::install_github("thierrymoudiki/simulatetimeseries")
remotes::install_github("herbps10/AdaptiveConformal", force=TRUE)
remotes::install_github("thierrymoudiki/misc")
suppressWarnings(library(datasets))
suppressWarnings(library(forecast))
suppressWarnings(library(foreach))
suppressWarnings(library(fpp2))
suppressWarnings(library(ahead))
suppressWarnings(library(AdaptiveConformal))
suppressWarnings(library(misc))
suppressWarnings(library(Tcomp))
coverage_score <- function(obj, actual) {
if (is.null(obj$lower))
{
return(mean((obj$intervals(, 1) <= actual)*(actual <= obj$intervals(, 2)))*100)
}
return(mean((obj$lower <= actual)*(actual <= obj$upper))*100)
}
winkler_score <- function(obj, actual, level = 95) {
alpha <- 1 - level / 100
lt <- try(obj$lower, silent = TRUE)
ut <- try(obj$upper, silent = TRUE)
actual <- as.numeric(actual)
if (is.null(lt) || is.null(ut))
{
lt <- as.numeric(obj$intervals(, 1))
ut <- as.numeric(obj$intervals(, 2))
}
n_points <- length(actual)
stopifnot((n_points == length(lt)) && (n_points == length(ut)))
diff_lt <- lt - actual
diff_bounds <- ut - lt
diff_ut <- actual - ut
score <- diff_bounds
score <- score + (2 / alpha) * (pmax(diff_lt, 0) + pmax(diff_ut, 0))
return(mean(score))
}
get_error <- function(obj, actual, level = 95)
{
actual <- as.numeric(actual)
mean_prediction <- as.numeric(obj$mean)
me <- mean(mean_prediction - actual)
rmse <- sqrt(mean((mean_prediction - actual)**2))
mae <- mean(abs(mean_prediction - actual))
mpe <- mean(mean_prediction/actual-1)
mape <- mean(abs(mean_prediction/actual-1))
coverage <- as.numeric(coverage_score(obj, actual))
winkler <- winkler_score(obj, actual, level = level)
res <- c(me, rmse, mae, mpe,
mape, coverage, winkler)
names(res) <- c("me", "rmse", "mae", "mpe",
"mape", "coverage", "winkler")
return(res)
}
ahead_methods <- c("block-bootstrap", "surrogate",
"kde", "bootstrap")
aci_methods <- c('ACI', 'SCP', 'AgACI',
'DtACI', 'SF-OGD', 'SAOCP')
i <- 3
level <- 95
nsim <- 250
ahead_method <- ahead_methods(1)
aci_method <- aci_methods(1)
# base model
obj <- forecast::thetaf(y=Tcomp::tourism((i))$x,
h=Tcomp::tourism((i))$h,
level=level)
print(get_error(obj, Tcomp::tourism((i))$xx))
# conformalized ahead
obj_ahead <- ahead::conformalize(FUN=forecast::thetaf,
y=Tcomp::tourism((i))$x,
h=Tcomp::tourism((i))$h,
level=level,
nsim = nsim,
method=ahead_method)
print(get_error(obj_ahead, Tcomp::tourism((i))$xx))
# AdaptiveConformal
(obj_aci <- AdaptiveConformal::aci(Y = as.vector(Tcomp::tourism((i))$xx),
predictions = as.vector(obj$mean),
method = "ACI",
alpha = level/100))
obj_aci$mean <- as.vector(obj$mean)
print(get_error(obj_aci, Tcomp::tourism((i))$xx))
## -----------------------------------------------------------------------------------
ahead_methods <- c("block-bootstrap", "surrogate",
"kde", "bootstrap")
aci_methods <- c('ACI', 'SCP', 'AgACI',
'DtACI', 'SF-OGD', 'SAOCP')
n_series <- length(tourism)
for (level in c(80, 95))
{
pb <- utils::txtProgressBar(min=0, max=n_series, style = 3)
benchmark <- foreach::foreach(i=1:n_series, .combine = rbind)%do%
{
results <- matrix(NA,
nrow= 1 + length(ahead_methods) + length(aci_methods),
ncol=9)
colnames(results) <- c("series", "method",
"me", "rmse", "mae", "mpe", "mape",
"coverage", "winkler")
results_index <- 1
# base model
obj <- forecast::thetaf(y=Tcomp::tourism((i))$x,
h=Tcomp::tourism((i))$h,
level=level)
results(results_index, ) <- c(i, "none",
get_error(obj, Tcomp::tourism((i))$xx))
results_index <- results_index + 1
# conformalized ahead
for (j in 1:length(ahead_methods))
{
obj_ahead <- try(ahead::conformalize(FUN=forecast::thetaf,
y=Tcomp::tourism((i))$x,
h=Tcomp::tourism((i))$h,
level=level,
nsim = nsim,
method=ahead_methods(j)), silent = TRUE)
if (inherits(obj_ahead, "try-error"))
{
results(results_index, ) <- c(i, paste0("conformal-", ahead_methods(j)), rep(NA, 7))
} else {
results(results_index, ) <- c(i, paste0("conformal-", ahead_methods(j)),
get_error(obj_ahead, Tcomp::tourism((i))$xx))
}
results_index <- results_index + 1
}
# AdaptiveConformal
for (j in 1:length(aci_methods))
{
obj_aci <- try(AdaptiveConformal::aci(Y = as.vector(Tcomp::tourism((i))$xx),
predictions = as.vector(obj$mean),
method = aci_methods(j),
alpha = level/100), silent = TRUE)
if (inherits(obj_ahead, "try-error"))
{
results(results_index, ) <- c(i, paste0("conformal-", aci_methods(j)),
rep(NA, 7))
} else {
obj_aci$mean <- as.vector(obj$mean)
results(results_index, ) <- c(i, paste0("conformal-", aci_methods(j)),
get_error(obj_aci, Tcomp::tourism((i))$xx))
}
results_index <- results_index + 1
}
utils::setTxtProgressBar(pb, i)
results
}
close(pb)
benchmark <- cbind.data.frame(benchmark(, c(1, 2)),
apply(benchmark(, -c(1, 2)), c(1, 2), as.numeric))
benchmark$method <- sapply(1:length(benchmark$method),
function(i) gsub(pattern = "conformal-",
replacement = "",
x=benchmark$method(i)))
saveRDS(benchmark, paste0("2025-01-20-tourism-benchmark", level, ".rds"))
}
Sunt prezentate ratele de acoperire și scorurile Winkler pentru intervalele de predicție de 80% și 95%, împreună cu diagrame cu casete ale ratelor de eroare logaritmică (cu cât mai mici, cu atât mai bine).
tourism_benchmark80 <- readRDS("2025-01-20-tourism-benchmark80.rds")
tourism_benchmark95 <- readRDS("2025-01-20-tourism-benchmark95.rds")
benchmark_medians80 <- cbind.data.frame(tapply(tourism_benchmark80$coverage,
tourism_benchmark80$method, median),
tapply(tourism_benchmark80$winkler,
tourism_benchmark80$method, median))
colnames(benchmark_medians80) <- c("coverage", "winkler_score")
misc::sort_df(benchmark_medians80, by="winkler_score")
benchmark_medians95 <- cbind.data.frame(tapply(tourism_benchmark95$coverage,
tourism_benchmark95$method, median),
tapply(tourism_benchmark95$winkler,
tourism_benchmark95$method, median))
colnames(benchmark_medians95) <- c("coverage", "winkler_score")
misc::sort_df(benchmark_medians95, by="winkler_score")
par(mfrow=c(2, 1))
boxplot(log(100-coverage) ~ method, data = tourism_benchmark80,
main="log-error rates")
boxplot(log(100-coverage) ~ method, data = tourism_benchmark95,
main="log-error rates")
coverage winkler_score
surrogate 83.33333 9339.212
block-bootstrap 83.33333 9372.207
kde 87.50000 9419.847
bootstrap 79.16667 10110.819
none 75.00000 11582.445
AgACI 62.50000 17010.164
DtACI 62.50000 17031.145
ACI 62.50000 17165.020
SCP 50.00000 18008.067
SAOCP 0.00000 47472.648
SF-OGD 0.00000 47535.205
coverage winkler_score
surrogate 95.83333 9453.619
block-bootstrap 95.83333 9625.191
kde 100.00000 9705.674
none 87.50000 9845.460
bootstrap 91.66667 10042.757
AgACI 62.50000 16564.417
ACI 62.50000 16649.422
DtACI 62.50000 16649.422
SCP 62.50000 16649.422
SAOCP 0.00000 47472.648
SF-OGD 0.00000 47535.205



Deci, pe baza acestor experimente extinse împotriva stadiului tehnicii (și presupunând că implementările metodelor de ultimă generație sunt corecte, ceea ce sunt sigur că sunt, consultați https://computo.sfds.asso.fr/published -202407-susmann-adaptive-conformal/ și presupunând că le folosesc bine), cât de tare este această contribuție la știința prognozei?
