Simple plots first, to get started.
library(tidyverse)
set.seed(42)
n_sample <- 5000
rho <- 0.95
sd_X <- 0.2
sd_C <- 0.1
sd_r <- 0.3
b <- 1
a <- 0
X <- arima.sim(n = n_sample, list(ar = rho), sd = sd_X) + 1
C <- arima.sim(n = n_sample, list(ar = rho), sd = sd_C)
r <- a + b * C * lead(X) + rnorm(n_sample) * sd_r
data <- data.frame(time = 1:n_sample, X, C, r)
data[1:100,] %>%
gather(key = process, value = value, -time) %>%
ggplot(aes(x = time, y = value, color = process)) + geom_line()
Then, ugly loops. Could be accelerated elegantly via the package slider (see other notebook). The execution is QUITE LONG (~hours).
library(tidyverse)
library(reshape2)
test <- function(t){
fit <- lm(r ~ C, data = data[(t-j):t,])
return( (predict(fit, data[t+1,])-data$r[t+1])^2)
}
n_sample <- 10^6
sd_X <- 0.2
sd_C <- 0.1
sd_r <- 0.3
b <- 1
a <- 0
rho <- c(0.8,0.9,0.95,0.99)
wind_max <- 405
wind_min <- 5
wind_by <- 5
err <- array(0, dim= c(length(rho), length(rho), (wind_max-wind_min)/wind_by + 1))
err_inf <- array(0, dim= c(length(rho), length(rho), (wind_max-wind_min)/wind_by + 1))
err_star <- array(0, dim= c(length(rho), length(rho), (wind_max-wind_min)/wind_by + 1))
for(i in 1:length(rho)){
for(k in 1:length(rho)){
#if(t%%250==0){print(t)}
for(j in seq(wind_min, wind_max, by = wind_by)){
loc <- 1+(j-wind_min)/wind_by
err_inf[i,k,loc] <- sd_r^2 + sd_X^2 /(1-rho[i]^2) * (a^2 + b^2*sd_C^2/(1-rho[k]^2))
err_star[i,k,loc] <- sd_r^2 + sd_X^2 * (a^2 + b^2*sd_C^2/(1-rho[k]^2))
}
}
}
for(i in 1:length(rho)){
print(i)
for(k in 1:length(rho)){
print(k)
X <- arima.sim(n = n_sample, list(ar = rho[i]), sd = sd_X) + 1
C <- arima.sim(n = n_sample, list(ar = rho[k]), sd = sd_C)
r <- a + b * C * lead(X) + rnorm(n_sample) * sd_r
data <- data.frame(time = 1:n_sample, X, C, r)
for(j in seq(wind_min, wind_max, by = wind_by)){ # T
loc <- 1+(j-wind_min)/wind_by
err[i,k,loc] <- map(seq(wind_max+1,n_sample-2, by = 100), test) %>% unlist() %>% mean()
}
}
}
err <- melt(err)
err_inf <- melt(err_inf)
err_star <- melt(err_star)
err_full <- cbind(err, err_inf$value, err_star$value)
colnames(err_full) <- c("rho_X", "rho_C", "Window", "Error", "E_inf", "E_star")
for(m in 1:length(rho)){
err_full$rho_X[err_full$rho_X == m] <- rho[m]
err_full$rho_C[err_full$rho_C == m] <- rho[m]
}
err_full <- err_full %>%
mutate(Window = Window * wind_by - 1 + wind_min)
save(err_full, file = "err_full.RData")
# err_full %>%
# mutate(rho = paste("rho_X", rho_X, "rho_C", rho_C)) %>%
# ggplot(aes(x = Window)) + geom_line(aes(y = Error, color = rho)) +
# xlim(9,100) + ylim(0.08,0.4)
Finally, the plots.
lev <- levels(as.factor(err_full$Window))
lev <- c(lev[1:2], lev[3*(1:(length(lev)/3))])
err_full %>%
filter(Window %in% lev) %>%
mutate(rho_X = as.factor(rho_X),
rho_C = as.factor(rho_C)) %>%
ggplot(aes(x = Window, y = Error, color = rho_X, shape = rho_C)) +
geom_point(size = 1.1) + xlim(8,200) +
facet_wrap(rho_X ~ rho_C ,
scales = "free",
labeller = label_wrap_gen(multi_line=FALSE)) + # + ylim(0.08,0.4)
geom_line(aes(y = E_inf), color = "black", linetype = 2) +
xlab("Window (T)") + ylab("Average Squared Error") +
scale_color_manual(values = c("#FA7F04", "#2FCD2A", "#0D6DF6", "#F30B58"))
ggsave("err_full.pdf")
LS0tCnRpdGxlOiBTaW1wbGUgcmVncmVzc2lvbnMgb24gc2xpZGluZyB3aW5kb3dzCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KClNpbXBsZSBwbG90cyBmaXJzdCwgdG8gZ2V0IHN0YXJ0ZWQuCgpgYGB7ciwgbWVzc2FnZSA9IEYsIHdhcm5pbmcgPSBGfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKc2V0LnNlZWQoNDIpCm5fc2FtcGxlIDwtIDUwMDAKcmhvIDwtIDAuOTUKc2RfWCA8LSAwLjIKc2RfQyA8LSAwLjEKc2RfciA8LSAwLjMKYiA8LSAxCmEgPC0gMApYIDwtIGFyaW1hLnNpbShuID0gbl9zYW1wbGUsIGxpc3QoYXIgPSByaG8pLCBzZCA9IHNkX1gpICsgMQpDIDwtIGFyaW1hLnNpbShuID0gbl9zYW1wbGUsIGxpc3QoYXIgPSByaG8pLCBzZCA9IHNkX0MpCnIgPC0gYSArIGIgKiBDICogbGVhZChYKSArIHJub3JtKG5fc2FtcGxlKSAqIHNkX3IKZGF0YSA8LSBkYXRhLmZyYW1lKHRpbWUgPSAxOm5fc2FtcGxlLCBYLCBDLCByKSAKZGF0YVsxOjEwMCxdICU+JQogICAgZ2F0aGVyKGtleSA9IHByb2Nlc3MsIHZhbHVlID0gdmFsdWUsIC10aW1lKSAlPiUKICAgIGdncGxvdChhZXMoeCA9IHRpbWUsIHkgPSB2YWx1ZSwgY29sb3IgPSBwcm9jZXNzKSkgKyBnZW9tX2xpbmUoKQpgYGAKCgpUaGVuLCB1Z2x5IGxvb3BzLiBDb3VsZCBiZSBhY2NlbGVyYXRlZCBlbGVnYW50bHkgdmlhIHRoZSBwYWNrYWdlIHNsaWRlciAoc2VlIG90aGVyIG5vdGVib29rKS4gVGhlIGV4ZWN1dGlvbiBpcyAqKlFVSVRFIExPTkcqKiAofmhvdXJzKS4KCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShyZXNoYXBlMikKCnRlc3QgPC0gZnVuY3Rpb24odCl7CiAgICBmaXQgPC0gbG0ociB+IEMsIGRhdGEgPSBkYXRhWyh0LWopOnQsXSkKICAgIHJldHVybiggKHByZWRpY3QoZml0LCBkYXRhW3QrMSxdKS1kYXRhJHJbdCsxXSleMikKfQoKbl9zYW1wbGUgPC0gMTBeNgpzZF9YIDwtIDAuMgpzZF9DIDwtIDAuMQpzZF9yIDwtIDAuMwpiIDwtIDEKYSA8LSAwCnJobyA8LSBjKDAuOCwwLjksMC45NSwwLjk5KQp3aW5kX21heCA8LSA0MDUKd2luZF9taW4gPC0gNQp3aW5kX2J5IDwtIDUKCmVyciA8LSBhcnJheSgwLCBkaW09IGMobGVuZ3RoKHJobyksIGxlbmd0aChyaG8pLCAod2luZF9tYXgtd2luZF9taW4pL3dpbmRfYnkgKyAxKSkKZXJyX2luZiA8LSBhcnJheSgwLCBkaW09IGMobGVuZ3RoKHJobyksIGxlbmd0aChyaG8pLCAod2luZF9tYXgtd2luZF9taW4pL3dpbmRfYnkgKyAxKSkKZXJyX3N0YXIgPC0gYXJyYXkoMCwgZGltPSBjKGxlbmd0aChyaG8pLCBsZW5ndGgocmhvKSwgKHdpbmRfbWF4LXdpbmRfbWluKS93aW5kX2J5ICsgMSkpCgpmb3IoaSBpbiAxOmxlbmd0aChyaG8pKXsKICAgIGZvcihrIGluIDE6bGVuZ3RoKHJobykpewogICAgICAgICNpZih0JSUyNTA9PTApe3ByaW50KHQpfQogICAgICAgIGZvcihqIGluIHNlcSh3aW5kX21pbiwgd2luZF9tYXgsIGJ5ID0gd2luZF9ieSkpewogICAgICAgICAgICBsb2MgPC0gMSsoai13aW5kX21pbikvd2luZF9ieQogICAgICAgICAgICBlcnJfaW5mW2ksayxsb2NdIDwtIHNkX3JeMiArIHNkX1heMiAvKDEtcmhvW2ldXjIpICogKGFeMiArIGJeMipzZF9DXjIvKDEtcmhvW2tdXjIpKQogICAgICAgICAgICBlcnJfc3RhcltpLGssbG9jXSA8LSBzZF9yXjIgKyBzZF9YXjIgKiAoYV4yICsgYl4yKnNkX0NeMi8oMS1yaG9ba11eMikpCiAgICAgICAgfQogICAgfQp9Cgpmb3IoaSBpbiAxOmxlbmd0aChyaG8pKXsKICAgIHByaW50KGkpCiAgICBmb3IoayBpbiAxOmxlbmd0aChyaG8pKXsKICAgICAgICBwcmludChrKQogICAgICAgIFggPC0gYXJpbWEuc2ltKG4gPSBuX3NhbXBsZSwgbGlzdChhciA9IHJob1tpXSksIHNkID0gc2RfWCkgKyAxCiAgICAgICAgQyA8LSBhcmltYS5zaW0obiA9IG5fc2FtcGxlLCBsaXN0KGFyID0gcmhvW2tdKSwgc2QgPSBzZF9DKQogICAgICAgIHIgPC0gYSArIGIgKiBDICogbGVhZChYKSArIHJub3JtKG5fc2FtcGxlKSAqIHNkX3IKICAgICAgICBkYXRhIDwtIGRhdGEuZnJhbWUodGltZSA9IDE6bl9zYW1wbGUsIFgsIEMsIHIpIAogICAgICAgIGZvcihqIGluIHNlcSh3aW5kX21pbiwgd2luZF9tYXgsIGJ5ID0gd2luZF9ieSkpeyAjIFQKICAgICAgICAgICAgbG9jIDwtIDErKGotd2luZF9taW4pL3dpbmRfYnkKICAgICAgICAgICAgZXJyW2ksayxsb2NdIDwtIG1hcChzZXEod2luZF9tYXgrMSxuX3NhbXBsZS0yLCBieSA9IDEwMCksIHRlc3QpICU+JSB1bmxpc3QoKSAlPiUgbWVhbigpCiAgICAgICAgfQogICAgfQp9CgplcnIgPC0gbWVsdChlcnIpCmVycl9pbmYgPC0gbWVsdChlcnJfaW5mKQplcnJfc3RhciA8LSBtZWx0KGVycl9zdGFyKQplcnJfZnVsbCA8LSBjYmluZChlcnIsIGVycl9pbmYkdmFsdWUsIGVycl9zdGFyJHZhbHVlKQpjb2xuYW1lcyhlcnJfZnVsbCkgPC0gYygicmhvX1giLCAicmhvX0MiLCAiV2luZG93IiwgIkVycm9yIiwgIkVfaW5mIiwgIkVfc3RhciIpCmZvcihtIGluIDE6bGVuZ3RoKHJobykpewogICAgZXJyX2Z1bGwkcmhvX1hbZXJyX2Z1bGwkcmhvX1ggPT0gbV0gPC0gcmhvW21dCiAgICBlcnJfZnVsbCRyaG9fQ1tlcnJfZnVsbCRyaG9fQyA9PSBtXSA8LSByaG9bbV0KfQplcnJfZnVsbCA8LSBlcnJfZnVsbCAlPiUKICAgIG11dGF0ZShXaW5kb3cgPSBXaW5kb3cgKiB3aW5kX2J5IC0gMSArIHdpbmRfbWluKQpzYXZlKGVycl9mdWxsLCBmaWxlID0gImVycl9mdWxsLlJEYXRhIikKIyBlcnJfZnVsbCAlPiUgCiMgICAgIG11dGF0ZShyaG8gPSBwYXN0ZSgicmhvX1giLCByaG9fWCwgInJob19DIiwgcmhvX0MpKSAlPiUKIyAgICAgZ2dwbG90KGFlcyh4ID0gV2luZG93KSkgKyBnZW9tX2xpbmUoYWVzKHkgPSBFcnJvciwgY29sb3IgPSByaG8pKSArCiMgICAgIHhsaW0oOSwxMDApICsgeWxpbSgwLjA4LDAuNCkKCmBgYAoKRmluYWxseSwgdGhlIHBsb3RzLgoKYGBge3IsIG1lc3NhZ2U9Riwgd2FybmluZz1GfQpsZXYgPC0gbGV2ZWxzKGFzLmZhY3RvcihlcnJfZnVsbCRXaW5kb3cpKQpsZXYgPC0gYyhsZXZbMToyXSwgbGV2WzMqKDE6KGxlbmd0aChsZXYpLzMpKV0pCmVycl9mdWxsICU+JQogICAgZmlsdGVyKFdpbmRvdyAlaW4lIGxldikgJT4lCiAgICBtdXRhdGUocmhvX1ggPSBhcy5mYWN0b3IocmhvX1gpLAogICAgICAgICAgIHJob19DID0gYXMuZmFjdG9yKHJob19DKSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHggPSBXaW5kb3csIHkgPSBFcnJvciwgY29sb3IgPSByaG9fWCwgc2hhcGUgPSByaG9fQykpICsKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDEuMSkgKyB4bGltKDgsMjAwKSArIAogICAgZmFjZXRfd3JhcChyaG9fWCB+IHJob19DICwgCiAgICAgICAgICAgICAgIHNjYWxlcyA9ICJmcmVlIiwKICAgICAgICAgICAgICAgbGFiZWxsZXIgPSBsYWJlbF93cmFwX2dlbihtdWx0aV9saW5lPUZBTFNFKSkgKyAjICsgeWxpbSgwLjA4LDAuNCkKICAgIGdlb21fbGluZShhZXMoeSA9IEVfaW5mKSwgY29sb3IgPSAiYmxhY2siLCBsaW5ldHlwZSA9IDIpICsgCiAgICB4bGFiKCJXaW5kb3cgKFQpIikgKyB5bGFiKCJBdmVyYWdlIFNxdWFyZWQgRXJyb3IiKSArCiAgICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYygiI0ZBN0YwNCIsICIjMkZDRDJBIiwgIiMwRDZERjYiLCAiI0YzMEI1OCIpKQpnZ3NhdmUoImVycl9mdWxsLnBkZiIpCmBgYAoKCgo=