This notebook is a companion material to the article Dynamic decision making with predictive panels by Guillaume Coqueret and Bertrand Tavin.
The code is folded to lighten the presentation, but can be accessed by clicking on the corresponding unfolding buttons.
It corresponds to the second exercise in the paper, for which the data is public.
Packages
First, we load the required packages.
if(!require(tidyverse)){install.packages(c("tidyverse", "lubridate", "nlshrink", "broom", "gt"))}
library(tidyverse) # Data handling
library(lubridate) # Date handling
library(patchwork) # Graph layout
library(nlshrink) # Shrinkage estimators
library(broom) # Regression output (tidy)
library(texreg) # LaTeX regression output
library(latex2exp) # LaTeX in ggplot
library(gt) # Neat tables
Data
Second, we download & format the data, which comes from Ken French’s Data Library.
We work with the 10 industry portfolios, sampled at the daily frequency.
NOTE: depending on the files, the number of rows to skip (when reading the .csv file) changes. It typically varies between 2 and 10.
We drop the data before 1980 but it’s easy to consider 1950 onwards.
temp <- tempfile()
link <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/10_Industry_Portfolios_daily_CSV.zip"
download.file(link, temp, quiet = TRUE) # Download!
df_assets <- read_csv(unz(temp, "10_Industry_Portfolios_Daily.csv"), skip = 9) %>%
# Check the nb of lines to skip
rename(Date = `...1`) %>% # Change name of the first column
mutate_at(vars(-Date), as.numeric) %>% # Convert to number
mutate_at(vars(-Date), function(x){x/100}) %>% # Divide by 100
mutate(Date = ymd(parse_date_time(Date, "%Y%m%d"))) # End of month date
split <- which(is.na(df_assets$Date))[1] # Split between portfolio types!
df_assets <- df_assets[1:(split-1),]
df_assets <- df_assets %>% filter(year(Date) >= 1980) # Choose a period (after 1950: more reliable)
colnames(df_assets)[1] <- "date"
stock_ids <- colnames(df_assets[,c(2:ncol(df_assets))]) # List of assets
nb_assets <- length(stock_ids) # Number of assets
list_years <- c(1980:2022) # List of years considered
Main function
Note: it allows for sample versus shrinkage estimators (see last argument of the function).
time_series_output <- function(nb_assets, df_assets, sample_size, shrinkage){ # The main function
one_n <- as.matrix(rep(1,nb_assets)) # A vector of ones
nb_dates <- length(df_assets$date) # The number of dates
list_daily_dates <- df_assets$date # List of these dates
Sigma_is <- list() # Initializing the variables
Sigma_oos <- list()
inv_Sigma_is <- list()
inv_Sigma_oos <- list()
vec_mu_is <- list()
vec_mu_oos <- list()
nb_t <- sample_size * 21 # Or: 500 # 252 # Length of samples (is & oos)
t0 <- nb_t # start date
step_lgth <- 21
nb_points <- (nb_dates - 2*nb_t) %/% step_lgth # Max number of monthly steps in the dataset
tend <- t0 + step_lgth*nb_points
one_t <- as.matrix(rep(1,nb_t))
date_list <- as.Date(df_assets[t0+nb_t,]$date)
for(j in 1:nb_points){
n_start_is <- t0+(j-1)*step_lgth - nb_t + 1
n_end_is <- t0+(j-1)*step_lgth
n_start_oos <- t0+(j-1)*step_lgth + 1 # Out-of-sample starts the day after
n_end_oos <- t0+(j-1)*step_lgth + nb_t
date_list[j] <- as.Date(df_assets[n_end_is,]$date)
# In-sample
data_temp_is <- df_assets[c(n_start_is:n_end_is),]
if(shrinkage == TRUE){
Sigma_is[[j]] <- linshrink_cov(X = as.matrix(data_temp_is[stock_ids]))
} else {
Sigma_is[[j]] <- var(data_temp_is[stock_ids]) # Sigma_is = X'X
}
vec_mu_is[[j]] <- as.matrix(colMeans(data_temp_is[stock_ids])) # v_is = X'y
inv_Sigma_is[[j]] <- solve(Sigma_is[[j]]) # Sigma_is^{-1}
# Out-of-Sample
data_temp_oos <- df_assets[c(n_start_oos:n_end_oos),]
if(shrinkage == TRUE){
Sigma_oos[[j]] <- linshrink_cov(X = as.matrix(data_temp_oos[stock_ids]))
} else {
Sigma_oos[[j]] <- var(data_temp_oos[stock_ids])
}
vec_mu_oos[[j]] <- as.matrix(colMeans(data_temp_oos[stock_ids]))
inv_Sigma_oos[[j]] <- solve(Sigma_oos[[j]])
}
loss_series <- 0 # SSR
dt_series <- 0
Deltat_series <- 0
for(j in 1:nb_points){
loss_series[j] <- t(vec_mu_is[[j]]) %*% inv_Sigma_is[[j]] %*% Sigma_oos[[j]] %*% inv_Sigma_is[[j]] %*% vec_mu_is[[j]] -
2 * t(vec_mu_is[[j]]) %*% inv_Sigma_is[[j]] %*% vec_mu_oos[[j]] +
t(vec_mu_oos[[j]]) %*% inv_Sigma_oos[[j]] %*% vec_mu_oos[[j]]
Deltat_series[j] <- 0.00001*norm(inv_Sigma_is[[j]] - inv_Sigma_oos[[j]], type = "2") # Covariate shift
dt_series[j] <- 10*sum(abs(vec_mu_is[[j]] - vec_mu_oos[[j]])) # Concept drift
}
output <- list(date_list = date_list, L = loss_series, d_t = dt_series, Delta_t = Deltat_series)
return(output)
} # End of function
Then, we gather the estimates for the loss \(L\), the drift \(d_t\) and the shift \(D_t\).
shrink_k <- TRUE # With shrinkage
fct_output1m <- time_series_output(nb_assets, df_assets, 1, shrink_k)
output1m <- data.frame(date = fct_output1m$date_list,
L = fct_output1m$L,
d_t = fct_output1m$d_t,
D_t = fct_output1m$Delta_t)
fct_output3m <- time_series_output(nb_assets, df_assets, 3, shrink_k)
output3m <- data.frame(date = fct_output3m$date_list,
L = fct_output3m$L,
d_t = fct_output3m$d_t,
D_t = fct_output3m$Delta_t)
fct_output6m <- time_series_output(nb_assets, df_assets, 6, shrink_k)
output6m <- data.frame(date = fct_output6m$date_list,
L = fct_output6m$L,
d_t = fct_output6m$d_t,
D_t = fct_output6m$Delta_t)
Graph
Finally, we plot the results.
bind_rows(bind_cols(output1m, sample ="1 month"),
bind_cols(output3m, sample ="3 month"),
bind_cols(output6m, sample ="6 month")) %>%
pivot_longer(-c(date, sample), names_to = "indicator", values_to = "value") %>%
ggplot(aes(x = date, y = value, color = indicator)) + geom_line() + theme_light() +
facet_grid(indicator~sample, scales = "free",
labeller = labeller(indicator = c(D_t = "Covariate shift", d_t = "Concept drift", L = "Loss"),
sample = c(`1 month` = "1 month samples",
`3 month` = "3 months samples",
`6 month` = "6 months samples"))) +
scale_color_manual(values = c("#0D70CD", "#FC8B02", "#FC0255"),
labels = unname(TeX(c("$\\delta_t$", "$10^{-2} \\times\\Delta_t$", "$10\\times loss$"))),
name = "Metric") +
theme(legend.position = c(0.89,0.19),
strip.background =element_rect(fill="#777777"),
legend.spacing = unit(-0.6, "cm"), legend.key.size = unit(0.5, "cm"),
legend.margin = unit(-0.6, "cm"), legend.title = element_text(size = 11, face = "bold"),
legend.box.spacing = unit(-0.6, "cm"))
# ggsave("ts2.pdf", width = 8, height = 5)
Regression analysis
Regression analysis: Loss in terms of SSR Regressed against covariate shift and concept drift, both squared + cross term.
One month samples
First, the dataset where all is stored. We start with one month samples for the estimators.
data_reg1m <- data.frame(date = fct_output1m$date_list,
D_t2 = (fct_output1m$Delta_t*100)^2, # Scaled
d_t2 = (fct_output1m$d_t)^2,
D_t_dt = fct_output1m$Delta_t*fct_output1m$d_t,
D_t = fct_output1m$Delta_t,
d_t = fct_output1m$d_t,
loss = fct_output1m$L)
Next, regressions! The output is shown in format table below.
fit_m1_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg1m)
m1_all <- fit_m1_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m1_1980 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg1m %>% dplyr::filter(date < "1990-01-01"))
m1_1980 <- fit_m1_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m1_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg1m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m1_1990 <- fit_m1_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m1_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg1m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m1_2000 <- fit_m1_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m1_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg1m %>% dplyr::filter(date >= "2010-01-01"))
m1_2010 <-fit_m1_2010 %>% tidy() %>% bind_cols(sample = "2010+")
Three month samples
Then, 3 months samples.
data_reg3m <- data.frame(date = fct_output3m$date_list,
D_t2 = (fct_output3m$Delta_t*100)^2, # Scaled
d_t2 = (fct_output3m$d_t)^2,
D_t_dt = fct_output3m$Delta_t*fct_output3m$d_t,
D_t = fct_output3m$Delta_t,
d_t = fct_output3m$d_t,
loss = fct_output3m$L)
fit_m3_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg3m)
m3_all <- fit_m3_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m3_1980 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg3m %>% dplyr::filter(date < "1990-01-01"))
m3_1980 <- fit_m3_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m3_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg3m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m3_1990 <- fit_m3_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m3_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg3m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m3_2000 <- fit_m3_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m3_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg3m %>% dplyr::filter(date >= "2010-01-01"))
m3_2010 <- fit_m3_2010 %>% tidy() %>% bind_cols(sample = "2010+")
Six month samples
Finally, the 6 month samples
data_reg6m <- data.frame(date = fct_output6m$date_list,
D_t2 = (fct_output6m$Delta_t*100)^2, # Scaled
d_t2 = (fct_output6m$d_t)^2,
D_t_dt = fct_output6m$Delta_t*fct_output6m$d_t,
D_t = fct_output6m$Delta_t,
d_t = fct_output6m$d_t,
loss = fct_output6m$L)
fit_m6_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg6m)
m6_all <- fit_m6_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m6_1980 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg6m %>% dplyr::filter(date < "1990-01-01"))
m6_1980 <- fit_m6_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m6_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg6m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m6_1990 <- fit_m6_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m6_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg6m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m6_2000 <- fit_m6_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m6_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt,
data_reg6m %>% dplyr::filter(date >= "2010-01-01"))
m6_2010 <- fit_m6_2010 %>% tidy() %>% bind_cols(sample = "2010+")
The table
The long code for the table is located below.
The p-value are coded in colors: the redder the lower.
For the t-statistics, the ones that have an absolute value larger than 3 are highlighted in blue.
We use a threshold of three following the conclusions of Harvey et al (2016) … and the cross-section of expected return.
No highly significant \(t\)-statitistic is negative.
We recall that the regression equation is:
\[L_t = a + b_DD_t^2 +b_dd_t^2+b_{dD}d_tD_t+e_t\]
term |
1 month sample
|
3 month sample
|
6 month sample
|
Estimate |
Std. Err. |
t-stat |
p-val |
Estimate |
Std. Err. |
t-stat |
p-val |
Estimate |
Std. Err. |
t-stat |
p-val |
ALL |
(Intercept) |
0.842 |
0.085 |
9.854 |
0.000 |
0.108 |
0.062 |
1.751 |
0.081 |
0.102 |
0.012 |
8.622 |
0.000 |
D_t2 |
0.000 |
0.000 |
−2.026 |
0.043 |
0.000 |
0.000 |
−0.860 |
0.390 |
0.000 |
0.000 |
−2.654 |
0.008 |
d_t2 |
0.172 |
0.110 |
1.568 |
0.118 |
−0.338 |
0.587 |
−0.577 |
0.564 |
−0.195 |
0.299 |
−0.654 |
0.514 |
D_t_dt |
6.458 |
0.770 |
8.391 |
0.000 |
6.178 |
0.694 |
8.909 |
0.000 |
2.664 |
0.194 |
13.706 |
0.000 |
1980s |
(Intercept) |
0.851 |
0.261 |
3.260 |
0.001 |
0.136 |
0.078 |
1.748 |
0.083 |
0.078 |
0.041 |
1.879 |
0.063 |
D_t2 |
0.000 |
0.000 |
−0.133 |
0.894 |
0.000 |
0.000 |
−0.501 |
0.617 |
0.000 |
0.000 |
−0.979 |
0.330 |
d_t2 |
5.086 |
2.476 |
2.054 |
0.042 |
−2.174 |
1.297 |
−1.676 |
0.096 |
1.508 |
3.444 |
0.438 |
0.662 |
D_t_dt |
2.776 |
2.310 |
1.202 |
0.232 |
5.473 |
0.807 |
6.783 |
0.000 |
2.528 |
0.804 |
3.143 |
0.002 |
1990s |
(Intercept) |
0.965 |
0.164 |
5.879 |
0.000 |
0.297 |
0.048 |
6.177 |
0.000 |
0.103 |
0.027 |
3.818 |
0.000 |
D_t2 |
0.000 |
0.000 |
−0.120 |
0.905 |
0.000 |
0.000 |
−0.315 |
0.754 |
0.000 |
0.000 |
−1.350 |
0.180 |
d_t2 |
1.683 |
0.827 |
2.036 |
0.044 |
0.683 |
0.673 |
1.016 |
0.312 |
1.550 |
1.027 |
1.510 |
0.134 |
D_t_dt |
4.345 |
2.105 |
2.064 |
0.041 |
3.360 |
0.795 |
4.227 |
0.000 |
3.593 |
0.838 |
4.290 |
0.000 |
2000s |
(Intercept) |
0.743 |
0.167 |
4.461 |
0.000 |
0.143 |
0.039 |
3.693 |
0.000 |
0.150 |
0.018 |
8.472 |
0.000 |
D_t2 |
0.000 |
0.000 |
2.038 |
0.044 |
0.000 |
0.000 |
1.358 |
0.177 |
0.000 |
0.000 |
1.019 |
0.310 |
d_t2 |
0.568 |
0.425 |
1.334 |
0.185 |
1.241 |
0.566 |
2.193 |
0.030 |
0.195 |
0.368 |
0.531 |
0.596 |
D_t_dt |
3.439 |
2.549 |
1.349 |
0.180 |
2.380 |
0.774 |
3.073 |
0.003 |
0.439 |
0.508 |
0.866 |
0.388 |
2010+ |
(Intercept) |
0.654 |
0.144 |
4.524 |
0.000 |
−0.142 |
0.196 |
−0.723 |
0.471 |
0.073 |
0.023 |
3.143 |
0.002 |
D_t2 |
0.000 |
0.000 |
−0.427 |
0.670 |
0.000 |
0.000 |
−0.284 |
0.777 |
0.000 |
0.000 |
1.583 |
0.116 |
d_t2 |
0.148 |
0.101 |
1.464 |
0.145 |
−0.679 |
1.135 |
−0.598 |
0.551 |
0.240 |
0.391 |
0.613 |
0.541 |
D_t_dt |
6.576 |
1.308 |
5.026 |
0.000 |
11.848 |
2.193 |
5.401 |
0.000 |
1.801 |
0.495 |
3.641 |
0.000 |
The code for the table (using the gt package):
bind_cols(
bind_rows(m1_all, m1_1980, m1_1990, m1_2000, m1_2010),
bind_rows(m3_all, m3_1980, m3_1990, m3_2000, m3_2010) %>% select(-term, -sample),
bind_rows(m6_all, m6_1980, m6_1990, m6_2000, m6_2010) %>% select(-term, -sample)
) %>%
group_by(sample) %>%
gt() %>%
cols_label(
`estimate...2` = "Estimate",
`estimate...7` = "Estimate",
`estimate...11` = "Estimate",
`std.error...3` = "Std. Err.",
`std.error...8` = "Std. Err.",
`std.error...12` = "Std. Err.",
`statistic...4` = "t-stat",
`statistic...9` = "t-stat",
`statistic...13` = "t-stat",
`p.value...5` = "p-val",
`p.value...10` = "p-val",
`p.value...14` = "p-val"
) %>%
tab_spanner(
label = "1 month sample",
columns = 2:5
) %>%
tab_spanner(
label = "3 month sample",
columns = 7:10
) %>%
tab_spanner(
label = "6 month sample",
columns = 11:14
) %>%
fmt_number(
columns = 2:14,
decimals = 3,
use_seps = FALSE
) %>%
tab_style(
style = list(
cell_fill(color = "#DDDDDD"),
cell_text(weight = "bold")
),
locations = cells_row_groups()
) %>%
data_color(
columns = c("p.value...5","p.value...10", "p.value...14"),
colors = scales::col_numeric(
palette = c(
"#FE756A", "#FFF97B"),
domain = c(0, 1),
alpha = 0.5)
) %>%
tab_style(
style = list(
cell_fill(color = "#A2BAE4"),
cell_text(style = "italic")
),
locations = cells_body(
columns = "statistic...4",
rows = abs(`statistic...4`) > 3
)
) %>%
tab_style(
style = list(
cell_fill(color = "#A2BAE4"),
cell_text(style = "italic")
),
locations = cells_body(
columns = "statistic...9",
rows = abs(`statistic...9`) > 3
)
) %>%
tab_style(
style = list(
cell_fill(color = "#A2BAE4"),
cell_text(style = "italic")
),
locations = cells_body(
columns = "statistic...13",
rows = abs(`statistic...13`) > 3
)
)
Positive auto-regressive models for shifts
We now perform the estimation of the models
\[X_{t+1} = bX_t + e_{t+1},\]
where \(X\) are the shift metrics.
shift <- c(1, 3, 6)
L <- cbind(data_reg1m$loss, # Reorganize the data
data_reg3m$loss,
data_reg6m$loss)
D_t <- cbind(data_reg1m$D_t,
data_reg3m$D_t,
data_reg6m$D_t)
d_t <- cbind(data_reg1m$d_t,
data_reg3m$d_t,
data_reg6m$d_t)
ar_pred_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3) # Initialization of the variables below
ar_pred_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
ar_pred_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
ar_sample <- 60
for(t in (ar_sample+7):nrow(data_reg1m)){
for(j in 1:3){
L_sample <- L[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
rho_L[t,j] <- min(L_sample/lag(L_sample, shift[j]), na.rm=T)
m_er <- mean(L_sample - rho_L[t,j] * lag(L_sample, shift[j]), na.rm=T)
m_er_L[t,j] <- m_er
ar_pred_L[t,j] <- min(L_sample/lag(L_sample, shift[j]), na.rm=T) * L_sample[length(L_sample)] + m_er
D_sample <- D_t[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
rho_D[t,j] <- min(D_sample/lag(D_sample, shift[j]), na.rm=T)
m_er <- mean(D_sample - rho_D[t,j] * lag(D_sample, shift[j]), na.rm=T)
m_er_D[t,j] <- m_er
ar_pred_D[t,j] <- min(D_sample/lag(D_sample, shift[j]), na.rm=T) * D_sample[length(D_sample)] + m_er
d_sample <- d_t[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
rho_d[t,j] <- min(d_sample/lag(d_sample, shift[j]), na.rm=T)
m_er <- mean(d_sample - rho_d[t,j] * lag(d_sample, shift[j]), na.rm=T)
m_er_d[t,j] <- m_er
ar_pred_d[t,j] <- min(d_sample/lag(d_sample, shift[j]), na.rm=T) * d_sample[length(d_sample)] + m_er
}
}
Next, some wrangling & the plots.
rho_L <- data_reg1m$date %>%
bind_cols(data.frame(rho_L)) %>% mutate(type = "Loss (L)")
colnames(rho_L) <- c("date", "1m", "3m", "6m", "type")
rho_D <- data_reg1m$date %>%
bind_cols(data.frame(rho_D)) %>% mutate(type = "Covariate shift (D)")
colnames(rho_D) <- c("date", "1m", "3m", "6m", "type")
rho_d <- data_reg1m$date %>%
bind_cols(data.frame(rho_d)) %>% mutate(type = "Concept drift (d)")
colnames(rho_d) <- c("date", "1m", "3m", "6m", "type")
ar1 <- bind_rows(rho_L, rho_D, rho_d) %>%
filter(date > "1985-12-01") %>% # There is a buffer period for the first estimation...
pivot_longer(c("1m", "3m", "6m"), names_to = "sample", values_to = "value") %>%
ggplot(aes(x = date, y = value, color = sample)) + geom_line() + facet_wrap(vars(type)) + theme_light() +
theme(legend.position = c(0.8,0.83)) + ylab("Autoregressive coefficient") + xlab(element_blank())
d_t <- bind_cols(data_reg1m$date, data.frame(d_t))
D_t <- bind_cols(data_reg1m$date, data.frame(D_t))
colnames(d_t) <- c("date", "1m", "3m", "6m")
colnames(D_t) <- c("date", "1m", "3m", "6m")
d_t <- d_t %>% pivot_longer(-date, names_to = "sample", values_to = "value")
D_t <- D_t %>% pivot_longer(-date, names_to = "sample", values_to = "value")
er_L <- data_reg1m$date %>%
bind_cols(data.frame(m_er_L)) %>% mutate(type = "Loss (L)")
colnames(er_L) <- c("date", "1m", "3m", "6m", "type")
er_D <- data_reg1m$date %>%
bind_cols(data.frame(m_er_D)) %>% mutate(type = "Covariate shift (D)")
colnames(er_D) <- c("date", "1m", "3m", "6m", "type")
er_d <- data_reg1m$date %>%
bind_cols(data.frame(m_er_d)) %>% mutate(type = "Concept drift (d)")
colnames(er_d) <- c("date", "1m", "3m", "6m", "type")
ar2 <- bind_rows(er_L, er_D, er_d) %>%
filter(date > "1985-12-01") %>%
pivot_longer(c("1m", "3m", "6m"), names_to = "sample", values_to = "value") %>%
ggplot(aes(x = date, y = value, color = sample)) + geom_line() + facet_wrap(vars(type), scales = "free") + theme_light() +
theme(legend.position = c(0.53,0.83)) + ylab("Average error") + xlab(element_blank())
ar1 / ar2
# ggsave("ar2.pdf", width = 7, height = 4)