I am trying to do a rolling multiple regression of a dataset grouped by stock. A sample of the dataset can be seen below. It goes from 1991 to 2019 and contains information on stocks like returns, etc. What I intend to do is regress dependant variable ExcessReturn on EPU_Paper for each stock using a 36 month rolling regression window. I also want the stocks to have at least 18 monthly return observations in the 36 month rolling windows. ISIN is the identifier of each stock in this case. I also want to include SIZE, INVEST, BM and OP as controls in the regression. The dataset does not contain any NAs except for the beta_monthly column which is all NAs. .
This is the code I have tried to run. I am able to get the for-loop working, but do not get the desired result. Optimally I would want the coefficient of EPU_Paper to be appended in df_Final in the correct row.
I am open for both completely new solutions and variants of my current attempt.
# Create date sequence
date <- seq(as.Date("1991-01-01"),as.Date("2019-12-31"), by = "month")
## Model
v <- 36 # No. of observations in rolling regression
w <- 18 # observations of stocks requred in period
df_Final$beta_monthly <- NA
for (i in 1:(length(date)-v)) {
beta.tab <- df_Final %>% filter(Date >= date[i] & Date < date[i+v]) %>%
group_by(ISIN) %>% filter(n() >= w) %>%
do(ols.model = lm(formula = ExcessReturn ~ EPU_Paper + SIZE, .)) %>%
mutate(beta_monthly = coefficients(ols.model)[2],
Date = date[v+1])
}
This is the dataframe df_Final. It contains all the data I need.
structure(list(Year = c(2002, 2004, 2011, 2011, 2012, 1993, 2005,
2019, 2005, 1998), Month = c(5, 4, 12, 11, 4, 11, 7, 2, 12, 12
), ISIN = c("NO0003172207", "NO0003072803", "NO0010001118", "NO0010096985",
"NO0010052350", "NO0004031303", "NO0003733800", "NO0003049405",
"NO0003028904", "NO0004684408"), SIZE = c(1143750000, 894618192,
257727844.92, 293346266180.2, 104014912.25, 1312826651.5, 51164845865,
535492777.6, 1.2465e+10, 8815671800), BM = c(2.69336652499494e-06,
6.25913195949328e-07, 3.0680673824874e-07, 9.99841307356348e-07,
3.99901247813628e-06, 3.6136784151303e-06, 6.27009692475242e-07,
6.397720392755e-07, 1.985559566787e-07, 9.2518383241951e-07),
OP = c(-0.0259646808923766, 0.197313839816668, 0.136649432305334,
0.594948150836374, -0.0018535993529254, -0.0801364023870418,
0.130539826349566, 0.0244477246423, 0.620295983086681, 0.103857566765579
), INVEST = c(0.129154816408376, 0.0321275661230328, -0.092547902189399,
0.142434794968375, -0.121033439243494, -0.00124744840099796,
-0.240237999927217, 0.0376008757633188, 0.060294968189705,
0.112664489390554), MonthlyReturn = c(-0.039797852179406,
-0.066030013642565, 0.019230769230769, 0.049271412097704,
-0.12516823687752, -0.02219755826859, 0.057851239669421,
-0.043636363636364, 0.05232436939754, 0.32743529766845),
RiskFreeRate = c(0.00558, 0.00163, 0.00209, 0.00251, 0.00163,
0.00467, 0.00181, 0.00086, 0.00208, 0.00726), ShareTurnover = c(69750L,
5250L, 369135L, 183793926L, 54869L, 2879656L, 7957362L, 367551L,
2478662L, 2245928L), MarketExcessReturn = c(-2.7155, -3.0781,
1.0322, -0.3552, -0.9447, -4.9307, 6.0359, 3.8371, 6.932,
-0.7896), ExcessReturn = c(-4.5377852179406, -6.7660013642565,
1.7140769230769, 4.6761412097704, -12.679823687752, -2.686755826859,
5.6041239669421, -4.4496363636364, 5.024436939754, 32.017529766845
), TradeDate = structure(c(11838, 12538, 15338, 15308, 15460,
8734, 12993, 17955, 13147, 10590), class = "Date"), GR_SIZE = structure(c(3L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 3L, 3L), .Label = c("1", "2",
"3"), class = "factor"), GR_OP = structure(c(1L, 2L, 2L,
3L, 1L, 1L, 2L, 1L, 3L, 1L), .Label = c("1", "2", "3"), class = "factor"),
GR_BM = structure(c(3L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 3L
), .Label = c("1", "2", "3"), class = "factor"), GR_INVEST = structure(c(3L,
2L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("1", "2",
"3"), class = "factor"), SIZE_BM = structure(c(9L, 5L, 1L,
9L, 3L, 6L, 8L, 2L, 7L, 9L), .Label = c("11", "12", "13",
"21", "22", "23", "31", "32", "33"), class = "factor"), SIZE_OP = structure(c(7L,
5L, 2L, 9L, 1L, 4L, 8L, 1L, 9L, 7L), .Label = c("11", "12",
"13", "21", "22", "23", "31", "32", "33"), class = "factor"),
SIZE_INVEST = structure(c(9L, 5L, 1L, 9L, 1L, 4L, 7L, 2L,
8L, 8L), .Label = c("11", "12", "13", "21", "22", "23", "31",
"32", "33"), class = "factor"), Date = structure(c(11808,
12509, 15309, 15279, 15431, 8705, 12965, 17928, 13118, 10561
), class = "Date"), EPU_Paper = c(53.995111032374, 68.0510031873012,
150.261825109363, 124.78265498286, 47.2994312059608, 164.273390295025,
27.168222382902, 181.297305839429, 29.292072793154, 139.423199892468
), beta_monthly = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)), row.names = c(NA, -10L), class = "data.frame")
You could try a split-apply-combine approach.
Data: