I have a sample dataset that looks like this:
library(dplyr)
test_df <- structure(list(test = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), test2 = c(34.6,
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9,
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8,
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2,
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5,
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4,
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4,
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2,
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93,
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4, 34.6,
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9,
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8,
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2,
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5,
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4,
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4,
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2,
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93,
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4), test3 = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), class = "data.frame", row.names = c(NA, -200L
))
I am now trying to calculate Probit-Estimates via glm. I want to calculate the same model a few times always choosing a different sample from the dataset based on test3. So far I got
id <- unique(test_df$test3)
for (i in 1:5)
{
random[i] <- sample(id,3)
data[i] <- test_df %>% filter(id %in% random[i])
model <- glm(test~test2+test3,family = binomial(link = "probit"),data = data[i])
res <- summary(model)
results[i] <- res$coefficients
}
However, this returns
Error: Problem with `filter()` input `..1`.
x Input `..1` must be of size 200 or 1, not size 10.
ℹ Input `..1` is `id %in% random[i]`.
Does anyone know how to fix this so that in the end I get 5 different coefficients for every variable? Thanks a lot!
Does this help?
Created on 2020-12-10 by the reprex package (v0.3.0)