I have a sample dataset that looks like this:
library(dplyr)
test_df <- structure(list(test = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), test2 = c(34.6,
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9,
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8,
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2,
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5,
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4,
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4,
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2,
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93,
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4, 34.6,
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9,
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8,
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2,
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5,
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4,
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4,
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2,
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93,
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4), test3 = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), class = "data.frame", row.names = c(NA, -200L
))
I am now trying to calculate Probit-Estimates via glm
. I want to calculate the same model a few times always choosing a different sample from the dataset based on test3
. So far I got
id <- unique(test_df$test3)
for (i in 1:5)
{
random[i] <- sample(id,3)
data[i] <- test_df %>% filter(id %in% random[i])
model <- glm(test~test2+test3,family = binomial(link = "probit"),data = data[i])
res <- summary(model)
results[i] <- res$coefficients
}
However, this returns
Error: Problem with `filter()` input `..1`.
x Input `..1` must be of size 200 or 1, not size 10.
ℹ Input `..1` is `id %in% random[i]`.
Does anyone know how to fix this so that in the end I get 5 different coefficients for every variable? Thanks a lot!
Normally loops that repeat analytic processes fail because the inner logic does not have indexing on the LHS of assignments. Here it fails because of those indices which are not initialized or might have been but perhaps were indexed improperly: Try this modification: