Calculate Probit Estimates for samples via Loop

Question

Calculate Probit Estimates for samples via Loop

80 Views Asked by Maximilian At 07 June 2025 at 09:46

I have a sample dataset that looks like this:

library(dplyr)
test_df <- structure(list(test = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), test2 = c(34.6, 
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9, 
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8, 
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2, 
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5, 
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4, 
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4, 
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2, 
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93, 
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4, 34.6, 
53.3, 53.9, 71.5, 40.6, 15.3, 34, 62.7, 5.7, 85.2, 21.3, 53.9, 
13.7, 32.5, 62.1, 25.6, 63.5, 48.6, 93.8, 85.7, 37.1, 31.4, 82.8, 
45.2, 31.6, 9.8, 6.5, 68.9, 66.8, 90.4, 30.2, 93.3, 20.2, 79.2, 
22.5, 3.1, 86.2, 68.5, 94.2, 67.6, 84.3, 36.2, 39.2, 56.8, 9.5, 
19.4, 58.8, 75.1, 86.7, 37.2, 79.9, 5.8, 62.3, 35.7, 58.8, 91.4, 
20, 36.9, 67.1, 76.8, 52.2, 82.8, 52.7, 50.2, 42, 36.2, 12.4, 
29.8, 27.7, 77, 77.8, 14.4, 51.6, 59.7, 50.6, 38.6, 42.6, 1.2, 
91.9, 8, 50.7, 82, 59.8, 42.4, 55.9, 78.9, 16.8, 97, 47.4, 93, 
90.1, 75.1, 67.7, 64.8, 7.3, 42.4, 53.1, 94.3, 71.2, 72.4), test3 = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L)), class = "data.frame", row.names = c(NA, -200L
))

I am now trying to calculate Probit-Estimates via glm. I want to calculate the same model a few times always choosing a different sample from the dataset based on test3. So far I got

id <- unique(test_df$test3)
for (i in 1:5)
{
  random[i] <- sample(id,3)
  data[i] <- test_df %>% filter(id %in% random[i])
  model <- glm(test~test2+test3,family = binomial(link = "probit"),data = data[i])
  res <- summary(model)
  results[i] <- res$coefficients
}

However, this returns

Error: Problem with `filter()` input `..1`.
x Input `..1` must be of size 200 or 1, not size 10.
ℹ Input `..1` is `id %in% random[i]`.

Does anyone know how to fix this so that in the end I get 5 different coefficients for every variable? Thanks a lot!

Original Q&A

There are 2 best solutions below

**IRTFM** · Answer 1

Normally loops that repeat analytic processes fail because the inner logic does not have indexing on the LHS of assignments. Here it fails because of those indices which are not initialized or might have been but perhaps were indexed improperly: Try this modification:

id <- unique(test_df$test3); results <- list()
for (i in 1:5)
{
    random <- sample(id,3)
    data1 <- test_df %>% filter(test3 %in% random) #note the need to use a column name
    model <- glm(test~test2+test3,family = binomial(link = "probit"),data = data1)
    res <- summary(model)
    results[[i]] <- res$coefficients # note need to use `[[` rather than `[`
}

str(results)
List of 5
 $ : num [1:3, 1:4] 6.85 8.02e-12 5.50e-10 2.18e+04 2.37e+02 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "(Intercept)" "test2" "test3"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "z value" "Pr(>|z|)"
 $ : num [1:3, 1:4] 3.35e+01 -3.41e-04 -1.34e+01 2.49e+04 1.73e+02 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "(Intercept)" "test2" "test3"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "z value" "Pr(>|z|)"
 $ : num [1:3, 1:4] -0.414455 -0.000279 -0.000755 0.59573 0.006827 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "(Intercept)" "test2" "test3"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "z value" "Pr(>|z|)"
 $ : num [1:3, 1:4] -1.54385 -0.00582 0.24211 0.7596 0.00707 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "(Intercept)" "test2" "test3"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "z value" "Pr(>|z|)"
 $ : num [1:3, 1:4] -0.62463 0.00318 0.23255 0.48835 0.00699 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "(Intercept)" "test2" "test3"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "z value" "Pr(>|z|)"

**zoowalk** · Answer 2

Does this help?

library(tidyverse)
library(broom)


id <- unique(test_df$test3)

results <- tibble(i=NA, results=NA)

for (i in 1:5)
{ 

  random <- sample(id, 3)

  
  data <- test_df %>% filter(test3 %in% random)
  model <- glm(test~test2+test3,family = binomial(link = "probit"), data = data)
  
  res <- tibble(i=i, results=tidy(model))
  
  results <- bind_rows(results, res)
  
}
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

results
#> # A tibble: 16 x 2
#>        i results$term $estimate  $std.error $statistic $p.value
#>    <int> <chr>            <dbl>       <dbl>      <dbl>    <dbl>
#>  1    NA <NA>         NA           NA        NA        NA      
#>  2     1 (Intercept)  -2.95e+ 1 29910.       -9.88e- 4  0.999  
#>  3     1 test2         2.37e- 3   282.        8.41e- 6  1.00   
#>  4     1 test3         4.54e+ 0  4455.        1.02e- 3  0.999  
#>  5     2 (Intercept)  -6.85e+ 0 15834.       -4.32e- 4  1.00   
#>  6     2 test2        -3.83e-17   211.       -1.82e-19  1      
#>  7     2 test3        -5.52e-16  1732.       -3.19e-19  1      
#>  8     3 (Intercept)  -1.34e+ 0     0.610    -2.20e+ 0  0.0277 
#>  9     3 test2        -5.49e- 3     0.00704  -7.80e- 1  0.436  
#> 10     3 test3         2.20e- 1     0.0834    2.64e+ 0  0.00827
#> 11     4 (Intercept)  -6.85e+ 0 15061.       -4.55e- 4  1.00   
#> 12     4 test2        -2.94e-18   224.       -1.32e-20  1      
#> 13     4 test3         4.95e-16  1821.        2.72e-19  1      
#> 14     5 (Intercept)  -3.52e- 1     0.585    -6.01e- 1  0.548  
#> 15     5 test2        -1.68e- 3     0.00678  -2.48e- 1  0.804  
#> 16     5 test3         2.52e- 1     0.114     2.21e+ 0  0.0272

^{Created on 2020-12-10 by the reprex package (v0.3.0)}

Calculate Probit Estimates for samples via Loop

There are 2 best solutions below

Related Questions in R

Related Questions in LOOPS

Related Questions in GLM

Related Questions in BINOMIAL-COEFFICIENTS

Trending Questions

Popular # Hahtags

Popular Questions