Replicating the T-Test in R

78 Views Asked by At

I tried to replicate the following simple t-test and this calculates t = -3.5985:

t.test(gktlists ~ small, data=star, var.equal=FALSE)

Here is what I did:

mean_small <- mean(star$gktlists[star$small == 1], na.rm = TRUE)
mean_large <- mean(star$gktlists[star$small == 0], na.rm = TRUE)
print(mean_small - mean_large)

var_small <- var(star$gktlists[star$small == 1], na.rm=TRUE)
var_large <- var(star$gktlists[star$small == 0], na.rm=TRUE)

len_small <- length(star$gktlists[star$small == 1])
len_large <- length(star$gktlists[star$small == 0])

(mean_large - mean_small) / sqrt(var_small / len_small + var_large / len_large)

But I get a different t-statistic, t = -3.746202. Is that only due to rounding errors or is the t.test() doing something different than I expect. What am I doing wrong?

2

There are 2 best solutions below

3
On BEST ANSWER

Using sleep data, your code works actually fine for me.

> mean_small <- mean(sleep$extra[sleep$group == 1], na.rm = TRUE)
> mean_large <- mean(sleep$extra[sleep$group == 2], na.rm = TRUE)
> print(mean_small - mean_large)
[1] -1.58
> var_small <- var(sleep$extra[sleep$group == 1], na.rm=TRUE)
> var_large <- var(sleep$extra[sleep$group == 2], na.rm=TRUE)
> len_small <- length(sleep$extra[sleep$group == 1])
> len_large <- length(sleep$extra[sleep$group == 2])
> (mean_large - mean_small)/sqrt(var_small/len_small + var_large/len_large)
[1] 1.860813
> (tt <- t.test(extra ~ group, data = sleep))

    Welch Two Sample t-test

data:  extra by group
t = -1.8608, df = 17.776, p-value = 0.07939
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
 -3.3654832  0.2054832
sample estimates:
mean in group 1 mean in group 2 
           0.75            2.33 

> diff(tt$estimate)
mean in group 2 
           1.58 
0
On

This is the textbook calculation in R code.

# to get the groups in two vectors simplifies the code below
A <- sleep$extra[sleep$group == 1]
B <- sleep$extra[sleep$group == 2]

# different variances, Welch test
var(A)
#> [1] 3.200556
var(B)
#> [1] 4.009

# compute group means, variances, etc
Abar <- mean(A); Bbar <- mean(B)
vA <- var(A); vB <- var(B)
nA <- length(A); nB <- length(B)
sA <- vA/nA; sB <- vB/nB
seA <- sqrt(vA/nA); seB <- sqrt(vB/nB)
se <- sqrt(seA^2 + seB^2)

# with Welch degrees of freedom
df <- se^4/(seA^4/(nA - 1) + seB^4/(nB - 1))

# test statistic
t.stat <- (Abar - Bbar)/sqrt(sA + sB)
# and p-value
pval <- 2*pt(abs(t.stat), df = df, lower.tail = FALSE)

# these values are exactly R's t.test results
t.stat
#> [1] -1.860813
df
#> [1] 17.77647
pval
#> [1] 0.07939414

(tt <- t.test(extra ~ group, data = sleep))
#> 
#>  Welch Two Sample t-test
#> 
#> data:  extra by group
#> t = -1.8608, df = 17.776, p-value = 0.07939
#> alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
#> 95 percent confidence interval:
#>  -3.3654832  0.2054832
#> sample estimates:
#> mean in group 1 mean in group 2 
#>            0.75            2.33

Created on 2023-11-05 with reprex v2.0.2