Compute t-test in R via reading csv data and visualize it in Boxplot

672 Views Asked by At

I am trying to import some data from a csv file and visualize it in a Boxplot to display it again online using a plateform. I am using R as developing language for computing a t-test function then displaying the results in plot. I am getting this error when I try to compute the t-test : Error

This is the code what I wrote it:

labels <- list('non-failing heart (NF)', 'failing heart (F)')

data <- read.csv("data.csv", header=T)
data[data == 'NA_integer_'] <- NA
t.test(data$NF, data$F)

df <- setNames(do.call(rbind.data.frame, 
                       lapply(data, function(d) data.frame(d[1], d[2]))),
              labels)    

                           
                           
results <- t.test(data$`non-failing heart (NF)`, data$`failing heart (F)`)


                           
results$statistic
results$estimate
results$p.value
                           

df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
        data=df,
        cex.lab=0.65,
        xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
        ylab="IRE binding activity (%)",
        col="orange",
        border="brown",
        ylim = c(0, 120)
)

Sample Data

structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")

Update

After converting the data to numeric as mention in the answer, I am getting this error: Error 2

1

There are 1 best solutions below

2
On BEST ANSWER

First you need to turn all the columns to numeric type:

# add this line after data[data == 'NA_integer_']
library(tidyverse)
data %>% mutate_all(as.numeric) -> data

And change the data.frame you're using to calculate result

# change data with df
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)

full code:

library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
labels <- list('non-failing heart (NF)', 'failing heart (F)')

data<-structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
data[data == 'NA_integer_'] <- NA
data %>% mutate_all(as.numeric) -> data
t.test(data$NF, data$F)
#> 
#>  Welch Two Sample t-test
#> 
#> data:  data$NF and data$F
#> t = 10.866, df = 10.695, p-value = 4.118e-07
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  51.54831 77.85169
#> sample estimates:
#> mean of x mean of y 
#>     100.0      35.3
df <- setNames(do.call(rbind.data.frame, 
                       lapply(data, function(d) data.frame(d[1], d[2]))),
              labels)    

                           
                           
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)

                           
results$statistic
#>         t 
#> 0.2051717
results$estimate
#> mean of x mean of y 
#>      75.5      68.0
results$p.value
#> [1] 0.8569285
                           

df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
        data=df,
        cex.lab=0.65,
        xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
        ylab="IRE binding activity (%)",
        col="orange",
        border="brown",
        ylim = c(0, 120)
)