How to calculate Standardized Mean Difference for Table1 Package in R?

919 Views Asked by At

I am using the package "table1" to create a fancy table one with extra column containing the standardized mean difference of continuous variables in my dataset.

The SMD should be a combination between the treatment and control groups stratified for a given variable within the table.

I am struggling to figure out a good way of doing this and would love some help creating the function to calculate SMD.

Here is some sample code:

f <- function(x, n, ...) factor(sample(x, n, replace=T, ...), levels=x)
set.seed(427)

n <- 146
dat <- data.frame(id=1:n)
dat$treat <- f(c("Placebo", "Treated"), n, prob=c(1, 2)) # 2:1 randomization
dat$age   <- sample(18:65, n, replace=TRUE)
dat$sex   <- f(c("Female", "Male"), n, prob=c(.6, .4))  # 60% female
dat$wt    <- round(exp(rnorm(n, log(70), 0.23)), 1)

# Add some missing data
dat$wt[sample.int(n, 5)] <- NA

label(dat$age)   <- "Age"
label(dat$sex)   <- "Sex"
label(dat$wt)    <- "Weight"
label(dat$treat) <- "Treatment Group"

units(dat$age)   <- "years"
units(dat$wt)    <- "kg"

my.render.cont <- function(x) {
    with(stats.apply.rounding(stats.default(x), digits=2), c("",
        "Mean (SD)"=sprintf("%s (&plusmn; %s)", MEAN, SD)))
}
my.render.cat <- function(x) {
    c("", sapply(stats.default(x), function(y) with(y,
        sprintf("%d (%0.0f %%)", FREQ, PCT))))
}

    #My attempt at an SMD function
     smd_value <- function(x, ...) {
 x <- x[-length(x)]  # Remove "overall" group
  # Construct vectors of data y, and groups (strata) g
  y <- unlist(x)
  g <- factor(rep(1:length(x), times=sapply(x, length)))
  if (is.numeric(y) & g==1) {
    # For numeric variables, calculate SMD
    smd_val1 <- (mean(y)/sd(y))
  } else if (is.numeric(y) & g==2) {
    # For numeric variables, calculate SMD
    smd_val2 <- (mean(y)/sd(y))
  } else {print("--")
  }
  smd_val <- smdval2 - smdval1
}

table1(~ age + sex + wt | treat, data=dat,  render.continuous=my.render.cont, render.categorical=my.render.cat, extra.col=list(`SMD`=smd_value))

I get the following error:

"Error in if (is.numeric(y) & g == 1) { : the condition has length > 1"

Any insight into a potential solution?

Thanks!

1

There are 1 best solutions below

1
On BEST ANSWER

Here you go!

# Install Packages---------------------------------------------------
library(stddiff)
library(cobalt)
library(table1)
library(Hmisc)

#Using 'mtcars' as an example
my_data<-mtcars

# Format variables--------------------------------------------------------------

# amd - Transmission (0 = automatic; 1 = manual)
my_data$am <-factor(my_data$am,
                levels = c(0,1),
                labels =c("Automatic","Manual"))

label(my_data$am) <-"Transmission Type" #adding a label for the variable

# vs - Engine (0 = V-shaped, 1 = Straight)
my_data$vs <-factor(my_data$vs,
                levels = c(0,1),
                labels =c("V-shaped","Straight"))

label(my_data$vs) <-"Engine"

# Adding a label to the numeric variables
label(my_data$mpg)<-"Miles per gallon"
label(my_data$hp)<-"Horsepower"

# SMD FUNCTION------------------------------------------------------------------
SMD_value <- function(x, ...) {
  # Construct vectors of data y, and groups (strata) g
  y <- unlist(x)
  g <- factor(rep(1:length(x), times=sapply(x, length)))
  if (is.numeric(y)) {
    # For numeric variables
    try({a<-data.frame(y)
    a$g<-g
    smd<-(as.data.frame(stddiff.numeric(data=a,gcol = "g", vcol = "y")))$stddiff
    },silent=TRUE)
  } else {
  # For categorical variables
    try({
      a<-data.frame(y)
      a$g<-g
      smd<-(abs((bal.tab(a, treat = "g",data=a,binary="std",continuous =         
 "std",s.d.denom = "pooled",stats=c("mean.diffs"))$Balance)$Diff.Un))
    },silent=TRUE)
  }
  c("",format(smd,digits=2)) #Formatting number of digits
}

# CONTINUOUS VARIABLES FORMATTING-----------------------------------------------
my.render.cont <- function(x) {
  with(stats.default(x), 
   c("",
     
     "Mean (SD)" = sprintf("%s (%s)",
                           round_pad(MEAN, 1),
                           round_pad(SD, 1)),
     
     "Median (IQR)" = sprintf("%s (%s, %s)",
                              round_pad(MEDIAN, 1), 
                              round_pad(Q1, 1), 
                              round_pad(Q3, 1)))
  )}

# Creating the final table-----------------------------------------------------
Table1<-table1(~ vs + mpg + hp  | am, 
           data=my_data,
           overall = FALSE, 
           render.continuous = my.render.cont, 
           extra.col=list(`SMD`=SMD_value)) #SMD Column

Table1 #displays final table