How to shade the area under a curve ok Kaplan-Meier (step function)

74 Views Asked by At

I like to obtain graphically the area under a curve de Kaplan-Meier. I have done the previous recommendation in this stack overflow but it doesn't work correctly

Example

library(survival)
data (cancer)
veteran 

km <- survfit(Surv(time,status)~1, data=leukemia)
plot(km, conf.int=F, xlab="time until relapse (in weeks)", mark.time = T,
     ylab="proportion without relapse",lab=c(10,10,7),col='blue')
abline(h=0)
abline(v=0)

time <- km$time[km$time <= 30]
surv <- km$surv[km$time <= 30]
polygon(
  c(min(time), time, max(time)),
  c(0, surv, 0),
  col = "blue", border = F
)

library(ggplot2)
dat <- data.frame(
  time = km$time,
  surv = km$surv
)
ggplot(dat, aes(time, surv)) +
  geom_line() +
  geom_vline(xintercept = 40, color = "blue") +
  geom_area(data = subset(dat, time <= 40), fill = "blue") +
  theme_minimal()

I would like to obtain graphically the area under curve between 0 and 40, AND between 20 and 60.

enter image description here

enter image description here

enter image description here

Thanks in advance

2

There are 2 best solutions below

2
On

The issue is that your data lacks the exact time points. Hence, to achieve your desired result you need an estimate or prediction for these time points. As your KM curve is piecewise-linear you could use linear interpolation e.g. using approx():

library(survival)
data(cancer)

km <- survfit(Surv(time, status) ~ 1, data = leukemia)

library(ggplot2)

km1 <- approx(km$time, km$surv, c(km$time, 40, 20, 60))

dat <- data.frame(
  time = km1$x,
  surv = km1$y
)

ggplot(dat, aes(time, surv)) +
  geom_line() +
  geom_vline(xintercept = 40, color = "blue") +
  geom_area(data = subset(dat, time <= 40), fill = "blue", alpha = .5) +
  geom_area(data = subset(dat, time >= 20 & time <= 60), fill = "green", alpha = .5) +
  theme_minimal()

0
On
library(survival)
library(ggplot2)
library(data.table)
library(zoo) 

km <- survfit(Surv(time,status)~1, data=leukemia)
plot(km, conf.int=F, xlab="time until relapse (in weeks)", mark.time = T,
     ylab="proportion without relapse",lab=c(10,10,7),col='blue')
abline(h=0)
abline(v=0)


fill_data=function(this_km){
  out=rbind(data.table(time=0,surv=1),data.table(time=this_km$time,surv=this_km$surv))
  out=rbind(na.omit(out[,.(time,surv=shift(surv))]),out)
  setkeyv(out,"time")
  return(out)
}

data=fill_data(km)
data[,surv:=na.locf(surv)]

ggplot()+
  geom_step(aes(time,surv), col='blue', lwd=1.5, data=data) + 
  geom_area(data=data, aes(x=time,y=surv), fill='pink', alpha=1, lwd=0.1)  +
  labs(x = "Time (months)", y = "Survival", title = "")+
  theme_classic() +
  scale_x_continuous(breaks = seq(from= 0, to=160, by = 10))+
  scale_y_continuous(breaks = seq(from= 0, to=1, by = 0.2))+
  geom_hline(yintercept = 0) +
  geom_vline(xintercept = 0) +
  theme(legend.position = c(0.8, 0.8), legend.direction = "vertical", 
        legend.background = element_rect(fill = "white"),
        legend.title = element_text(family = "Courier", color = "black", size = 14,  face = 1), 
        legend.text = element_text (size = 20)) + 
  theme(axis.text = element_text(size = 14))+
  theme(axis.title = element_text(size = 18)) +
  theme(axis.title.x = element_text(margin = margin(t = 14)),
        axis.title.y = element_text(margin = margin(r = 14))) +
  theme(legend.title = element_blank(), legend.key.height = unit(2, "lines"))

summary(km)
summary(km, times = c(20,60))
fix(data) # time 20 - surv 0.646, time 60  surv 0.0828

data4 = subset(data, time >= 20 & time <= 60)
ggplot()+
  geom_step(aes(time,surv), col='blue', lwd=1, data=data) + 
  geom_area(data=data4, aes(x=time,y=surv), fill='pink', alpha=0.5, lwd=0.75)  +
  labs(x = "Time (months)", y = "Survival", title = "")+
  theme_classic() +
  scale_x_continuous(breaks = seq(from= 0, to=160, by = 10))+
  scale_y_continuous(breaks = seq(from= 0, to=1, by = 0.2))

enter image description here

enter image description here

Thanks ¡¡¡