ggplot2 barplot multiple overlapping confidence interval

43 Views Asked by At

I am trying to make a barplot, and came across an issue with having multiple overlapping error bar when using geom_errorbar in R.

## Use ggplot for bargraph
dat3$EDU_B <- as.factor(dplyr::recode(dat3$EDU_B,
                                      "-0.5" = "No BA",
                                      "0.5" = "BA or Higher"))

dat3$DISC_LB <- as.factor(dplyr::recode(dat3$DISC_LB,
                                      "-0.5" = "None",
                                      "0.5" = "Once or More"))

m_graph <- glm(Vac~AGE+SEX+R_Ethgp+hea+SUPP+EDU_B*DISC_LB, data=dat3, family="binomial"(link="logit"))

d_graph<- expand.grid(EDU_B=c("No BA", "BA or Higher"), 
                      DISC_LB = c("None", "Once or More"),
                      AGE = mean(dat3$AGE, na.rm=TRUE), SEX = as.factor(c("1", "2")), R_Ethgp = as.factor(c("1", "2", "3")),
                      hea=mean(dat3$hea, na.rm=TRUE),
                      SUPP=mean(dat3$SUPP, na.rm=TRUE))


d_graph <- ggplotPredict(m_graph, d_graph)

colours <- setNames(c("gray", "black"), 
                    c("None", "Once or More"))

ggplot(d_graph, aes(x = EDU_B, y = Predicted, fill = DISC_LB)) +
geom_bar(stat = 'identity', position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = CILo, ymax = CIHi), position = position_dodge(width = .5), width = .25) +
  labs(x = 'EDU_B',
       y = 'Vac',
       fill = "DISC_LB") +
  theme_bw(base_size = 12) +
  theme(axis.line = element_line(color="black"),
        axis.ticks = element_line(color="black"), panel.border = element_blank(), 
        legend.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  scale_fill_manual(values=colours)

This code shows the below barplot:

barplot outcome

dput(d_graph) outcome is as below:

structure(list(Predicted = c(0.741733907298891, 0.893756128438394, 
0.753034460658107, 0.844328967902538, 0.690516055803563, 0.867292483919938, 
0.703160805745206, 0.808195916427015, 0.72652210305617, 0.886123699935912, 
0.738253376025301, 0.83380602202825, 0.673614979456173, 0.858060859719625, 
0.686636893380215, 0.795821126990505, 0.756261079495159, 0.900874986909593, 
0.767125555910493, 0.8542194901226, 0.706785307496624, 0.875938400246907, 
0.719035804605874, 0.819892436521564), CILo = c(0.715042531267134, 
0.879798887274131, 0.723169362160149, 0.824203005032667, 0.662243435508124, 
0.850948845952362, 0.671328969582244, 0.78632684692288, 0.683373949583467, 
0.861484206821811, 0.696028073246912, 0.801850170598328, 0.629603044714273, 
0.830165337690498, 0.64352199113624, 0.761701452826876, 0.717667779348929, 
0.881149420462295, 0.730439261597285, 0.827893111224002, 0.666379378466323, 
0.853326521810591, 0.680743413133101, 0.791428074348007), CIHi = c(0.766740741785983, 
0.906265380398855, 0.780654887628222, 0.862535118114526, 0.717431853735625, 
0.882092336078731, 0.733135236691968, 0.828315421381777, 0.765805349072594, 
0.906854159078416, 0.776496600435928, 0.861498477136563, 0.714766483640827, 
0.882025639872601, 0.726751381800773, 0.826170411398003, 0.791114377817812, 
0.917632740850175, 0.800184797380489, 0.877116574262168, 0.744176233516029, 
0.895491256713494, 0.754392395912261, 0.84523184833694), SE = c(0.0266913760317568, 
0.0139572411642636, 0.0298650984979582, 0.0201259628698712, 0.0282726202954383, 
0.0163436379675761, 0.0318318361629623, 0.021869069504135, 0.0431481534727024, 
0.0246394931141003, 0.0422253027783889, 0.0319558514299214, 0.0440119347418992, 
0.0278955220291265, 0.0431149022439746, 0.0341196741636288, 0.0385933001462304, 
0.0197255664472985, 0.0366862943132076, 0.0263263788985977, 0.0404059290303008, 
0.0226118784363158, 0.0382923914727727, 0.0284643621735564), 
    EDU_B = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("No BA", 
    "BA or Higher"), class = "factor"), DISC_LB = structure(c(1L, 
    1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), levels = c("None", "Once or More"
    ), class = "factor"), AGE = c(1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14), SEX = structure(c(1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L), levels = c("1", "2"), class = "factor"), 
    R_Ethgp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
    ), levels = c("1", "2", "3"), class = "factor"), hea = c(-2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15), SUPP = c(3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"
))

How can I fix the problem?

Thank you!

I want to have only one error bar for each bar, but the code that I run shows multiple overlapping bar.

1

There are 1 best solutions below

2
stefan On

As I already guessed in my comment the issue is that you multiple rows for each combo of EDU_B and DISC_LB in your dataset, i.e. 6 rows in total reflecting the combos of SEX and R_Ethgp.

One option to fix that would be to use facetting as I do below or filter your dataset or ...

library(ggplot2)

ggplot(d_graph, aes(x = EDU_B, y = Predicted, fill = DISC_LB)) +
  geom_bar(stat = "identity", position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = CILo, ymax = CIHi),
    position = position_dodge(width = .5), width = .25
  ) +
  labs(
    x = "EDU_B",
    y = "Vac",
    fill = "DISC_LB"
  ) +
  theme_bw(base_size = 12) +
  theme(
    axis.line = element_line(color = "black"),
    axis.ticks = element_line(color = "black"),
    panel.border = element_blank(),
    legend.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  scale_fill_manual(values = colours) +
  facet_grid(SEX ~ R_Ethgp, labeller = label_both)

enter image description here

EDIT And if you just want to show the effect for two variables holding all other covariates fixed at some average value, then you you have to set up your data accordingly.

Wasn't able to find a function ggplotPredict when googling. Instead I use ggeffects::ggpredict in the code below and some random fake example data:

library(ggeffects)
library(ggplot2)

### Example data
set.seed(1)

dat3 <- expand.grid(
  EDU_B = c("No BA", "BA or Higher"),
  DISC_LB = c("None", "Once or More"),
  AGE = c(1:10),
  SEX = factor(c("1", "2")),
  R_Ethgp = factor(c("1", "2", "3")),
  hea = c(1:10),
  SUPP = c(1:10),
  id = 1:10
)
dat3$Vac <- sample(0:1, nrow(dat3), replace = TRUE)
###

m_graph <- glm(Vac ~ AGE + SEX + R_Ethgp + hea + SUPP + EDU_B * DISC_LB,
  data = dat3, family = "binomial"(link = "logit")
)

d_graph <- ggeffects::ggpredict(m_graph, terms = c("EDU_B", "DISC_LB"))

colours <- setNames(
  c("gray", "black"),
  c("None", "Once or More")
)

ggplot(d_graph, aes(x = x, y = predicted, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
    position = position_dodge(width = .5), width = .25
  ) +
  labs(
    x = "EDU_B",
    y = "Vac",
    fill = "DISC_LB"
  ) +
  theme_bw(base_size = 12) +
  theme(
    axis.line = element_line(color = "black"),
    axis.ticks = element_line(color = "black"), panel.border = element_blank(),
    legend.background = element_blank(),
    panel.grid.major = element_blank(), panel.grid.minor = element_blank()
  ) +
  scale_fill_manual(values = colours)