Create ggplot that is Both A Grouped and Stacked Barplot, Using 2 different Columns

38 Views Asked by At

I am trying to create a combined grouped and stacked barplot, where the grouped bars correspond to two different columns, length_groups and depth_groups, but I also want the bars to be stacked, using the classifications identified within these 2 columns.I have created 2 seperate graphs for the 2 columns using facet wrap which you can see here enter image description here

But I want to combine these graphs, given that they have the same panels, y-axis, and x-axis. So therefore, I want the camera deployment bars (blues) to be grouped next to the species length (oranges/purples) based on the abbreviated x-axis that they share. Does anyone know how to do this?

Here is a sample of my dataframe

df = structure(list(Length_Groups = c("Large", "Large", "Large", "Large", 
"Large", "Large", "Large", "Large", "Large", "Large", "Large", 
"Medium", "Medium", "Medium", "Medium", "Medium", "Medium", "Medium", 
"Medium", "Medium", "Medium", "Medium", "Medium", "Medium", "Medium", 
"Medium", "Medium", "Medium", "Medium", "Medium", "Medium", "Medium", 
"Medium", "Medium", "Medium", "Not Reported", "Not Reported", 
"Not Reported", "Not Reported", "Not Reported", "Not Reported", 
"Not Reported", "Not Reported", "Not Reported", "Not Reported", 
"Not Reported", "Not Reported", "Not Reported", "Not Reported", 
"Not Reported", "Not Reported"), Depth_groups = c("3m to 40m", 
"3m to 40m", "3m to 40m", ">40m", ">40m", "Not Reported", "Not Reported", 
"Not Reported", "Surface to 3m", "Surface to 3m", "Surface to 3m", 
"3m to 40m", "3m to 40m", "3m to 40m", "3m to 40m", "3m to 40m", 
"3m to 40m", "3m to 40m", "3m to 40m", "3m to 40m", "3m to 40m", 
">40m", ">40m", ">40m", ">40m", "Not Reported", "Not Reported", 
"Not Reported", "Not Reported", "Surface to 3m", "Surface to 3m", 
"Surface to 3m", "Surface to 3m", "Surface to 3m", "Surface to 3m", 
"3m to 40m", "3m to 40m", "3m to 40m", "3m to 40m", ">40m", ">40m", 
">40m", ">40m", ">40m", "Not Reported", "Not Reported", "Not Reported", 
"Surface to 3m", "Surface to 3m", "Surface to 3m", "Surface to 3m"
), Water_Type = c("Freshwater", "Saltwater", "Saltwater", "Saltwater", 
"Saltwater", "Saltwater", "Saltwater", "Saltwater", "Saltwater", 
"Saltwater", "Saltwater", "Freshwater", "Freshwater", "Saltwater", 
"Saltwater", "Saltwater", "Saltwater", "Saltwater", "Saltwater", 
"Saltwater", "Saltwater", "Saltwater", "Saltwater", "Saltwater", 
"Saltwater", "Brackish", "Freshwater", "Saltwater", "Saltwater", 
"Freshwater", "Freshwater", "Freshwater", "Saltwater", "Saltwater", 
"Saltwater", "Freshwater", "Saltwater", "Saltwater", "Saltwater", 
"Freshwater", "Saltwater", "Saltwater", "Saltwater", "Saltwater", 
"Brackish", "Freshwater", "Saltwater", "Freshwater", "Freshwater", 
"Saltwater", "Saltwater"), Abbreviation = c("DD + AB", "AC + DD + AB", 
"DD + AB", "DD + AB", "SA + AB", "AC + B", "AC + Mobile", "SA + AB", 
"AC + Mobile + Stationary", "DD + AB + Mobile", "DD + Mobile", 
"AC + Mobile", "DD + Stationary", "AC + AB", "AC + B", "AC + DD + AB", 
"AC + Mobile", "AC + SA + B", "AC + Stationary", "DD + AB", "DD + Stationary", 
"AC + B", "AC + Mobile", "AC + Stationary", "DD + AB", "AC + Stationary", 
"DD + AB", "DD + AB", "DD + SA + AB", "AC + DD + Stationary", 
"AC + PIT + Stationary", "PIT + R + Stationary", "AC + DD + Stationary", 
"DD + AB", "SA + Stationary", "AC + PIT + R + Stationary", "DD + AB", 
"DD + PIT + AB", "DD + PIT + Mobile", "DD + AB", "AC + Mobile", 
"DD + AB", "DD + SA + AB", "SA + Mobile", "AC + Stationary", 
"AC + Stationary", "AC + Stationary", "PIT + Stationary", "R + Stationary", 
"DD + AB", "SA + Stationary"), total = c(1L, 1L, 4L, 3L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 3L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 
1L, 1L, 1L, 1L, 4L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -51L), groups = structure(list(
    Length_Groups = c("Large", "Large", "Large", "Large", "Large", 
    "Medium", "Medium", "Medium", "Medium", "Medium", "Medium", 
    "Medium", "Medium", "Not Reported", "Not Reported", "Not Reported", 
    "Not Reported", "Not Reported", "Not Reported", "Not Reported", 
    "Not Reported", "Not Reported"), Depth_groups = c("3m to 40m", 
    "3m to 40m", ">40m", "Not Reported", "Surface to 3m", "3m to 40m", 
    "3m to 40m", ">40m", "Not Reported", "Not Reported", "Not Reported", 
    "Surface to 3m", "Surface to 3m", "3m to 40m", "3m to 40m", 
    ">40m", ">40m", "Not Reported", "Not Reported", "Not Reported", 
    "Surface to 3m", "Surface to 3m"), Water_Type = c("Freshwater", 
    "Saltwater", "Saltwater", "Saltwater", "Saltwater", "Freshwater", 
    "Saltwater", "Saltwater", "Brackish", "Freshwater", "Saltwater", 
    "Freshwater", "Saltwater", "Freshwater", "Saltwater", "Freshwater", 
    "Saltwater", "Brackish", "Freshwater", "Saltwater", "Freshwater", 
    "Saltwater"), .rows = structure(list(1L, 2:3, 4:5, 6:8, 9:11, 
        12:13, 14:21, 22:25, 26L, 27L, 28:29, 30:32, 33:35, 36L, 
        37:39, 40L, 41:44, 45L, 46L, 47L, 48:49, 50:51), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -22L), .drop = TRUE))

I have tried using pivot tables, but it doesn't work because I have multiple groups within my column.

To make that graph above this is the code I am using

library(dplyr)
library(ggplot2)
library(stringr)
library("ggh4x")
library(forcats)
library(ggpubr)
library(grid)
library(tidyr)
library(viridis)

Size_plot = SDC |>
  ungroup() |>
  mutate(Abbreviation = fct_reorder(Abbreviation,
                                   ifelse(!Water_Type %in% "Saltwater", 0, total),
                                   .fun = sum
  )) |>
  ggplot(aes(x = Abbreviation, y = total,
             fill = factor(Length_Groups, levels = c("Small", "Medium", "Large", "Not Reported")))) +
  scale_fill_viridis(discrete = TRUE, 
                     breaks = c("Small", "Medium", "Large", "Not Reported"),
                     labels = c("Small (<24cm)", "Medium (24-140cm)", "Large (>140cm)", "Not Reported"),
                     option = "plasma",
                     direction=-1, end = 0.75) +
  geom_col(width = .5) +
  scale_y_continuous(expand = c(0, 0)) +
  theme_classic() +
  # get multiple graphs, ncol puts them in vertical order instead of horizontal
  # and axes, remove labels keeps the x borders for all graphs
  facet_wrap2(~Water_Type, ncol = 1, axes = "all", remove_labels = "all") +
  # increase the text size on x and y axis
  theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 12),
        # change size of axis titles
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        
        legend.title=element_text(size=12), 
        legend.text=element_text(size=12),
        # increase spacing between facet wrap plots
        panel.spacing = unit(1.5, "lines"),
        # get rid of border around panel
        strip.background = element_blank(),
        # increase text size of table
        strip.text.x = element_text(size = 12),
        # margin outside of graph, top, right, bottom, left
        plot.margin = margin(1, 0, 1.5, 4, "cm")
  ) +
  # change axis titles
  
  labs(fill='Tagged Species \nMin. Length')


depth_graph = SDC |>
  ungroup() |>
  mutate(Abbreviation = fct_reorder(Abbreviation,
                                   ifelse(!Water_Type %in% "Saltwater", 0, total),
                                   .fun = sum
  )) |>
  ggplot(aes(x = Abbreviation, y = total,
             fill = factor(Depth_groups, levels = c("Surface to 3m", "3m to 40m", ">40m", "Not Reported")))) +
  scale_fill_viridis(discrete = TRUE, breaks = c("Surface to 3m", "3m to 40m", ">40m", "Not Reported"),
                     direction = -1, end = .75) +
  geom_col(width = .5) +
  scale_y_continuous(expand = c(0, 0)) +
  theme_classic() +
  # get multiple graphs, ncol puts them in vertical order instead of horizontal
  # and axes, remove labels keeps the x borders for all graphs
  facet_wrap2(~Water_Type, ncol = 1, axes = "all", remove_labels = "all") +
  # increase the text size on x and y axis
  theme( axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
         axis.text.y = element_text(size = 12),
         # change size of axis titles
         axis.title.x = element_blank(),
         
         axis.title.y = element_text(
           size = 12, 
           margin = margin(0, 25, 0, 0)
         ),
         
         legend.title=element_text(size=12), 
         legend.text=element_text(size=12),
         # increase spacing between facet wrap plots
         panel.spacing = unit(1.5, "lines"),
         # get rid of border around panel
         strip.background = element_blank(),
         # increase text size of table
         strip.text.x = element_text(size = 12),
         # margin outside of graph, top, right, bottom, left
         plot.margin = margin(1, 0, 1.5, 4, "cm")
  ) +
  # change axis titles
  
  ylab("Total Number of Manuscripts") +
  labs(fill='Camera Deployment \nMax. Depth') 


figure <- ggarrange(depth_graph, Size_plot,
                    ncol = 2, nrow = 1)
figure = annotate_figure(figure, bottom = textGrob("Medium-Scale Device Pairings", gp = gpar(cex = 1.2)))
figure
1

There are 1 best solutions below

0
stefan On

In general the approach to achieve your desired result is pretty "standard" and already documented in several answers:

  1. To put the bars side by side in one plot you could use sneaky facets, which means to map your depth and length groups on x and facet by Abbreviation. Afterwards get rid of the facet look using theme options.

  2. To still have two individual fill scales for both groups you could use the ggnewscale package, which allows for multiple scales for the same aesthetic.

In your case two things complicated applying the approach and requires two hacks:

  1. As you already facet by Water_Type I switched to facet_grid2 which however puts the label for Water type as the strip text in the y direction. To fix that I used a geom_label layer to fake horizontal strip texts for Water_Type.

  2. When using sneaky facets, the strip text is used to "fake" the axis text. However, as you have rotated axis text there is no option (aka I haven't found one) to still center the labels in the horizontal direction. Instead I used another hack by adding the axis text using ggh4x::facetted_pos_scales where I added the Abbreviations as an "empty" x axis category to the x scale and only show this category on the x axis.

Finally note that I reshaped your data to long and summarized the data afterwards.

library(dplyr)
library(ggplot2)
library(ggh4x)
library(forcats)
library(ggnewscale)

dat <- SDC |>
  ungroup() |>
  mutate(Abbreviation = fct_reorder(Abbreviation,
    ifelse(!Water_Type %in% "Saltwater", 0, total),
    .fun = sum
  )) |>
  tidyr::pivot_longer(
    -c(Abbreviation, Water_Type, total)
  ) |>
  summarise(
    total = sum(total),
    .by = c(Water_Type, Abbreviation, name, value)
  ) |>
  mutate(
    value = factor(
      value,
      c(
        c("Small", "Medium", "Large"),
        c("Surface to 3m", "3m to 40m", ">40m", "Not Reported")
      )
    )
  )

# Make x scales for panels
scale_x <- lapply(
  levels(dat$Abbreviation),
  \(x) {
    scale_x_discrete(
      breaks = x,
      limits = c("Depth_groups", x, "Length_Groups"),
      expand = c(0, 1.6)
    )
  }
)

# Make dataset for Water Type labels
dat_label <- dat |>
  summarise(
    x = 2,
    Abbreviation = levels(Abbreviation)[12],
    .by = Water_Type
  ) |>
  mutate(
    Abbreviation = factor(Abbreviation, levels(dat$Abbreviation))
  )

dat |>
  ggplot(aes(
    x = name,
    y = total
  )) +
  geom_col(
    data = ~ filter(.x, !grepl("Depth", name)),
    aes(
      fill = value
    ),
    width = 1.5
  ) +
  scale_fill_viridis_d(
    breaks = c("Small", "Medium", "Large", "Not Reported"),
    labels = c("Small (<24cm)", "Medium (24-140cm)", "Large (>140cm)", "Not Reported"),
    option = "plasma",
    direction = -1,
    end = 0.75,
    name = "Tagged Species\nMin. Length"
  ) +
  ggnewscale::new_scale_fill() +
  geom_col(
    data = ~ filter(.x, !grepl("Length", name)),
    aes(
      fill = value
    ),
    width = 1.5
  ) +
  geom_label(
    data = dat_label,
    aes(
      x = x, label = Water_Type
    ),
    y = Inf,
    vjust = 0,
    fill = NA,
    label.size = 0
  ) +
  scale_fill_viridis_d(
    breaks = c("Surface to 3m", "3m to 40m", ">40m", "Not Reported"),
    direction = -1, end = .75,
    name = "Camera Deployment\nMax. Depth"
  ) +
  scale_y_continuous(expand = c(0, 0)) +
  coord_cartesian(clip = "off") +
  facet_grid2(
    Water_Type ~ Abbreviation,
    switch = "x",
    axes = "x",
    remove_labels = "x",
    scales = "free_x"
  ) +
  ggh4x::facetted_pos_scales(x = scale_x) +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      vjust = 1
    ),
    axis.ticks.x = element_blank(),
    panel.spacing.y = unit(20, "pt"),
    panel.spacing.x = unit(0, "lines"),
    strip.text = element_blank(),
    strip.background = element_blank(),
    plot.margin = margin(
      t = 20, r = 5.5, l = 30, b = 5.5
    )
  ) +
  labs(
    y = "Total Number of Manuscripts",
    x = "Medium-Scale Device Pairings"
  )

enter image description here