Manually change order of stacked bar charts in ggplot

1.1k Views Asked by At

I want to manually change the order of stacked bar charts in order to make my labels readable that have so few observations that they are ontop of each other. My goal is to set the order to "unclassified", "Fungi", "Viridiplantae" to set the bars with little observations apart from each other.

I tried the proposed solution here but it would not work. Maybe I am missing something?

levels(as.factor(totaltibble$kingdom))
[1] "Fungi"         "unclassified"  "Viridiplantae"

phytibble <- psmelt(physeq_comp)
totaltibble <-phytibble %>%
  group_by(Sample, superkingdom, kingdom)%>%
  summarize(sum(Abundance))

        ggplot(totaltibble, aes(superkingdom, `sum(Abundance)`, fill=factor(kingdom, levels=c("unclassified", "Fungi", "Viridiplantae"))))+
      geom_col(aes(fill=kingdom))+
      scale_y_continuous("Anzahl der Reads", labels = comma_format(big.mark = ".", decimal.mark = ","))+
      scale_fill_manual("Reich", labels = c("Fungi", "unklassifiziert", "Viridiplantae"), values = wes_palette("Darjeeling1") )+
      scale_x_discrete("Domäne", labels = c("Backteria", "Eukaryota", "unklassifiziert", "Viren"))+
      ggtitle("Absolute Häufigkeit nach Reich und Domäne")+
       facet_grid(~Sample, labeller=(Sample=sample_labeller))+
      geom_text(aes(label=`sum(Abundance)`), vjust=1.6)+
      theme_bw()

the Plot

The object I am creating the plot from:

structure(list(Sample = c("MB5_2020_nano", "MB5_2020_nano", "MB6_2020_nano", 
"MB6_2020_nano", "MB5_2020_ill", "MB5_2020_ill", "MB6_2020_ill", 
"MB6_2020_ill", "MB5_2020_nano", "MB6_2020_nano", "MB5_2020_ill", 
"MB5_2020_nano", "MB5_2020_nano", "MB6_2020_ill", "MB6_2020_nano", 
"MB6_2020_nano", "MB6_2020_ill", "MB5_2020_nano", "MB6_2020_nano", 
"MB5_2020_ill", "MB6_2020_ill", "MB5_2020_ill", "MB5_2020_ill", 
"MB6_2020_ill"), superkingdom = c("Eukaryota", "unclassified", 
"Eukaryota", "unclassified", "unclassified", "Eukaryota", "Eukaryota", 
"unclassified", "Bacteria", "Bacteria", "Eukaryota", "Eukaryota", 
"Eukaryota", "Eukaryota", "Eukaryota", "Eukaryota", "Bacteria", 
"Viruses", "Viruses", "Bacteria", "Eukaryota", "Eukaryota", "Viruses", 
"Viruses"), totalreads = c(740180, 740180, 220406, 220406, 122691, 
122691, 41791, 41791, 740180, 220406, 122691, 740180, 740180, 
41791, 220406, 220406, 41791, 740180, 220406, 122691, 41791, 
122691, 122691, 41791), kingdom = c("Fungi", "unclassified", 
"Fungi", "unclassified", "unclassified", "Fungi", "Fungi", "unclassified", 
"unclassified", "unclassified", "unclassified", "unclassified", 
"Viridiplantae", "unclassified", "unclassified", "Viridiplantae", 
"unclassified", "unclassified", "unclassified", "unclassified", 
"Viridiplantae", "Viridiplantae", "unclassified", "unclassified"
), abundance = c(440891, 295055, 126035, 93059, 61774, 60325, 
28618, 12905, 3548, 1021, 591, 437, 224, 220, 191, 93, 47, 25, 
7, 1, 1, 0, 0, 0), percent = c(59.5653759896241, 39.8626009889486, 
57.1831075379073, 42.2216273604167, 50.3492513713312, 49.1682356489066, 
68.4788590844919, 30.8798545141298, 0.479342862546948, 0.463236028057312, 
0.481697924053109, 0.0590396930476371, 0.0302629090221298, 0.526429135459788, 
0.0866582579421613, 0.0421948585791675, 0.112464406211864, 0.00337755681050555, 
0.0031759570973567, 0.000815055709057714, 0.0023928597066354, 
0, 0, 0)), row.names = c(NA, -24L), groups = structure(list(Sample = c("MB5_2020_ill", 
"MB5_2020_ill", "MB5_2020_ill", "MB5_2020_ill", "MB5_2020_nano", 
"MB5_2020_nano", "MB5_2020_nano", "MB5_2020_nano", "MB6_2020_ill", 
"MB6_2020_ill", "MB6_2020_ill", "MB6_2020_ill", "MB6_2020_nano", 
"MB6_2020_nano", "MB6_2020_nano", "MB6_2020_nano"), superkingdom = c("Bacteria", 
"Eukaryota", "unclassified", "Viruses", "Bacteria", "Eukaryota", 
"unclassified", "Viruses", "Bacteria", "Eukaryota", "unclassified", 
"Viruses", "Bacteria", "Eukaryota", "unclassified", "Viruses"
), totalreads = c(122691, 122691, 122691, 122691, 740180, 740180, 
740180, 740180, 41791, 41791, 41791, 41791, 220406, 220406, 220406, 
220406), .rows = structure(list(20L, c(6L, 11L, 22L), 5L, 23L, 
    9L, c(1L, 12L, 13L), 2L, 18L, 17L, c(7L, 14L, 21L), 8L, 24L, 
    10L, c(3L, 15L, 16L), 4L, 19L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
1

There are 1 best solutions below

3
On

The issue appears to be that you have two separate places where you map fill to kingdom:

  1. In the top call within ggplot(... aes(fill=factor(kingdom...
  2. In the call to the geom: geom_col(aes(fill=kingdom))

The heirarchy of deciding mapping in ggplot uses the mapping in the geom call before the general mapping. What's happening here is that geom_col(aes(fill=kingdom)) is overwriting what you're stating in the top call. It will map the non-refactored kingdom column instead of your refactored one.

Easy fix is to use geom_col() in place of geom_col(aes(fill=kingdom)).

Alternative fixes if you would prefer:

  • Change kingdom before plotting: totaltibble$kingdom <- factor(totaltibble$kingdom, levels=c("unclassified", "Fungi", "Viridiplantae"))

  • Duplicate the aes(fill=... statement used in ggplot() to use in the geom_col() statment.

  • Remove the fill= statemnt in ggplot() and move it to geom_col().

In the top-line ggplot() you reference and change it, but you are also stating it in the second line: geom_col(aes(fill=kingdom)). Since you restate it in geom_col(), ggplot will use the one stated in the geom call over the general mapping statement. Just remove it in geom_col() - it's not needed there and is causing issues. So, just geom_col() without the aes(). Alternatively, change kingdom before you plot or use the same factor(... code inside aes() for geom_col().