Making an alluvial/sankey diagram using the first axis as the fill

246 Views Asked by At

I have this data:

https://docs.google.com/spreadsheets/d/18sTSOzVEmSEI2KGfGSvRT_0BbzQ9n87kCGZH-cSULCs/edit?usp=sharing (put below the code to produce it)

I use this code:

library(ggplot2)
library(ggalluvial)
ggplot(data = modechoice,
       aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
  geom_alluvium(aes(fill = "black"), width = 1/12) +
  geom_stratum(width = 1/12, fill = "black", color = "grey") +
  geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
  scale_fill_brewer(type = "qual", palette = "Set1") +
  ggtitle("Mode choice before corona and expected after")

And I get this result:

enter image description here

Not really what you would hope for.

First of all, I want the fill to be coloured by axis1. So s2_vervoermiddel_werkstudie_voor should dictate the colour, but I get an error when I try to do that (instead of "black", I put s2_vervoermiddel_werkstudie_voor)

(Error: Continuous value supplied to discrete scale)

Second, how do I change the labels to not look like we are a decade behind?

Third, how do I change the ordering?

structure(list(s2_vervoermiddel_werkstudie_voor = c("Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)", 
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)", 
"Trein", "Trein", "Trein", "Trein", "Trein", "Trein", "Trein", 
"Trein", "Trein", "Trein", "Bus/Tran/Metro", "Bus/Tran/Metro", 
"Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro", 
"Bus/Tran/Metro", "Bus/Tran/Metro", "Fiets", "Fiets", "Fiets", 
"Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets", 
"E-bike/speed pedelec", "E-bike/speed pedelec", "E-bike/speed pedelec", 
"E-bike/speed pedelec", "Scooter/brommer/motor", "Scooter/brommer/motor", 
"Scooter/brommer/motor", "Scooter/brommer/motor", "Scooter/brommer/motor", 
"Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen", 
"NVA", "NVA", "NVA", "NVA", "NVA", NA, NA, NA, NA, NA, NA, NA, 
NA), s2_vervoermiddel_werkstudie_na = c("Auto (bestuurder)", 
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", 
"Scooter/brommer/motor", "Lopen", "NVA", NA, "Auto (bestuurder)", 
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "Lopen", 
"NVA", NA, "Auto (bestuurder)", "Auto (passagier)", "Trein", 
"Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", 
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)", 
"Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "NVA", 
NA, "Auto (bestuurder)", "Auto (passagier)", "Trein", "Bus/Tran/Metro", 
"Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", "Lopen", 
"NVA", NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec", 
NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", 
NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro", "Fiets", 
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)", 
"Fiets", "NVA", NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro", 
"Fiets", "E-bike/speed pedelec", "Lopen", "NVA", NA), Freq = c(441, 
2, 11, 1, 21, 12, 3, 3, 3, 46, 4, 9, 1, 1, 2, 1, 1, 1, 25, 3, 
156, 1, 22, 4, 2, 2, 6, 18, 10, 1, 7, 49, 17, 3, 2, 10, 30, 1, 
28, 10, 348, 11, 4, 4, 6, 39, 2, 2, 53, 11, 4, 1, 1, 13, 2, 2, 
5, 2, 8, 26, 1, 4, 1, 1, 1, 22, 1, 11, 4, 2, 15, 2, 1, 2, 379
)), row.names = c(NA, -75L), class = c("tbl_df", "tbl", "data.frame"
))

classes:

tibble [75 x 3] (S3: tbl_df/tbl/data.frame)
 $ s2_vervoermiddel_werkstudie_voor: chr [1:75] "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" ...
 $ s2_vervoermiddel_werkstudie_na  : chr [1:75] "Auto (bestuurder)" "Auto (passagier)" "Trein" "Bus/Tran/Metro" ...
 $ Freq                            : num [1:75] 441 2 11 1 21 12 3 3 3 46 ...
2

There are 2 best solutions below

0
On BEST ANSWER

example:


ggplot(data = modechoice %>%
           ## remove NAs
           filter(!is.na(s2_vervoermiddel_werkstudie_voor) &
                  !is.na(s2_vervoermiddel_werkstudie_na)),
       aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
    geom_alluvium(width = 1/12,
                  ## link alluvium colors to variable:
                  aes(fill = s2_vervoermiddel_werkstudie_voor)
                  ) +
    geom_stratum(width = 1/12,
                 ## change black stratum color here:
                 fill = "red",
                 color = "grey") +
    ## change geom_label to geom_text for leaner appearance:
    geom_text(stat = "stratum",
              aes(label = after_stat(stratum)),
              ## rotate labels if desired:
              angle = 0,
              ## left- and right-adjust labels
              hjust = c(rep(c(0,1),each = 9))
              ) +
    scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
    scale_fill_brewer(type = "qual", palette = "Set1",
                      ## skip color legend:
                      guide = 'none'
                      ) +
    ggtitle("Mode choice before corona and expected after")

Ordering with fct_reorder by cumsum of Freq. In a rush, sorry for not working out the code.

1
On

You need to specify the fill color according to the first axis variable. The labels are going to be difficult to place in view of their size and the limited space of some of the nodes, but something like this might work for you:

library(ggplot2)
library(ggalluvial)
ggplot(data = modechoice,
       aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, 
           axis2 = s2_vervoermiddel_werkstudie_na)) +
  geom_alluvium(aes(fill = s2_vervoermiddel_werkstudie_voor),
                width = 1/12) +
  geom_stratum(width = 1/12, fill = "black", color = "grey") +
  geom_text(x = 0.95, stat = "stratum", 
            aes(label = s2_vervoermiddel_werkstudie_voor),
              color = 'black', hjust = 1) +
  geom_text(x = 2.05, stat = "stratum", 
            aes(label = s2_vervoermiddel_werkstudie_na),
            color = 'black', hjust = 0) +
  scale_x_discrete(limits = c("Before", "After"), expand = c(.2, .2)) +
  ggtitle("Mode choice before corona and expected after") +
  theme_void() +
  theme(legend.position = 'none')

enter image description here