ggplot color axis labels based on variable

2.3k Views Asked by At

I have a dataframe called df_plot, I am trying to create a dumbbell plot sorted by a column (medium age 2020), with the axis assigned a color based on another category). This is very close but can someone explain to me why the colors in the axis do not match? As you can see from the dataframe the Solomon Islands are supposedly Red but they are being outputted as Green?

Really grateful for any tips

df_plot<-df_plot %>% arrange(Median.Age.2020)


ggplot(df_plot,aes(x=Median.Age.2020,xend=Median.Age.2030,y=reorder(Country,Median.Age.2020))) +
  geom_dumbbell( size=1.4,color="#5E5E5E",
                 colour_x = "#6E0019", colour_xend = "#FF5179",
                dot_guide = T,
                dot_guide_size = 0.4,
                size_x=3.5,
                size_xend=3.5)+
  geom_point(aes(x=Median.age.2025,y=Country),size=3.5,color="#A50026")+
  xlab("Median Age")+
  xlim(14,26) +
  theme_classic()+
  theme(axis.text.y = element_text(colour=fct_reorder(df_plot$color, df_plot$Median.Age.2020)),
        axis.ticks.y=element_blank())+
  ylab("")

Image with wrong colors

Dataframe called df_plot

(I am unsure how you add a dataframe into stackoverflow, so copied the header here - picture explains the structure. Only difference is that I have changed color to hex codes)

"Country"   "HMTC.Region"   "Median.Age.2020"   "Median.age.2025"   "Median.Age.2030"   "Population_2020"   "Population_2030"   "id"    "color"
"Niger" "Africa"    15.2    15.6    16.1    24075000    34994000    1   "#00B0F0"
"Mali"  "Africa"    16.3    17  17.8    20284000    27057000    2   "#00B0F0"
"Chad"  "Africa"    16.6    17.2    18  16285000    21460000    3   "#00B0F0"
"Uganda"    "Africa"    16.7    17.8    19.1    47188000    63842000    4   "#00B0F0"
"Angola"    "Africa"    16.7    17.2    17.9    32827000    44712000    5   "#00B0F0"
"Burundi"   "Africa"    17.3    17.8    18.7    11939000    15799000    6   "#00B0F0"
2

There are 2 best solutions below

2
On

You should really provide a reproducible example and name the required packages.

You turn your colors into factors, and thus into black, red, green. If you just keep them as is they will be interpreted correctly:

library(forestplot)
library(dplyr)
library(ggalt)

df_plot <- structure(list(
  Country = c("Solomon Islands", "Afghanistan", "Niger", "Mali", "Chad", "Uganda", "Angola", "Democratic Republic of Congo", "Burundi", "Mozambique"), 
  HMTC.Region = c("Asia Pacific", "Middle East", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa"), 
  Median.Age.2020 = c(19.9, 18.4, 15.2, 16.3, 16.6, 16.7, 16.7, 17, 17.3, 17.6), 
  Median.age.2025 = c(20.4, 19.9, 15.6, 17, 17.2, 17.8, 17.2, 17.5, 17.8, 18.3), 
  Median.Age.2030 = c(21.1, 21.5, 16.1, 17.8, 18, 19.1, 17.9, 18.2, 18.7, 19), 
  age_group = c("box1", "box1", "box1", "box1", "box1", "box1", "box1", "box1", "box1", "box1"), 
  color = c("Red", "Orange", "Blue", "Blue", "Blue", "Blue", "Blue", "Blue", "Blue", "Blue")), 
  class = "data.frame", row.names = c(28L, 15L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L))

df_plot <- df_plot %>% arrange(Median.Age.2020)
ggplot(df_plot,aes(x=Median.Age.2020,xend=Median.Age.2030,y=reorder(Country,Median.Age.2020))) +
  geom_dumbbell( size=1.4,color="#5E5E5E",
                 colour_x = "#6E0019", colour_xend = "#FF5179",
                 dot_guide = T,
                 dot_guide_size = 0.4,
                 size_x=3.5,
                 size_xend=3.5)+
  geom_point(aes(x=Median.age.2025,y=Country),size=3.5,color="#A50026")+
  xlab("Median Age")+
  xlim(14,26) +
  theme_classic()+
  theme(axis.text.y = element_text(colour=df_plot$color),
        axis.ticks.y=element_blank())+
  ylab("")
#> Warning: Vectorized input to `element_text()` is not officially supported.
#> Results may be unexpected or may change in future versions of ggplot2.

Created on 2020-05-23 by the reprex package (v0.3.0)

0
On

Touch wood I have figured it out. The above code worked perfectly but wasn't coping with when values had the same value. So needed to add a second layer arrange df<-df %>% arrange(Median.Age.2020,Country) %>% mutate(id=row_number())

library(dplyr)
library(ggalt)
library(ggplot2)
library(tidyverse)
library(ggalt)
library(ggrepel)
library(RColorBrewer)

df<-read.csv("..Median_age.csv",stringsAsFactors = F)

limit<-10E6 #10 mill

#Drop unknown HMTC regions and those with more than 10 mill
df <- df %>% filter(!HMTC.Region=="N/A") %>% filter(Population_2020>=limit & Population_2030>=limit)


df$Country<-str_trim(df$Country)



df<-df  %>% arrange(Median.Age.2020,Country) %>% mutate(id=row_number())




df_plot <- structure(list(
  Country = df$Country, 
  HMTC.Region = df$HMTC.Region, 
  Median.Age.2020 = df$Median.Age.2020, 
  Median.age.2025 = df$Median.age.2025, 
  Median.Age.2030 = df$Median.Age.2030),
  class = "data.frame", row.names = c(NA,-72L)) #72 is length of dataframe?

df_plot$color<-"#4D4D4D"
df_plot<-df_plot %>% mutate(color=ifelse(HMTC.Region=="Africa","#00B0F0",color)) %>%
  mutate(color=ifelse(HMTC.Region=="Europe","#FFBD33",color)) %>%
  mutate(color=ifelse(HMTC.Region=="Asia Pacific ","#7030A0",color)) %>%
  mutate(color=ifelse(HMTC.Region=="Eastern Europe & Central Asia","#5F589E",color)) %>%
  mutate(color=ifelse(HMTC.Region=="South Asia","#F6003B",color)) %>%
  mutate(color=ifelse(HMTC.Region=="Latin America","#82C836",color)) %>%
  mutate(color=ifelse(HMTC.Region=="Middle East","#A19F57",color)) %>%
  mutate(color=ifelse(HMTC.Region=="North America","#002060",color)) 


# df_plot <- df_plot %>% arrange(Median.Age.2020)
ggplot(df_plot,aes(x=Median.Age.2020,xend=Median.Age.2030,y=reorder(Country,Median.Age.2020))) +
  geom_dumbbell( size=1.4,color="#5E5E5E",
                 colour_x = "#6E0019", colour_xend = "#FF5179",
                 dot_guide = T,
                 dot_guide_size = 0.4,
                 size_x=3.5,
                 size_xend=3.5)+
  geom_point(aes(x=Median.age.2025,y=Country),size=3.5,color="#A50026")+
  xlab("Median Age")+
  xlim(14,70) +
  theme_classic()+
  theme(axis.text.y = element_text(colour=df_plot$color),
        axis.ticks.y=element_blank())+
  ylab("")