it's the first time I'm using R so sorry for stupid mistakes. I need to do a Sankey Chart to display flows between cities in a Metropolitan Area, but I am not satisfyied with the how the graph looks. I would like the labels to be outside the Sankey and to the cities to be organized from byggest to smallest flow.
This is my code (sorry, it's messy, I am afraid of erasing steps)
library(lodown)
library(magrittr)
library(dplyr)
library(stringr)
library(fs)
library(SAScii)
library(readr)
library(purrr)
library(survey)
library(tidyr)
library(ggplot2)
library(scales)
library(ggtext)
library(questionr)
library(networkD3)
# Search data and download
catalog <- lodown::get_catalog(data_name = "censo", output_dir = "data") %>%
dplyr::filter(year == 2010, stringr::str_detect(state, "al")) %>%
lodown::lodown(data_name = "censo")
# Dowloaded data
fs::dir_tree(path = "data")
# Variables imported
vars_censo <- c("v0001", "v0002", "v1004", "v1006", "v6529", "v6530", "v6531", "v6532", "v0010", "v0606", "v0636", "v6362", "v6364", "v0660", "v6602", "v6604", "v0661", "v0662","v0601")
# Convert SAS to R
sas_input <- SAScii::parse.SAScii(catalog$pes_sas) %>%
dplyr::mutate(varname = stringr::str_to_lower(varname))
# Import TXT
raw_censo <- readr::read_fwf(
file = catalog$pes_file,
col_positions = readr::fwf_widths(
widths = abs(sas_input$width),
col_names = sas_input$varname
),
col_types = paste0(
ifelse(
!(sas_input$varname %in% vars_censo),
"_",
ifelse(sas_input$char, "c", "d")
),
collapse = ""
)
)
# filtrer cities
df.rmm <- data.frame(
raw_censo %>%
filter(v0002 == "00409" | v0002 == "00508" | v0002 == "00607" | v0002 == "02207" | v0002 == "04302" | v0002 == "04708" | v0002 == "05200" | v0002 == "05507" | v0002 == "06448" | v0002 == "06901" | v0002 == "07701" | v0002 == "07909" | v0002 == "08907"))
# rename (column)
df.rmm <- rename(df.rmm, UF=v0001,municipio=v0002,RM=v1004,urbrur=v1006,rend.dom=v6529,rend.dom.sm=v6530,rend.pc=v6531,rend.pc.sm=v6532,peso=v0010,cor=v0606, est.no.mun=v0636, UFesc= v6364, mun.esc = v6364, trab.no.mun= v0660, uf.trab=v6602, mun.trab=v6604, commute=v0661, tempo.desl=v0662, sexo=v0601)
# rename cities (lines)
df.rmm$municipio[df.rmm$municipio=="00409"] <- "Atalaia, 5,6%"
df.rmm$municipio[df.rmm$municipio=="00508"] <- "Barra de Santo Antônio, 2,9%"
df.rmm$municipio[df.rmm$municipio=="00607"] <- "Barra de São Miguel, 0.8%"
df.rmm$municipio[df.rmm$municipio=="02207"] <- "Coqueiro Seco, 3,1%"
df.rmm$municipio[df.rmm$municipio=="04302"] <- "Maceió, 22,3%"
df.rmm$municipio[df.rmm$municipio=="04708"] <- "Marechal Deodoro, 10,5%"
df.rmm$municipio[df.rmm$municipio=="05200"] <- "Messias, 5,4%"
df.rmm$municipio[df.rmm$municipio=="05507"] <- "Murici, 3,2%"
df.rmm$municipio[df.rmm$municipio=="06448"] <- "Paripueira, 3,6%"
df.rmm$municipio[df.rmm$municipio=="06901"] <- "Pilar, 5,3%"
df.rmm$municipio[df.rmm$municipio=="07701"] <- "Rio Largo, 26,3%"
df.rmm$municipio[df.rmm$municipio=="07909"] <- "Santa Luzia do Norte, 3%"
df.rmm$municipio[df.rmm$municipio=="08907"] <- "Satuba, 8,2%"
# criates new dataframe
df.rmm2 <- df.rmm[!is.na(df.rmm$commute),]
# filters municipalities
df.rmm3 <- data.frame(
df.rmm2 %>%
filter(mun.trab == "2700409" | mun.trab == "2700508" | mun.trab == "2700607" | mun.trab == "2702207" | mun.trab == "2704302" | mun.trab == "2704708" | mun.trab == "2705200" | mun.trab == "2705507" | mun.trab == "2706448" | mun.trab == "2706901" | mun.trab == "2707701" | mun.trab == "2707909" | mun.trab == "2708907"))
# rename destiny (columns)
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700409"] <- "Atalaia, 3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700508"] <- "Barra de Santo Antônio, 0,6%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700607"] <- "Barra de São Miguel, 1,5%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2702207"] <- "Coqueiro Seco, 0,2%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2704302"] <- "Maceió, 63,8%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2704708"] <- "Marechal Deodoro, 7%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2705200"] <- "Messias, 1,9%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2705507"] <- "Murici, 1%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2706448"] <- "Paripueira, 1,3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2706901"] <- "Pilar, 5%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2707701"] <- "Rio Largo, 12,3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2707909"] <- "Santa Luzia do Norte, 0,7%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2708907"] <- "Satuba, 1,8%"
# criates OD table
#od_table<-table(df.rmm3$municipio,df.rmm3$mun.trab)
#od_table
# criates variable pesoajust = peso / 10000000000000
df.rmm3["pesoajust"]<-df.rmm3$peso/10000000000000
# waited table
tabelaod <- wtd.table(x = df.rmm3$municipio, y = df.rmm3$mun.trab, weights = df.rmm3$pesoajust)%>%
round()
as.data.frame.matrix(tabelaod)
write.table(tabelaod, file = "tabelaod", sep = " ", na = "", quote = TRUE, row.names = TRUE, eol = "\r\n")
# First Sankey diagram
#dataForSankey <- df.rmm3%>%dplyr::select(mun.trab, municipio)
#hchart(data_to_sankey(dataForSankey), "sankey", name = "OD")
od <- data.frame( wtd.table(x = df.rmm3$municipio, y = df.rmm3$mun.trab, weights = df.rmm3$pesoajust)%>%
round())
od$Var <- NULL
# A connection data frame is a list of flows with intensity for each flow
links <- data.frame(
source= c(od$Var1),
target= c(od$Var2),
value= c(od$Freq))
# Add a 'group' column to each connection:
links$group <- as.factor(c("Atalaia, 5,6%","Barra de Santo Antônio, 2,9%","Barra de São Miguel, 0,8%","Coqueiro Seco, 3,1%","Maceió, 22,3%","Marechal Deodoro, 10,5%", "Messias, 5,4%", "Murici, 3,2%","Paripueira, 3,6%","Pilar, 5,3%", "Rio largo, 26,3%", "Santa Luzia do Norte, 3%", "Satuba, 8,2%"))
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(
name=c(as.character(links$source),
as.character(links$target)) %>% unique()
)
# Add a 'group' column to each connection:
nodes$group <- as.factor(c("Atalaia, 5,6%","Barra de Santo Antônio, 2,9%","Barra de São Miguel, 0,8%","Coqueiro Seco, 3,1%","Maceió, 22,3%","Marechal Deodoro, 10,5%", "Messias, 5,4%", "Murici, 3,2%","Paripueira, 3,6%","Pilar, 5,3%", "Rio largo, 26,3%", "Santa Luzia do Norte, 3%", "Satuba, 8,2%"))
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1
links$IDtarget <- match(links$target, nodes$name)-1
# Make the Network
p <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name", LinkGroup = "group",
fontSize = 10, dragY = TRUE, NodeGroup = "group",
showNodeValues = FALSE)
p
this is the order I would like

And I would like to move the labels to the outside, so it's easier to read.

Starting with your
odobject, you could sort the nodes data.frame by the total value of the nodes and setiterations = 0to disable the algorithmic placement of nodes thatsankeyNetwork()does, e.g.and based on these previous answers...
https://stackoverflow.com/a/45495841/4389763
https://stackoverflow.com/a/36213873/4389763
You could use
htmlwidgets::onRender()and some custom JavaScript to move the source nodes' labels to the left and the target nodes' labels to the right...