I have a ggplot of bar graphs for each cluster.
The axis labels ("Path") for the bar plot are all unique and long, but they are grouped by "PathTypes" and "Cluster" - info which I want to represent on the bar graphs. I use a texture (stripes, dots, etc) from the ggpattern package to represent the "PathType" and I use colors to represent the "Cluster".
The resulting graph I produce is illegible bc it's just too large. I've been butting heads with facet_grid and facet_wrap. I am fine with using 2-3 pages to represent all the clusters, but I'm unsure about how to split the data smartly to accomplish that.
Example code follows:
library(data.table)
library(ggpattern)
library(gridExtra)
library(ggpubr)
library(truncnorm)
library(ggplot2)
library(stringi)
# generating sample data for data table called all.cluster.dt
PathType <- sample(x = c("Type1", "Type2", "Type3", "Type4", "Type5"), # create the PathType column
size = 400,
replace = T)
Score <- rtruncnorm(n = 400, a = 15, b = 90, mean = 55, sd = 15) # create the Score Column
Path <- NA # initialize the Path column
Path.generator <- function() { # function to write unique Paths
a <- do.call(paste0, replicate(10, sample(LETTERS, 15, TRUE), FALSE))
single.Name <- paste(a, collapse = ' ')
return(single.Name)
}
cluster <- sample(x = c(1:14), # create the Cluster column
size = 400,
replace = T)
all.cluster.dt <- data.table( # create the data table with desired columns
PathType,
Score,
Path,
cluster
)
for(i in 1:length(all.cluster.dt$Path)){ # loop down Path column calling function to generate unique Path name for each row
all.cluster.dt$Path[i] <- Path.generator()
}
wrap.it <- function(x, len) # function to try and wrap long Path label text
{
sapply(x, function(y) paste(strwrap(y, len),
collapse = "\n"),
USE.NAMES = FALSE)
}
# Call this function with a list or vector
wrap.labels <- function(x, len)
{
if (is.list(x))
{
lapply(x, wrap.it, len)
} else {
wrap.it(x, len)
}
}
wr.lap <- wrap.labels(all.cluster.dt$Path, 40) # wrap Path labels to 40 characters long
all.cluster.dt$Path <- wr.lap
all.cluster.dt$Path <- factor(all.cluster.dt$Path, # group and factorize the data by PathType and Score
levels = unique(all.cluster.dt$Path[order(all.cluster.dt$PathType, all.cluster.dt$Score)]))
cluster.color.df <- data.frame("cluster" = c(1:14), # add custom colors to represent which Cluster the Path belongs to
"color" = c("#F5F2D4", "#CAD8F2", "#8FB6FF", "#FFFDD7", "#DADADA", "#DAEB9B", "#EED1F2", "#C9E2D0", "#FFDFA2", "#DFFFD6", "#F6DFDE", "#E2DEF5", "#F0B8BC", "#CAF3EF"))
setDT(all.cluster.dt)[cluster.color.df, color := i.color, on = .(cluster)] # match color to cluster in all.cluster.dt
bar.plots <- ggplot(all.cluster.dt, aes(x=Score, y=Path)) +
ggpattern::geom_col_pattern( # adds texture/patterns to the bars based on the PathType column
aes(pattern = `PathType`),
fill = all.cluster.dt$color,
colour = "black",
pattern_density = 0.2, # how dense the pattern should be
pattern_fill = "black",
pattern_spacing = 0.1) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 90)) +
theme_bw() +
theme(axis.title.y = element_blank()) +
theme(legend.position = "none",
text = element_text(size = 8))
bar.plots + facet_grid(rows = vars(cluster), scales = "fixed") # draw the bar graph
I now attempt to use grid.arrange and ggsave to arrange the plots by Cluster on a page, but get an error: "replacement has 17 rows, data has 400"...
pdf("bar_graphs.pdf", wi=8.1,hei=10.6)
do.call(grid.arrange, bar.plots)
ggsave("bar_graphs.pdf", marrangeGrob(bar.plots, nrow=4, ncol=2))
dev.off()
Any answers that provide the overall solution to my goal (getting my bar graphs into a legible figure) or elucidate why I get an error arranging grobs is much appreciated.