How to change cluster names in silhouette plot in R

307 Views Asked by At

I am trying to see how some known labels explain, or are adjusted to the 2D representation from my data that I get from UMAP.

I tried to use silhouette function but the cluster information must be provided as a numeric vector and then these are the one that plot(sil) shows. Is there a way to use the cluster names or at least show these in the plot instead of the numbers? (similar question as here but instead of the sample labels I want to change the cluster labels)

For example:

# run hierarchical clustering
if(!require("cluster")) { install.packages("cluster");  require("cluster") } 
tmp <- matrix(c( 0,  20,  20,  20,  40,  60,  60,  60, 100, 120, 120, 120,
             20,   0,  30,  50,  60,  80,  40,  80, 120, 100, 140, 120,
             20,  30,   0,  40,  60,  80,  80,  80, 120, 140, 140,  80,
             20,  50,  40,   0,  60,  80,  80,  80, 120, 140, 140, 140,
             40,  60,  60,  60,   0,  20,  20,  20,  60,  80,  80,  80,
             60,  80,  80,  80,  20,   0,  20,  20,  40,  60,  60,  60,
             60,  40,  80,  80,  20,  20,   0,  20,  60,  80,  80,  80,
             60,  80,  80,  80,  20,  20,  20,   0,  60,  80,  80,  80,
             100, 120, 120, 120,  60,  40,  60,  60,   0,  20,  20,  20,
             120, 100, 140, 140,  80,  60,  80,  80,  20,   0,  20,  20,
             120, 140, 140, 140,  80,  60,  80,  80,  20,  20,   0,  20,
             120, 120,  80, 140,  80,  60,  80,  80,  20,  20,  20,   0),
             nr=12, dimnames=list(LETTERS[1:12], LETTERS[1:12]))

cl <- hclust(as.dist(tmp,diag = TRUE, upper = TRUE), method= 'single')
cluster_labels<-cutree(cl, h=25)
#here I would like to change the cluster labels from numbers to letters, for example:
#cluster_labels<-LETTERS[1:length(unique(cluster_labels))][cluster_labels]
sil_cl <- silhouette( cluster_labels,as.dist(tmp), title=title(main = 'Good'))
plot(sil_cl)#the plot should show the cluster labels instead of the numbers
1

There are 1 best solutions below

0
On BEST ANSWER

I figured out this can be done using factoextra package. But it would be nice if anyone finds the way to do it with the regular plot() function though

if(!require("cluster")) { install.packages("cluster");  require("cluster") } 
if(!require("factoextra")) { install.packages("factoextra");  require("factoextra") } 
tmp <- matrix(c( 0,  20,  20,  20,  40,  60,  60,  60, 100, 120, 120, 120,
         20,   0,  30,  50,  60,  80,  40,  80, 120, 100, 140, 120,
         20,  30,   0,  40,  60,  80,  80,  80, 120, 140, 140,  80,
         20,  50,  40,   0,  60,  80,  80,  80, 120, 140, 140, 140,
         40,  60,  60,  60,   0,  20,  20,  20,  60,  80,  80,  80,
         60,  80,  80,  80,  20,   0,  20,  20,  40,  60,  60,  60,
         60,  40,  80,  80,  20,  20,   0,  20,  60,  80,  80,  80,
         60,  80,  80,  80,  20,  20,  20,   0,  60,  80,  80,  80,
         100, 120, 120, 120,  60,  40,  60,  60,   0,  20,  20,  20,
         120, 100, 140, 140,  80,  60,  80,  80,  20,   0,  20,  20,
         120, 140, 140, 140,  80,  60,  80,  80,  20,  20,   0,  20,
         120, 120,  80, 140,  80,  60,  80,  80,  20,  20,  20,   0),
         nr=12, dimnames=list(LETTERS[1:12], LETTERS[1:12]))

cl <- hclust(as.dist(tmp,diag = TRUE, upper = TRUE), method= 'single')
cluster_labels<-cutree(cl, h=25)
sil_cl <- silhouette( cluster_labels,as.dist(tmp), title=title(main = 'Good'))
fviz_silhouette(sil_cl)+scale_fill_discrete(labels=LETTERS[1:12])+guides(col=FALSE)

enter image description here