# ---------- K-Means ------------ Data <- read.csv("Data.csv", header = F) plot(Data) kData = kmeans(Data, centers = 2) plot(Data, col=kData$cluster) # ---------- Hierarchical Clustering ------------ par(mfrow=c(2,2)) plot(hclust(dist(Data), method = "single"), main = "Single Linkage", labels = FALSE) plot(hclust(dist(Data), method = "complete"), main = "Complete Linkage", labels = FALSE) plot(hclust(dist(Data), method = "average"), main = "Average Linkage", labels = FALSE) plot(hclust(dist(Data), method = "centroid"), main = "Centroid Linkage", labels = FALSE) dev.off() # ---------- K-Means, Again ------------ max_clusters = 20 sum_dist = vector(length = max_clusters) for (k in 1:max_clusters) { kData = kmeans(Data, centers = k) sum_dist[k] = kData$tot.withinss } plot(x = seq(1,20), y = sum_dist, col = 'blue', xlab = 'num of clusters', ylab = 'sum of within-group distances') lines(x = seq(1,20), y = sum_dist, col = 'blue') axis(1,labels = seq(1,20), at = seq(1,20)) # To create a silhouette graph we need the 'cluster' and 'factoextra' packages if(!("cluster" %in% rownames(installed.packages()))) {install.packages("cluster")} library(cluster) par(mfrow=c(3,2)) for(i in 2:6){ kData = kmeans(Data, centers = i) plot(silhouette(kData$cluster, dist(Data)), col = 1:i, border = NA, do.n.k = F, do.clus.stat = T, cex.lab=1.5, cex.axis=1.5, cex.sub = 1.5, cex.names = 1.5, main = "") } dev.off() silAvg = matrix(ncol = 2, nrow = 5) for(i in 2:6){ kData = kmeans(Data, centers = i) silAvg[i-1,1] = i silAvg[i-1,2] = mean(silhouette(kData$cluster, dist(Data))[,3]) } plot(silAvg, xlab = "Number of clusters", ylab = "Average silhouette value") lines(silAvg)