#title K-Medoids Clustering [[TableOfContents]] ==== k-means와 k-medioids의 차이점 ==== * k-means : 임의의 점을 중심으로 잡음 * k-medioids: 실제 값을 중심으로 잡음 ==== 예제 ==== 데이터 {{{ n = 100 g = 6 set.seed(g) d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))), y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2)))) }}} {{{ library(fpc) library("cluster") #최적의 k값을 찾음 #k <- pamk(d)$nc k <- pamk(d)$nc+1 cl <- pam(d, k) result <- data.frame(d, clusterid=cl$clustering) par(mfrow=c(1,2)) plot(d, cex=0.2) plot(d, cex=0.2) colour <- c("red", "blue", "black", "green") points(result$x, result$y, col=c(colour[result$clusterid])) par(mfrow=c(1,1)) }}} attachment:K-MedoidsClustering/kmedoids.png ==== 최적의 k를 찾는 다른 방법 ==== 참고: http://stackoverflow.com/questions/15376075/cluster-analysis-in-r-determine-the-optimal-number-of-clusters {{{ n = 100 g = 6 set.seed(g) d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))), y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2)))) }}} {{{ sim <- clusGap(d, FUN = pam, K.max = 20, B = 100) k <- with(sim, maxSE(Tab[,"gap"],Tab[,"SE.sim"])) plot(sim) abline(v=k, lty=2, col="blue") }}} attachment:K-MedoidsClustering/k.png k-means 예제 {{{ wss <- (nrow(d)-1)*sum(apply(d,2,var)) for (i in 2:15) wss[i] <- sum(kmeans(d, centers=i)$withinss) plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") }}} attachment:K-MedoidsClustering/k001.png ==== 참고자료 ==== * http://glorymind.tistory.com/83