_覓 | 覦覈襦 | 豕蠏手 | 殊螳 | 譯殊碁 |
FrontPage › K-MeansClustering
|
|
[edit]
1 k螳 蟆一 #n = 100 g = 6 set.seed(g) d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))), y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2)))) plot(d) library(fpc) pamk.best <- pamk(d) pamk.best$nc るジ 覦覯:
NbClust(d, min.nc=2, max.nc=15, method="kmeans") [edit]
2 #library("RODBC") conn <- odbcConnect("sql_server",uid="id", pwd="pw") x <- sqlQuery(conn, " select play_rate1h t01 , play_rate2h t02 , play_rate3h t03 , play_rate4h t04 , play_rate5h t05 , play_rate6h t06 , play_rate7h t07 , play_rate8h t08 , play_rate9h t09 , play_rate10h t10 , play_rate11h t11 , play_rate12h t12 , play_rate13h t13 , play_rate14h t14 , play_rate15h t15 , play_rate16h t16 , play_rate17h t17 , play_rate18h t18 , play_rate19h t19 , play_rate20h t20 , play_rate21h t21 , play_rate22h t22 , play_rate23h t23 , play_rate0h t24 from plays tablesample(10 percent) ") #kmeans clustering (cl <- kmeans(x, 8)) summary(cl) tmp <- data.frame(cl$centers, cluster=rownames(cl$centers)) #企ろ磯 pdf襯 蠏碁る慨. library(reshape) library(ggplot2) tmp <- melt(tmp, id=c("cluster")) tmp$variable <- as.numeric(gsub("t","", tmp$variable)) head(tmp) p <- ggplot(tmp, aes(x=variable, y=value, colour=factor(cluster))) p + geom_line() + geom_text(data=tmp, aes(x=variable, y=value, label=factor(cluster))) #pdf螳 豎 100企? library(sqldf) sqldf(" select cluster , sum(value) pdf , count(*) cnt from tmp group by cluster order by 1 ") clusters <- data.frame(cl$cluster) colnames(clusters) <- c("cl") head(clusters) cnt <- sqldf(" select cl , count(*) cnt from clusters group by cl order by 1 ") data.frame(cnt=cnt$cnt, prop=cnt$cnt / sum(cnt$cnt)) #test, predic x1 <- sqlQuery(conn, " select top 10 play_rate1h t01 , play_rate2h t02 , play_rate3h t03 , play_rate4h t04 , play_rate5h t05 , play_rate6h t06 , play_rate7h t07 , play_rate8h t08 , play_rate9h t09 , play_rate10h t10 , play_rate11h t11 , play_rate12h t12 , play_rate13h t13 , play_rate14h t14 , play_rate15h t15 , play_rate16h t16 , play_rate17h t17 , play_rate18h t18 , play_rate19h t19 , play_rate20h t20 , play_rate21h t21 , play_rate22h t22 , play_rate23h t23 , play_rate0h t24 from plays tablesample(1 percent) ") #install.packages("DeducerExtras") library("DeducerExtras") predict(cl, x1) [edit]
3 覃伎(r code) #library("animation") kmeans.ani(x = cbind(X1 = runif(50), X2 = runif(50)), centers = 3, hints = c("Move centers!", "Find cluster?"), pch = 1:3, col = 1:3)
鏤
|
觜 覿 觜襦 讌 觜襦. |