Contents

1 k螳 蟆一
2
3 覃伎(r code)
4 覃伎(gif)


1 k螳 蟆一 #

n = 100
g = 6 
set.seed(g)
d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))), 
                y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))))
plot(d)

library(fpc)
pamk.best <- pamk(d)
pamk.best$nc

るジ 覦覯:
NbClust(d, min.nc=2, max.nc=15, method="kmeans")

2 #

library("RODBC")
conn <- odbcConnect("sql_server",uid="id", pwd="pw")

x <- sqlQuery(conn, "
select
     play_rate1h	t01
,	play_rate2h	t02
,	play_rate3h	t03
,	play_rate4h	t04
,	play_rate5h	t05
,	play_rate6h	t06
,	play_rate7h	t07
,	play_rate8h	t08
,	play_rate9h	t09
,	play_rate10h	t10
,	play_rate11h	t11
,	play_rate12h	t12
,	play_rate13h	t13
,	play_rate14h	t14
,	play_rate15h	t15
,	play_rate16h	t16
,	play_rate17h	t17
,	play_rate18h	t18
,	play_rate19h	t19
,	play_rate20h	t20
,	play_rate21h	t21
,	play_rate22h	t22
,	play_rate23h	t23
,	play_rate0h	t24
from plays tablesample(10 percent)
")

#kmeans clustering
(cl <- kmeans(x, 8))
summary(cl)
tmp <- data.frame(cl$centers, cluster=rownames(cl$centers))


#企ろ磯 pdf襯 蠏碁る慨.
library(reshape)
library(ggplot2)

tmp <- melt(tmp, id=c("cluster"))
tmp$variable <- as.numeric(gsub("t","", tmp$variable))
head(tmp)

p <- ggplot(tmp, aes(x=variable, y=value, colour=factor(cluster))) 
p + geom_line() + geom_text(data=tmp, aes(x=variable, y=value, label=factor(cluster)))


#pdf螳 豎 100企?
library(sqldf)
sqldf("
    select
        cluster
    ,   sum(value) pdf
    ,   count(*) cnt
    from tmp
    group by
        cluster
    order by 1
")


clusters <- data.frame(cl$cluster)
colnames(clusters) <- c("cl")
head(clusters)

cnt <- sqldf("
    select
        cl
    ,   count(*) cnt
    from clusters
    group by
        cl
    order by 1
")
data.frame(cnt=cnt$cnt, prop=cnt$cnt / sum(cnt$cnt))

#test, predic
x1 <- sqlQuery(conn, "
select top 10
    play_rate1h    t01
,	play_rate2h	t02
,	play_rate3h	t03
,	play_rate4h	t04
,	play_rate5h	t05
,	play_rate6h	t06
,	play_rate7h	t07
,	play_rate8h	t08
,	play_rate9h	t09
,	play_rate10h	t10
,	play_rate11h	t11
,	play_rate12h	t12
,	play_rate13h	t13
,	play_rate14h	t14
,	play_rate15h	t15
,	play_rate16h	t16
,	play_rate17h	t17
,	play_rate18h	t18
,	play_rate19h	t19
,	play_rate20h	t20
,	play_rate21h	t21
,	play_rate22h	t22
,	play_rate23h	t23
,	play_rate0h	t24
from plays tablesample(1 percent)
")

#install.packages("DeducerExtras")
library("DeducerExtras")
predict(cl, x1)

3 覃伎(r code) #

library("animation")
kmeans.ani(x = cbind(X1 = runif(50), X2 = runif(50)), 
           centers = 3, hints = c("Move centers!", "Find cluster?"), pch = 1:3, col = 1:3)

4 覃伎(gif) #

豢豌: http://shabal.in/visuals/kmeans/1.html

Starting with 4 left-most points
left.gif

Starting with 4 right-most points
right.gif

Starting with 4 top points
top.gif