Contents

1 sampling
2 ctree(conditional inference tree)
3 SVM
4 kNN
5 randomForest
6 neural networks
7 logistic regression
8 ada booting
9 naive bayes
10 gam
11 som
12 2
13 train
14 谿瑚襭


覲旧″ 覲伎願 螻蠍 覲伎企 螻襴讀 企り 蟆郁骸螳 譬讌讌 .
譴 蟆 '覲'.

1 sampling #

7:3朱 一危磯ゼ
nrow(iris)
#150

#training set sampling, test set sampling
library("caret")
partition_idx <- createDataPartition (iris$Species, p=0.3)$Resample1
training <- iris[partition_idx, ]
test <- iris[-partition_idx, ]

nrow(training); nrow(test)
#45;105

over/under sampling
library("DMwR")
resample.df <- SMOTE(Species~., data=iris, perc.over=100, perc.under=50)
table(resample.df$Species)
觜 谿願 2:1 谿企り , 一危郁 讓曙 over sampling(perc.over=100)螻, 一危郁 襷 讓曙 under sampling(perc.under=50) . 豕蠏殊 伎 螻襴讀 class襯 蟆一 れ 0~1伎 random 螳 螻燕 一危磯ゼ 豢螳り .

2 ctree(conditional inference tree) #

蟆一碁Μ(rpart れ) れ 2螳讌 覓語襯 螳讌螻 .
  • over fitting
  • 糾 煙 覲伎
企 覓語襯 願屋 覦 ctree.

library(party)
model <- ctree(Species~., data=training)
pred <- predict(model, newdata=test, type="response")

library (caret)
confusionMatrix(predict(model, newdata=test, type="response"), test$Species)

3 SVM #

library(e1071) 
model  <- svm(Species ~ ., data = training)
pred <- predict(model, newdata=test, type="response")

library (caret)
confusionMatrix(pred, test$Species)
table(pred, test$is_out)

obj <- tune.svm(factor(is_out)~., data = training, sampling = "fix", gamma = 2^c(-8,-4,0,4), cost = 2^c(-8,-4,-2,0))
plot(obj, transform.x = log2, transform.y = log2)
plot(obj, type = "perspective", theta = 120, phi = 45)

library("kernlab")
model <- ksvm(factor(is_out) ~., data=training, kernel = "rbfdot")
pred <- predict(model, newdata=test2, type="response")             
confusionMatrix(pred, test2$is_out)     

4 kNN #

library("class") 
pred <- knn(training[,1:4], test[,1:4], training$Species, k = 5, prob=TRUE)

library (caret)
confusionMatrix(pred, test$Species)

5 randomForest #

library("randomForest")
model <- randomForest(Species ~ ., data=training, type="classification", importance=TRUE)
pred <- predict(model, newdata=test)

library (caret)
confusionMatrix(pred, test$Species)

6 neural networks #

library(nnet)
model <- nnet(Species~., data=training, size=5)
pred <- predict(model, newdata=test, type="class")

library (caret)
confusionMatrix(pred, test$Species)

7 logistic regression #

library(nnet)
model <- multinom(Species~., data=training)
#head (fitted(out)) #蟆郁骸 襯
pred <- predict(model, newdata=test, type="class")

library (caret)
confusionMatrix(pred, test$Species)

summary(model)
> summary(model)
Call:
multinom(formula = Species ~ ., data = training)

Coefficients:
           (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
versicolor    157.4558    -45.15929   -27.99315     79.58368   -58.51255
virginica    -157.1791     13.78356   -82.12478     54.58758    80.74923

Std. Errors:
           (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
versicolor    17101.14     9088.735    10475.50     4091.601    7197.552
virginica     17101.14     9088.740    10475.49     4091.600    7197.552

Residual Deviance: 0.0001210002 
AIC: 20.00012 
> 

8 ada booting #

library(ada)
model <- ada(Species~., data=training)
pred <- predict(model, newdata=test)

library (caret)
confusionMatrix(pred, test$Species)

9 naive bayes #

library(e1071)
model <- naiveBayes(Species~., data=training)
pred <- predict(model, newdata=test)

library (caret)
confusionMatrix(pred, test$Species)

10 gam #

library("mgcv")
model <- gam(危覿 ~ s(覲1) + s(覲2),family=binomial, data=training)
summary(model)
pred <- predict(model, test, type="response")
confusionMatrix(ifelse(pred < 0.5, "危", "譟"), test$危覿)


11 som #

library("kohonen")
training.class <- training$Species
test.class <- test$Species
training <- scale(training[,1:4])
test <- scale(test[,1:4])

model <- som(training, grid = somgrid(5, 5, "hexagonal"))
pred <- predict(model, newdata = test,
                          trainX = training,
                          trainY = factor(training.class))

confusionMatrix(pred$prediction, test.class)  

12 2 #

#ctree
library(party)
library (caret)
model <- ctree(factor(is_out)~., data=training)
#model <- ctree(factor(is_out)~., data=training, weights=ifelse(training$蟲襷り唄 >= 0, 100, 1))
pred <- predict(model, newdata=test, type="response")
confusionMatrix(predict(model, newdata=test, type="response"), test$is_out)

#randomForest
library(randomForest) 
model <- randomForest(factor(is_out) ~ ., data=training, type="classification", importance=TRUE, proximity=TRUE)
pred <- predict(model, newdata=test)
confusionMatrix(pred, test$is_out)
imp <- data.frame(importance(model))
imp[order(imp$MeanDecreaseGini, decreasing=T),]
varImpPlot(model)

#CART
library(rpart)
model <- rpart(factor(is_out) ~., data=training, method="class")
pred <- predict(model, newdata=test, type="class")
confusionMatrix(pred, test$is_out)

#SVM
library(e1071) 
model <- svm(factor(is_out) ~., data=training, method="class")
pred <- predict(model, newdata=test, type="class")
confusionMatrix(pred, test$is_out)

#NN
library(nnet)
model <- nnet(factor(is_out) ~., data=training, size=40, method="class")
pred <- predict(model, newdata=test, type="class")
confusionMatrix(pred, test$is_out)

#kNN
library("class") 
pred <- knn(training[,2:ncol(training)], test[,2:ncol(training)], training$is_out, k = 7, prob=TRUE)
confusionMatrix(pred, test$is_out)

#logistic regression
library(nnet)
model <- multinom(is_out~., data=training)
pred <- predict(model, newdata=test, type="class")
confusionMatrix(pred, test$is_out)

13 train #

library(caret)
library(rpart)
library(e1071)

data(iris)
formula <- as.formula(Species ~.)
t <- train(formula, iris, method = "rpart", cp=0.002, maxdepth=8)
plot(t)


plot(t$finalModel)
text(t$finalModel)

library(rattle)
library("rpart.plot")
fancyRpartPlot(t$finalModel)

14 谿瑚襭 #