#title sqldf [[TableOfContents]] ==== 예제 ==== data frame을 sql로 쪼물쪼물 할 수 있다. {{{ install.packages("sqldf") library("sqldf") x <- head(warpbreaks) sqldf("select * from warpbreaks limit 5") sqldf("select * from warpbreaks where breaks >= 50") sqldf("select wool, sum(breaks) from warpbreaks group by wool") }}} {{{ > sqldf("select * from warpbreaks limit 5") Loading required package: tcltk breaks wool tension 1 26 A L 2 30 A L 3 54 A L 4 25 A L 5 70 A L > sqldf("select * from warpbreaks where breaks >= 50") breaks wool tension 1 54 A L 2 70 A L 3 52 A L 4 51 A L 5 67 A L > sqldf("select wool, sum(breaks) from warpbreaks group by wool") wool sum(breaks) 1 A 838 2 B 682 }}} 또 다른 예제 {{{ install.packages("sqldf") library("sqldf") cname <- c("x1", "x2", "x3","x4", "x5", "x6", "y") german = read.table("c:\\data\\german1.txt", col.names = cname) head(german) sqldf("select x1, sum(x2) sum_x2, count(distinct x4) distinct_x4 from german group by x1") }}} {{{ #사용자 정의함수 사용? DF <- data.frame(a_date = 0:1, c = 0:1) processDates <- function(data, ...) { ix <- grepl("_date$", names(data)) names(data)[ix] <- sub("_date$", "", names(data)[ix]) data[ix] <- lapply(data[ix], as.Date, origin = "1970-01-01") data } sqldf("select * from DF", method = processDates) #파라미터 처리 p1 <- 7 fn$sqldf("select * from iris where Sepal_Length > $p1") #Update DF <- data.frame(a = 1:3, b = c(3, NA, 5)) sqldf(c("update DF set b = a where b is null", "select * from main.DF")) DF <- data.frame(a = 1:3, b = factor(c(3, NA, 5))); DF sqldf(c("update DF set b = a where b is null", "select * from main.DF"), method = "raw") #PostgreSQL이 설치되고, 서비스가 시작되어 있어야 함. #https://code.google.com/p/sqldf/#12._How_does_one_use_sqldf_with_PostgreSQL? library("RPostgreSQL") }}} ==== sqldf 에러 ==== {{{ > sqldf("select * from d order by cid"); Error in .local(drv, ...) : Failed to connect to database: Error: Can't connect to MySQL server on 'localhost' (0) Error in !dbPreExists : invalid argument type }}} {{{library("RMySQL")}}}와 같이 mysql 라이브러리를 로드한 경우 sqldf 사용시 위와 같이 에러가 날 수 있다. 이런 경우에는 아래와 같이 해주자. {{{ sqldf("select * from d order by cid", drv="SQLite"); }}} ==== 참고자료 ==== * https://code.google.com/p/sqldf/ * http://cran.r-project.org/web/packages/sqldf/sqldf.pdf