#title Data Frame [[TableOfContents]] ==== 개요 ==== 데이터 프레임은 변수(필드)와 관찰치(레코드)로 구성된 2차원 자료 객체이다. ==== 문자열로 데이터 프레임 만들기 ==== 방법1 {{{ x <- read.table(header=T, text=" factorLevel characteristicValue A1 8.44 A1 8.36 A1 8.28 A2 8.59 A2 8.91 A2 8.6 A3 9.34 A3 9.41 A3 9.69 A4 8.92 A4 8.92 A4 8.74") head(x) }}} 방법2 {{{ tmp <- textConnection( "day A B 0 10.0 10.0 7 9.0 9.1 14 8.0 8.2 21 7.0 7.3 28 6.0 6.4 35 5.0 5.5 42 4.0 4.5 49 3.0 3.6 56 2.0 2.7") x <- read.table(tmp, header=TRUE) close.connection(tmp) head(x) }}} ==== cube ==== {{{ as.data.frame(addmargins(xtabs(uptake ~ Plant + Type + Treatment, CO2))) }}} ==== join ==== {{{ df1 = data.frame(CustomerId=c(1:6),Product=c(rep("Toaster",3),rep("Radio",3))) df2 = data.frame(CustomerId=c(2,4,6),State=c(rep("Alabama",2),rep("Ohio",1))) df1;df2 #outer join: merge(x = df1, y = df2, by = "CustomerId", all = TRUE) #left outer: merge(x = df1, y = df2, by = "CustomerId", all.x=TRUE) #right outer: merge(x = df1, y = df2, by = "CustomerId", all.y=TRUE) #cross join: merge(x = df1, y = df2, by = NULL) }}} 결과 {{{ > #outer join: > merge(x = df1, y = df2, by = "CustomerId", all = TRUE) CustomerId Product State 1 1 Toaster 2 2 Toaster Alabama 3 3 Toaster 4 4 Radio Alabama 5 5 Radio 6 6 Radio Ohio > > #left outer: > merge(x = df1, y = df2, by = "CustomerId", all.x=TRUE) CustomerId Product State 1 1 Toaster 2 2 Toaster Alabama 3 3 Toaster 4 4 Radio Alabama 5 5 Radio 6 6 Radio Ohio > > #right outer: > merge(x = df1, y = df2, by = "CustomerId", all.y=TRUE) CustomerId Product State 1 2 Toaster Alabama 2 4 Radio Alabama 3 6 Radio Ohio > > #cross join: > merge(x = df1, y = df2, by = NULL) CustomerId.x Product CustomerId.y State 1 1 Toaster 2 Alabama 2 2 Toaster 2 Alabama 3 3 Toaster 2 Alabama 4 4 Radio 2 Alabama 5 5 Radio 2 Alabama 6 6 Radio 2 Alabama 7 1 Toaster 4 Alabama 8 2 Toaster 4 Alabama 9 3 Toaster 4 Alabama 10 4 Radio 4 Alabama 11 5 Radio 4 Alabama 12 6 Radio 4 Alabama 13 1 Toaster 6 Ohio 14 2 Toaster 6 Ohio 15 3 Toaster 6 Ohio 16 4 Radio 6 Ohio 17 5 Radio 6 Ohio 18 6 Radio 6 Ohio > }}} ==== na가 있는 컬럼 삭제 ==== {{{ tmp[, colSums(is.na(tmp)) == 0] }}} ==== 조건절 ==== {{{ ex1[ex1$hw==1,] }}} ==== alter colum ==== 데이터형 변경 {{{ x$일자 <- as.Date(x$일자) }}} 컬럼 삭제 {{{ x$일자 <- NULL }}} ==== 가로를 세로로 ==== {{{ tmp <- textConnection( "청결상태 음식량 대기시간 음식맛 친절 6 4 7 6 5 5 7 5 6 6 5 3 4 5 6 3 3 2 3 4 4 3 3 3 2") x <- read.table(tmp, header=TRUE) close.connection(tmp) stack(x) }}} 결과 {{{ > stack(x) values ind 1 6 청결상태 2 5 청결상태 3 5 청결상태 4 3 청결상태 5 4 청결상태 6 4 음식량 7 7 음식량 8 3 음식량 9 3 음식량 10 3 음식량 11 7 대기시간 12 5 대기시간 13 4 대기시간 14 2 대기시간 15 3 대기시간 16 6 음식맛 17 6 음식맛 18 5 음식맛 19 3 음식맛 20 3 음식맛 21 5 친절 22 6 친절 23 6 친절 24 4 친절 25 2 친절 }}} ==== 정렬하기 ==== {{{ tmp <- textConnection( "청결상태 음식량 대기시간 음식맛 친절 6 4 7 6 5 5 7 5 6 6 5 3 4 5 6 3 3 2 3 4 4 3 3 3 2") x <- read.table(tmp, header=TRUE) close.connection(tmp) head(x) }}} 결과 {{{ > x[order(x$친절), ] 청결상태 음식량 대기시간 음식맛 친절 5 4 3 3 3 2 4 3 3 2 3 4 1 6 4 7 6 5 2 5 7 5 6 6 3 5 3 4 5 6 > x[order(x$친절, x$음식맛), ] 청결상태 음식량 대기시간 음식맛 친절 5 4 3 3 3 2 4 3 3 2 3 4 1 6 4 7 6 5 3 5 3 4 5 6 2 5 7 5 6 6 }}} ==== subset ==== {{{ subset(x, select=c(친절, 대기시간), subset= (청결상태 > 5)) subset(x, select=-c(친절, 대기시간)) # 마이너스(-) 부호는 해당 컬럼을 제외한 나머지 컬럼을 말함. }}} ==== 열(컬럼)이름 바꾸기 ==== {{{ > colnames(x) <- c("청결도", "음식량", "대기시간", "음식맛", "친절도") > x 청결도 음식량 대기시간 음식맛 친절도 1 6 4 7 6 5 2 5 7 5 6 6 3 5 3 4 5 6 4 3 3 2 3 4 5 4 3 3 3 2 }}} ==== na값 제거하기 ==== {{{ clean < na.omit(x) }}} ==== 데이터프레임 합치기 ==== {{{ all.cols <- rbind(x1, x2) all.rows <- cbind(x1, x2) inner.join <- merge(x1, x2, by="join_key") }}} ==== 특정 열만 조회하기 ==== {{{ iris[, names(iris) %in% c ("Species" , "Petal.Width")] }}} ==== 특정 열만 빼고 조회하기 ==== {{{ iris[, !names(iris) %in% c ("Species" , "Petal.Width")] }}} or {{{ `%notin%` <- Negate(`%in%`) iris[, names(iris) %notin% c ("Species" , "Petal.Width")] }}} --https://www.r-bloggers.com/the-notin-operator/ ==== 특정 행만 빼고 조회하기 ==== {{{ iris[!rownames(iris) %in% c(1, 2, 3),] }}} ==== 자료의 입력: data.frame() 함수 ==== data.frame()함수를 이용하여 자료를 입력하는데, 구조 특성상 결합되는 자료 객체의 수(레코드)는 같아야 한다. 데이터 유형은 달라도 된다. 문자형 변수의 경우는 결합되는 과정에서 범주형(factor)로 변환된다. 이를 방지하기 위해서 함수 I()를 사용하면 된다. {{{ > mat <- matrix(1:10, nrow=5) > mat [,1] [,2] [1,] 1 6 [2,] 2 7 [3,] 3 8 [4,] 4 9 [5,] 5 10 > dimnames(mat) <- list(c(1:5), c(paste("Var", 1:2, sep="."))) > mat Var.1 Var.2 1 1 6 2 2 7 3 3 8 4 4 9 5 5 10 > vec1 <- LETTERS[1:5] > vec2 <- letters[1:5] > vec1 [1] "A" "B" "C" "D" "E" > vec2 [1] "a" "b" "c" "d" "e" > df <- data.frame(mat, vec1, name=I(vec2)) > df Var.1 Var.2 vec1 name 1 1 6 A a 2 2 7 B b 3 3 8 C c 4 4 9 D d 5 5 10 E e > df <- data.frame(mat, vec1, 컬럼명=I(vec2)) > df Var.1 Var.2 vec1 컬럼명 1 1 6 A a 2 2 7 B b 3 3 8 C c 4 4 9 D d 5 5 10 E e > > df <- data.frame(mat, vec1, name=I(vec2), row.names=1) > df Var.2 vec1 name 1 6 A a 2 7 B b 3 8 C c 4 9 D d 5 10 E e > df <- data.frame(mat, vec1, name=I(vec2), row.names=vec2) > df Var.1 Var.2 vec1 name a 1 6 A a b 2 7 B b c 3 8 C c d 4 9 D d e 5 10 E e > }}} 벡터로 만들 수도 있다. {{{ > colA <- c(1:5) > colB <- c(6:10) > data.frame(colA, colB) colA colB 1 1 6 2 2 7 3 3 8 4 4 9 5 5 10 }}} ==== 자료의 입력: read.table() 함수 ==== * attachment:example2.txt * attachment:example3.txt example2.txt {{{ x y id1 1 2 id2 3 4 id3 5 NA id4 miss 7 }}} example3.txt {{{ id x y id1 1 2 id2 3 4 id3 5 6 }}} 이렇게 하면 에러가 난다. {{{ > data.f <- read.table(file="c:\\example3.txt", header=TRUE) 이하에 에러read.table(file = "c:\\example3.txt", header = TRUE) : more columns than column names }}} 이렇게 해야 한다. {{{ > #flush=FALSE 인 경우 > data.f <- read.table(file="c:\\example3.txt", skip=1, col.names=c("id", "x", "y"), flush=FALSE) > data.f id x y 1 id1 1 2 2 id2 3 4 3 id3 5 6 > #flush=TRUE 인 경우 > data.f <- read.table(file="c:\\example3.txt", skip=1, col.names=c("id", "x", "y"), flush=TRUE) > data.f id x y 1 id1 1 2 2 id3 5 6 }}} example2.txt 파일을 읽어보자. {{{ > data.f <- read.table(file="c:\\example2.txt", header=TRUE) > data.f x y id1 1 2 id2 3 4 id3 5 NA id4 miss 7 > }}} 문자열 'miss'가 걸린다. 이것을 missing value로 처리해보자. {{{ > data.f <- read.table(file="c:\\example2.txt", header=TRUE, na.strings=("miss")) > data.f x y id1 1 2 id2 3 4 id3 5 NA id4 NA 7 > }}} col.names 뿐만 아리라 row.names도 줄 수 있다. {{{ > data.f <- read.table(file="c:\\example2.txt", header=TRUE, na.strings=("miss"), row.names=1) > data.f x y id1 1 2 id2 3 4 id3 5 NA id4 NA 7 > }}} ==== 데이터 프레임 sql server에 빠르게 입력하기 ==== {{{ df <- rs[1:10,] tname <- "dbo.predict_result" library(RODBC) fast_dbinsert <- function(df, tname){ #테이블 삭제 query <- paste0( "if object_id('", tname, "') is not null drop table ", tname) sqlQuery(conn, query) sqlSave(conn, df[0:0,], tablename = tname, rownames=FALSE) #데이터 입력 tmp_filename <- tempfile() write.table(df, tmp_filename, na = "\\N", row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t") sqlQuery(conn, query) unlink(tmp_filename) } }}}