#title R그래픽스-유용한 예제들 [[TableOfContents]] ==== 3d surface ==== https://stackoverflow.com/questions/41700400/smoothing-3d-plot-in-r {{{ library(mgcv) x<- rnorm(200) y<- rnorm(200) z<-rnorm(200) tab<-data.frame(x,y,z) tab #surface wireframe: mod <- gam(z ~ te(x, y), data = tab) library(rgl) library(deldir) zfit <- fitted(mod) col <- cm.colors(20)[1 + round(19*(zfit - min(zfit))/diff(range(zfit)))] persp3d(deldir(x, y, z = zfit), col = col) aspect3d(1, 2, 1) }}} attachment:R그래픽스-유용한예제들/3d_surface.png ==== magick ==== 이미지 처리 패키지 * https://cran.r-project.org/web/packages/magick/vignettes/intro.html ==== 그룹별 분포를 3차원스럽게 보이기(ggjoy) ==== * http://rpubs.com/ianrmcdonald/293304 * http://mran.microsoft.com/web/packages/ggjoy/vignettes/gallery.html 대략 이런 모양 attachment:R그래픽스-유용한예제들/ggjoy.png --출처: http://rpubs.com/ianrmcdonald/293304 더 이쁘게.. * https://www.r-bloggers.com/joyplot-for-gsea-result/ ==== ggplot x축 비틀기 ==== {{{ ggplot(df1, aes(x=grp, y=val)) + geom_boxplot(outlier.shape = NA) + ylim(0, 30) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) }}} * 45도 비틀기: theme(axis.text.x = element_text(angle = 45, hjust = 1)) * 90도 비틀기: theme(axis.text.x = element_text(angle = 90, hjust = 1)) ==== 잔차 plot ==== --Practical Data Science with R, ISBN 978-1-61729-156-2 {{{ d <- data.frame(y=(1:10)^2,x=1:10) model <- lm(y~x,data=d) d$prediction <- predict(model,newdata=d) library('ggplot2') ggplot(data=d) + geom_point(aes(x=x,y=y)) + geom_line(aes(x=x,y=prediction),color='blue') + geom_segment(aes(x=x,y=prediction,yend=y,xend=x)) + scale_y_continuous('') }}} ==== 버블차트(bubble chart) ==== {{{ dfx = data.frame(ev1=1:10, ev2=sample(10:99, 10), ev3=10:1) symbols(x=dfx$ev1, y=dfx$ev2, circles=dfx$ev3, inches=1/3, ann=F, bg="steelblue2", fg=NULL) }}} ==== area chart 아니고, 범위(상한~하한) 영역 칠하기 ==== {{{ polygon(c(x, rev(x)), c(y$upr, rev(y$lwr)), col = "gray", border = NA) }}} ==== 이중축 ==== plotrix 패키지 참고 및 출처 --> http://rpubs.com/cardiomoon/19042 {{{ library("plotrix") going_up <- seq(3, 7, by = 0.5) + rnorm(9) going_down <- rev(60:74) + rnorm(15) twoord.plot(2:10, going_up, 1:15, going_down, xlab = "Sequence", ylab = "Ascending values", rylab = "Descending values", lcol = 4, main = "Plot with two ordinates - points and lines", do.first = "plot_bg();grid(col=\"white\",lty=1)") twoord.plot(2:10, going_up, 1:15, going_down, xlab = "Sequence", lylim = range(going_up) + c(-1, 10), rylim = range(going_down) + c(-10, 2), ylab = "Ascending values", ylab.at = 5, rylab = "Descending values", rylab.at = 65, lcol = 4, main = "Plot with two ordinates - separated lines", lytickpos = 3:7, rytickpos = seq(55, 75, by = 5), do.first = "plot_bg();grid(col=\"white\",lty=1)") }}} 다른 예제 {{{ set.seed(2015-04-13) d = data.frame(x =seq(1,10), n = c(0,0,1,2,3,4,4,5,6,6), logp = signif(-log10(runif(10)), 2)) #좌 par(mar = c(5,5,2,5)) with(d, plot(x, logp, type="l", col="red3", ylab=expression(-log[10](italic(p))), ylim=c(0,3))) #우 par(new = T) with(d, plot(x, n, pch=16, axes=F, xlab=NA, ylab=NA, cex=1.2)) axis(side = 4) mtext(side = 4, line = 3, 'Number genes selected') legend("topleft", legend=c(expression(-log[10](italic(p))), "N genes"), lty=c(1,0), pch=c(NA, 16), col=c("red3", "black")) }}} --출처: http://www.r-bloggers.com/r-single-plot-with-two-different-y-axes/ ==== abline, legend ==== {{{ library(party) library(caret) gtree <- ctree(Species ~ ., data = iris) plot(gtree) attach(iris) colour <- c("black", "red", "blue") plot(Petal.Width, Petal.Length, pch=20, col=c(colour[Species])) abline(h=4.8, col="blue", lty=2);text(0.75,5,"Petal.Length > 4.8", col="blue") abline(h=1.9, col="black", lty=2);text(1,2.2,"Petal.Length > 1.9", col="black") abline(v=1.7, col="red", lty=2);text(2,4,"Petal.Width > 1.7", col="red") legend(0.1, 6.5, c("setosa","versicolor", "virginica"), pch=20, col=colour) }}} ==== 인터랙티브 플롯팅 ==== * [https://support.rstudio.com/hc/en-us/articles/200551906-Interactive-Plotting-with-Manipulate Interactive Plotting with Manipulate] ==== xlab, ylab, title 조정 ==== {{{ for(i in 1:max(cl$cluster)){ p_cluster <- ggplot(tmp[tmp$cluster == i, ], aes(x=variable, y=value, colour=factor(cluster))) p_cluster + geom_line() + ggtitle(paste0("cluster", i))+theme(axis.text=element_text(size=20), axis.title=element_text(size=20,face="bold"),plot.title=element_text(family="Times", face="bold", size=20)) ggsave(file=paste0("c:\\plot\\cluster", i, ".png")) } }}} ==== text ==== {{{ prior <- seq(0, 1, 0.1) posterior <- (piror*1/2)/(piror*1/2+(1-piror)*0.09) plot(prior, posterior, main="친자확인소송", type="l") abline(0,1) abline(h=0.5, v=0.5) point_label <- paste0("(", prior,", ", round(posterior, 3), ")") text(prior, posterior, point_label, cex=0.8, pos=4, col="red") }}} ==== parallel plot? ==== * http://stat.ethz.ch/R-manual/R-patched/library/MASS/html/parcoord.html * http://www.r-bloggers.com/alluvial-diagrams/ --> Alluvial diagrams * http://rgm3.lab.nig.ac.jp/RGM/R_rdfile?f=ggparallel/man/ggparallel.Rd&d=R_CC ==== 범례의 그림크기 조절 ==== {{{ p<-ggplot(df, aes(x=일자, y=게임수, colour=장르)) + geom_point(size=4) + facet_wrap( ~ 국가, nrow=3) p + theme( strip.text.x = element_text(size=20), legend.title = element_text(size=20, face="bold"), legend.text = element_text(size = 20, face = "bold")) + guides(size=10,colour = guide_legend(override.aes = list(size=7))) }}} ==== x축 라벨바꾸기 ==== {{{ plot(tmp1$stage_no, tmp1$md, axes = FALSE, xlab="스테이지", ylab="정체판수") axis(1, tmp1$stage_no,tmp1$stage_nm, las=2) #las=2는 라벨을 세로로 axis(2);box() }}} ==== x축 날짜형식 ==== {{{ library(ggplot2) library("scales") #date_format() ggplot(loess.df, aes(x=dt, y=s)) + geom_point() + geom_smooth() + labs(x = "일자", y = "일 시장규모(억)") + scale_x_date(labels = date_format("%Y-%m")) }}} {{{ #xaxt='n' 옵션을 줘야한다. plot(df$dt, df$amt, type="l", xaxt='n') axis.Date(side=1, df$dt, format = "%Y-%m-%d") #strptime(x1, "%Y-%m-%d %H:%M:%OS") }}} ==== ggplot2::stat_density2d() ==== 2차원 그래프로 산점도로는 잘 보이지 않는 데이터들의 그룹을 식별하기 좋다. {{{ rs <- data.frame() for(i in 1:4){ x <- c(rnorm(200,0,i),rnorm(200,4,i)) y <- c(rnorm(200,0,i),rnorm(200,4,i)) grp <- replicate(length(x), i) rs <- rbind(rs, data.frame(grp, x, y)) } library(ggplot2) p <- ggplot(rs, aes(x=x, y=y)) p + stat_density2d() + facet_wrap( ~ grp, nrow=2) }}} attachment:R그래픽스-유용한예제들/stat_density2d.png ==== boxplot.2d ==== {{{ #install.packages("hdrcde") library("hdrcde") x <- c(rnorm(200,0,1),rnorm(200,4,1)) y <- c(rnorm(200,0,1),rnorm(200,4,1)) par(mfrow=c(2,2)) plot(x,y, pch="+", cex=.5) hdr.boxplot.2d(x,y) plot(hdr.2d(x, y), pointcol="red", show.points=TRUE, pch=3) par(mfrow=c(1,1)) }}} attachment:R그래픽스-유용한예제들/hdr.png ==== 기본 scatter plot ==== {{{ colour <- c("red", "blue", "black") plot(iris$Sepal.Length, iris$Sepal.Width, col=c(colour[iris$Species])) }}} attachment:R그래픽스-유용한예제들/basic_scatter_plot.png ==== 마우스로 클릭하여 값 알아내기 ==== {{{ plot(dau ~ dau_pred, data=grossing) }}} identify() 함수를 사용하면 알고자 하는 값들을 클릭 후 esc를 누르면 차트에 클릭된 곳의 값을 인식한다. {{{ #rowname()이 차트에 찍힌다. identify(grossing$dau_pred, grossing$dau, labels=row.names(grossing)) #x,y값이 찍힌다. 물론 esc를 눌렀을 때에 순서대로 rowname도 출력된다. identify(grossing$dau_pred, grossing$dau, labels=paste0("x=",grossing$dau_pred, "\ny=", grossing$dau)) }}} 아래의 코드는 identify() 함수와 달리 클릭하자마자 값을 알 수 있다. {{{ repeat { click.loc <- locator(1) if(!is.null(click.loc)){ text( x = click.loc$x, y = click.loc$y, labels = paste0("x=",round(click.loc$x,0), "\ny=", round(click.loc$y)), cex = 0.8, col = "blue" ) } else break } }}} ==== smoothScatter ==== 일반적으로 산점도(scatter plot)를 그리면 다음과 같다. {{{ plot(x,y) }}} attachment:R그래픽스-유용한예제들/smoothScatter01.png 그런데, 데이터의 양이 많으면 위와 같이 데이터의 분포를 파악하기 어렵다. smoothScatter()는 이런 어려움을 극복할 수 있게 시각화 해준다. 아래의 그림을 보면 3개 정도의 군집이 있는 것을 확인 할 수 있다. {{{ library(graphics) smoothScatter(x, y) }}} attachment:R그래픽스-유용한예제들/smoothScatter02.png ==== ggpairs ==== pairs()함수는 data.frame에 대해 산점도를 그려준다. 이 자체로도 정보가 많지만, 더욱 많은 정보를 줄 수 있는 산점도도 있다. 그게 ggpairs()다. 단점은 느리다는 것. {{{ library("GGally") ggpairs(iris) }}} attachment:R그래픽스-유용한예제들/ggpairs.png ==== pairs(커스터마이징된) ==== {{{ panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) { usr <- par("usr") on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y, use="complete.obs")) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2) } panel.hist <- function(x, ...) { usr <- par("usr") on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) opar=par(ps=30) #font-size h <- hist(x, plot = FALSE) breaks <- h$breaks nB <- length(breaks) y <- h$counts y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="white", ...) } panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 1, col.smooth = "black", ...) { points(x, y, pch = pch, col = col, bg = bg, cex = cex) abline(stats::lm(y ~ x), col = col.smooth, ...) } pairs(iris, pch=".", upper.panel = panel.cor, diag.panel = panel.hist, lower.panel = panel.lm) #또는 비모수 panel.lowess <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 1, col.smooth = "black", ...) { points(x, y, pch = pch, col = col, bg = bg, cex = cex) lines(lowess(x,y)) } pairs(iris, pch=".", upper.panel = panel.cor, diag.panel = panel.hist, lower.panel = panel.lowess) }}} attachment:R그래픽스-유용한예제들/pairs.png ==== Spider(Radar) Chart ==== {{{ tmp <- df[df$job_id == i, 4:7] title <- head(df[df$job_id == i, 2],1) tmp <- rbind(rep(12000,4) , rep(0,4), tmp) #max, min값을 세팅해야 함. rownames(tmp) <- c("1", "2", "Active", "Churn") colors_border <- c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) ) colors_in <- c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4)) #radarchart(tmp, axistype=1, pcol=colors_border, plwd=2, plty=1, axislabcol="grey", cglcol="gray", caxislabels=seq(0,12000,2000), cglwd=0.8, vlcex=0.8, seg=4, title=title) radarchart(tmp, axistype=1, pcol=colors_border, plwd=2, plty=1, axislabcol="grey", cglcol="gray", caxislabels=seq(0,12000,2000), cglwd=0.8, vlcex=0.8, seg=4, title="") legend(x=0.7, y=1, legend = rownames(tmp[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "black", cex=1.2, pt.cex=3) }}} ---- 대박 정보네요. 감사합니다. -- shanmdphd 2017-05-25 14:42:48