library("TeachingDemos") y <- rlnorm(500, 3, 2) par(mfrow=c(2,2)) qqnorm(y) qqnorm(bct(y,1/2)) qqnorm(bct(y,0)) hist(bct(y,0))
library("car") y <- rlnorm(500, 3, 2) hist(box.cox(y,0))
#x: 蠏殊 x <- c(2,3,4,6,8,9,10,12,13,15,17,19,20,23,25,26,27,29,30,33) #y: 磯 y <- c(1500,1640,1950,2700,2580,3200,3750,2960,4200,3960,4310,3990,4010,4560,4800,5300,5050,5450,5950,5650) result <- lm(y~x) summary(result)
Call: lm(formula = y ~ x) Residuals: Min 1Q Median 3Q Max -470.0 -290.8 -168.0 293.9 789.4 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1708.117 173.489 9.846 1.13e-08 *** x 130.960 9.089 14.409 2.52e-11 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 386.6 on 18 degrees of freedom Multiple R-squared: 0.9202, Adjusted R-squared: 0.9158 F-statistic: 207.6 on 1 and 18 DF, p-value: 2.522e-11
#x1:, x2:, x3:豌伎, y1:螻給讌蠍 x1 <- c(28,46,39,25,34,29,38,23,42,27,35,39,38,32,25) x2 <- c(146,169,160,156,161,168,154,153,160,152,155,154,157,162,142) x3 <- c(34,57,48,38,47,50,54,40,62,39,46,54,57,53,32) y1 <- c(22,36,24,22,27,29,26,23,31,24,23,27,31,25,23) data <- data.frame(y1,x1,x2,x3)
#螳 蟯谿(壱) 覃 pairs(data)覃 . cov.wt(data, cor=T) > cov.wt(data, cor=T) $cov y1 x1 x2 x3 y1 16.31429 21.07143 20.01429 28.91429 x1 21.07143 48.66667 26.71429 53.64286 x2 20.01429 26.71429 52.25714 45.60000 x3 28.91429 53.64286 45.60000 82.54286 $center y1 x1 x2 x3 26.20000 33.33333 156.60000 47.40000 $n.obs [1] 15 $cor y1 x1 x2 x3 y1 1.0000000 0.7478150 0.6854618 0.7879319 x1 0.7478150 1.0000000 0.5297304 0.8463624 x2 0.6854618 0.5297304 1.0000000 0.6943082 x3 0.7879319 0.8463624 0.6943082 1.0000000 >蟆郁骸伎
summary(lm(y1~x1+x2+x3)) > summary(lm(y1~x1+x2+x3)) Call: lm(formula = y1 ~ x1 + x2 + x3) Residuals: Min 1Q Median 3Q Max -3.9976 -1.3822 0.3611 1.1549 3.9291 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -13.2173 17.6038 -0.751 0.469 x1 0.2014 0.1842 1.093 0.298 x2 0.1710 0.1316 1.300 0.220 x3 0.1249 0.1667 0.750 0.469 Residual standard error: 2.532 on 11 degrees of freedom Multiple R-squared: 0.6913, Adjusted R-squared: 0.6072 F-statistic: 8.213 on 3 and 11 DF, p-value: 0.003774 >蟆郁骸伎
library("MASS") fit <- lm(y1~x1+x2+x3,data=mydata) step <- stepAIC(fit, direction="both") step$anova # display results
x1 x2 x3 x4 x5 x6 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 覈 0 0 1 0 0 0 蠍 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1
m1 <- lm(formula=Height~Girth + Volume, data=trees) summary(m1)
> summary(m1) Call: lm(formula = Height ~ Girth + Volume, data = trees) Residuals: Min 1Q Median 3Q Max -9.7855 -3.3649 0.5683 2.3747 11.6910 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 83.2958 9.0866 9.167 6.33e-10 *** Girth -1.8615 1.1567 -1.609 0.1188 Volume 0.5756 0.2208 2.607 0.0145 * --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 5.056 on 28 degrees of freedom Multiple R-squared: 0.4123, Adjusted R-squared: 0.3703 F-statistic: 9.82 on 2 and 28 DF, p-value: 0.0005868
m2 <- lm(formula=Height~Girth + Volume + Girth:Volume, data=trees) summary(m2)
> summary(m2) Call: lm(formula = Height ~ Girth + Volume + Girth:Volume, data = trees) Residuals: Min 1Q Median 3Q Max -6.7781 -3.5574 -0.1512 2.3631 10.5879 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 75.40148 8.49147 8.880 1.7e-09 *** Girth -2.29632 1.03601 -2.217 0.035270 * Volume 1.86095 0.47932 3.882 0.000604 *** Girth:Volume -0.05608 0.01909 -2.938 0.006689 ** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 4.482 on 27 degrees of freedom Multiple R-squared: 0.5546, Adjusted R-squared: 0.5051 F-statistic: 11.21 on 3 and 27 DF, p-value: 5.898e-05
fitted(m2)
coef(m2)[1] + coef(m2)[2]*trees$Girth + coef(m2)[3]*trees$Volume + coef(m2)[4]*trees$Girth*trees$Volume
#install.packages("Design",dependencies=T) library("Design") result <- lm(y1~x1+x2+x3) vif(result) > library("Design") 蟲 れ Hmisc襯 襦譴 れ れ襯 豌螳: 'Hmisc' The following object(s) are masked from package:base : format.pval, round.POSIXt, trunc.POSIXt, units 蟲 れ survival襯 襦譴 蟲 れ splines襯 襦譴 れ れ襯 豌螳: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials Design library by Frank E Harrell Jr Type library(help='Design'), ?Overview, or ?Design.Overview') to see overall documentation. れ れ襯 豌螳: 'Design' The following object(s) are masked from package:survival : Surv, survfit > result <- lm(y1~x1+x2+x3) > vif(result) x1 x2 x3 3.607546 1.975832 5.010688 >蟆郁骸伎
par(mfrow=c(2,2)) plot(lm(formula = y1 ~ x1 + x2 + x3 + x1:x2))
> out <- lm(formula = y1 ~ x1 + x2 + x3 + x1:x2) > shapiro.test(resid(out)) Shapiro-Wilk normality test data: resid(out) W = 0.9669, p-value = 0.8099 >蟆郁骸 覲 p-value = 0.8099襦 '蠏覿 谿願 '朱 蠏覓願れ 譟磯 讌讌. 讀, 谿 蠏覿襯 磯ジ.
> library("lmtest") > bptest(out) studentized Breusch-Pagan test data: out BP = 5.6201, df = 4, p-value = 0.2294 >p-value螳 0.2294襦 譴 0.05 蠏覓願 讌讌. 讀, 焔一企. 襷 企(heterogenous variance) 蟆曙 螳譴豕螻(weighted least squares; nls()) . 覲覲 牛 蟲 .
> out <- lm(formula = y1 ~ x1 + x2 + x3 + x1:x2, weights=I(x1^(2))) > summary(out) Call: lm(formula = y1 ~ x1 + x2 + x3 + x1:x2, weights = I(x1^(2))) Residuals: Min 1Q Median 3Q Max -106.194 -34.272 4.358 30.456 85.645 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 206.98239 62.41049 3.316 0.00779 ** x1 -6.74461 1.84209 -3.661 0.00438 ** x2 -1.25865 0.40492 -3.108 0.01109 * x3 0.44112 0.13692 3.222 0.00915 ** x1:x2 0.04199 0.01105 3.800 0.00349 ** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 61.32 on 10 degrees of freedom Multiple R-squared: 0.8834, Adjusted R-squared: 0.8368 F-statistic: 18.94 on 4 and 10 DF, p-value: 0.0001167
> library("lmtest") > dwtest(out) Durbin-Watson test data: out DW = 2.3524, p-value = 0.7083 alternative hypothesis: true autocorrelation is greater than 0 >
data(airquality) lm1<-lm(Ozone~.,airquality) # full model lm2<-lm(Ozone~Solar.R+Wind +Month+Day,airquality) # reduced model anova(lm2,lm1)
y2 <- x1 #x1 譬覲 螳螻... summary(lm(cbind(y1,y2)~x2+x3)) > y2 <- x1 > summary(lm(cbind(y1,y2)~x2+x3)) Response y1 : Call: lm(formula = y1 ~ x2 + x3) Residuals: Min 1Q Median 3Q Max -3.5060 -1.5862 0.2502 0.8539 5.3777 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -9.8745 17.4765 -0.565 0.5825 x2 0.1493 0.1311 1.139 0.2770 x3 0.2678 0.1043 2.567 0.0247 * --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 2.552 on 12 degrees of freedom Multiple R-squared: 0.6578, Adjusted R-squared: 0.6008 F-statistic: 11.53 on 2 and 12 DF, p-value: 0.001605 Response y2 : Call: lm(formula = y2 ~ x2 + x3) Residuals: Min 1Q Median 3Q Max -5.4716 -1.9151 -0.2964 1.9562 7.1935 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 16.5997 27.1675 0.611 0.552589 x2 -0.1079 0.2038 -0.529 0.606186 x3 0.7095 0.1622 4.375 0.000904 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 3.967 on 12 degrees of freedom Multiple R-squared: 0.7228, Adjusted R-squared: 0.6766 F-statistic: 15.65 on 2 and 12 DF, p-value: 0.0004537 >
> summary(model) Call: lm(formula = 糾 ~ + 蟆一, data = result) Residuals: Min 1Q Median 3Q Max -5.348 -2.274 -1.276 2.954 5.673 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 53.6832 14.1808 3.786 0.00431 ** 0.6073 0.1984 3.062 0.01353 * 蟆一 -1.9346 0.9144 -2.116 0.06348 . --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 3.721 on 9 degrees of freedom Multiple R-squared: 0.8289, Adjusted R-squared: 0.7909 F-statistic: 21.8 on 2 and 9 DF, p-value: 0.0003544
> anova(model) #, 螻燕, 螻燕蠏 F螳, Analysis of Variance Table Response: 糾 Df Sum Sq Mean Sq F value Pr(>F) 1 541.69 541.69 39.1297 0.0001487 *** 蟆一 1 61.97 61.97 4.4762 0.0634803 . Residuals 9 124.59 13.84 --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
螻燕 | 螻燕蠏 | F觜 | ||
蠏 | 603.66 | 11 | 301.83 | 21.802950 |
谿 | 124.59 | 9 | 13.84 | |
螻 | 728.25 | 11 |
fit <- lm(Volume ~ Girth + Height, data=trees) f <- summary(fit)$fstatistic pf(f[1],f[2],f[3],lower.tail=F)
confint(model, level=0.95) 2.5 % 97.5 % (Intercept) 502.5676 524.66261 poly(q, 3)1 1919.2739 2232.52494 poly(q, 3)2 -264.6292 48.62188 poly(q, 3)3 707.3999 1020.65097
model <- lm(Girth ~ Height + Volume, data=trees) summary(model)
> summary(model) Call: lm(formula = Girth ~ Height + Volume, data = trees) Residuals: Min 1Q Median 3Q Max -1.34288 -0.56696 -0.08628 0.80283 1.11642 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 10.81637 1.97320 5.482 7.45e-06 *** Height -0.04548 0.02826 -1.609 0.119 Volume 0.19518 0.01096 17.816 < 2e-16 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 0.7904 on 28 degrees of freedom Multiple R-squared: 0.9408, Adjusted R-squared: 0.9366 F-statistic: 222.5 on 2 and 28 DF, p-value: < 2.2e-16
#install.packages("QuantPsyc") library("QuantPsyc") lm.beta(model)
> lm.beta(model) Height Volume -0.09235166 1.02236871Volume 蟇一 覈 レ 殊海. 覓朱 beta襯 覲願鍵 Height 覲 覈語 蟇 蟆企.
y <- seq(1: nrow(x1)) dau <- x1$dau fit <- lm(dau ~ poly(y, 3)) summary(fit) plot(dau~y) lines(y, predict(fit, data.frame(y=y)), col="purple") newdata <- data.frame(y = seq(139,145)) predict(fit, newdata) out <- lm(dau~y + I(y^2) + I(y^3)) summary(out) p <- 145 3.131e+06 - p*2.637e+04 + (p^2) * 2.438e+02 - (p^3)*1.138e+00
plot(y ~ x, data=df) notin <- identify(df$x, df$y, labels=row.names(df)) fit <- lm(y ~ x - 1, data=df[!rownames(df) %in% notin,]) summary(fit) plot(y ~ x, data=df[!rownames(df) %in% notin,]) abline(fit)
se <- sqrt(diag(vcov(bu.model)))[2] ci.upr <- coef(bu.model)[2] + 1.96*se ci.lwr <- coef(bu.model)[2] - 1.96*se
# Make up some data x <- seq(0, 1, length.out = 20) y <- x^2 + rnorm(20) # fit a quadratic model <- lm(y ~ I(x^2)) fitted <- predict(model, interval = "confidence", level=.95) # plot the data and the fitted line plot(x, y) lines(x, fitted[, "fit"]) # now the confidence bands lines(x, fitted[, "lwr"], lty = "dotted") lines(x, fitted[, "upr"], lty = "dotted")
d <- data.frame(y=(1:10)^2,x=1:10) model <- lm(y~x,data=d) d$prediction <- predict(model,newdata=d) #R-squared 1-sum((d$prediction-d$y)^2)/sum((mean(d$y)- d$y)^2)
m <- lm(Girth~Height, trees) summary(m)
> summary(m) Call: lm(formula = Girth ~ Height, data = trees) Residuals: Min 1Q Median 3Q Max -4.2386 -1.9205 -0.0714 2.7450 4.5384 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -6.18839 5.96020 -1.038 0.30772 Height 0.25575 0.07816 3.272 0.00276 ** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1 Residual standard error: 2.728 on 29 degrees of freedom Multiple R-squared: 0.2697, Adjusted R-squared: 0.2445 F-statistic: 10.71 on 1 and 29 DF, p-value: 0.002758
n <- length(trees$Girth) #覲語 p <- 1 #襴暑 R2 <- cor(trees$Girth, fitted(m))^2 #R-squared 1 - ((n-1)*(1-R2)/(n-p-1)) #Adjusted R-squared
as.lm.nls <- function(object, ...) { if (!inherits(object, "nls")) { w <- paste("expected object of class nls but got object of class:", paste(class(object), collapse = " ")) warning(w) } gradient <- object$m$gradient() if (is.null(colnames(gradient))) { colnames(gradient) <- names(object$m$getPars()) } response.name <- if (length(formula(object)) == 2) "0" else as.character(formula(object)[[2]]) lhs <- object$m$lhs() L <- data.frame(lhs, gradient) names(L)[1] <- response.name fo <- sprintf("%s ~ %s - 1", response.name, paste(colnames(gradient), collapse = "+")) fo <- as.formula(fo, env = as.proto.list(L)) do.call("lm", list(fo, offset = substitute(fitted(object)))) }
predict(as.lm.nls(fit), data.frame(x=xx), interval="prediction", level=.99)
x <- c(3,4,5,6,7) y <- c(4.8, 6, 7.2, 7.8, 8.2) fit1 <- lm(y ~ x) summary(fit1) plot(x,y) abline(fit1) library("zyp") fit2 <- zyp.sen(y ~ x) fit2 abline(coef=coef(fit2), col="red")
y <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) x <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) model <- lm(y ~ x) library("DMwR") regr.eval(y, fitted(model)) plot(x,y) abline(model)
> regr.eval(y, fitted(model)) mae mse rmse mape 0.41276110 0.24190200 0.49183534 0.08399855
select (count(*)*sum(x*y)-(sum(x)*sum(y)))/(count(*)*sum(x*x)-(sum(x)*sum(x))) slope , avg(y)-((count(*)*sum(x*y))-(sum(x)*sum(y)))/((count(*)*sum(power(x,2)))-power(sum(x),2))*avg(x) intercept from tbl_xy
create view v_test as select 98 x, 1 y union all select 90, 2 union all select 90, 3 union all select 65, 4 union all select 66, 5 union all select 67, 6 union all select 13, 7 union all select 44, 8 union all select 27, 9 go declare @x nvarchar(255) , @y nvarchar(255) , @tablenm nvarchar(255) , @where nvarchar(1000) , @a nvarchar(255) , @b nvarchar(255) , @sql nvarchar(max); set @x = 'x'; set @y = 'y'; set @tablenm = 'select * from v_test'; set @where = ''; set @sql = ' declare @cnt int , @a decimal(38,5) , @b decimal(38,5) , @c decimal(38,5) , @d decimal(38,5) , @xavg decimal(38,5) , @yavg decimal(38,5) , @a1 decimal(38,5) , @b1 decimal(38,5); select @cnt = count(*) , @a = sum(convert(decimal(38,5), ' + @x + ')) , @b = sum(convert(decimal(38,5), ' + @y + ')) , @c = sum(convert(decimal(38,5), power(' + @x + ', 2))) , @d = sum(convert(decimal(38,5), ' + @x + '*' + @y + ')) , @xavg = avg(convert(decimal(38,5), ' + @x + ')) , @yavg = avg(convert(decimal(38,5), ' + @y + ')) from (' + @tablenm + ') as t; set @a1 = ((@b*@c) - (@a*@d)) / (@cnt*@c - power(@a, 2)); set @b1 = ((@cnt*@d) - (@a*@b)) / (@cnt*@c - power(@a, 2)); select 蠏 , round(r2, 4) [R-squared] , round(1-(1-r2)*(@cnt-1)/(@cnt-2),4) [Adjusted R-squared] , round(corr, 4) 蟯螻 from ( select ''y = ''+ convert(nvarchar, @a1) + '' + '' + convert(nvarchar, @b1) + ''x'' 蠏 , 1 - var(' + @y + '-(@a1 + @b1*' + @x + ')) / var(' + @y + ') r2 , ((1.00/(@cnt - 1)*sum((' + @x + '-@xavg) * (' + @y + '-@yavg))) / sqrt(var(' + @x + ') * var(' + @y + '))) corr from (' + @tablenm + ') as t where 1=1 ' + @where + ' ) t select row_number() over(order by ' + @x + ') seq , ' + @x + ' x , ' + @y + ' y , power(' + @x + ', 2) x2 , ' + @x + '*' + @y + ' xy , @a1 + @b1*' + @x + ' estimatedvalue_linear --, @a1 * log(@a) + @b1 *' + @x + ' estimatedvalue_log , ' + @y + '-(@a1 + @b1*' + @x + ') diffvalue from (' + @tablenm + ') as t where 1=1 ' + @where + ' order by 2; '; print @sql; exec(@sql);