본문 바로가기

깜신의 통계 이야기

2017년 10월10일 통계워크샵 7차 사전배포 자료

통계워크샵 7차에서는 회귀분석(Part 1)을 알아봅니다.

실습할 내용을 R스크립트 파일로 미리 배포해드립니다.


김종엽 드림.


L_reg.R


위 파일에는 아래 내용이 담겨있습니다.


###### 단순선형회귀 ########

women

plot(weight~height, data=women)

fit <- lm(weight~height, data=women)

abline(fit, col="blue")


summary(fit)

cor.test(women$weight, women$height)

0.9954948^2


plot(fit)


par(mfrow=c(2,2))

plot(fit)

par(mfrow=c(1,1))


### 다항회귀(polynomial regression)

fit2 <- lm(weight~height+ I(height^2), data = women) 

summary(fit2)

lines(women$height, fitted(fit2), col="red")

plot(fit2)


fit3 <- lm(weight~height + I(height^2) + I(height^3), data = women)

plot(fit3)


install.packages("gvlma")

library(gvlma)

gvmodel <- gvlma(fit)

summary(gvmodel)


########  다중회귀분석 ##########

state.x77

states <- as.data.frame(state.x77[,c("Murder","Population","Illiteracy","Income","Frost")])

states


fit = lm(Murder~Population +Illiteracy + Income + Frost , data=states)

plot(fit)

summary(fit)


install.packages("car", dependencies = TRUE)

library(car)

vif(fit)

sqrt(vif(fit))


###### 이상관측치 #######

influencePlot(fit, id.method = "identify")


states["Nevada",]

fitted(fit)["Nevada"]

residuals(fit)["Nevada"]


##### 회귀모형의 교정 ######

states

summary(powerTransform(states$Murder))


boxTidwell(Murder~ Population + Illiteracy , data= states)


ncvTest(fit)

spreadLevelPlot(fit)


##### 예측 변수 선택 #####


fit1 <- lm(Murder~ ., data=states)

summary(fit1)


fit2 <- lm(Murder ~ Population + Illiteracy , data=states)

summary(fit2)


#AIC (Akaite's An information Criterion)

AIC(fit1, fit2)


###### stepwise regression (Backward stepwise regression, Forward stepwise regression)

#Backward stepwise regression

full.model = lm(Murder~. , data = states)

reduced.model = step(full.model, direction = "backward")

summary(reduced.model)


#Forward stepwise regression

min.model = lm(Murder~1, data = states)

fwd.model  <- step(min.model, direction="forward", scope = (Murder~Population+ Illiteracy + Income

                                                           + Frost), trace=0)

summary(fwd.model)


## all subset regression

library(leaps)

leaps <- regsubsets(Murder~ Population + Illiteracy + Income + Frost, data= states, nbest=4)

plot(leaps, scale="adjr2")