2018. 6. 21. 12:23ㆍ깜신의 통계 이야기
2018.6.21일자 한림대학교 동탄성심병원 통계워크샵 자료
data("mtcars")
View(mtcars)
table(mtcars$cyl, mtcars$am)
##### 0,1로 필업된 데이터를 원래 의미로 바꾸는 방법 하나. #####
mtcars$tm = ifelse(mtcars$am==0,"automatic","manual")
result = table(mtcars$cyl, mtcars$tm)
##### 0,1로 필업된 데이터를 원래 의미로 바꾸는 방법 둘. #####
mtcars$sm = factor(mtcars$am, labels=c("automatic", "manual"))
table(mtcars$cyl, mtcars$sm)
###### 행과 열의 끝에 총합을 추가하는 방법 #######
addmargins(result)
##### 카이제곱 검정과 피셔의 정확한 검정 ########
chisq.test(result)
fisher.test(result)
##### Cochran Armitage Trend Test ###############
require(moonBook)
data(acs)
View(acs)
table(acs$HBP, acs$smoking)
acs$smoking = factor(acs$smoking, levels=c("Never", "Ex-smoker","Smoker"))
result = table(acs$HBP, acs$smoking)
prop.trend.test(x,n) #x는 number of events, n은 number of trials
result[2,]
colSums(result)
prop.trend.test(result[2,],colSums(result))
######### 모자이크 플롯을 그리는 방법 ###############
mosaicplot(result)
mosaicplot(result, color=c("tan1","firebrick2"))
t(result)
mosaicplot(t(result), color=c("tan1","firebrick2"), ylab="Hypertension", xlab="Smoking")
######### R스튜디오에 있는 색상 열람표 ##########
colors()
demo("colors")
result = table(mtcars$cyl, mtcars$sm)
barplot(result)
barplot(result, ylim=c(0,20), legend=rownames(result))
mylegend = paste(rownames(result),"cyl")
mylegend
barplot(result, ylim=c(0,20), legend=mylegend)
barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE)
barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE, horiz=TRUE)
mycol = c("tan1", "coral2", "firebrick2")
barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE, horiz=TRUE,col=mycol)
#######################################################
##################### 상관분석 ##########################
################# Corelation test #########################
#######################################################
install.packages("UsingR")
library(UsingR)
data("galton")
str(galton)
View(galton)
plot(child~parent, data =galton)
cor.test(galton$child, galton$parent)
out = lm(child~parent, data= galton)
summary(out)
# y = 23.94153 + 0.64629 x X
abline(out, col ="red")
plot(jitter(child,5)~jitter(parent,5), galton)
sunflowerplot(galton)
install.packages("SwissAir")
library(SwissAir)
data("AirQual")
View(AirQual)
str(AirQual)
Ox <- AirQual [ ,c("ad.O3","lu.O3","sz.O3")] +
AirQual[, c("ad.NOx","lu.NOx","sz.NOx")]-
AirQual[, c("ad.NO", "lu.NO","sz.NO")]
names(Ox) <- c("ad","lu","sz")
plot(lu~sz, data= Ox)
install.packages("hexbin")
library(hexbin)
bin = hexbin(Ox$lu, Ox$sz, xbins = 50)
plot(bin)
smoothScatter(Ox$lu, Ox$sz)
install.packages("IDPmisc")
library(IDPmisc)
iplot(Ox$lu, Ox$sz)
par(mfrow=c(1,1))
#################################################################
###################### 단순선형회귀 ################################
women
plot(weight~height, data=women)
plot(weight~height, data=women)
fit <- lm(weight~height, data=women)
fit <- lm(weight~height, data=women)
abline(fit, col="blue")
abline(fit, col="blue")
summary(fit) #-87.51667은 y절편이라고함.
cor.test(women$height, women$weight) # 상관계수 구하는 함수
cor.test(women$weight, women$height)
0.9954948^2
plot(fit)
par(mfrow=c(2,2))
plot(fit)
par(mfrow=c(1,1))
### 다항회귀(polynomial regression)
fit2 <- lm(weight~ height + I(height^2) , data=women)
fit2 <- lm(weight~height+ I(height^2), data = women)
summary(fit2)
lines(women$height, fitted(fit2), col="red")
plot(fit2)
fit3 <- lm(weight~height + I(height^2) + I(height^3), data = women)
plot(fit3)
fit4 <- lm(weight~ height + I(height^2), data=women[-c(13,15),])
plot(fit4)
install.packages("gvlma")
library(gvlma)
gvmodel <- gvlma(fit4)
summary(gvmodel)
######## 다중회귀분석 ##########
state.x77
View(state.x77)
states <- as.data.frame(state.x77[ , c("Murder","Population","Illiteracy","Income","Frost")])
states
View(states)
fit = lm(Murder~Population +Illiteracy + Income + Frost , data=states)
shapiro.test(states$Population)
fit = lm(Murder~Population + Illiteracy, data=states)
plot(fit)
summary(fit)
#install.packages("car", dependencies = TRUE)
library(car)
vif(fit)
sqrt(vif(fit))
###### 이상관측치 #######
influencePlot(fit, id.method = "identify")
states["Nevada", ]
fitted(fit)["Nevada"]
residuals(fit)["Nevada"]
##### 회귀모형의 교정 ######
states
summary(powerTransform(states$Murder))
boxTidwell(Murder~ Population + Illiteracy , data= states)
ncvTest(fit)
spreadLevelPlot(fit)
##### 예측 변수 선택 #####
fit1 <- lm(Murder~ ., data=states)
summary(fit1)
fit2 <- lm(Murder ~ Population + Illiteracy , data=states)
summary(fit2)
#AIC (Akaike's An information Criterion)
AIC(fit1, fit2)
###### stepwise regression (Backward stepwise regression, Forward stepwise regression)
#Backward stepwise regression
full.model = lm(Murder~. , data = states)
reduced.model = step(full.model, direction = "backward")
summary(reduced.model)
#Forward stepwise regression
min.model = lm(Murder~1, data = states)
fwd.model <- step(min.model, direction="forward", scope = (Murder~Population+ Illiteracy + Income
+ Frost), trace=0)
summary(fwd.model)
## all subset regression
library(leaps)
leaps <- regsubsets(Murder~ Population + Illiteracy + Income + Frost, data= states, nbest=4)
plot(leaps, scale="adjr2")
########## Logistic Regression ########
require(survival)
str(colon)
colon1 <- na.omit(colon)
View(colon)
View(colon1)
result <- glm(status ~ rx+sex+age+obstruct+perfor + adhere + nodes + differ + extent + surg, family = binomial, data=colon1)
summary(result)
reduced.model = step(result)
summary(reduced.model)
require(moonBook)
extractOR(reduced.model)
fit = glm(formula = status ~ rx + obstruct + adhere + nodes + extent + surg, family = binomial, data = colon1)
fit.od = glm(formula = status ~ rx + obstruct + adhere + nodes + extent + surg, family = quasibinomial, data = colon1)
pchisq(summary(fit.od)$dispersion*fit$df.residual, fit$df.residual, lower = F)
#0.2803691이 값이 0.05보다 크다면 과산포는 없다고 확신할 수 있습니다.
?ORplot()
plot()
ORplot(fit, main = "Plot for Odds Ratios")
ORplot(fit, type=2, show.OR=FALSE, show.CI=TRUE, pch=15, lwd=2, col=c("darkblue", "red"), main="Plot of OR" )
ORplot(fit, type=3, show.OR=FALSE, show.CI=TRUE, pch=15, lwd=2, col=c("darkblue", "red"), main="Plot of OR")
##### Poisson Regression #######
install.packages("robust")
library(robust)
data(breslow.dat, package = "robust")
summary(breslow.dat)
install.packages("qcc")
library(qcc)
qcc.overdispersion.test(breslow.dat$sumY, type="poisson")
fit = glm(sumY ~ Base + Age + Trt, family = quasipoisson, data= breslow.dat)
summary(fit)
install.packages("moonBook")
library(moonBook)
extractOR(fit)
extractOR(fit, digits = 3)
ORplot(fit, type = 2, show.CI=TRUE, main="Plot for Quasipoisson")
#### Survival Analysis #####
require(survival)
data("colon")
View(colon)
colon <- na.omit(colon)
rm(colon1)
str(colon)
colon$TS <- Surv(colon$time,colon$status==1)
fit = survfit(TS ~ rx, data = colon)
plot(fit)
plot(fit,col=1:3, lty=1:3)
legend("topright", legend=levels(colon$rx), col=1:3, lty = 1:3)
###### Cumulative hazard Start #####
plot(fit, col=1:3, lty=1:3, fun ="cumhaz", mark.time=FALSE, ylab="Cumulative hazard")
legend("topleft", legend = levels(colon$rx), col=1:3, lty =1:3)
###### Cumulative hazard End #######
#### Log-rank test ####
survdiff(Surv(time, status ==1)~rx, data=colon)
#### Cox Regression #####
out = coxph(Surv(time, status ==1)~rx, data=colon)
summary(out)
###### Hazard ratios of all individual variables #####
colon$TS <- Surv(colon$time,colon$status==1)
out = coxph(colon$TS~rx, data=colon )
install.packages("moonBook")
require(moonBook)
attach(colon)
out = mycph(TS~.-id-study-time-status-etype, data=colon)
out2 =coxph(TS~. -id-study-time-status-etype, data=colon)
final =step(out2, direction = "backward")
HRplot(out, type =2, show.CI = TRUE)
'깜신의 통계 이야기' 카테고리의 다른 글
로지스틱회귀분석 그래프 그리기 - 깜신의 통계 왕초보 탈출 38탄 (0) | 2018.08.29 |
---|---|
한림대학교 동탄성심병원 통계워크샵-회귀분석 (0) | 2018.07.05 |
로지스틱 회귀분석 R에서 따라하기 2부 - 깜신의 통계 왕초보 탈출 37탄 (2) | 2018.05.31 |
로지스틱 회귀분석 R에서 따라하기 1부 - 깜신의 통계 왕초보 탈출 36탄 (2) | 2018.05.29 |
로지스틱 회귀분석 개념 따라잡기 - 깜신의 통계 왕초보 탈출 35탄 (0) | 2018.05.28 |