한림대학교 동탄성심병원 통계워크샵 자료집

한림대학교 동탄성심병원 통계워크샵 자료집

2018. 6. 21. 12:23ㆍ깜신의 통계 이야기

2018.6.21일자 한림대학교 동탄성심병원 통계워크샵 자료

lecture_dongtan.R

data("mtcars")

View(mtcars)

table(mtcars$cyl, mtcars$am)

##### 0,1로 필업된 데이터를 원래 의미로 바꾸는 방법 하나. #####

mtcars$tm = ifelse(mtcars$am==0,"automatic","manual")

result = table(mtcars$cyl, mtcars$tm)

##### 0,1로 필업된 데이터를 원래 의미로 바꾸는 방법 둘. #####

mtcars$sm = factor(mtcars$am, labels=c("automatic", "manual"))

table(mtcars$cyl, mtcars$sm)

###### 행과 열의 끝에 총합을 추가하는 방법 #######

addmargins(result)

##### 카이제곱 검정과 피셔의 정확한 검정 ########

chisq.test(result)

fisher.test(result)

##### Cochran Armitage Trend Test ###############

require(moonBook)

data(acs)

View(acs)

table(acs$HBP, acs$smoking)

acs$smoking = factor(acs$smoking, levels=c("Never", "Ex-smoker","Smoker"))

result = table(acs$HBP, acs$smoking)

prop.trend.test(x,n) #x는 number of events, n은 number of trials

result[2,]

colSums(result)

prop.trend.test(result[2,],colSums(result))

######### 모자이크 플롯을 그리는 방법 ###############

mosaicplot(result)

mosaicplot(result, color=c("tan1","firebrick2"))

t(result)

mosaicplot(t(result), color=c("tan1","firebrick2"), ylab="Hypertension", xlab="Smoking")

######### R스튜디오에 있는 색상 열람표 ##########

colors()

demo("colors")

result = table(mtcars$cyl, mtcars$sm)

barplot(result)

barplot(result, ylim=c(0,20), legend=rownames(result))

mylegend = paste(rownames(result),"cyl")

mylegend

barplot(result, ylim=c(0,20), legend=mylegend)

barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE)

barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE, horiz=TRUE)

mycol = c("tan1", "coral2", "firebrick2")

barplot(result, ylim=c(0,20), legend=mylegend, beside=TRUE, horiz=TRUE,col=mycol)

#######################################################

##################### 상관분석 ##########################

################# Corelation test #########################

#######################################################

install.packages("UsingR")

library(UsingR)

data("galton")

str(galton)

View(galton)

plot(child~parent, data =galton)

cor.test(galton$child, galton$parent)

out = lm(child~parent, data= galton)

summary(out)

# y = 23.94153 + 0.64629 x X

abline(out, col ="red")

plot(jitter(child,5)~jitter(parent,5), galton)

sunflowerplot(galton)

install.packages("SwissAir")

library(SwissAir)

data("AirQual")

View(AirQual)

str(AirQual)

Ox <- AirQual [ ,c("ad.O3","lu.O3","sz.O3")] +

AirQual[, c("ad.NOx","lu.NOx","sz.NOx")]-

AirQual[, c("ad.NO", "lu.NO","sz.NO")]

names(Ox) <- c("ad","lu","sz")

plot(lu~sz, data= Ox)

install.packages("hexbin")

library(hexbin)

bin = hexbin(Ox$lu, Ox$sz, xbins = 50)

plot(bin)

smoothScatter(Ox$lu, Ox$sz)

install.packages("IDPmisc")

library(IDPmisc)

iplot(Ox$lu, Ox$sz)

par(mfrow=c(1,1))

#################################################################

###################### 단순선형회귀 ################################

women

plot(weight~height, data=women)

fit <- lm(weight~height, data=women)

abline(fit, col="blue")

summary(fit) #-87.51667은 y절편이라고함.

cor.test(women$height, women$weight) # 상관계수 구하는 함수

cor.test(women$weight, women$height)

0.9954948^2

plot(fit)

par(mfrow=c(2,2))

plot(fit)

par(mfrow=c(1,1))

### 다항회귀(polynomial regression)

fit2 <- lm(weight~ height + I(height^2) , data=women)

fit2 <- lm(weight~height+ I(height^2), data = women)

summary(fit2)

lines(women$height, fitted(fit2), col="red")

plot(fit2)

fit3 <- lm(weight~height + I(height^2) + I(height^3), data = women)

plot(fit3)

fit4 <- lm(weight~ height + I(height^2), data=women[-c(13,15),])

plot(fit4)

install.packages("gvlma")

library(gvlma)

gvmodel <- gvlma(fit4)

summary(gvmodel)

######## 다중회귀분석 ##########

state.x77

View(state.x77)

states <- as.data.frame(state.x77[ , c("Murder","Population","Illiteracy","Income","Frost")])

states

View(states)

fit = lm(Murder~Population +Illiteracy + Income + Frost , data=states)

shapiro.test(states$Population)

fit = lm(Murder~Population + Illiteracy, data=states)

plot(fit)

summary(fit)

#install.packages("car", dependencies = TRUE)

library(car)

vif(fit)

sqrt(vif(fit))

###### 이상관측치 #######

influencePlot(fit, id.method = "identify")

states["Nevada", ]

fitted(fit)["Nevada"]

residuals(fit)["Nevada"]

##### 회귀모형의 교정 ######

states

summary(powerTransform(states$Murder))

boxTidwell(Murder~ Population + Illiteracy , data= states)

ncvTest(fit)

spreadLevelPlot(fit)

##### 예측 변수 선택 #####

fit1 <- lm(Murder~ ., data=states)

summary(fit1)

fit2 <- lm(Murder ~ Population + Illiteracy , data=states)

summary(fit2)

#AIC (Akaike's An information Criterion)

AIC(fit1, fit2)

###### stepwise regression (Backward stepwise regression, Forward stepwise regression)

#Backward stepwise regression

full.model = lm(Murder~. , data = states)

reduced.model = step(full.model, direction = "backward")

summary(reduced.model)

#Forward stepwise regression

min.model = lm(Murder~1, data = states)

fwd.model <- step(min.model, direction="forward", scope = (Murder~Population+ Illiteracy + Income

+ Frost), trace=0)

summary(fwd.model)

## all subset regression

library(leaps)

leaps <- regsubsets(Murder~ Population + Illiteracy + Income + Frost, data= states, nbest=4)

plot(leaps, scale="adjr2")

########## Logistic Regression ########

require(survival)

str(colon)

colon1 <- na.omit(colon)

View(colon)

View(colon1)

result <- glm(status ~ rx+sex+age+obstruct+perfor + adhere + nodes + differ + extent + surg, family = binomial, data=colon1)

summary(result)

reduced.model = step(result)

summary(reduced.model)

require(moonBook)

extractOR(reduced.model)

fit = glm(formula = status ~ rx + obstruct + adhere + nodes + extent + surg, family = binomial, data = colon1)

fit.od = glm(formula = status ~ rx + obstruct + adhere + nodes + extent + surg, family = quasibinomial, data = colon1)

pchisq(summary(fit.od)$dispersion*fit$df.residual, fit$df.residual, lower = F)

#0.2803691이 값이 0.05보다 크다면 과산포는 없다고 확신할 수 있습니다.

?ORplot()

plot()

ORplot(fit, main = "Plot for Odds Ratios")

ORplot(fit, type=2, show.OR=FALSE, show.CI=TRUE, pch=15, lwd=2, col=c("darkblue", "red"), main="Plot of OR" )

ORplot(fit, type=3, show.OR=FALSE, show.CI=TRUE, pch=15, lwd=2, col=c("darkblue", "red"), main="Plot of OR")

##### Poisson Regression #######

install.packages("robust")

library(robust)

data(breslow.dat, package = "robust")

summary(breslow.dat)

install.packages("qcc")

library(qcc)

qcc.overdispersion.test(breslow.dat$sumY, type="poisson")

fit = glm(sumY ~ Base + Age + Trt, family = quasipoisson, data= breslow.dat)

summary(fit)

install.packages("moonBook")

library(moonBook)

extractOR(fit)

extractOR(fit, digits = 3)

ORplot(fit, type = 2, show.CI=TRUE, main="Plot for Quasipoisson")

#### Survival Analysis #####

require(survival)

data("colon")

View(colon)

colon <- na.omit(colon)

rm(colon1)

str(colon)

colon$TS <- Surv(colon$time,colon$status==1)

fit = survfit(TS ~ rx, data = colon)

plot(fit)

plot(fit,col=1:3, lty=1:3)

legend("topright", legend=levels(colon$rx), col=1:3, lty = 1:3)

###### Cumulative hazard Start #####

plot(fit, col=1:3, lty=1:3, fun ="cumhaz", mark.time=FALSE, ylab="Cumulative hazard")

legend("topleft", legend = levels(colon$rx), col=1:3, lty =1:3)

###### Cumulative hazard End #######

#### Log-rank test ####

survdiff(Surv(time, status ==1)~rx, data=colon)

#### Cox Regression #####

out = coxph(Surv(time, status ==1)~rx, data=colon)

summary(out)

###### Hazard ratios of all individual variables #####

colon$TS <- Surv(colon$time,colon$status==1)

out = coxph(colon$TS~rx, data=colon )

install.packages("moonBook")

require(moonBook)

attach(colon)

out = mycph(TS~.-id-study-time-status-etype, data=colon)

out2 =coxph(TS~. -id-study-time-status-etype, data=colon)

final =step(out2, direction = "backward")

HRplot(out, type =2, show.CI = TRUE)

'깜신의 통계 이야기' 카테고리의 다른 글

로지스틱회귀분석 그래프 그리기 - 깜신의 통계 왕초보 탈출 38탄 (0)	2018.08.29
한림대학교 동탄성심병원 통계워크샵-회귀분석 (0)	2018.07.05
로지스틱 회귀분석 R에서 따라하기 2부 - 깜신의 통계 왕초보 탈출 37탄 (2)	2018.05.31
로지스틱 회귀분석 R에서 따라하기 1부 - 깜신의 통계 왕초보 탈출 36탄 (2)	2018.05.29
로지스틱 회귀분석 개념 따라잡기 - 깜신의 통계 왕초보 탈출 35탄 (0)	2018.05.28

깜신의 작은 진료소

깜신의 작은 진료소

태그

최근글

댓글

공지사항

아카이브

'깜신의 통계 이야기' 카테고리의 다른 글

관련글

티스토리툴바