2017.9.11일 통계워크샵 사전 배포 자료

2017.9.11일 통계워크샵 사전 배포 자료

2017. 9. 11. 13:19ㆍ깜신의 통계 이야기

이비인후과학교실에 김종엽입니다.

2017.9.11일 오후 5시30분에 진행 예정인 6차 통계워크샵 준비 자료입니다.

L_cor.R

해당 파일에는 워크샵 당일에 실습할 R 코드가 담겨 있습니다.

10_rmanova.csv

해당 파일에는 Two way repeated measures ANOVA 실습을 위한 예제 데이터가 들어 있습니다.

############# Frieman test #############

DrugResponse <-

matrix(c(5.40, 5.50, 5.55,

5.85, 5.70, 5.75,

5.20, 5.60, 5.50,

5.55, 5.50, 5.40,

5.90, 5.85, 5.70,

5.45, 5.55, 5.60,

5.40, 5.40, 5.35,

5.45, 5.50, 5.35,

5.25, 5.15, 5.00,

5.85, 5.80, 5.70,

5.25, 5.20, 5.10,

5.65, 5.55, 5.45,

5.60, 5.35, 5.45,

5.05, 5.00, 4.95,

5.50, 5.50, 5.40,

5.45, 5.55, 5.50,

5.55, 5.55, 5.35,

5.45, 5.50, 5.55,

5.50, 5.45, 5.25,

5.65, 5.60, 5.40,

5.70, 5.65, 5.55,

6.30, 6.30, 6.25),

nrow = 22,

byrow = TRUE,

dimnames = list(1 : 22,

c("DrugA", "DrugB", "DrugC")))

DrugResponse

install.packages("reshape")

library(reshape)

DR2 <- melt(DrugResponse)

out <- aov(value~X2, data = DR2)

shapiro.test(resid(out))

boxplot(value~X2, data= DR2)

friedman.test(DrugResponse)

DRA <- DR2[DR2$X2 != "DrugA", ]

t.test(value~X2, data= DRA, paired = TRUE)

DRB <- DR2[DR2$X2 != "DrugB", ]

t.test(value~X2, data= DRB, paired = TRUE)

DRC <- DR2[DR2$X2 != "DrugC", ]

t.test(value~X2, data= DRC, paired = TRUE)

0.05/3

friedman.test.with.post.hoc <- function(formu, data, to.print.friedman = T, to.post.hoc.if.signif = T, to.plot.parallel = T, to.plot.boxplot = T, signif.P = .05, color.blocks.in.cor.plot = T, jitter.Y.in.cor.plot =F)

{

# formu is a formula of the shape: Y ~ X | block

# data is a long data.frame with three columns: [[ Y (numeric), X (factor), block (factor) ]]

# Note: This function doesn't handle NA's! In case of NA in Y in one of the blocks, then that entire block should be removed.

# Loading needed packages

if(!require(coin))

{

print("You are missing the package 'coin', we will now try to install it...")

install.packages("coin")

library(coin)

}

if(!require(multcomp))

{

print("You are missing the package 'multcomp', we will now try to install it...")

install.packages("multcomp")

library(multcomp)

}

if(!require(colorspace))

{

print("You are missing the package 'colorspace', we will now try to install it...")

install.packages("colorspace")

library(colorspace)

}

# get the names out of the formula

formu.names <- all.vars(formu)

Y.name <- formu.names[1]

X.name <- formu.names[2]

block.name <- formu.names[3]

if(dim(data)[2] >3) data <- data[,c(Y.name,X.name,block.name)] # In case we have a "data" data frame with more then the three columns we need. This code will clean it from them...

# Note: the function doesn't handle NA's. In case of NA in one of the block T outcomes, that entire block should be removed.

# stopping in case there is NA in the Y vector

if(sum(is.na(data[,Y.name])) > 0) stop("Function stopped: This function doesn't handle NA's. In case of NA in Y in one of the blocks, then that entire block should be removed.")

# make sure that the number of factors goes with the actual values present in the data:

data[,X.name ] <- factor(data[,X.name ])

data[,block.name ] <- factor(data[,block.name ])

number.of.X.levels <- length(levels(data[,X.name ]))

if(number.of.X.levels == 2) { warning(paste("'",X.name,"'", "has only two levels. Consider using paired wilcox.test instead of friedman test"))}

# making the object that will hold the friedman test and the other.

the.sym.test <- symmetry_test(formu, data = data, ### all pairwise comparisons

teststat = "max",

xtrafo = function(Y.data) { trafo( Y.data, factor_trafo = function(x) { model.matrix(~ x - 1) %*% t(contrMat(table(x), "Tukey")) } ) },

ytrafo = function(Y.data){ trafo(Y.data, numeric_trafo = rank, block = data[,block.name] ) }

)

# if(to.print.friedman) { print(the.sym.test) }

if(to.post.hoc.if.signif)

{

if(pvalue(the.sym.test) < signif.P)

{

# the post hoc test

The.post.hoc.P.values <- pvalue(the.sym.test, method = "single-step") # this is the post hoc of the friedman test

# plotting

if(to.plot.parallel & to.plot.boxplot) par(mfrow = c(1,2)) # if we are plotting two plots, let's make sure we'll be able to see both

if(to.plot.parallel)

{

X.names <- levels(data[, X.name])

X.for.plot <- seq_along(X.names)

plot.xlim <- c(.7 , length(X.for.plot)+.3) # adding some spacing from both sides of the plot

if(color.blocks.in.cor.plot)

{

blocks.col <- rainbow_hcl(length(levels(data[,block.name])))

} else {

blocks.col <- 1 # black

}

data2 <- data

if(jitter.Y.in.cor.plot) {

data2[,Y.name] <- jitter(data2[,Y.name])

par.cor.plot.text <- "Parallel coordinates plot (with Jitter)"

} else {

par.cor.plot.text <- "Parallel coordinates plot"

}

# adding a Parallel coordinates plot

matplot(as.matrix(reshape(data2, idvar=X.name, timevar=block.name,

direction="wide")[,-1]) ,

type = "l", lty = 1, axes = FALSE, ylab = Y.name,

xlim = plot.xlim,

col = blocks.col,

main = par.cor.plot.text)

axis(1, at = X.for.plot , labels = X.names) # plot X axis

axis(2) # plot Y axis

points(tapply(data[,Y.name], data[,X.name], median) ~ X.for.plot, col = "red",pch = 4, cex = 2, lwd = 5)

}

if(to.plot.boxplot)

{

# first we create a function to create a new Y, by substracting different combinations of X levels from each other.

subtract.a.from.b <- function(a.b , the.data)

{

the.data[,a.b[2]] - the.data[,a.b[1]]

}

temp.wide <- reshape(data, idvar=X.name, timevar=block.name,

direction="wide") #[,-1]

wide.data <- as.matrix(t(temp.wide[,-1]))

colnames(wide.data) <- temp.wide[,1]

Y.b.minus.a.combos <- apply(with(data,combn(levels(data[,X.name]), 2)), 2, subtract.a.from.b, the.data =wide.data)

names.b.minus.a.combos <- apply(with(data,combn(levels(data[,X.name]), 2)), 2, function(a.b) {paste(a.b[2],a.b[1],sep=" - ")})

the.ylim <- range(Y.b.minus.a.combos)

the.ylim[2] <- the.ylim[2] + max(sd(Y.b.minus.a.combos)) # adding some space for the labels

is.signif.color <- ifelse(The.post.hoc.P.values < .05 , "green", "grey")

boxplot(Y.b.minus.a.combos,

names = names.b.minus.a.combos ,

col = is.signif.color,

main = "Boxplots (of the differences)",

ylim = the.ylim

)

legend("topright", legend = paste(names.b.minus.a.combos, rep(" ; PostHoc P.value:", number.of.X.levels),round(The.post.hoc.P.values,5)) , fill = is.signif.color )

abline(h = 0, col = "blue")

}

list.to.return <- list(Friedman.Test = the.sym.test, PostHoc.Test = The.post.hoc.P.values)

if(to.print.friedman) {print(list.to.return)}

return(list.to.return)

} else {

print("The results where not significant, There is no need for a post hoc test")

return(the.sym.test)

}

# Original credit (for linking online, to the package that performs the post hoc test) goes to "David Winsemius", see:

# http://tolstoy.newcastle.edu.au/R/e8/help/09/10/1416.html

}

friedman.test.with.post.hoc(value~X2|X1, DR2)

################ Two way repeated measures ANOVA #########

acne <- read.csv("10_rmanova.csv",header = TRUE)

acL <- reshape(acne, direction = "long", varying=3:6, sep ="")

acL$group <- factor(acL$group)

acL$id <- factor(acL$id)

acL$time <- factor(acL$time)

interaction.plot(acL$time, acL$group, acL$month)

acD <- aov(month~group*time +Error(id), data=acL)

summary(acD)

acL0 <- acL[acL$time=="0",]

acL1 <- acL[acL$time=="1",]

acL3 <- acL[acL$time=="3",]

acL6 <- acL[acL$time=="6",]

t.test(month~group, data=acL0)

t.test(month~group, data=acL1)

t.test(month~group, data=acL3)

t.test(month~group, data=acL6)

########## Corelation test ##############

install.packages("UsingR")

library(UsingR)

data("galton")

str(galton)

View(galton)

plot(child~parent, data =galton)

cor.test(galton$child, galton$parent)

out = lm(child~parent, data= galton)

summary(out)

# y = 23.94153 + 0.64629 x X

abline(out, col ="red")

plot(jitter(child,5)~jitter(parent,5), galton)

sunflowerplot(galton)

install.packages("SwissAir")

library(SwissAir)

data("AirQual")

View(AirQual)

str(AirQual)

Ox <- AirQual [ ,c("ad.O3","lu.O3","sz.O3")] +

AirQual[, c("ad.NOx","lu.NOx","sz.NOx")]-

AirQual[, c("ad.NO", "lu.NO","sz.NO")]

names(Ox) <- c("ad","lu","sz")

plot(lu~sz, data= Ox)

install.packages("hexbin")

library(hexbin)

bin = hexbin(Ox$lu, Ox$sz, xbins = 50)

plot(bin)

smoothScatter(Ox$lu, Ox$sz)

install.packages("IDPmisc")

library(IDPmisc)

iplot(Ox$lu, Ox$sz)

par(mfrow=c(1,1))

'깜신의 통계 이야기' 카테고리의 다른 글

Two way repeated measures ANOVA(이원반복분산분석) 따라하기- 깜신의 통계 왕초보 탈출 25탄 (2)	2017.10.09
프리드만 검정(Friedman test) 따라하기-깜신의 통계 왕초보 탈출24탄 (0)	2017.10.08
One way repeated measures ANOVA, R스튜디오에서 따라하기 - 깜신의 통계 왕초보탈출 23탄 (2)	2017.09.11
Wilcoxon Signed Rank Test, R스튜디오에서 따라하기 - 깜신의 통계 왕초보탈출 22탄 (1)	2017.09.09
Paired t test, R스튜디오에서 따라하기 - 깜신의 통계 왕초보탈출 21탄 (8)	2017.09.08

깜신의 작은 진료소

깜신의 작은 진료소

태그

최근글

댓글

공지사항

아카이브

'깜신의 통계 이야기' 카테고리의 다른 글

관련글

티스토리툴바