# R commands Chapter 3

 

# Section 3.1

 

# Read the HERS data into a data frame (needs only to be done once if the workspace is saved)

hers=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.txt",sep="\t",header=T,na.strings=".")

 

# Attach the data frame:

attach(hers)

 

# t-test of difference in average glucose by exercise (cf Table 3.1, page 30):

t.test(glucose~exercise,subset=(diabetes==0),var.equal=T)   # two-sided test (diff != 0), which is the default

t.test(glucose~exercise,subset=(diabetes==0),var.equal=T,alternative="less")   # one-sided test (diff < 0)

t.test(glucose~exercise,subset=(diabetes==0),var.equal=T,alternative="greater")   # one-sided test (diff > 0)

 

# Summary and one-way ANOVA of systolic blood pressure (SBP) by ethnicity (raceth) (cf Table 3.2, page 32):

summary(SBP[raceth==1])    

summary(SBP[raceth==2])    

summary(SBP[raceth==3])

summary(aov(SBP~factor(raceth)))

 

# t-test of difference in average glucose by exercise allowing for unequal variances (cf Table 3.3, page 34):

t.test(glucose~exercise,subset=(diabetes==0))   # two-sided test  (note that unequal variances is the default)

 

 

# Detach the hers data frame:

detach(hers)

 

 

# Section 3.3

 

# In Section 3.3 a 10% sample of the HERS data is used. We read this sample into a data frame:

hers.sample=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.sample.txt",header=T)

 

# Plot of systolic blood pressure  versus age with fitted regression line (cf. Figure 3.1, page 37):

plot(hers.sample$age,hers.sample$sbp,xlab="Age in Years",ylab="Systolic Blood Pressure",pch=20)    

abline(lm(sbp~age,data=hers.sample))

 

# Linear regression of systolic blood pressure versus age (cf. Table 3.4, page 40):

hers.fit=lm(sbp~age,data=hers.sample)

summary(hers.fit)

anova(hers.fit)

 

 

# Section 3.4

 

# Read the WCGS data into a data frame (needs only to be done once if the workspace is saved)

wcgs=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/wcgs.txt",sep="\t",header=T,na.strings=".")

 

# Attach the data frame:

attach(wcgs)

 

# 2x2 contingency table for chd and arcus (cf. first part of Table 3.5, page 45):

table(arcus,chd69)

 

# Risk difference for chd according to arcus  (cf. first line in second part of Table 3.5, page 45)

# (need to read in the 2x2 table to get it in the right format)

tab.arcus=matrix(c(102,153,839,2058),ncol=2) 

# first (second) row is number of chd cases and number of non-cases for group 1 (group 2)

prop.test(tab.arcus,correct=F)  

 

# Fisher exact test for femal partner's HIV status (cf. Table 3.6, page 49):

tab.hiv=matrix(c(3,4,2,22),nrow=2)  # read data table in similar format as above

fisher.test(tab.hiv)  # odds ratio estimate differs from the one in the book

 

# CHD by age group (cf. Table 3.7, page 50):

table(chd69,agec)

chisq.test(table(chd69,agec),correct=F)

 

# Twenty-year vital status by smoking behaviour (cf. Table 3.9, page 52):

tab.smoke=matrix(c(139,230,443,502),nrow=2) 

prop.test(tab.smoke, correct=F) 

 

# Twenty-year vital status by smoking behaviour stratified by age (cf. Table 3.10, page 53):

tab.smoke.age=array(c(19,13,269,327,78,52,167,147,42,165,7,28),dim=c(2,2,3)) 

mantelhaen.test(tab.smoke.age, correct=F) 

 

# Detach the wcgs data frame:

detach(wcgs)

 

 

# Section 3.5

 

# Read the leukemia data into a data frame (needs only to be done once if the workspace is saved)

leuk=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/leuk.txt",sep="\t",header=T)

 

 

# Survival curves  by treatment for leukemia patients (cf. Figure 3.2, page 58):

library(survival)  # we need to use the survival library

leuk.fit=survfit(Surv(time,cens)~group,data=leuk) 

plot(leuk.fit, lty=1:2,mark.time=F,xlab="Weeks since randomization",ylab="Proportion in remission")

 

# Logrank test leukemia example (cf. Table 3.14, page 61):

survdiff(Surv(time,cens)~group,data=leuk)