# R commands for Chapter 4

 

# Section 4.1

 

# Read the HERS data for into a data frame (needs only to be done once if the workspace is saved)

hers=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.txt",sep="\t",header=T,na.strings=".")

 

 

# Unadjusted regression of glucose on exercise (cf Table 4.1, page 71):

unadj.fit=lm(glucose~exercise, data=hers, subset=(diabetes==0))

summary(unadj.fit)

anova(unadj.fit)

 

 

# Adjusted regression of glucose on exercise (cf Table 4.2, page 72):

adj.fit=lm(glucose~exercise+age+drinkany+BMI, data=hers, subset=(diabetes==0))

summary(adj.fit)

anova(adj.fit)

 

 

# Section 4.3

 

# Regression of glucose on physical activity (cf first part of Table 4.4, page 80):

hers$physact=factor(hers$physact)

phys.fit=lm(glucose~physact, data=hers, subset=(diabetes==0))

summary(phys.fit)

anova(phys.fit)

 

# Estimated mean for individuals with level 2 of physical activity and comparison of levels 5 and 3 of physical activity (cf last part of Table 4.4, page 80)

# These commands use the package "contrasts"

library(contrast)        # load the contrast-package

contrast(phys.fit,list(physact="2"))

contrast(phys.fit,list(physact="5"),list(physact="3"))

 

 

# Section 4.4

 

# Regression of LDL on BMI (cf first part of Table 4.8, page 94):

bmi.fit=lm(LDL~BMI,data=hers)

summary(bmi.fit)

anova(bmi.fit)

 

# Regression of LDL on BMI adjusted for other predictors (cf first last of Table 4.8, page 94):

adjbmi.fit=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)

summary(adjbmi.fit)

anova(adjbmi.fit)

 

 

# Section 4.6

 

# Regression of LDL1 on hormone therapy (HT) and statin use with interaction (cf first part of Table 4.12, page 102):

ht.fit=lm(LDL1~HT+statins+HT:statins,data=hers)

summary(ht.fit)

anova(ht.fit)

 

# Estimated effect of HT among baseline statin users (cf last part of Table 4.12, page 102)

contrast(ht.fit,list(HT=1,statins=1),list(HT=0,statins=1))  # the command uses the "contrast" package

 

 

# Interaction model for BMI and statin use (cf first part of Table 4.13, page 104):

hers$cBMI=hers$BMI- mean(hers$BMI[!is.na(hers$BMI)])    # define centered predictor

stat.fit=lm(LDL~statins+cBMI+statins:cBMI+age+nonwhite+smoking+drinkany,data=hers)

summary(stat.fit)

anova(stat.fit)

 

 

# Estimated effect of HT among baseline statin users (cf last part of Table 4.13, page 104)

par1=list(statins=1,cBMI=1,age=0,nonwhite=0,smoking=0,drinkany=0)

par2=list(statins=1,cBMI=0, age=0,nonwhite=0,smoking=0,drinkany=0)

contrast(stat.fit,par1,par2)  # the command uses the "contrast" package

 

 

 

# Interaction model for HT effects on absolute change in LDL (cf. Table 4.14, page 107):

hers$LDLch=hers$LDL1 - hers$LDL    # define absolute change in LDL

hers$cLDL0=hers$LDL- mean(hers$LDL[!is.na(hers$LDL)])    # define centered LDL at baseline

LDL.fit=lm(LDLch~HT+cLDL0+HT:cLDL0,data=hers)

summary(LDL.fit)

anova(LDL.fit)

 

# Interaction model for HT effects on percent change in LDL (cf. Table 4.15, page 107):

hers$LDLpctch=100*(hers$LDL1 - hers$LDL)/hers$LDL    # define percent change in LDL

pLDL.fit=lm(LDLpctch~HT+cLDL0+HT:cLDL0,data=hers)

summary(pLDL.fit)

anova(pLDL.fit)

 

 

# Section 4.7

 

# CPR (or partial residuals plot) for multiple regression of LDL and HDL on BMI  (cf. Fig 4.4, page 111)

# These commands use the package "car"

library(car)        # load the car-package

fit.ldl=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)

fit.hdl=lm(HDL~BMI+age+nonwhite+smoking+drinkany,data=hers)

par(mfrow=c(1,2))

crPlots(fit.ldl, terms=~BMI)

crPlots(fit.hdl, terms=~BMI)

par(mfrow=c(1,1))

 

# CPR (or partial residuals plot) for multiple regression of HDL with quadratic term in BMI  (cf. Fig 4.4, page 111)

fit.hdl2=lm(HDL~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)

crPlots(fit.hdl2, terms=~BMI)

 

 

# Some plot of the residuals in multiple regression of LDL on BMI adjusted for other predictors (cf. Fig 4.8, page 115)

fit.ldl=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)

hist(fit.ldl$res)               # histogram

boxplot(fit.ldl$res)       # boxplot

qqnorm(fit.ldl$res);  qqline(fit.ldl$res)       # normal Q-Q plot

 

# Some plot of the residuals in multiple regression of log(LDL) on BMI adjusted for other predictors (cf. Fig 4.8, page 115)

fit.logldl=lm(log(LDL)~BMI+age+nonwhite+smoking+drinkany,data=hers)

hist(fit.logldl$res)               # histogram

boxplot(fit.logldl$res)       # boxplot

qqnorm(fit.logldl$res);  qqline(fit.logldl$res)       # normal Q-Q plot

 

 

# Plots of residuals versus predictor and fitted values for multiple regression of HDL with quadratic term in BMI  and adjusted for other predictors (cf. Fig 4.10, page 118)

fit.hdl2=lm(HDL~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)

par(mfrow=c(1,2))

ind=(!is.na(hers$HDL))&(!is.na(hers$BMI))&(!is.na(hers$drinkany))   # needed to omit individuals with missing values that are not used when fitting the model

plot(hers$BMI[ind],fit.hdl2$res, xlab="BMI",ylab=""); abline(0,0)

plot(fit.hdl2$fit,fit.hdl2$res, xlab="Fitted value", ylab=""); abline(0,0)

par(mfrow=c(1,1))

 

# Plots of residuals versus predictor and fitted values for multiple regression of log(HDL) with quadratic term in BMI  and adjusted for other predictors (cf. Fig 4.11, page 118)

fit.loghdl2=lm(log(HDL)~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)

par(mfrow=c(1,2))

plot(hers$BMI[ind],fit.loghdl2$res,xlab="BMI",ylab=""); abline(0,0)

plot(fit.loghdl2$fit,fit.loghdl2$res,xlab="Fitted value", ylab=""); abline(0,0)

par(mfrow=c(1,1))

 

 

# Boxplot of standardized "dfbetas" for regression of LDL on BMI adjusted for other predictors (cf. Fig 4.14, page 124)

fit.ldl=lm(LDL~BMI+age10+nonwhite+smoking+drinkany,data=hers)  # use age10=age/10 as predictor

boxplot(dfbetas(fit.ldl))