# R commands for Chapter 6

 

# Section 6.1

 

# Read the WCGS data for into a data frame (needs only to be done once if the workspace is saved)

wcgs=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/wcgs.txt",sep="\t",header=T,na.strings=".")

 

 

# Logistic model for the relationship between chd and age (cf Table 6.2, page 163):

wcgs.age=glm(chd69~age, data=wcgs, family=binomial)

summary(wcgs.age)

anova(wcgs.age,test="Chisq")

 

 

# Logistic model for chd and arcus senilis (cf Table 6.4, page 165):

wcgs.arcus=glm(chd69~arcus, data=wcgs, family=binomial)

summary(wcgs.arcus)? #gives estimated regression coefficient, not odds ratio

anova(wcgs.arcus,test="Chisq")

 

# The summary-command for a logistic regression fit, does not give odds ratio (i.e. the exponential of the estimated coefficients)

# To obtain odds ratios with corresponding 95 % confidence intervals we may use the function:

expcoef=function(glmobj){

  regtab=summary(glmobj)$coef

  expcoef=exp(regtab[,1])

  lower=expcoef*exp(-1.96*regtab[,2])

  upper=expcoef*exp(1.96*regtab[,2])

  cbind(expcoef,lower,upper)}

 

# For chd and arcus senilis we then get the odds ratio with 95% confidence interval by (cf Table 6.4, page 165):

expcoef(wcgs.arcus)

 

 

# Logistic model for chd and age as factor (cf Table 6.5, page 166):

wcgs.agec=glm(chd69~factor(agec), data=wcgs, family=binomial)

summary(wcgs.agec)? #gives estimated regression coefficient, not odds ratio

expcoef(wcgs.agec)? #gives odds ratios with confidence intervals (using the function defined above)

anova(wcgs.agec,test="Chisq")

 

 

# Section 6.2

 

# Multiple logistic model for chd risk (cf Table 6.6, page 168):

# As in the book we omit an individual with an unusually high cholesterol value

wcgs.mult=glm(chd69~age+chol+sbp+bmi+smoke, data=wcgs, family=binomial, subset=(chol<600))

summary(wcgs.mult)

 

 

# Multiple logistic model with rescaled predictors (cf Table 6.7, page 170):

wcgs.resc=glm(chd69~age_10+chol_50+bmi_10+sbp_50+smoke, data=wcgs, family=binomial, subset=(chol<600))

summary(wcgs.resc)?? #gives estimated regression coefficient, not odds ratio

expcoef(wcgs.resc)? #gives odds ratios with confidence intervals (using the function defined above)

 

 

# Logistic model for wcgs behavioral pattern (cf Table 6.8, page 171):

wcgs.beh=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+factor(behpat), data=wcgs, family=binomial, subset=(chol<600))

summary(wcgs.beh)?? #gives estimated regression coefficient, not odds ratio

expcoef(wcgs.beh)????? #gives odds ratios with confidence intervals (using the function defined above)

 

 

# Likelihood ratio test for the four-level wcgs behavioral pattern (cf Table 6.9, page 172):

anova(wcgs.resc,wcgs.beh, test="Chisq")

 

 

# Likelihood ratio test for the two-level wcgs behavioral pattern (cf Table 6.10, page 173):

wcgs.beh2=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+dibpat, data=wcgs, family=binomial, subset=(chol<600))

summary(wcgs.beh2)?? #gives estimated regression coefficient, not odds ratio

expcoef(wcgs.beh2)????? #gives odds ratios with confidence intervals (using the function defined above)

anova(wcgs.beh2,wcgs.beh, test="Chisq")

 

 

# Logistic model for type A behavior pattern and selected predictors? (cf Table 6.11, page 174):

wcgs.dibpat=glm(dibpat~age_10+chol_50+sbp_50+bmi_10+smoke, data=wcgs, family=binomial, subset=(chol<600))

summary(wcgs.dibpat)?? #gives estimated regression coefficient, not odds ratio

expcoef(wcgs.dibpat)????? #gives odds ratios with confidence intervals (using the function defined above)

 

 

# Logistic model for interaction between arcus and age as a categorical predictor? (cf Table 6.12, page 176):

wcgs.int=glm(chd69~bage_50+arcus+bage_50:arcus, data=wcgs, family=binomial)

summary(wcgs.int)

 

# Contrast for arcus-age interaction model (cf. Table 6.14, page 177)

library(contrast)??????? # load the contrast-package

par1=list(bage_50=1,arcus=1)

par2=list(bage_50=0,arcus=1)

contr.arcus.age=contrast(wcgs.int,par1,par2)??? # #gives estimated contrast, not odds ratio (should be compared with standard normal, not t-distribution)

contr.arcus.age

 

# The contrast-command for a logistic regression fit, does not give odds ratio (i.e. the exponential of the estimated coefficients)

# To obtain odds ratios for a contrast with corresponding 95 % confidence intervals we may use the function:

expcontrast=function(contrastobj){

  expcontrast=exp(contrastobj$Contrast)

  lower=exp(contrastobj$Contrast-1.96* contrastobj$SE)

  upper=exp(contrastobj$Contrast+1.96* contrastobj$SE)

  cbind(expcontrast,lower,upper)}

 

# For arcus-age interaction model we then get the odds ratio with 95% confidence interval by (cf Table 6.14, page 177):

expcontrast(contr.arcus.age)

 

 

# Logistic model for interaction between arcus and age as a continuous predictor? (cf Table 6.15, page 178):

wcgs.intcont=glm(chd69~arcus+age+arcus:age, data=wcgs, family=binomial)

summary(wcgs.intcont)

 

# Logistic model for interaction between arcus and age as a continuous predictor, continued? (cf Table 6.16, page 179):

par1=list(age=55,arcus=1)

par2=list(age=55,arcus=0)

contr.arcus.age55=contrast(wcgs.intcont,par1,par2)??? # #gives estimated contrast, not odds ratio (should be compared with standard normal, not t-distribution)

contr.arcus.age55

expcontrast(contr.arcus.age55)? # odds ratio using function defined above

 

 

# Expanded logistic model for chd events? (cf Table 6.17, page 181):

# First make a "clean" data set without missing cholesterol values and without the individual with the unusually high cholesterol value

wcgs.clean=wcgs[!is.na(wcgs$chol)&(wcgs$chol<600), ]

wcgs.all=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+dibpat+bmi_10:chol_50+bmi_10:sbp_50, data=wcgs.clean, family=binomial)

summary(wcgs.all)

 

 

# Predictions for five individuals (id=2001 to id=2005) from the logistic model? (cf Table 6.18, page 181)

pred=predict(wcgs.all,type="response")

idpred=wcgs.clean$id<=2005

usepred=c("id","chd69","age","chol","sbp","bmi","smoke","dibpat")

cbind(wcgs.clean[idpred,usepred],pred[idpred])

 

 

# Section 6.4

 

# Pearson residuals from the logistic model plotted according to increasing id-number (cf Figure 6.4, page 189)

idorder=order(wcgs$id)

plot(1:dim(wcgs)[1],residuals(wcgs.all,type="pearson")[idorder],xlab="Observation number",ylab="Pearson residuals")