# PRACTICAL EXERCISE 8 # ==================== # Read data and disregard the individuals who smoke pipe or cigar: norw.death=read.table("http://folk.uio.no/borgan/abg-2008/data/causes_death.txt", header=T) norw.death=norw.death[norw.death$smkgr!=6,] # We will use the survival package, so this has to be loaded. # Question a: # We fit a Cox model with sex and smoking habits as categorical covariates # and systolic blodd pressure and body mass index as numeric covariates: fit.b=coxph(Surv(agestart,agestop,dead)~factor(sex)+factor(smkgr)+sbp+bmi, data=norw.death) summary(fit.b) # The model assumes a log-linear effect of the numeric covariates and # proportionality of all covariates # Question b: fit.b=coxph(Surv(agestart,agestop,dead)~factor(sex)+factor(smkgr)+pspline(sbp)+bmi, data=norw.death) print(fit.b) par(mfrow=c(2,2)) termplot(fit.b,se=T) # The effect of systolic blood pressure is fairly log-linear. # Question c: fit.c=coxph(Surv(agestart,agestop,dead)~factor(sex)+factor(smkgr)+sbp+pspline(bmi), data=norw.death) print(fit.c) par(mfrow=c(2,2)) termplot(fit.c,se=T) # The effect of body mass index in not log-linear. It seems difficult to find # a useful trasformation of body mass index, so we will work with grouped bmi. # We make a categorical variable for groupe bmi (cf. problem text) # and fit a model with this covariate norw.death$gbmi=cut(norw.death$bmi,c(0,20,25,30,60),right=F, labels=1:4) fit.c2=coxph(Surv(agestart,agestop,dead)~factor(sex)+factor(smkgr)+sbp+factor(gbmi), data=norw.death) summary(fit.c2) # Question d # Graphical check of proportionality for sex: fit.d1=coxph(Surv(agestart,agestop,dead)~strata(sex)+factor(smkgr)+sbp+factor(gbmi), data=norw.death) par(mfrow=c(1,1)) plot(survfit(fit.d1,newdata=data.frame(smkgr=1,sbp=130,gbmi=1)), fun="cumhaz",lty=1:2,log=T,mark.time=F,xlim=c(40,70)) legend("bottomright",as.character(1:2),lty=1:2) # Proportionality for sex seems ok. # Graphical check of proportionality for smoking group: fit.d2=coxph(Surv(agestart,agestop,dead)~strata(smkgr)+factor(sex)+sbp+factor(gbmi), data=norw.death) plot(survfit(fit.d2,newdata=data.frame(sex=1,sbp=130,gbmi=1)), fun="cumhaz",lty=1:5,log=T,mark.time=F,xlim=c(40,70)) legend("bottomright",as.character(1:5),lty=1:5) # Proportionality seems ok, except for smoking group 5 (20+ cigarettes per day) # Graphical check of proportionality for systolic blood pressure # (need to make blood pressure groups): break.sbp=quantile(norw.death$sbp) norw.death$gsbp=cut(norw.death$sbp,break.sbp,include.lowest=T,label=1:4) fit.d3=coxph(Surv(agestart,agestop,dead)~strata(gsbp)+factor(sex)+factor(smkgr)+factor(gbmi), data=norw.death) plot(survfit(fit.d3,newdata=data.frame(sex=1,smkgr=1,gbmi=1)), fun="cumhaz",lty=1:4,log=T,mark.time=F,xlim=c(40,70)) legend("bottomright",as.character(1:4),lty=1:4) # Proportionality seems ok for systolic blood pressure # Graphical check of proportionality for body mass index: fit.d4=coxph(Surv(agestart,agestop,dead)~strata(gbmi)+factor(sex)+factor(smkgr)+sbp, data=norw.death) plot(survfit(fit.d4,newdata=data.frame(sex=1,smkgr=1,sbp=130)), fun="cumhaz",lty=1:4,log=T,mark.time=F,xlim=c(40,70)) legend("bottomright",as.character(1:4),lty=1:4) # Proportionality seems ok for body mass index # Question e # We perform a formal test for proportionality for the model with grouped body mass index cox.zph(fit.c2,transform='log') # The formal test confirms whst we saw from the plots. # There may be some problem with proportionality for smoking group 5, but otherwise it seems fine # (The problem with smoking group 5 is not serious enough to "force us" to modify the model.) par(mfrow=c(3,3)) plot(cox.zph(fit.c2,transform='log')) # The plots should be fairly horizontal if proportionality is ok, # and that is the case here # Question f break.pi=quantile(fit.c2$linear.predictor) norw.death.reduced=norw.death[!is.na(norw.death$bmi),] norw.death.reduced$pred.group=cut(fit.c2$linear.predictor,break.pi, include.lowest=T,label=1:4) plot(survfit(Surv(agestart,agestop,dead)~pred.group,data=norw.death.reduced),mark.time=F,lty=1:4,xlim=c(40,70)) surv.norw=survfit(fit.c2) for (i in 1:4) { mean.pi=mean(fit.c2$linear.predictor[norw.death.reduced$pred.group==i]) lines(surv.norw$time,surv.norw$surv^exp(mean.pi),type="s",lty=i,col="red") } # The Kaplan-Meier estimates and the estimates base don the Cox model are quite close # for all the four groups, sot overall fit seems ok.