# R-commands for stratified Cox regression and model checks # ======================================================== # For illustration we will use the melanoma data # (treating deaths from other causes as censorings). # We will use the survival library, so this has to be loaded # We first read the data: melanoma=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h12/melanoma.txt",header=T) # STRATIFIED COX REGRESSION # ------------------------------ # We first fit a model with sex, tumor thickness, and ulceration as covariates: fit.stu=coxph(Surv(lifetime,status==1)~factor(sex)+thickn+factor(ulcer), data=melanoma) summary(fit.stu) # If we do not trust the proportional hazard assumption for ulceration, we may # fit a stratified model with separate baseline for each ulceration stratum # (cf. below for a method for checking the assumption) fit.strat=coxph(Surv(lifetime,status==1)~factor(sex)+thickn+strata(ulcer),data=melanoma) # We may plot the cumuative baseline hazards for the two ulceration strata: baseline.covar=data.frame(sex=1,thickn=0) surv.strat=survfit(fit.strat,newdata=baseline.covar) plot(surv.strat,fun="cumhaz", mark.time=F,xlim=c(0,10), xlab="Years since operation",ylab="Cumulative hazard",lty=1:2) # CHECK OF LOG-LINEARITY # ----------------------- # We will check log-linearity of thickness for a model with sex, tumor thickness, and ulceration as covariates: fit.stu=coxph(Surv(lifetime,status==1)~factor(sex)+thickn+factor(ulcer), data=melanoma) # A simple method for cheking log-linearity is to make a categorical variable # for thickness, estimate a separate effect for each thickness group and see # if we get a fairly linear trend for the estimates # (We group according to the thickness of the patients who die from melanoma): break.thick= c(0,quantile(melanoma$thickn[melanoma$status==1])[2:4], max(melanoma$thickn)) melanoma$thick.group=cut(melanoma$thickn,break.thick,labels=1:4) cox.stgu=coxph(Surv(lifetime,status==1)~thick.group+factor(sex)+factor(ulcer),data=melanoma) summary(cox.stgu) th=rep(0,4) for (i in 1:4) th[i]=mean(melanoma$thickn[melanoma$thick.group ==i]) plot(th,c(0,cox.stgu$coef[1:3]),pch=19,col="red",xlim=c(0,10),ylim=c(0,1.5),xlab="Thickness (mm)",ylab="Estimated betas") # A more advanced method is to fit a penalized smoothing spline for the effect of thickness: fit.spstu=coxph(Surv(lifetime,status==1)~factor(sex)+factor(ulcer)+pspline(thickn), data=melanoma) print(fit.spstu) par(mfrow=c(1,3)) termplot(fit.spstu,se=T) # Both the formal test (from the print command) and the plot (from the termplot command) # indicate that the effect of thickness is NOT log-linear. # The model checks above indicate that it is better to use log2-thickness, # so we define log2-thickness as a new numeric covariate and check for a log-linear # effect of the covariate log2-thickness: melanoma$log2thick=log(melanoma$thickn,2) fit.spslogtu=coxph(Surv(lifetime,status==1)~factor(sex)+factor(ulcer)+pspline(log2thick), data=melanoma) print(fit.spslogtu) par(mfrow=c(1,3)) termplot(fit.spslogtu,se=T) # Now both the formal test (from the print command) and the plot (from the termplot command) # indicate that the effect of log2-thickness is reasonably log-linear. # Thus we fit the model: fit.slogtu=coxph(Surv(lifetime,status==1)~factor(sex)+factor(ulcer)+log2thick, data=melanoma) summary(fit.slogtu) # CHECK OF PROPORTIONAL HAZARDS # ---------------------------- # A simple method is to fit a stratified Cox model, where we stratify according # to the levels of a categorical covariate, and plot the stratum-specific # cumulative baseline hazard estimates on a log-scale. # If we have proportional hazards, the curves should be fairly parallel # (i.e. have a constant vertical distance). # We first check for sex: fit.stslogtu=coxph(Surv(lifetime,status==1)~strata(sex)+factor(ulcer)+log2thick, data=melanoma) plot(survfit(fit.stslogtu,newdata=data.frame(ulcer=1,log2thick=0)),fun="cumhaz",lty=1:2,log=T,mark.time=F,xlim=c(0,10)) # Proportionallity seems ok. # We then check for ulceration: fit.slogtstu=coxph(Surv(lifetime,status==1)~factor(sex)+strata(ulcer)+log2thick, data=melanoma) plot(survfit(fit.slogtstu,newdata=data.frame(sex=1,log2thick=0)),fun="cumhaz",lty=1:2,log=T,mark.time=F,xlim=c(0,10)) # The curves are fairly parallel, except for the first two years # Finally we check for thickness: fit.ssttstu=coxph(Surv(lifetime,status==1)~factor(sex)+factor(ulcer)+strata(thick.group), data=melanoma) plot(survfit(fit.ssttstu,newdata=data.frame(sex=1,ulcer=1)),fun="cumhaz",lty=1:4,log=T,mark.time=F,xlim=c(0,10)) # There is a tendency that the curves are closer together for large values # of t than for smaller ones, indicating a non-proportional effect of thickness # We then make a formal test for proportionality of the covariates. # This is done by, for each covariate x, adding time-dependent covariate x*log(t), # and testing whether the time-dependent covariates are significant using a score test: cox.zph(fit.slogtu,transform='log') # The test indicates that the effect of tumor-thickness is not proportional. # The estimate we get for log-thicness in then a weighted average of the time-varying effect #We also make plots that give nonparametric estimates of the (possible) time dependent # effect of the covariates: par(mfrow=c(1,3)) plot(cox.zph(fit.slogtu)) # GRAPHICAL CHECK OF GLOBAL FIT # ----------------------------- # We will check the global fit of a model with sex, log tumor thickness, and ulceration as covariates: fit.slogtu=coxph(Surv(lifetime,status==1)~factor(sex)+log2thick+factor(ulcer), data=melanoma) surv.slogtu=survfit(fit.stu) # Make 4 groups according to the values of the prognostic index # (We need a special handeling of the lowest value to avoid this to become NA) break.pi=quantile(fit.stu$linear.predictor) melanoma$pred.group=cut(fit.stu$linear.predictor,break.pi, include.lowest=T,label=1:4) # Make Kaplan-Meier plots for the four groups: plot(survfit(Surv(lifetime,status==1)~pred.group,data=melanoma),mark.time=F,lty=1:4,xlim=c(0,10),lwd=1.5) # Add lines for the survival function estimates based on the fitted Cox model # for the mean value of the prognositc index within each group for (i in 1:4) { mean.pi=mean(fit.stu$linear.predictor[melanoma$pred.group==i]) lines(surv.slogtu$time,surv.slogtu$surv^exp(mean.pi),type="s",lty=i,col="red",lwd=1.5) } # The agreement is fine for the two groups with thickest tumors, # but not so good for the two groups with thinnest tumors