#Exercise 18 claims = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h14/data/claims.txt", header=T) par(mfrow=c(2,2)) plot(x=claims$alder, y=claims$antskader/claims$antforsikret, ylab="occurrence rate" ) plot(x=claims$motorvolum, y=claims$antskader/claims$antforsikret, ylab="occurrence rate", col="red") plot(x=claims$distrikt, y=claims$antskader/claims$antforsikret, ylab="occurrence rate", col="blue") claims$alder = as.factor(claims$alder) claims$motorvolum = as.factor(claims$motorvolum) claims$distrikt = as.factor(claims$distrikt) claims$antforsikret = as.numeric(claims$antforsikret) claims$antskader = as.numeric(claims$antskader) #b, Perform an analysis on the dataset. clarify the significance of age, engine volume, district and any potential interactions between these. #Model with age, engine volume and district as main effects with all pairwise interactions. fit.b = glm(antskader~offset(log(antforsikret))+alder+motorvolum+distrikt+motorvolum:alder+alder:distrikt+motorvolum:distrikt,family=poisson,data=claims) summary(fit.b) anova(fit.b,test="Chisq") #Performs the Likelihood Ratio Test (LRT) on each covaraite, Terms added sequentially (first to last) # Punkt c) #Define age and engine volume as numeric covariates. claims$numalder=as.numeric(claims$alder) claims$numvolum=as.numeric(claims$motorvolum) #For district we combine 1,2,3 and continue to use it as factor. claims$nydistrikt=1*(as.numeric(claims$distrikt)<4)+4*(as.numeric(claims$distrikt)==4) claims$nydistrikt=factor(claims$nydistrikt) fit.c = glm(antskader~offset(log(antforsikret))+numalder+numvolum+nydistrikt,family=poisson,data=claims) summary(fit.c) #Performs a Wald test on each parameter. # d) #Plot residuals; par(mfrow=c(2,2)) plot(residuals(fit.c)) plot(claims$numalder,residuals(fit.c)) plot(claims$numvolum,residuals(fit.c)) plot(claims$nydistrikt,residuals(fit.c)) # e) Function to estimate rate ratio with confidence interval (slides and 5.6 in J&H) RRCItab<-function(glmfit){ sumglm = summary(glmfit)$coef RR = exp(sumglm[,1]) RRL = exp(sumglm[,1]-1.96*sumglm[,2]) RRU = exp(sumglm[,1]+1.96*sumglm[,2]) return( cbind(RR,RRL,RRU) ) } #Interpreter the estimates from the model in c) as rate-ratios. RRCItab(fit.c) # f), #Estimate the rate of a new person with: # age 25-29 years old (x_age = 2) # engine volume 1,5-2 liter (x_volum = 3) # living in London (x_district = 4) x_new = matrix(c(1,2,3,4),nrow=4,ncol=1) b_hat = matrix(fit.c$coef,nrow=4,ncol=1) covMat = summary(fit.c)$cov.unscaled rate = exp( t(x_new)%*%b_hat) se = sqrt(t(x_new)%*%covMat%*%x_new) lower = exp( t(x_new)%*%b_hat -1.96*se) upper = exp( t(x_new)%*%b_hat +1.96*se) c(rate, lower, upper)