#Exercise 20
#The dataset sau contains weight of lambs at slaughter from a Norwegian county collected in the years 1989-1998. We have 7 variables:
sau<-read.table(file="http://www.uio.no/studier/emner/matnat/math/STK3100/data/sau.txt", header=T)
attach(sau)
head(sau)

#? hvkt      Body weight for lambs at slaughter     (Response variable)
#? aar       year
#? ageewe    Age of mother
#? alderlam  Age of lamb at slaughter (in days)
#? kjoenn    Gender
#? burdH     Number of lambs in the litter
#? NAO       A climate index for the current year.

#### a, Do some exploratory analysis to become familiar with the data set:
par(mfrow=c(1,2))
boxplot(split(hvkt,kjoenn))
boxplot(split(hvkt,burdH))

par(mfrow=c(2,2))
plot( aar, hvkt)
plot( ageewe, hvkt)
plot( alderlam, hvkt)
plot( NAO, hvkt)
#not obvious what covariates that explains the data best.


########### Try different GLM models using the log-link with all covariates:
#### b, Try y~gamma distribution:
fit.gam = glm(hvkt ~ .,family=Gamma(link=log), data=sau)
summary(fit.gam)
#the (residual) deviance for the model = 54.198  on 1998-7=1991 df. The approximation of chi-square dist can be problematic since we estimate phi!
pchisq(54.198, 1991, lower.tail=F)


#### c, Try y~inverse gaussian:
fit.ig = glm(hvkt ~ .,family=inverse.gaussian(link=log), data=sau)
summary(fit.ig)

AIC(fit.gam, fit.ig)        #AIC = 2(-logLik + k), lowest AIC best model. k=#of parameters.
#fit.gam = 13562.88
#fit.ig = 13609.02


#### d, Try y~log-normal: then log(y)~normal, so we can do a standard linear regression on log(y).
fit.log = lm(log(hvkt) ~ .,data=sau)
summary(fit.log)
cbind(fit.log$coef,fit.gam$coef,fit.ig$coef)        #very similar


#### e), compute the AIC for the log-linear model and compare with the previous models. (see notes for calculations)
# AIC = -1468.854. the response is on another scale here, so the (log)likelihood values are not directly comparable.
logLik.originalScale = as.numeric(logLik(fit.log)) - sum(log(hvkt))
AIC.log = 2*(-logLik.originalScale + 8)
#=13606.22


####################################################################
##################### Further on we will use the Gamma regression:
#y ~ gamma (mu, nu), E(y)=mu, Var(y)=mu^2/nu

#### f,
fit.gam_full = glm(hvkt ~ aar+ageewe+alderlam+kjoenn+burdH+NAO,family=Gamma(link=log), data=sau)
fit.gam_noNAO  = glm(hvkt ~ aar+ageewe+alderlam+kjoenn+burdH    ,family=Gamma(link=log), data=sau)

LRT = 2*(logLik(fit.gam_full) - logLik(fit.gam_noNAO))
LRT = as.numeric(LRT)
pchisq(LRT, lower.tail=F, df=1)     #0.3868399, accept H0, beta_NAO is not sign diff from 0.

summary(fit.gam_noNAO)
AIC(fit.gam_noNAO)

mu_hat = fit.gam_noNAO$fitted
nu_hat = 1/summary(fit.gam_noNAO)$disp          #phi=1/nu


#### g, Examine the final model!
#plotting the residuals:
devres<-sign(hvkt-mu_hat)*sqrt(-2*( log(hvkt/mu_hat)+1-hvkt/mu_hat ) )              # residuals(fit.gam_noNAO, method="deviance")

plot(mu_hat,devres,xlab="fitted",ylab="deviance residuals"); abline(h=0, col="red")


#plotting 米? against empirical variance:
quant_hat = quantile(mu_hat,c(0:100)/100) #deler y and my_hat in 100 groups based on quantiles

Mu = NULL
V = NULL
for(i in 1:100){
    #calculate emp_var = var(hvkt) for groupe i:
    yi = hvkt[mu_hat>quant_hat[i] & mu_hat<quant_hat[i+1]]
    V = c(V,var(yi))

    #calculate mu_hat for groupe i:
    Mu_i = mu_hat[mu_hat>quant_hat[i] & mu_hat<quant_hat[i+1]]
    Mu = c(Mu,mean(Mu_i))
}
plot(Mu,V,xlab="muhat", ylab="emp var",main="Gamma")              #compare our est of mu to the emp var.
lines(Mu,Mu^2/nu_hat)   #our est of mu and our est of varianse.

#goodness-of-fit test:
deviance = 54.218
pchisq(deviance, 1992, lower.tail=F)  #The approximation of chi-square dist can be problematic!


##################################### Find confidence intervals ################################################
x_new = matrix(c(1,1995,5,1,1,150),ncol=1) #intersept, aar=1995, ageewe=5, kjoenn=§m§, burdH=1 and alderlam=150.

#### h, make a 95% confidence interval for 灰:
eta_hat = sum(x_new*fit.gam_noNAO$coef)
sigma = summary(fit.gam_noNAO)$cov.scaled                #cov matrix, invesre Fisher Info (using the estimated parameter values)
cov_eta = t(x_new)%*%sigma%*%x_new
CI.eta = eta_hat+c(-1,1)*1.96*sqrt(cov_eta)

#### i, make a 95% confidence interval for 米:
exp(CI.eta)

#or with sampling
M = 10000
eta.star = rnorm(M,mean=eta_hat,sd=sqrt(cov_eta))    #Sample M random variables eta from its normal distribution.
CI.eta2 = quantile(eta.star,c(0.025,0.975))
exp(CI.eta2)


#### make a 95% prediction interval for one new observation Y using x_new:
#l : from the book (page 71). see notes.

#k : with sampling
mu.star = exp(eta.star)
phi.star = summary(fit.gam_noNAO)$disp*1992/rchisq(M,1992)
y.star = rgamma(M,shape=1/phi.star,scale=mu.star*phi.star)
PI.y = quantile(y.star,c(0.025,0.975))


#### j make a 95% confidence interval for 耳 (see notes for calculations)
PearsonX2 = sum( (hvkt-mu_hat)^2/(mu_hat^2))
c1 = qchisq(0.025, df=1992, lower.tail=T)
c2 = qchisq(0.025, df=1992, lower.tail=F)
c( PearsonX2/c2 , PearsonX2/c1 )