#########################################################
# Problem 5: Fit Generalized Pareto to peaks over threshold. 
#########################################################

# If continuing from Problems 1-4, then working directory should already be
# set and data should
# already be loaded & dates fixed with lubridate. However, we redo this here
# so that this exercise is also stand-alone. 
# In RStudio navigation bar (at top left of screen): "Session" -> "Set Working Directory" -> "To source file location"
# Read in the data (again)
obs <- read.table("bldrsubset.txt", header = T)
library(lubridate) # library that makes dates and times easier. #install.packages("lubridate") 
obs$date <- ymd(obs$date) # Use lubridate to convert to date format
obs$Year = year(obs$date) # Add a Year column - we wil need this for block maxima, as blocks will be years
summary(obs)

# 2014 is not a full time block! But for GP (unlike GEV), this doesn't matter, we want
#                                all peaks-over threshold. 
max(obs$date, na.rm=T)
#obs=subset(obs, Year<2014) # Only needed this for GEV
summary(obs)

# Note: Can't fit GP with NAs, need to remove. 
# Remove any rows where the precip, which is column 1, is an na. 
Pmmobs = obs[complete.cases(obs[ , 1]),]

library(extRemes) # Need to load library for extRemes

#######################
# Generalized pareto
####################
# For now, let's focus on Precip. Get rid of Temp columns
Pmmobs$TxC=NULL
Pmmobs$TnC=NULL
plot(Pmm~Year, data = Pmmobs, t="p", col="blue", ylab="Daily PCP (mm)", pch=19, cex=.5)
quantile(Pmmobs$Pmm, c(.95, .97, .99))
abline(h=quantile(Pmmobs$Pmm, c(.99)), col="red", lty=2, lwd=3)
quantile(Pmmobs$Pmm, c(.99))
new_data = subset(Pmmobs, Pmm>=23.9)
points(Pmm~Year, data=new_data, t="p", col="red", pch = 1)

threshrange.plot(Pmmobs$Pmm, r=c(5, 20), nint=24, type="GP")

# Your Turn 1: Look at the threshold plot, playing around with the range and intervals, 
#              what value range seems reasonable?
#      P.S. Recall tradeoff between bias and variance, but the ultimate selection is subjective!
#    Select a threshold and test in the GP
# thresh_val = ?? enter a reasonable threshold. 

fitD <-fevd(Pmm, Pmmobs, threshold=thresh_val, type="GP", time.units = "365/year")
plot(fitD)
ci(fitD, type="parameter") # Notice that there is only scale and shape. 
# Your Turn 2: What does kind of tail does the shape estimate suggest? 

# We can use pextremes to find out the conditional probability of exceeding higher values.
pextRemes(fitD, c(12, 24)) # If thresh_val is exceeded, what is the probability of exceeding 12 mm? Of 24mm?

# Your turn: 3) Try adding a covariate to fitD in the scale and see if it's significant.
#            4) Try varying the threshold, how does it change the results?

# Finally, meteorological data is typically clustered, which violates the iid assumption of the GP!
# Need to test this. 
y= Pmmobs$Pmm
extremalindex(y, thresh_val)
# Suggests that the data is clustered.
look <-decluster(y, threshold = thresh_val) # default run lenght = 1. 
par(mfrow=c(1,1))
plot(look)
# Grey dots show data removed from the fit. 
extremalindex(look, thresh_val) # Higher extremal index

# Fit new GP to declustered data
fitGP.dc <- fevd(y, data=data.frame(y=c(look)), 
                 threshold =thresh_val, type= "GP",
                 time.units = "365/year") 

# Use lr.test to see if look is statistically better than original
lr.test(fitD, fitGP.dc)
# Does declustering improve the fit? (Hint p_value needs to be < 0.05)

# Can we add a covariate to the declustered data? 
fitGP.dc.covary <- fevd(y, data=data.frame(y=c(look), covary=Pmmobs$Year), # Need to create data frame and add the covariate as a new column
                        scale.fun = ~ covary, # scale.fun for GP, location.fun for PP!
                        threshold =thresh_val, type= "GP",
                        time.units = "365/year") # 

# Does adding a covariate to the declustered data improve the fit? 
lr.test(fitGP.dc, fitGP.dc.covary)
