for(var in time_vars_1d)
output[[var]] <- c(output[[var]], chunk_list[[cc]][[var]])
for(var in time_vars_2d)
output[[var]] <- cbind(output[[var]], chunk_list[[cc]][[var]])
}
}
return(output)
}
WtEventTiming <- function(POSIXct, obs,
mod=NULL,
max.scale=256,
min_ts_length=max.scale * time_step_h,
time_step_h=NULL) {
## TODO JLM: max.scale is unitless but min_ts_length is not? max.scale*time_step_h gives
## a unit for it.
## use ... or other kw for passing to wt, xwt.
## TODO JLM: are max.scale wt and xwt necessarily the same?
## JLM TODO: Gap handling.
## JLM TODO: If there were discontinuities, would have a loop over the chunks.
## JLM TODO: If we break up the timeseries, should probably return the chunks of raw data too.
## TODO JLM: option for reducing this object size if only stats are required.
##           most of the bloat is for plotting.
n_time <- length(POSIXct)
output <- list()
## ---------------------------------------------------------------------------
## Input data
## Save of a data frame, this facilitates plotting. Could make this optional.
input_data <-
as.data.table(
data.frame(
POSIXct=POSIXct,
Time=as.numeric(POSIXct),
input_index=1:length(POSIXct),
obs=obs
)
)
for(key in names(mod)) input_data[[key]] <- mod[[key]]
## -------------------------------------------------------
## Deal with missing data
## Missing data across all of (obs, model1, ..., modelN) should be removed.
## Generally, model data wont be missing but if we just did this on the obs and
## data were missing for one model instance and not another, the results would
## not be consistent.
## Break up the data in to contiguous time chunks.
input_data <- input_data[complete.cases(input_data),]
## Label time chunks
## How do we know the timestep?
if(is.null(time_step_h))
## time_step_h <- as.numeric(median(diff(input_data$POSIXct)), units="hours")
time_step_h <- median(diff(input_data$POSIXct)) # This is a timediff.
## Need to ensure that a passed value is a timediff.
wh_gt_time_step <- which(diff(input_data$POSIXct) > time_step_h) + 1
input_data$chunk <- 0
input_data$chunk[wh_gt_time_step] <- 1
input_data$chunk <- cumsum(input_data$chunk) + 1
## Filter out chunks less than some size
chunk_len = input_data[, .(len_h = max(POSIXct) - min(POSIXct)), by='chunk']
rm_chunks <- subset(chunk_len, len_h < min_ts_length)$chunk
if(length(rm_chunks))
input_data <- input_data[ !(chunk %in% rm_chunks) ]
## Melt just for the output list
output[['input_data']] <- melt(
input_data,
id.vars=c('Time', 'POSIXct', 'chunk', 'input_index'),
variable.name='Streamflow',
value.name='Streamflow (cms)'
)
## Why this extra copy?
output[['input_data']]$streamflow_values <- output[['input_data']]$`Streamflow (cms)`
## ---------------------------------------------------------------------------
## Observed wavelet transforms.
## Observed timeseries is just one.
wt_obs <- WtTimeChunks(input_data, obs_name='obs', max.scale=max.scale)
class(wt_obs) <- c("wavelet_timing", class(wt_obs))
n_period <- length(wt_obs$period)
output[['obs']] = list(wt = wt_obs)
## The masks
output[['obs']]$wt$event_timing$mask <- WtEventMask(output[['obs']]$wt)
## The event matrices
output[['obs']]$wt$event_timing$event_mtx<- WtEventMtx(output[['obs']]$wt)
## Gather the "bulk" (all the) information needed for sampling phase/timing errors:
## No phase needed for the obs wt.
wh_event_mask <- which(output[['obs']]$wt$event_timing$mask$event == 1, arr.ind=TRUE)
output[['obs']]$wt$event_timing$all <-
data.table::data.table(power_corr = output[['obs']]$wt$power.corr[wh_event_mask])
output[['obs']]$wt$event_timing$all$period <- output[['obs']]$wt$period[wh_event_mask[,1]]
output[['obs']]$wt$event_timing$all$time <-
output[['input_data']][Streamflow == 'obs']$input_index[wh_event_mask[,2]]
output[['obs']]$wt$event_timing$all$period_clusters <-
output[['obs']]$wt$event_timing$event_mtx$period_clusters[wh_event_mask]
## sort all by period and time
setkey(output[['obs']]$wt$event_timing$all, period, time)
## Calculate the time-averaged corrected wavelet power spectrum on the obs:
output[['obs']]$wt$event_timing$time_avg <-
output[['obs']]$wt$event_timing$all[,.(power_corr=mean(power_corr)),.(period)]
## Sort time_avg by period
setkey(output[['obs']]$wt$event_timing$time_avg, period)
## Calculate the local maxima of the time-avg corrected WPS so we can sample timing
## on just the most important periods.
output[['obs']]$wt$event_timing$time_avg$local_max <-
pastecs::turnpoints(output[['obs']]$wt$event_timing$time_avg$power_corr)$peaks
if(is.null(mod)) return(output)
## ---------------------------------------------------------------------------
## Modeled.
## For the modeled timeseries, we loop over the named list of modeled timeseries.
wt_mod = list()
mod_names <-  names(mod)
for (name in mod_names) {
wt_mod[[name]] <- WtTimeChunks(input_data, name, max.scale=max.scale)
class(wt_mod[[name]]) <- c("wavelet_timing", class(wt_mod[[name]]))
}
for (name in mod_names) {
output[[name]] <- list(wt = wt_mod[[name]])
output[[name]]$wt$event_timing$mask <- WtEventMask(output[[name]]$wt)
}
## Do the modeled TS need event_mtx? I dont think so.
## The intersection stats.
for (name in mod_names) {
area_intersect <-
sum(output[['obs']]$wt$event_timing$mask$event &
output[[name]]$wt$event_timing$mask$event)
output[[name]]$wt$event_timing$stats$obs_event_frac <-
area_intersect / sum(output[['obs']]$wt$event_timing$mask$event)
output[[name]]$wt$event_timing$stats$mod_event_frac <-
area_intersect / sum(output[[name]]$wt$event_timing$mask$event)
}
## TODO JLM: rename these?
## obs_event_frac is like a hit rate, 1 - mod_event_frac is like a FAR.
## The timing stats.
## Gather the "bulk" phase/timing errors:
##    No sampling, take all observed significant/event timing errors from the obs-mod xwt.
for (name in mod_names) {
output[[name]]$xwt <-
WtTimeChunks(input_data, obs_name='obs', mod_name=name, max.scale=max.scale)
class(output[[name]]$xwt) <- c("wavelet_timing", class(output[[name]]$xwt))
## Calculate the timing error matrix
output[[name]]$xwt$event_timing$mtx$timing_err <- output[[name]]$xwt$phase * NA
for(rr in 1:nrow(output[[name]]$xwt$phase)) {
output[[name]]$xwt$event_timing$mtx$timing_err[rr,] <-
output[[name]]$xwt$period[rr] *
output[[name]]$xwt$phase[rr,] / (2*pi)
}
## The masks
output[[name]]$xwt$event_timing$mask <- WtEventMask(output[[name]]$xwt)
## The event matrices
output[[name]]$xwt$event_timing$event_mtx <- WtEventMtx(output[[name]]$xwt)
## It's key that wh_event_mask is from the *obs* wt object and the modelex xwt object.
wh_event_mask <-
which(output[['obs']]$wt$event_timing$mask$event == 1, arr.ind=TRUE)
output[[name]]$xwt$event_timing$all <-
data.table::data.table(phase = output[[name]]$xwt$phase[wh_event_mask])
output[[name]]$xwt$event_timing$all$xwt_power_corr <-
output[[name]]$xwt$power.corr[wh_event_mask]
## This is excessive but having trouble joining it from the obs later.
output[[name]]$xwt$event_timing$all$obs_power_corr <-
output[['obs']]$wt$power.corr[wh_event_mask]
output[[name]]$xwt$event_timing$all$period <-
output[[name]]$xwt$period[wh_event_mask[,1]]
output[[name]]$xwt$event_timing$all$time <-
output[['input_data']][Streamflow == 'obs']$input_index[wh_event_mask[,2]]
output[[name]]$xwt$event_timing$all$timing_err <-
output[[name]]$xwt$event_timing$all$period *
output[[name]]$xwt$event_timing$all$phase / (2*pi)
## The period clusters are FOR THE OBSERVATIONS, not the modeled
output[[name]]$xwt$event_timing$all$period_clusters <-
output[['obs']]$wt$event_timing$event_mtx$period_clusters[wh_event_mask]
# Is this observed event significant in the XWT?
output[[name]]$xwt$event_timing$all$xwt_signif <-
output[['obs']]$wt$event_timing$mask$event[wh_event_mask] == 1 &
output[[name]]$xwt$event_timing$mask$event[wh_event_mask] == 1
setkey(output[[name]]$xwt$event_timing$all, period, time)
}
## TODO JLM: strip off data.tables?
return(output)
}
we_hydro_stats <- function(wt_event) {
output <- list()
mod_names <- setdiff(names(wt_event), c("input_data", "obs"))
## -------------------------------------------------------
## "Bulk" stats timing errors
for (name in mod_names) {
output[[name]]$xwt$event_timing$bulk_stats <- list()
output[[name]]$xwt$event_timing$bulk_stats$mean_timing_err <-
mean(wt_event[[name]]$xwt$event_timing$all$timing_err)
output[[name]]$xwt$event_timing$bulk_stats$sd_timing_err <-
sd(wt_event[[name]]$xwt$event_timing$all$timing_err)
}
## -------------------------------------------------------
## Extract the periods of interest from the obs wt analysis.
wh_peak <- wt_event[['obs']]$wt$event_timing$time_avg$local_max
peak_periods <- wt_event[['obs']]$wt$event_timing$time_avg$period[wh_peak]
## -------------------------------------------------------
## Mean timing errors by period.
for (name in mod_names) {
keep_cols <- c('obs_power_corr', 'time', 'period', 'timing_err', 'period_clusters')
all_sub <- wt_event[[name]]$xwt$event_timing$all[, keep_cols, with=FALSE]
all_sub <- all_sub[ period %in% peak_periods, ]
output[[name]]$xwt$event_timing$time_avg <-
wt_event[[name]]$xwt$event_timing$all[
period %in% peak_periods,
.(time_err=mean(timing_err),
obs_power_corr=mean(obs_power_corr),
xwt_power_corr=mean(xwt_power_corr),
xwt_signif=mean(xwt_signif),
n_clusters=length(unique(period_clusters)),
time=mean(time)
),
.(period)
]
setkey(output[[name]]$xwt$event_timing$time_avg, period)
}
## -------------------------------------------------------
## Cluster-mean timing errors on maxima of time-averaged obs wt power.
for (name in mod_names) {
output[[name]]$xwt$event_timing$cluster_mean <-
wt_event[[name]]$xwt$event_timing$all[
period %in% peak_periods,
.(time_err=mean(timing_err),
obs_power_corr=mean(obs_power_corr),
xwt_power_corr=mean(xwt_power_corr),
xwt_signif=mean(xwt_signif),
time=mean(time)
),
by=c("period_clusters", "period")
]
output[[name]]$xwt$event_timing$cluster_mean_time_avg <-
output[[name]]$xwt$event_timing$cluster_mean[
,
.(time_err=mean(time_err),
obs_power=mean(obs_power_corr),
xwt_power=mean(xwt_power_corr),
xwt_signif_frac=mean(xwt_signif),
n_clusters=.N
),
by='period'
]
setkey(output[[name]]$xwt$event_timing$cluster_mean, period)
}
## -------------------------------------------------------
## Cluster-MAX timing errors on maxima of time-averaged obs wt power.
for (name in mod_names) {
output[[name]]$xwt$event_timing$cluster_max <-
wt_event[[name]]$xwt$event_timing$all[
period %in% peak_periods,
.(time_err=timing_err[which.max(obs_power_corr)],
obs_power_corr=obs_power_corr[which.max(obs_power_corr)],
xwt_power_corr=xwt_power_corr[which.max(obs_power_corr)],
xwt_signif=xwt_signif[which.max(obs_power_corr)],
time=time[which.max(obs_power_corr)]
),
by=c("period_clusters", "period")
]
output[[name]]$xwt$event_timing$cluster_max_time_avg <-
output[[name]]$xwt$event_timing$cluster_max[
,
.(time_err=mean(time_err),
obs_power=mean(obs_power_corr),
xwt_power=mean(xwt_power_corr),
xwt_signif_frac=mean(xwt_signif),
n_clusters=.N
),
by='period'
]
setkey(output[[name]]$xwt$event_timing$cluster_max, period)
}
return(output)
}
we_stats <- we_hydro_stats(wt_event)
library(grid)
figure1 <- step1_figure(wt_event)
head(we_stats)
#In R
library(devtools)
library(data.table)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(rwrfhydro)
location <- 'onion_creek'
time_period <- 'three_mos'
options(warn=0)
options(device = png)
options(repr.plot.width = 8.5, repr.plot.height = 6)
figure_prefix <- paste0(location,'_',time_period)
data <- WtGetEventData(location, time_period)
wt_event = WtEventTiming(
POSIXct=data$POSIXct,
obs=data$q_cms_obs,
mod=list('NWM v1.2'=data$`NWM v1.2`),
min_ts_length=0,
max.scale=256
)
we_stats <- we_hydro_stats(wt_event)
library(grid)
figure1 <- step1_figure(wt_event)
grid.draw(figure1)
location <- 'onion_creek'
time_period <- 'three_mos'
options(warn=0)
options(device = png)
options(repr.plot.width = 8.5, repr.plot.height = 6)
figure_prefix <- paste0(location,'_',time_period)
data <- WtGetEventData(location, time_period)
wt_event = WtEventTiming(
POSIXct=data$POSIXct,
obs=data$q_cms_obs,
mod=list('NWM v1.2'=data$`NWM v1.2`),
min_ts_length=0,
max.scale=256
)
we_stats <- we_hydro_stats(wt_event)
library(grid)
figure1 <- step1_figure(wt_event)
grid.draw(figure1)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE, force=TRUE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
location <- 'onion_creek'
time_period <- 'three_mos'
options(warn=0)
options(device = png)
options(repr.plot.width = 8.5, repr.plot.height = 6)
figure_prefix <- paste0(location,'_',time_period)
data <- WtGetEventData(location, time_period)
wt_event = WtEventTiming(
POSIXct=data$POSIXct,
obs=data$q_cms_obs,
mod=list('NWM v1.2'=data$`NWM v1.2`),
min_ts_length=0,
max.scale=256
)
we_stats <- we_hydro_stats(wt_event)
library(grid)
figure1 <- step1_figure(wt_event)
grid.draw(figure1)
figure1
plot(figure1)
help(grid.draw)
figure2 <- step2_figure(wt_event)
library(ggnewscale)
devtools::install_github("eliocamp/ggnewscale")
install.packages("ggnewscale")
install.packages("ggnewscale")
library(ggnewscale)
WtEventTiming()
library(rwrfhydro)
WtEventTiming()
zz= WtGetEventData()
zz= WtGetEventData()
zz= WtGetEventData()
zz= WtGetEventData()
CheckForUpdates()
library(rwrfhydro)
CheckForUpdates()
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
help(ggsave)
library(ggplot2)
help(ggsave)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
DT =we_stats$NWM$xwt$event_timing$cluster_max
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
#In R
library(devtools)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
library(data.table)
library(rwrfhydro)
library(ggplot2)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
devtools::install_github('jmccreight/rwrfhydro', ref='wavelet', dependencies=FALSE)#devtools::install_github("jmccreight/rwrfhydro", ref='wavelet', force=TRUE)
uvData <- readNWISuv(siteNumbers = c("09107000"),
parameterCd = '00060',
startDate = '2010-10-01T00:00Z',
endDate = '2016-11-30T23:59Z')# Note: my computer, Hazel, bring it in at UTC, but for MST of the startDate/endDate, which
library(dataRetrieval)
uvData <- readNWISuv(siteNumbers = c("09107000"),
parameterCd = '00060',
startDate = '2010-10-01T00:00Z',
endDate = '2016-11-30T23:59Z')# Note: my computer, Hazel, bring it in at UTC, but for MST of the startDate/endDate, which
head(uvData)
uvData <- readNWISuv(siteNumbers = c("09107000"),
parameterCd = '00060',
startDate = '2007-10-01T00:00Z',
endDate = '2010-11-30T23:59Z')# Note: my computer, Hazel, bring it in at UTC, but for MST of the startDate/endDate, which
head(uvData)
uvData <- readNWISuv(siteNumbers = c("09107000"),
parameterCd = '00060',
startDate = '2000-10-01T00:00Z',
endDate = '2007-11-30T23:59Z')# Note: my computer, Hazel, bring it in at UTC, but for MST of the startDate/endDate, which
head(uvData)
help(readRDS)
install.packages("dtw")
# If continuing from Problems 1-4, then working directory should already be
# set and data should
# already be loaded & dates fixed with lubridate. However, we redo this here
# so that this exercise is also stand-alone.
# In RStudio navigation bar (at top left of screen): "Session" -> "Set Working Directory" -> "To source file location"
# Read in the data (again)
obs <- read.table("bldrsubset.txt", header = T)
setwd("~/hazel_files/Proj_Sci/Regional_Climate_Research_Section/RC_Tutorials/8Tutorial_7_2019/2019_Towler_Labs_ClassroomMachines")
# If continuing from Problems 1-4, then working directory should already be
# set and data should
# already be loaded & dates fixed with lubridate. However, we redo this here
# so that this exercise is also stand-alone.
# In RStudio navigation bar (at top left of screen): "Session" -> "Set Working Directory" -> "To source file location"
# Read in the data (again)
obs <- read.table("bldrsubset.txt", header = T)
library(lubridate) # library that makes dates and times easier. #install.packages("lubridate")
obs$date <- ymd(obs$date) # Use lubridate to convert to date format
obs$Year = year(obs$date) # Add a Year column - we wil need this for block maxima, as blocks will be years
summary(obs)
# 2014 is not a full time block! But for GP (unlike GEV), this doesn't matter, we want
#                                all peaks-over threshold.
max(obs$date, na.rm=T)
#obs=subset(obs, Year<2014) # Only needed this for GEV
summary(obs)
# Note: Can't fit GP with NAs, need to remove.
# Remove any rows where the precip, which is column 1, is an na.
Pmmobs = obs[complete.cases(obs[ , 1]),]
library(extRemes) # Need to load library for extRemes
#######################
# Generalized pareto
####################
# For now, let's focus on Precip. Get rid of Temp columns
Pmmobs$TxC=NULL
Pmmobs$TnC=NULL
plot(Pmm~Year, data = Pmmobs, t="p", col="blue", ylab="Daily PCP (mm)", pch=19, cex=.5)
quantile(Pmmobs$Pmm, c(.95, .97, .99))
abline(h=quantile(Pmmobs$Pmm, c(.99)), col="red", lty=2, lwd=3)
new_data = subset(Pmmobs, Pmm>=23.9)
points(Pmm~Year, data=new_data, t="p", col="red", pch = 1)
quantile(Pmmobs$Pmm, c(.99)
)
quantile(Pmmobs$Pmm, c(.99))
new_data = subset(Pmmobs, Pmm>=23.9)
points(Pmm~Year, data=new_data, t="p", col="red", pch = 1)
threshrange.plot(Pmmobs$Pmm, r=c(5, 20), nint=24, type="GP")
thresh_val = 10
fitD <-fevd(Pmm, Pmmobs, threshold=thresh_val, type="GP", time.units = "365/year")
plot(fitD)
ci(fitD, type="parameter") # Notice that there is only scale and shape.
fitD <-fevd(Pmm, Pmmobs, threshold=thresh_val, type="GP", time.units = "365/year")
plot(fitD)
ci(fitD, type="parameter") # Notice that there is only scale and shape.
# We can use pextremes to find out the conditional probability of exceeding higher values.
pextRemes(fitD, c(12, 24)) # If thresh_val is exceeded, what is the probability of exceeding 12 mm? Of 24mm?
# Finally, meteorological data is typically clustered, which violates the iid assumption of the GP!
# Need to test this.
y= Pmmobs$Pmm
extremalindex(y, thresh_val)
# Suggests that the data is clustered.
look <-decluster(y, threshold = thresh_val) # default run lenght = 1.
par(mfrow=c(1,1))
plot(look)
# Grey dots show data removed from the fit.
extremalindex(look, thresh_val) # Higher extremal index
# Fit new GP to declustered data
fitGP.dc <- fevd(y, data=data.frame(y=c(look)),
threshold =thresh_val, type= "GP",
time.units = "365/year")
# Use lr.test to see if look is statistically better than original
lr.test(fitD, fitGP.dc)
# Can we add a covariate to the declustered data?
fitGP.dc.covary <- fevd(y, data=data.frame(y=c(look), covary=Pmmobs$Year), # Need to create data frame and add the covariate as a new column
scale.fun = ~ covary, # scale.fun for GP, location.fun for PP!
threshold =thresh_val, type= "GP",
time.units = "365/year") #
# Does adding a covariate to the declustered data improve the fit?
lr.test(fitGP.dc, fitGP.dc.covary)
