### Problem 1. Explore station weather data: 
#              Load data, plot time series, fit linear trend, plot histogram. 


# Step 1: Check/set working directory in RStudio.
# Check your working directory
getwd()
# Which files are in there?
list.files()
# If you don't see the file "bldrsubset.txt", you need to set you working directory

# In RStudio navigation bar (at top left of screen): "Session" -> "Set Working Directory" -> "To source file location"
# or use setwd(" ") with the path

# Step 2: Load the historical Boulder weather data (observations)
obs <- read.table("bldrsubset.txt", header = T)

# Step2a. Look at the data. (Try: head(), str(), summary(), dim()), Use "help" to find out what each command is doing. 
help(dim)
head(obs)
str(obs)
summary(obs)

# Step 3: Add a date column to form "date" instead of "factor" using package called "lubridate"
# Important! If you did this on your own machine: you would need to install this package once, 
#            then load the library every time you want to use it. install.packages("lubridate"), but this is already loaded here.
library(lubridate) # Need to do this every time you want to use it. 
obs$date <- ymd(obs$date) # This adds a new column to the data frame "obs", called "date", which R recognizes as the form year-month-day.

head(obs)
str(obs)

# Step 4: Plotting the max temperatures TxC
par(mfrow=c(1,2)) # Creates space for 2 adjacent plots
plot(obs$TxC~obs$date, type="l", col="red", lwd=2,
     xlab="Years",ylab="degrees C", ylim=c(-30,40), main = "Boulder Daily Max Temp")

# Your turn 1: Create the graph for TnC, and make the color "blue"
# Hint: copy/paste the plot code above and modify. See web instructions if you get stuck!

#  Check if there's a trend in temperatures
fit.TxC <- lm(obs$TxC~obs$date)
fit.TnC <- lm(obs$TnC~obs$date)

anova(fit.TxC)
summary(fit.TxC)

# Your Turn 2: Repeat the anova() and summary() for fit.TnC. 
#              Is there a statistically significant trend in TnC?

# Step 5: Look at the daily precipitation distribution.
par(mfrow=c(1,1))
hist(obs$Pmm)
range(obs$Pmm, na.rm = T)
quantile(obs$Pmm, na.rm =T)

# Only look at precip data that is greater than 1mm
prec <- na.omit(obs[obs$Pmm>=1,1])
range(prec)
quantile(prec)
# Precip data is typically fit with a gamma. Fit gamma distribution
#install.packages("MASS") # Only need to install once, it's already on the classroom machines
library(MASS)
gampars <- fitdistr(prec, "gamma")$estimate
breaks <- seq(0,235,2)
xfit<-seq(min(prec),max(prec),length=100) 
yfit<-dgamma(xfit, shape=gampars[1], rate=gampars[2]) 

hist(prec, breaks= breaks, col="blue",
     main="Histogram with Gamma curve", xlab="Precipitation mm",
     xlim=c(0,50),freq=FALSE)
lines(xfit, yfit, col="red", lwd=2)

# Your Turn 3: Try range(), quantile(), summary(), etc on prec.  
#              What information is not being captured by the gamma distribution? 
# Look back at the web document to check you answers and move on to the next
# exercise 
