Last active
March 9, 2018 21:43
-
-
Save walkerjeffd/6179768 to your computer and use it in GitHub Desktop.
R functions for assigning and summarizing storm events based on interevent period and minimum event total precipitation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
assign.events <- function(df, datetime.name="DATETIME", value.name="VALUE", interevent=8, threshold=0.1) { | |
# assigns events to data frame such as storm events or discharge events | |
require(plyr) | |
if (!(datetime.name %in% names(df))) { | |
stop(paste0('Could not find datetime column called ', datetime.name)) | |
} | |
if (!(value.name %in% names(df))) { | |
stop(paste0('Could not find value column called ', value.name)) | |
} | |
if (!is.regular(df[, datetime.name])) { | |
stop('Dataframe is not regular') | |
} | |
# make sure df is sorted by datetime | |
df <- df[order(df[, datetime.name]),] | |
# extract datetime and value | |
x <- data.frame(DATETIME=df[, datetime.name], | |
VALUE=df[, value.name]) | |
# compute runs | |
x.rle <- rle(x$VALUE > 0) | |
# EVENT: logical column indicating if timestep is part of event | |
# does not yet account for events that do not meet minimum threshold | |
x$EVENT <- rep(x.rle$lengths*(x.rle$values | x.rle$lengths < interevent), x.rle$lengths) | |
x$EVENT <- x$EVENT > 0 | |
# EVENT_ID: numeric column assigning unique IDs to each event | |
x$EVENT_ID <- c(as.numeric(x$EVENT[1]), ifelse(diff(x$EVENT)>0, diff(x$EVENT), 0)) | |
x$EVENT_ID <- cumsum(x$EVENT_ID)*x$EVENT | |
# compute sum of each event | |
x.event <- ddply(x, .(EVENT_ID), summarise, SUM=sum(VALUE)) | |
# take subset of events that meet minimum threshold | |
x.event <- subset(x.event, SUM >= threshold) | |
# assign events an ID of 0 if they did not meet the threshold | |
x$EVENT_ID[which(!(x$EVENT_ID %in% x.event$EVENT_ID))] <- 0 | |
# reassign EVENT based on current EVENT_ID | |
x$EVENT <- x$EVENT_ID > 0 | |
# reassign EVENT_ID based on new EVENT | |
x$EVENT_ID <- c(as.numeric(x$EVENT[1]), ifelse(diff(x$EVENT)>0, diff(x$EVENT), 0)) | |
x$EVENT_ID <- cumsum(x$EVENT_ID)*x$EVENT | |
# assign DRY_ID for dry periods | |
x$DRY_ID <- c(as.numeric(!x$EVENT[1]), ifelse(diff(!x$EVENT)>0, diff(!x$EVENT), 0)) | |
x$DRY_ID <- cumsum(x$DRY_ID)*(!x$EVENT) | |
# get rle of EVENT_ID | |
x.rle.event <- rle(x$EVENT_ID) | |
# add counter for each dry/wet event | |
x$COUNTER <- unlist(sapply(x.rle.event$lengths, seq)) | |
# set EVENT_ID during dry events and DRY_ID during wet events to NA | |
x$EVENT_ID[which(x$EVENT_ID==0)] <- NA | |
x$DRY_ID[which(x$DRY_ID==0)] <- NA | |
# assign counter for dry events (number of hours since end of last wet event) | |
x$DRY_HOURS <- ifelse(!is.na(x$DRY_ID), x$COUNTER, 0) | |
# assign counter for wet events (number of hours since end of last dry event) | |
x$EVENT_HOURS <- ifelse(!is.na(x$EVENT_ID), x$COUNTER, 0) | |
# add EVENT_ID, DRY_HOURS, WET_HOURS to df and return | |
df$EVENT_ID <- x$EVENT_ID | |
df$EVENT_HOURS <- x$EVENT_HOURS | |
df$DRY_ID <- x$DRY_ID | |
df$DRY_HOURS <- x$DRY_HOURS | |
df | |
} | |
events.summary <- function(df, datetime.name="DATETIME", value.name="VALUE") { | |
# summarizes each event event in dataframe df | |
# assumes df contains columns EVENT_ID (from assign.events), DATETIME (POSIXct), and VALUE (numeric) | |
if (!("EVENT_ID" %in% names(df))) { | |
stop("Unable to find column EVENT_ID in dataframe") | |
} | |
if (!(datetime.name %in% names(df))) { | |
stop("Unable to find datetime column in dataframe") | |
} | |
if (!(value.name %in% names(df))) { | |
stop("Unable to find value column in dataframe") | |
} | |
df$DATETIME <- df[, datetime.name] | |
df$VALUE <- df[, value.name] | |
event <- ddply(subset(df, !is.na(EVENT_ID)), c("EVENT_ID"), summarise, | |
DURATION=length(DATETIME), | |
START=min(DATETIME), | |
END=max(DATETIME), | |
TOTAL=sum(VALUE), | |
PEAK=max(VALUE)) | |
event | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is incredibly helpful;
I'm relatively new to R and have been working on trying to write my own version of rain event summary code.
One issue/request:
It looks as though the assumption is that each record is equivalent to a 1 hr timestep. Can you modify the function to incorporate a variable that allows users to define their record length? (For instance, much of my meteorological data is in 30 min steps).