Washington Floods

Methods

We started by using the dataRetrieval package from the USGS to get a list of all active stream sensors in Washington that measure gage height. When filtering for the ones that have subdaily data (often hourly or even 15- and 30-min intervals) it leaves us with 289 sites.

# get all active sites in WA stream sites that report stage height (00065)
wa_sites <- whatNWISsites(stateCd = "WA",
                          parameterCd = "00065",
                          siteType = "ST", 
                          siteStatus = "active")

# cross check with stream sites that have recent subdaily data 
wa_uv_gauges <- whatNWISdata(stateCd = "WA", parameterCd = "00065", service = "uv") %>%
  filter(site_tp_cd == "ST", # streams
         end_date >= "2025-12-15") # fixing for now

# this is the initial site list
wa_sites_filter <- wa_sites %>%
  filter(site_no %in% wa_uv_gauges$site_no)

rm(wa_sites, wa_uv_gauges)

Since high frequency measurements are relatively new for most sites (mid 2000s), we can’t rely on them to see what was the highest gage ever recorded at a particular location. Daily records go back longer, but give you the average on a given day, not the maximum.

On the other hand, NOAA displays historic crests (like for this site) for some of these stream sensors. Those numbers have been referenced in some reporting but we can leverage the agency’s API to check them at a much larger scale.

First, we can build this function that returns whether or not each USGS on the initial site list also exists on NOAA’s API (like this). When filtering for the ones available, we end up with 170 sites.

# now we need to see which have NOAA data too
check_noaa_exists_fc <- function(site_no, pause = 0.2) {
  
  url  <- paste0("https://api.water.noaa.gov/nwps/v1/gauges/", site_no)
  
  resp <- GET(url,
              add_headers(`User-Agent` = "your.name/1.0 (contact: your@email.com)"))
  
  # throttle our requests
  Sys.sleep(pause)
  
  status <- status_code(resp)
  
  tibble(site_no = site_no,
         has_noaa = status_code(resp) == 200, # does it exist?
         http_status = status)
  
}

# run the checker over USGS list
noaa_exists <- map_dfr(wa_sites_filter$site_no, check_noaa_exists_fc)

# filter the NOAA list for sites that exist there
noaa_exists_true <- noaa_exists %>%
  filter(has_noaa == T)

# then filter USGS list to those that have NOAA entries
wa_sites_filter2 <- wa_sites_filter %>%
  filter(site_no %in% noaa_exists_true$site_no)

rm(noaa_exists, noaa_exists_true)

Now we can run the 170 USGS sites that also exist in NOAA’s portal to fetch historic crests. For more context, we can also grab at what height NOAA classifies flood stages for each of these.

Note that even though all these are on the API, not all have registered historic crest data. When we filter for the ones that do, we are left with 131 sites.

# make a function to fetch historic crests and flood stages from NOAA
get_noaa_meta_fc <- function(site_no, pause = 0.2) {
  
  url  <- paste0("https://api.water.noaa.gov/nwps/v1/gauges/", site_no)
  
  resp <- GET(url,
              add_headers(`User-Agent` = "your.name/1.0 (contact: your@email.com)"))
  
  # throttle requests
  Sys.sleep(pause)
  
  # pull json text 
  txt <- content(resp, as = "text", encoding = "UTF-8")
  j   <- fromJSON(txt, simplifyVector = TRUE)
  
  # and then we can covert to a tabular format:
  
  # 1) historic crests list
  hc <- j$flood$crests$historic
  crests_df <- tibble(
    usgs_id = j$usgsId %||% site_no,
    crest_time = hc$occurredTime %||% NA,
    crest_stage_ft = hc$stage %||% NA_real_,
    preliminary = hc$preliminary %||% NA,
    olddatum = hc$olddatum %||% NA)
  
  # keep the highest historical crest only (and no old locations)
  record_crests_df <- crests_df %>%
    mutate(crest_date = date(crest_time)) %>%
    select(usgs_id, crest_date, crest_stage_ft, preliminary, olddatum) %>%
    arrange(desc(crest_stage_ft)) %>%
    filter(olddatum == FALSE) %>%
    head(1)

  # if empty, return a single NA row (so downstream joins/exports are simple)
  if (nrow(record_crests_df) == 0) {
    record_crests_df <- tibble(
      usgs_id = j$usgsId %||% site_no,
      crest_date = as.Date(NA),
      crest_stage_ft = NA_real_,
      preliminary = NA_character_,
      olddatum = NA)
  }
  
  # 2) flood categories (stage thresholds, feet)
  cats <- j$flood$categories
  categories_df <- tibble(
    usgs_id = j$usgsId %||% site_no,
    action_ft = cats$action$stage %||% NA_real_,
    minor_ft = cats$minor$stage %||% NA_real_,
    moderate_ft = cats$moderate$stage %||% NA_real_,
    major_ft = cats$major$stage %||% NA_real_)
  
  list(crests = record_crests_df, categories = categories_df)
  
}

# iterate for each USGS site that we confirmed has NOAA data
noaa_meta_list <- map(wa_sites_filter2$site_no, get_noaa_meta_fc)

# then bind the two tables we got: 
# 1) record crests
wa_noaa_historic_crests <- noaa_meta_list %>%
  map("crests") %>%
  bind_rows()

# 2) flood_categories
wa_noaa_flood_categories <- noaa_meta_list %>%
  map("categories") %>%
  bind_rows()

rm(noaa_meta_list)

# now we can remove all the sites that don't have crests data
wa_noaa_historic_crests_true <- wa_noaa_historic_crests %>%
  drop_na(crest_stage_ft) %>%
  select(site_no = usgs_id,
         historic_crest_date = crest_date,
         historic_crest_stage_ft = crest_stage_ft)

# we are left with 131 final sites for the analysis
wa_sites_filter3 <- wa_sites_filter2 %>%
  filter(site_no %in% wa_noaa_historic_crests_true$site_no)

# let's also save the places that do have flood categories
wa_noaa_flood_categories_true <- wa_noaa_flood_categories %>%
  # when thresholds are not established, the value is -9999.0, replace that for NA
  mutate(across(c(action_ft, minor_ft, moderate_ft, major_ft),
                ~ na_if(., -9999.0))) %>%
  # keep places that have at least one flood category
  mutate(has_flood_cat = if_any(c(minor_ft, moderate_ft, major_ft), ~ !is.na(.))) %>%
  filter(has_flood_cat == T) %>%
  select(site_no = usgs_id, minor_ft, moderate_ft, major_ft) 

# export this to save time in future runs
write_csv(wa_noaa_flood_categories_true, "inputs/wa_noaa_flood_categories_true.csv")
write_csv(wa_noaa_historic_crests_true, "inputs/wa_noaa_historic_crests_true.csv")
write_csv(wa_sites_filter3, "inputs/wa_sites_filter3.csv")

There are 131 stream sensors where we can get recent but also historical data for this analysis. Let’s now use the USGS package to query subdaily data for each of those for the first half of December, 2025.

Then we can join that to the historical crest and check if any new records.

# if this is a quick run, read the input files
# if it's a full run, the will was created in the last chunk, so carry on
if (needs_build == F) {
  wa_noaa_flood_categories_true <- read_csv(wa_noaa_flood_categories_true_path,
                                           col_types = cols(site_no = col_character(),
                                                            .default = col_guess()),
                                           show_col_types = F)
  wa_noaa_historic_crests_true <- read_csv(wa_noaa_historic_crests_true_path,
                                           col_types = cols(site_no = col_character(),
                                                            .default = col_guess()),
                                           show_col_types = F)
  wa_sites_filter3 <- read_csv(wa_sites_filter3_path,
                                           col_types = cols(site_no = col_character(),
                                                            .default = col_guess()),
                                           show_col_types = F)
} else {
  message("Using input files from previous computation")
}

# query recent subdaily gage data
# to check if we have record breakers this month
wa_december_data <- readNWISuv(siteNumbers = wa_sites_filter3$site_no,
                               parameterCd = "00065",
                               startDate = "2025-12-01",
                               endDate = "2025-12-15")

# clean up columns and split into a list
# also fix timezone from UTC to PT
wa_december_data2 <- wa_december_data %>%
  mutate(dateTime = as.POSIXct(dateTime, tz = "America/Los_Angeles")) %>%
  select(site_no, date_time = dateTime, gage_ft = X_00065_00000) %>%
  arrange(site_no, date_time)

# we can get the max for each site this month and compare to the record here
# if there are december ties for max, get the earliest
wa_december_max <- wa_december_data2 %>%
  group_by(site_no) %>%
  filter(gage_ft == max(gage_ft, na.rm = T)) %>%
  slice_min(order_by = date_time, n = 1, with_ties = F) %>%
  ungroup() %>%
  transmute(site_no,
            max_december_gage_ft = gage_ft,
            max_december_date_time = date_time) %>%
  left_join(wa_noaa_historic_crests_true, by = "site_no") %>%
  mutate(new_record = ifelse(max_december_gage_ft > historic_crest_stage_ft, T, F),
         diff_ft = max_december_gage_ft - historic_crest_stage_ft,
         diff_inches = diff_ft*12) # also get in inches

# filter possible new records here
wa_december_new_record <- wa_december_max %>%
  filter(new_record == T) %>%
  select(-new_record)

# from a visual inspection, 
# we will remove one site where the record crest doesn't seem accurate
# site: 12026150
# - shows the historic high as 12.89 ft
# - but in the past year has always been above 300 ft
# site: 12211190
# - it's actually an overflow site, not a real river

# we are left with 13 new december records
wa_december_new_record_filter <- wa_december_new_record %>%
  filter(site_no != "12026150") %>%
  filter(site_no != "12211190")

# join their location metadata
wa_sites_filter3_simple <- wa_sites_filter3 %>%
  select(site_no, station_nm, lat = dec_lat_va, long = dec_long_va)

wa_december_new_record_filter <- wa_december_new_record_filter %>%
  left_join(wa_sites_filter3_simple, by = "site_no") %>%
  select(site_no, station_nm, max_december_date_time, 
         max_december_gage_ft, historic_crest_date,
         historic_crest_stage_ft, diff_inches, lat, long)

# we will also clean up text from station names
# and round diff_inches to keep one decimal
wa_december_new_record_filter <- wa_december_new_record_filter %>%
  mutate(station_nm = str_to_title(station_nm),
         station_nm = gsub(", Wa", ", WA", station_nm),
         station_nm = gsub(" Near ", " near ", station_nm),
         station_nm = gsub(" At ", " at ", station_nm),
         diff_inches = round(diff_inches, 1))

rm(wa_december_data, wa_december_new_record)

# export this data
# write_csv(wa_december_new_record_filter, "outputs/wa_december_new_record_filter.csv")

After an individual inspection of the ones flagged as potential record-breakers, we removed one where the historic crest doesn’t seem accurate. NOAA showed the historic crest for site 12026150 as being 12.89 ft but USGS shows the site being above 300 ft everyday in the past year.

Additionally, sensor 12211190 was also removed since it’s not an actual river. As the Washington USGS confirmed, this sensor only measures overbank flow from the Nooksack River.

Aside from these, we found 13 sites where there are potentially new records, based on the preliminary data. Below is a summary of these.

Note that there may be even more records. The Washington USGS flagged another 6 sites that were not included in this location. Most of these are not on the NOAA API so our code can’t flag them.

Here’s a graphic showing the new records at each location. Shaded band indicates values at or above the historic crest stage:

Now let’s do one more analysis beyond the record-breaking sites. A subset of the list of our active sites that have historic crests also have flood categories (minor, moderate and major flooding) from NOAA (54 of the 131).

Here’s what the flooding levels mean:

Major: Extensive inundation of structures and roads. Significant evacuations of people and/or transfer of property to higher elevations.
Moderate: Some inundation of structures and roads near stream.
Minor: Minimal or no property damage, but possibly some public threat.

If we want to quantify the frequency of flooding events at these, we have two options: a) use subdaily data to find the true daily max or b) use mean-daily data that will miss days that briefly entered a flood level but not long or high enough to get the daily average to also stay in the flood category.

While the daily data might underestimate the frequency, it also goes back for a longer time frame. We opted for this option. By choosing sites that have flood categories and daily gage data since the year 2000, we end up with 39 locations.

# check daily data availability at the sensors with flood cat data
flood_dv_gauges <- whatNWISdata(siteNumbers = wa_noaa_flood_categories_true$site_no,
                                parameterCd = "00065", 
                                service = "dv")

# we can get 39 sites with data since 2000
# that's over 25 years
flood_dv_gauges_2000 <- flood_dv_gauges %>%
  filter(begin_date < "2000-01-01")

After querying this data, we matched each site to its corresponding flood thresholds and labeled whether any day got to minor, moderate or major flooding.

We can see that across there 39 gages, there’s been 681 flooding instances across 402 different days since the year 2000. Most of these are minor.

# now let's query their stage data until Dec 15, 2025
flood_dv_data_2000 <- readNWISdv(siteNumbers = flood_dv_gauges_2000$site_no,
                               parameterCd = "00065",
                               startDate = "2000-01-01",
                               endDate = "2025-12-15")

# clean up columns
# and join the flood thresholds and label each day
flood_dv_data_2000_true <- flood_dv_data_2000 %>%
  select(site_no, date = Date, mean_gage_ft = X_00065_00003) %>%
  arrange(site_no, date) %>%
  left_join(wa_noaa_flood_categories_true, by = "site_no") %>%
  mutate(flood_cat = case_when(
    mean_gage_ft >= major_ft ~ "major",
    mean_gage_ft >= moderate_ft ~ "moderate",
    mean_gage_ft >= minor_ft ~ "minor",
    T ~ NA)) %>%
  filter(flood_cat %in% c("minor", "moderate", "major")) %>%
  mutate(flood_cat = factor(flood_cat, levels = c("minor", "moderate", "major")))

### get a few stats about these

# since 2000, these 39 gages experienced flooding events in 402 separate days
length(unique(flood_dv_data_2000_true$date))

## [1] 402

# here's a breakdown of the 681 flooding data points 
# (dates are repeated when more than one gage reached flooding levels)
flood_dv_data_2000_true %>%
  group_by(flood_cat) %>%
  summarise(sensors_flooding = n()) %>%
  adorn_totals()

##  flood_cat sensors_flooding
##      minor              511
##   moderate              119
##      major               51
##      Total              681

When aggregating by year and month, December 2025 turns out to be the month with the most flooding events. It’s also the highest for major flooding events.

# now we can aggregate by month and count how many sensors reached flood levels
flood_dv_data_2000_true_agg <- flood_dv_data_2000_true %>%
  mutate(year = year(date),
         month = month(date)) %>%
  group_by(year, month) %>%
  summarise(sensors_flooding = n()) %>%
  arrange(-sensors_flooding)

# turns out 2025 had the most flooded sensors of any month in the 21st century
head(flood_dv_data_2000_true_agg, 5)

## # A tibble: 5 × 3
## # Groups:   year [5]
##    year month sensors_flooding
##   <int> <int>            <int>
## 1  2025    12               54
## 2  2009     1               32
## 3  2021    11               29
## 4  2020     2               28
## 5  2006    11               24

# December 2025 is also the highest for "major" floods
# now we can aggregate by month and count how many sensors reached flood levels
flood_dv_data_2000_true %>%
  filter(flood_cat == "major") %>%
  mutate(year = year(date),
         month = month(date)) %>%
  group_by(year, month) %>%
  summarise(sensors_major_flooding = n()) %>%
  arrange(-sensors_major_flooding) %>%
  head(5)

## # A tibble: 5 × 3
## # Groups:   year [5]
##    year month sensors_major_flooding
##   <int> <int>                  <int>
## 1  2025    12                     15
## 2  2006    11                      9
## 3  2009     1                      5
## 4  2020     2                      4
## 5  2021    11                      4

# to plot this in a timeseries we need to create a full sequence
# with months that don't have flood events and then join our data

# 1) build a calendar of all months
calendar <- tibble(month_date = seq.Date(from = as.Date("2000-01-01"),
                                         to = as.Date("2025-12-01"),
                                         by = "month")) %>%
  mutate(year = year(month_date),
         month = month(month_date))

# 2) prepare data with a month_date key
flood_dv_data_2000_true_agg <- flood_dv_data_2000_true_agg %>%
  ungroup() %>%
  mutate(month_date = make_date(year, month, 1)) %>%
  select(month_date, sensors_flooding)

# 3) join calendar to aggregate and fill missing values with 0
flood_2000_full_agg <- calendar %>%
  left_join(flood_dv_data_2000_true_agg, by = "month_date") %>%
  mutate(sensors_flooding = replace_na(sensors_flooding, 0))

rm(calendar)

# export this too
flood_2000_full_agg_clean <- flood_2000_full_agg %>%
  select(month_date, sensors_flooding)

# write_csv(flood_2000_full_agg_clean, "outputs/flood_2000_full_agg_clean.csv")

Washington Floods

Ignacio Calderon, USA TODAY

2025-12-18

Results

Methods