Results

Multiple states, including California, are proposing to redraw their district maps – which can shift the number of seats the Republican and Democratic parties can each get in the next elections.

This is our analysis to simulate how seats would change based on how people voted in the 2024 presidential race.

Data sources for California analysis:

Here’s what we found:

More details on how the margins would shift can be found on interactive tables below.

Methods

A caveat with California is that the precinct-level data we were able to obtain, from Statewide Database, redacts voting data for precincts with under 10 voters. These very small precincts make up less than 0.05% of total voters.

We collected voting tabular data and shapefiles, but Statewide Database recommended using a different method. Instead of performing spatial joins to label precincts to congressional districts, we can use their precinct/census block conversion file.

Census blocks are smaller than precincts so we’d have to “distribute” the votes proportionally. Then, we can connect that to block assignment files for the current and proposed district maps to aggregate votes that way. This should result in more accurate results (for comparison, we also performed the spatial method and it simulated the exact same seat changes, but the margin shifts in a few districts differed. They were never over a percentage point).

That said, import voting tabular data along with block conversion file:

# import and clean
ca_votes_2024 <- read_csv("inputs/CA/state_g24_sov_data_by_g24_srprec.csv") %>%
  clean_names() %>%
  select(county, fips, srprec, srprec_key, total_votes = totvote, 
         harris = prsdem01, 
         trump = prsrep01) %>%
  # precincts with < 10 voters are redacted, flag those
  mutate(redacted = ifelse(total_votes < 10, T, F)) %>%
  # these have *** instead of a number; replace for NA
  mutate(across(c(harris, trump), ~na_if(., "***"))) %>%
  # now we can convert the columns to numeric
  mutate(harris = as.numeric(harris),
         trump = as.numeric(trump))

# check key is unique
length(unique(ca_votes_2024$srprec_key)) == nrow(ca_votes_2024)
## [1] TRUE
# quick aggregation to check
# redacted precincts make up under 0.05% of total votes
ca_votes_2024 %>%
  group_by(redacted) %>%
  summarise(total_votes = sum(total_votes)) %>%
  adorn_totals()
##  redacted total_votes
##     FALSE    16078204
##      TRUE        7385
##     Total    16085589
# drop recated precints
ca_votes_2024 <- filter(ca_votes_2024, redacted != T) %>% select(-redacted)

# import precinct-block conversion file
srprec2block <- read_csv("inputs/CA/state_g24_sr_blk_map.csv") %>%
  clean_names() %>%
  select(block_key, srprec_key, pct_srprec = pctsrprec) %>%
  mutate(pct_srprec = pct_srprec/100)

# join them and allocate census blocks proportionally
# by multiplying votes * pct_srprec
ca_blocks <- left_join(srprec2block, ca_votes_2024, by = "srprec_key") %>%
  mutate(harris_adj = harris * pct_srprec,
         trump_adj = trump * pct_srprec,
         total_votes_adj = total_votes * pct_srprec) %>%
  select(block_key, srprec_key, harris_adj, trump_adj, total_votes_adj)

# quick check total votes after block conversion
# very slight difference, likely due to rounding/digits stored
sum(ca_votes_2024$total_votes)
## [1] 16078204
sum(ca_blocks$total_votes_adj, na.rm = T)
## [1] 16077413
rm(srprec2block)

With block-level voting data, we can map that to corresponding current/proposed districts by using assignment files and allocate them by multiplying by the percent of the precinct they represent (provided in the data).

# import key for current map
key_current <- read_csv("inputs/CA/cd118/06_CA_CD118.txt", 
                        col_names = TRUE,
                        col_types = cols(.default = col_character()))

# do some formatting for consistency
key_current <- key_current%>%
  select(block_key = GEOID, current_district = CDFP) %>%
  mutate(current_district = paste0("District ", 
                                   as.numeric(current_district)))

# import key for proposed map
key_proposed <- read_csv("inputs/CA/ab604_redistricting/AB604.csv", col_names = F)

# do some formatting for consistency
colnames(key_proposed) <- c("block_key", "proposed_district")

key_proposed <- key_proposed %>%
  mutate(proposed_district = paste0("District ",
                                    as.numeric(proposed_district)))

# now join this to our block level voting data
ca_main <- ca_blocks %>%
  left_join(key_current, by = "block_key") %>%
  left_join(key_proposed, by = "block_key")

rm(key_current, key_proposed)

Once district are assigned, we can aggregate them and get a vote and seat count.

# first do current district aggregation
ca_current_district_count <- ca_main %>%
  group_by(current_district) %>%
  summarise(current_harris = sum(harris_adj, na.rm = T),
            current_trump = sum(trump_adj, na.rm = T),
            current_total_votes = sum(total_votes_adj, na.rm = T)) %>%
  mutate(current_party = case_when(
    current_harris > current_trump ~ "dem",
    current_trump > current_harris ~ "rep",
    current_harris == current_trump ~ "tie")) %>%
  rename(district = current_district) %>%
  drop_na()

# now proposed
ca_proposed_district_count <- ca_main %>%
  group_by(proposed_district) %>%
  summarise(proposed_harris = sum(harris_adj, na.rm = T),
            proposed_trump = sum(trump_adj, na.rm = T),
            proposed_total_votes = sum(total_votes_adj, na.rm = T)) %>%
  mutate(proposed_party = case_when(
    proposed_harris > proposed_trump ~ "dem",
    proposed_trump > proposed_harris ~ "rep",
    proposed_harris == proposed_trump ~ "tie")) %>%
  rename(district = proposed_district) %>%
  drop_na()

# then merge
ca_district_count <- left_join(ca_current_district_count, ca_proposed_district_count, by = "district")
rm(ca_current_district_count, ca_proposed_district_count)

# we see that 6 seats would be flipped under the proposed map
ca_district_count <- ca_district_count %>%
  mutate(seat_flip = case_when(
    current_party == "dem" & proposed_party == "rep" ~ 1,
    current_party == "rep" & proposed_party == "dem" ~ 1,
    T ~ 0))

ca_district_count %>%
  group_by(current_party) %>%
  summarise(count = n())
## # A tibble: 2 × 2
##   current_party count
##   <chr>         <int>
## 1 dem              41
## 2 rep              11
ca_district_count %>%
  group_by(proposed_party) %>%
  summarise(count = n())
## # A tibble: 2 × 2
##   proposed_party count
##   <chr>          <int>
## 1 dem               47
## 2 rep                5

Below are our results from our precinct-based calculation, with the party under current and proposed maps, corresponding margins (negative means a Harris advantage and positive a Trump advantage), and a binary to flag the 6 seats that would be flipped under the 2024 presidential race voting counts.

ca_district_count <- ca_district_count %>%
  mutate(current_harris_pct = (current_harris/current_total_votes),
         current_trump_pct = (current_trump/current_total_votes),
         current_margin = round((current_trump_pct - current_harris_pct)*100, 1),
         proposed_harris_pct = (proposed_harris/proposed_total_votes),
         proposed_trump_pct = (proposed_trump/proposed_total_votes),
         proposed_margin = round((proposed_trump_pct - proposed_harris_pct)*100, 1),
         margin_shift = round((proposed_margin - current_margin), 1)) %>%
  select(-current_harris_pct, -current_trump_pct, -proposed_harris_pct, -proposed_trump_pct)

# export
# write_csv(ca_district_count, "outputs/ca_district_count.csv")

# display this in a table
ca_district_count %>%
  select(ca_district = district, current_party, proposed_party, seat_flip, current_margin, proposed_margin, margin_shift) %>%
  arrange(-seat_flip) %>%
  datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf')))