Skip to content

Instantly share code, notes, and snippets.

@plnnr
Last active July 24, 2022 04:02
Show Gist options
  • Save plnnr/8a8fa7090c7a377d6ab44abc89c4afb1 to your computer and use it in GitHub Desktop.
Save plnnr/8a8fa7090c7a377d6ab44abc89c4afb1 to your computer and use it in GitHub Desktop.
Demonstration on how to use interpolate_pw() from tidycensus on multiple input tables with different input years and different weighting mechanisms.
## Load libraries, set options, source scripts ---------------------------------
library(tidyverse)
library(tidycensus)
library(sf)
library(tigris)
library(furrr)
options(
scipen = 999,
digits = 4,
tigris_class = "sf",
tigris_use_cache = T
)
## Generate spatial data
STATES_TO_DOWNLOAD <- c("41", "53")
COUNTIES_TO_DOWNLOAD <- c("41005", "41009", "41051", "41067", "41071", "53011", "53059")
TARGET_EPSG <- 2913
TRACTS.SF <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){
tigris::tracts(STATE, cb = TRUE, year = 2020) %>%
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>%
st_transform(TARGET_EPSG)
TRACTS.2019.SF <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){
tigris::tracts(STATE, cb = TRUE, year = 2019) %>%
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>%
st_transform(TARGET_EPSG)
TRACTS_2010t2020.SF <- rbind(
mutate(TRACTS.2019.SF, year = 2010),
mutate(TRACTS.2019.SF, year = 2015),
mutate(TRACTS.SF, year = 2020)
)
STATE_BLOCKS.2020 <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){
tigris::blocks(STATE, year = 2020) %>%
filter(substr(GEOID20, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>%
st_transform(TARGET_EPSG)
## Load CPI inflation adjustments (CPI-U-RS)
cpi <- rio::import("1_data/0_resources/cpi-u-rs_1950-current.xlsx") %>%
select(year, inflation_factor = inflation_factor_2020)
## Download data using multi-threading -----------------------------------------
## Enable multi-core processing
plan(multisession, workers = availableCores())
set.seed(123)
core_tables <- c("B25106", "B03002", "B19013", "B25010", "B15002")
## Grab data for selected years and selected tables
acs_table_query_longitudinal <- future_map_dfr(
.x = c(2010, 2015, 2020),
.f = function(acs_year){
map_dfr(
.x = core_tables,
.f = function(acs_table){
query <- get_acs(geography = "tract",
table = acs_table,
year = acs_year,
state = STATES_TO_DOWNLOAD) %>%
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD) %>%
mutate(agg = "count",
year = acs_year) %>%
select(-NAME)
})
})
## Determine weighting methods -------------------------------------------------
## Variables that are population-based should be weighted by population
## Variables that are unit-based should be weighted by households
## Variables that are aggregate may need to be re-weighted???
# c("B25106", "B03002", "B19013", "B25010", "B15002")
## Population-based tables:
# B03002
# B15002
## Unit-based tables:
# B25010
# B25106
# B19013
## Extensive
# B03002
# B15002
# B25106
## Intensive (extensive == FALSE)
# B25010
# B19013
## Interpolate results ---------------------------------------------------------
## In this section, we interpolate data from 2010 and 2015 to 2020 census tracts.
## Since the use of the weighting variables and the use of extensive/intensive
## weighting mechanisms vary (count data vs aggregate data like medians), we must
## specify the right combination of tables and weighting columns. We do this by
## carefully specifying the list arguments for the pmap() function.
## First we transform our long data to wide data and append spatial data
wide_query <- acs_table_query_longitudinal %>%
pivot_wider(id_cols = c(GEOID, year), names_from = variable, values_from = estimate) %>%
left_join(., TRACTS_2010t2020.SF, by = c("GEOID", "year")) %>%
st_as_sf()
## Set up list arguments for pmap() function
years <- c(rep(2015, 3),
rep(2010, 3))
tables <- list(c("B25106"), c("B25010", "B19013"), c("B03002", "B15002"),
c("B25106"), c("B25010", "B19013"), c("B03002", "B15002"))
weightcols <- c("HOUSING20", "HOUSING20", "POP20",
"HOUSING20", "HOUSING20", "POP20")
extensives <- c(TRUE, FALSE, TRUE,
TRUE, FALSE, TRUE)
## Generate list of filtered data based on the years and tables
dfs <- pmap(.l = list(years, tables),
.f = function(.year, .table){
# Expand .table in case of multiple tables
.table <- paste(.table)
wide_query %>%
filter(year == .year) %>%
select(GEOID, starts_with(c({{.table}})))
})
## Interpolate all of the data
interpolated <- future_pmap(
.l = list(dfs, years, weightcols, extensives),
.f = function(.dat, .year, .weightcol, .ext){
interpolate_pw(from = .dat,
to = wide_query %>%
filter(year == 2020) %>%
select(GEOID),
to_id = "GEOID",
weights = STATE_BLOCKS.2020,
weight_column = .weightcol,
crs = TARGET_EPSG,
extensive = .ext) %>%
mutate(year = .year) %>%
relocate(year, .after = GEOID) %>%
st_drop_geometry()
},
.options = furrr_options(seed = TRUE))
## Bind the list together, cast to long format, and standardize the format
## TODO there has to be a more elegant way to do this step...
interpolated.df <- rbind(
bind_cols(interpolated[1:3], .name_repair = "unique") %>%
select(-c(GEOID...49, GEOID...55, year...50, year...56)) %>%
rename(GEOID = GEOID...1, year = year...2),
bind_cols(interpolated[4:6], .name_repair = "unique") %>%
select(-c(GEOID...49, GEOID...55, year...50, year...56)) %>%
rename(GEOID = GEOID...1, year = year...2)
) %>%
pivot_longer(cols = -c(GEOID, year),
names_to = "variable",
values_to = "estimate") %>%
mutate(moe = 0,
agg = "count") %>%
select(names(acs_table_query_longitudinal))
## Optionally add spatial data and generate a quick map
interpolated.sf <- interpolated.df %>%
left_join(., TRACTS.SF, by = "GEOID") %>%
st_as_sf()
interpolated.sf %>%
filter(year == 2015,
variable == "B25106_001") %>%
mapview(zcol = "estimate")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment