Last active
July 24, 2022 04:02
-
-
Save plnnr/8a8fa7090c7a377d6ab44abc89c4afb1 to your computer and use it in GitHub Desktop.
Demonstration on how to use interpolate_pw() from tidycensus on multiple input tables with different input years and different weighting mechanisms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load libraries, set options, source scripts --------------------------------- | |
library(tidyverse) | |
library(tidycensus) | |
library(sf) | |
library(tigris) | |
library(furrr) | |
options( | |
scipen = 999, | |
digits = 4, | |
tigris_class = "sf", | |
tigris_use_cache = T | |
) | |
## Generate spatial data | |
STATES_TO_DOWNLOAD <- c("41", "53") | |
COUNTIES_TO_DOWNLOAD <- c("41005", "41009", "41051", "41067", "41071", "53011", "53059") | |
TARGET_EPSG <- 2913 | |
TRACTS.SF <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){ | |
tigris::tracts(STATE, cb = TRUE, year = 2020) %>% | |
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>% | |
st_transform(TARGET_EPSG) | |
TRACTS.2019.SF <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){ | |
tigris::tracts(STATE, cb = TRUE, year = 2019) %>% | |
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>% | |
st_transform(TARGET_EPSG) | |
TRACTS_2010t2020.SF <- rbind( | |
mutate(TRACTS.2019.SF, year = 2010), | |
mutate(TRACTS.2019.SF, year = 2015), | |
mutate(TRACTS.SF, year = 2020) | |
) | |
STATE_BLOCKS.2020 <- map_dfr(STATES_TO_DOWNLOAD, .f = function(STATE){ | |
tigris::blocks(STATE, year = 2020) %>% | |
filter(substr(GEOID20, 1, 5) %in% COUNTIES_TO_DOWNLOAD)}) %>% | |
st_transform(TARGET_EPSG) | |
## Load CPI inflation adjustments (CPI-U-RS) | |
cpi <- rio::import("1_data/0_resources/cpi-u-rs_1950-current.xlsx") %>% | |
select(year, inflation_factor = inflation_factor_2020) | |
## Download data using multi-threading ----------------------------------------- | |
## Enable multi-core processing | |
plan(multisession, workers = availableCores()) | |
set.seed(123) | |
core_tables <- c("B25106", "B03002", "B19013", "B25010", "B15002") | |
## Grab data for selected years and selected tables | |
acs_table_query_longitudinal <- future_map_dfr( | |
.x = c(2010, 2015, 2020), | |
.f = function(acs_year){ | |
map_dfr( | |
.x = core_tables, | |
.f = function(acs_table){ | |
query <- get_acs(geography = "tract", | |
table = acs_table, | |
year = acs_year, | |
state = STATES_TO_DOWNLOAD) %>% | |
filter(substr(GEOID, 1, 5) %in% COUNTIES_TO_DOWNLOAD) %>% | |
mutate(agg = "count", | |
year = acs_year) %>% | |
select(-NAME) | |
}) | |
}) | |
## Determine weighting methods ------------------------------------------------- | |
## Variables that are population-based should be weighted by population | |
## Variables that are unit-based should be weighted by households | |
## Variables that are aggregate may need to be re-weighted??? | |
# c("B25106", "B03002", "B19013", "B25010", "B15002") | |
## Population-based tables: | |
# B03002 | |
# B15002 | |
## Unit-based tables: | |
# B25010 | |
# B25106 | |
# B19013 | |
## Extensive | |
# B03002 | |
# B15002 | |
# B25106 | |
## Intensive (extensive == FALSE) | |
# B25010 | |
# B19013 | |
## Interpolate results --------------------------------------------------------- | |
## In this section, we interpolate data from 2010 and 2015 to 2020 census tracts. | |
## Since the use of the weighting variables and the use of extensive/intensive | |
## weighting mechanisms vary (count data vs aggregate data like medians), we must | |
## specify the right combination of tables and weighting columns. We do this by | |
## carefully specifying the list arguments for the pmap() function. | |
## First we transform our long data to wide data and append spatial data | |
wide_query <- acs_table_query_longitudinal %>% | |
pivot_wider(id_cols = c(GEOID, year), names_from = variable, values_from = estimate) %>% | |
left_join(., TRACTS_2010t2020.SF, by = c("GEOID", "year")) %>% | |
st_as_sf() | |
## Set up list arguments for pmap() function | |
years <- c(rep(2015, 3), | |
rep(2010, 3)) | |
tables <- list(c("B25106"), c("B25010", "B19013"), c("B03002", "B15002"), | |
c("B25106"), c("B25010", "B19013"), c("B03002", "B15002")) | |
weightcols <- c("HOUSING20", "HOUSING20", "POP20", | |
"HOUSING20", "HOUSING20", "POP20") | |
extensives <- c(TRUE, FALSE, TRUE, | |
TRUE, FALSE, TRUE) | |
## Generate list of filtered data based on the years and tables | |
dfs <- pmap(.l = list(years, tables), | |
.f = function(.year, .table){ | |
# Expand .table in case of multiple tables | |
.table <- paste(.table) | |
wide_query %>% | |
filter(year == .year) %>% | |
select(GEOID, starts_with(c({{.table}}))) | |
}) | |
## Interpolate all of the data | |
interpolated <- future_pmap( | |
.l = list(dfs, years, weightcols, extensives), | |
.f = function(.dat, .year, .weightcol, .ext){ | |
interpolate_pw(from = .dat, | |
to = wide_query %>% | |
filter(year == 2020) %>% | |
select(GEOID), | |
to_id = "GEOID", | |
weights = STATE_BLOCKS.2020, | |
weight_column = .weightcol, | |
crs = TARGET_EPSG, | |
extensive = .ext) %>% | |
mutate(year = .year) %>% | |
relocate(year, .after = GEOID) %>% | |
st_drop_geometry() | |
}, | |
.options = furrr_options(seed = TRUE)) | |
## Bind the list together, cast to long format, and standardize the format | |
## TODO there has to be a more elegant way to do this step... | |
interpolated.df <- rbind( | |
bind_cols(interpolated[1:3], .name_repair = "unique") %>% | |
select(-c(GEOID...49, GEOID...55, year...50, year...56)) %>% | |
rename(GEOID = GEOID...1, year = year...2), | |
bind_cols(interpolated[4:6], .name_repair = "unique") %>% | |
select(-c(GEOID...49, GEOID...55, year...50, year...56)) %>% | |
rename(GEOID = GEOID...1, year = year...2) | |
) %>% | |
pivot_longer(cols = -c(GEOID, year), | |
names_to = "variable", | |
values_to = "estimate") %>% | |
mutate(moe = 0, | |
agg = "count") %>% | |
select(names(acs_table_query_longitudinal)) | |
## Optionally add spatial data and generate a quick map | |
interpolated.sf <- interpolated.df %>% | |
left_join(., TRACTS.SF, by = "GEOID") %>% | |
st_as_sf() | |
interpolated.sf %>% | |
filter(year == 2015, | |
variable == "B25106_001") %>% | |
mapview(zcol = "estimate") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment