# Load the names and abbreviations of all states ==========
# # TODO: Don't do this every time!! Read in a dataframe
# state_choices <- data.frame(, # Initialize with standard state names and abb
# names(state_choices) <- c('State', 'Abbreviation') # Better column headers
# # Format state names
# state_choices$Adjusted_Name <- tolower(state_choices$State) # Lower case version of state name
# state_choices$Adjusted_Name <- sub(' ', '_', state_choices$Adjusted_Name) # ...without spaces
# saveRDS(state_choices,"state_choices.Rds")
state_choices <- readRDS("data/general/state_choices.Rds")
# Low / moderate income data ==========
# Read in national LMI data and Tracts2District data
national_lmi <- readRDS("data/general/national_lmi.Rds")
options(tigris_class = "sf") # shape file
#Tract2District <- readRDS('data/general/Tract2District.Rds')
Tract2District <- readRDS('data/general/Tract2District.remap.Rds') %>%
rename(GEOID = GEOID_TRACT_10)
# Function
generate_lowmod_data <- function(state_code) {
# State census block groups
state_lmi <- national_lmi %>% filter(Stusab == state_code)
# Reduce GEOID to census tract
state_lmi$GEOID <- as.character(state_lmi$GEOID)
state_lmi$GEOID <- str_sub(state_lmi$GEOID, 1, nchar(state_lmi$GEOID) - 1)
# Make the low mod percentage of each census tract the average of all block groups within
# that census tract
state_lmi_averaged <- state_lmi %>%
dplyr::select(GEOID, Lowmod_pct) %>%
group_by(GEOID) %>%
mutate(Lowmod_pct = median(Lowmod_pct)) %>%
dplyr::distinct(GEOID, Lowmod_pct, .keep_all = TRUE)
# Obtain census tract data from year 2015 to make it coherent with the year of LMI data
# state_tract <- tracts(state = state_code, year = 2015) # Might have to keep this at 2015...
state_tract <- tracts(state = state_code, year = 2015)
# Merge and convert Lowmod_pct to range[0, 100]
# This still looks good!
state_lowmod <- merge(state_tract, state_lmi_averaged, by = 'GEOID') %>%
dplyr::select(GEOID, Lowmod_pct) %>%
mutate(Lowmod_pct = Lowmod_pct * 100)
# Merge with Tract2District data
# TODO: Create new version of Tract2District
state_districts <- Tract2District %>%
filter(STATE == state_code) %>%
dplyr::select(GEOID, DISTRICT)
# TODO: This merge definitely breaks it!!!
merged <- merge(state_lowmod, state_districts, by="GEOID")
merged$DISTRICT <- as.numeric(merged$DISTRICT) # Should this be numeric??
# Take into account of states with single congressional district
# Name the 'DISTRICT' column 'At-large'
single_cd_states <- c('AK', 'DE', 'MT', 'ND', 'SD', 'VT', 'WY')
if (state_code %in% single_cd_states) {
merged$DISTRICT <- 'At-large'
# Name and save the dataset
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3]
file_path <- paste(state_abbr, '.Rds', sep = '')
file_path <- paste('data/lowmod/', file_path, sep = '')
saveRDS(merged, file_path)
# Generate lowmod data for individual states ==========
for (i in 1: nrow(state_choices)) {
state_code <- state_choices[i, 2]
# Congressional district outline data ==========
# Format congressional district elections data
# TODO: New outlines!
# results_house %>%
# group_by(state, district_id) %>%
# summarize(N = n()) %>%
# nrow()
# results_house %>%
# left_join(candidates, by = "cand_id") %>%
# dplyr::select(state, district_id, cand_name, party, general_votes) %>%
# arrange(desc(general_votes))
# district_elections <- results_house %>%
# dplyr::mutate(district = parse_number(district_id)) %>%
# dplyr::mutate(district = as.character(district)) %>%
# dplyr::group_by(state, district) %>%
# dplyr::summarize(
# N = n(),
# total_votes = sum(general_votes, na.rm = TRUE),
# d_votes = sum(ifelse(party == "D", general_votes, 0), na.rm = TRUE),
# r_votes = sum(ifelse(party == "R", general_votes, 0), na.rm = TRUE)
# ) %>%
# dplyr::mutate(
# other_votes = total_votes - d_votes - r_votes,
# r_prop = r_votes / total_votes,
# winner = ifelse(r_votes > d_votes, "Republican", "Democrat")
# )
# district_elections$district <- stringr::str_pad(district_elections$district,width=2, side="left", pad="0")
# saveRDS(district_elections,"data/general/district_elections.Rds")
district_elections <- readRDS("data/general/district_elections.Rds")
# FIPS2STATENAME$STATEFP <- str_pad(FIPS2STATENAME$STATEFP, width=2, side="left", pad="0")
# saveRDS(FIPS2STATENAME, "data/general/FIPS2STATENAME.Rds")
# Read in congressional districts data
#districts <- readRDS('data/general/districts.Rds') # TODO: Use new shapes!
# districts_new <- congressional_districts(year=2020) # Replace with readRDS()
#districts <- readRDS('data/general/districts_new.Rds') # CD116 districts; don't use!
districts <- readRDS('data/cd118_shapes/districts_cd118.Rds') # Via
# districts <- districts %>%
# dplyr::select(-c("STATENAME","Abbreviation")) %>%
# left_join(FIPS2STATENAME, by="STATEFP") %>%
# relocate(STATENAME, .after = STATEFP) %>%
# left_join(STATENAME2ABB, by="STATENAME") %>%
# relocate(Abbreviation, .after = STATENAME)
#saveRDS(districts, "data/general/districts_new.Rds")
# Function
generate_cong_districts <- function(state_code) {
# State congressional district elections data
state_elections <- district_elections %>%
dplyr::filter(state == state_code)
# State shape data
state_name <- state_choices[which(state_choices$Abbreviation == state_code), 1]
state_shape <- districts %>%
dplyr::filter(STATENAME == state_name) %>%
dplyr::mutate(ID = GEOID) %>%
dplyr::select(STATENAME, ID, DISTRICT)
# Merge
state_merged <- state_shape %>%
st_transform(4326) %>% # Using CRS 4326
inner_join(state_elections, by = c('DISTRICT' = 'district'))
# Name and save the dataset
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3]
file_path <- paste(state_abbr, '.Rds', sep = '')
file_path <- paste('data/cong_districts/', file_path, sep = '')
saveRDS(state_merged, file_path)
# Generate congressional districts data for individual states ==========
for (i in 1: nrow(state_choices)) {
state_code <- state_choices[i, 2]
# Reservoir data ==========
# Read in reservoirs data
reservoirs <- readRDS("data/general/reservoirs.Rds")
# Function
generate_reservoirs <- function(state_code) {
# Filter to state reservoirs
state_reservoirs <- reservoirs %>%
filter(state == state_code)
# Read in state lowmod data for spatial merge
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3]
lowmod_path <- paste('data/lowmod/', state_abbr, sep = '')
lowmod_path <- paste(lowmod_path, '.Rds', sep = '')
state_lowmod_data <- readRDS(lowmod_path)
# Convert reservoir data to shape file; transform coordinates
state_reservoirs_spatial <- state_reservoirs %>%
st_as_sf(coords=c('long', 'lat'), crs = '+proj=longlat +datum=WGS84') %>%
st_transform(crs = st_crs(state_lowmod_data))
# saveRDS(state_reservoirs_spatial, "state_reservoirs_spatial.Rds") # For debug
# Spatial join reservoir data and lowmod data; filter columns
# TODO: This is producing some NAs; some reservoir points not overlapping with census blocks
points_in_state <- st_join(state_reservoirs_spatial, state_lowmod_data, left = T)
points_in_state <- points_in_state[, c('dam_name', 'Lowmod_pct')]
# saveRDS(points_in_state, "points_in_state.Rds") # for debug
# Merge back with the original reservoir data
state_reservoirs_merged <- base::merge(state_reservoirs, points_in_state, by = 'dam_name')
# Add low mod percentage buckets
state_reservoirs_merged <- state_reservoirs_merged %>% mutate(
lowmod_bucket = case_when(
Lowmod_pct < 20 ~ '0 - 20',
Lowmod_pct >= 20 & Lowmod_pct < 40 ~ '20 - 40',
Lowmod_pct >= 40 & Lowmod_pct < 60 ~ '40 - 60',
Lowmod_pct >= 60 & Lowmod_pct < 80 ~ '60 - 80',
Lowmod_pct >= 80 ~ '80 - 100', ~ 'No LMI Data'
# Name and save the dataset
file_path <- paste(state_abbr, '.Rds', sep = '')
file_path <- paste('data/reservoirs/', file_path, sep = '')
saveRDS(state_reservoirs_merged, file_path)
# Generate reservoir data for individual states ==========
for (i in 1: nrow(state_choices)) {
state_code <- state_choices[i, 2]
# Electric substation data ==========
# Read in substation data
substations <- read.csv('data/general/electric_substations.csv')
# Function
generate_substations <- function(state_code) {
# Filter to state substations
state_substations <- substations %>%
filter(STATE == state_code) %>%
# get rid of useless columns
dplyr::select(-c('NAICS_CODE', 'NAICS_DESC', 'COUNTRY', 'SOURCE',
'MAX_INFER', 'MIN_INFER', 'X', 'Y')) %>%
# take out unavailable substations
# Name and save the dataset
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3]
file_path <- paste(state_abbr, '.Rds', sep = '')
file_path <- paste('data/substations/', file_path, sep = '')
saveRDS(state_substations, file_path)
# Generate substation data for individual states ==========
for (i in 1: nrow(state_choices)) {
state_code <- state_choices[i, 2]
# Generate ALL DATA ==========
# Or run previous for-loops individually for each data type
for (i in 1: nrow(state_choices)) {
state_code <- state_choices[i, 2]