Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CohoesPVApp/dataset_generator.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
278 lines (232 sloc)
9.99 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(RColorBrewer) | |
library(sf) | |
library(raster) | |
library(readr) | |
library(maps) | |
library(maptools) | |
library(sp) | |
library(leaflet) | |
library(tigris) | |
library(stringr) | |
library(fec12) | |
library(shiny) | |
library(htmltools) | |
library(shinybusy) | |
library(foreign) | |
# Load the names and abbreviations of all states ========== | |
# # TODO: Don't do this every time!! Read in a dataframe | |
# state_choices <- data.frame(state.name, state.abb) # Initialize with standard state names and abb | |
# names(state_choices) <- c('State', 'Abbreviation') # Better column headers | |
# # Format state names | |
# state_choices$Adjusted_Name <- tolower(state_choices$State) # Lower case version of state name | |
# state_choices$Adjusted_Name <- sub(' ', '_', state_choices$Adjusted_Name) # ...without spaces | |
# | |
# saveRDS(state_choices,"state_choices.Rds") | |
state_choices <- readRDS("data/general/state_choices.Rds") | |
# Low / moderate income data ========== | |
# Read in national LMI data and Tracts2District data | |
national_lmi <- readRDS("data/general/national_lmi.Rds") | |
options(tigris_class = "sf") # shape file | |
#Tract2District <- readRDS('data/general/Tract2District.Rds') | |
Tract2District <- readRDS('data/general/Tract2District.remap.Rds') %>% | |
rename(GEOID = GEOID_TRACT_10) | |
# Function | |
generate_lowmod_data <- function(state_code) { | |
# State census block groups | |
state_lmi <- national_lmi %>% filter(Stusab == state_code) | |
# Reduce GEOID to census tract | |
state_lmi$GEOID <- as.character(state_lmi$GEOID) | |
state_lmi$GEOID <- str_sub(state_lmi$GEOID, 1, nchar(state_lmi$GEOID) - 1) | |
# Make the low mod percentage of each census tract the average of all block groups within | |
# that census tract | |
state_lmi_averaged <- state_lmi %>% | |
dplyr::select(GEOID, Lowmod_pct) %>% | |
group_by(GEOID) %>% | |
mutate(Lowmod_pct = median(Lowmod_pct)) %>% | |
dplyr::distinct(GEOID, Lowmod_pct, .keep_all = TRUE) | |
# Obtain census tract data from year 2015 to make it coherent with the year of LMI data | |
# state_tract <- tracts(state = state_code, year = 2015) # Might have to keep this at 2015... | |
state_tract <- tracts(state = state_code, year = 2015) | |
# Merge and convert Lowmod_pct to range[0, 100] | |
# This still looks good! | |
state_lowmod <- merge(state_tract, state_lmi_averaged, by = 'GEOID') %>% | |
dplyr::select(GEOID, Lowmod_pct) %>% | |
mutate(Lowmod_pct = Lowmod_pct * 100) | |
# Merge with Tract2District data | |
# TODO: Create new version of Tract2District | |
state_districts <- Tract2District %>% | |
filter(STATE == state_code) %>% | |
dplyr::select(GEOID, DISTRICT) | |
# TODO: This merge definitely breaks it!!! | |
merged <- merge(state_lowmod, state_districts, by="GEOID") | |
merged$DISTRICT <- as.numeric(merged$DISTRICT) # Should this be numeric?? | |
# Take into account of states with single congressional district | |
# Name the 'DISTRICT' column 'At-large' | |
single_cd_states <- c('AK', 'DE', 'MT', 'ND', 'SD', 'VT', 'WY') | |
if (state_code %in% single_cd_states) { | |
merged$DISTRICT <- 'At-large' | |
} | |
# Name and save the dataset | |
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3] | |
file_path <- paste(state_abbr, '.Rds', sep = '') | |
file_path <- paste('data/lowmod/', file_path, sep = '') | |
saveRDS(merged, file_path) | |
} | |
# Generate lowmod data for individual states ========== | |
for (i in 1: nrow(state_choices)) { | |
state_code <- state_choices[i, 2] | |
generate_lowmod_data(state_code) | |
} | |
# Congressional district outline data ========== | |
# Format congressional district elections data | |
# TODO: New outlines! | |
# results_house %>% | |
# group_by(state, district_id) %>% | |
# summarize(N = n()) %>% | |
# nrow() | |
# | |
# results_house %>% | |
# left_join(candidates, by = "cand_id") %>% | |
# dplyr::select(state, district_id, cand_name, party, general_votes) %>% | |
# arrange(desc(general_votes)) | |
# | |
# district_elections <- results_house %>% | |
# dplyr::mutate(district = parse_number(district_id)) %>% | |
# dplyr::mutate(district = as.character(district)) %>% | |
# dplyr::group_by(state, district) %>% | |
# dplyr::summarize( | |
# N = n(), | |
# total_votes = sum(general_votes, na.rm = TRUE), | |
# d_votes = sum(ifelse(party == "D", general_votes, 0), na.rm = TRUE), | |
# r_votes = sum(ifelse(party == "R", general_votes, 0), na.rm = TRUE) | |
# ) %>% | |
# dplyr::mutate( | |
# other_votes = total_votes - d_votes - r_votes, | |
# r_prop = r_votes / total_votes, | |
# winner = ifelse(r_votes > d_votes, "Republican", "Democrat") | |
# ) | |
# | |
# district_elections$district <- stringr::str_pad(district_elections$district,width=2, side="left", pad="0") | |
# | |
# saveRDS(district_elections,"data/general/district_elections.Rds") | |
district_elections <- readRDS("data/general/district_elections.Rds") | |
# FIPS2STATENAME$STATEFP <- str_pad(FIPS2STATENAME$STATEFP, width=2, side="left", pad="0") | |
# saveRDS(FIPS2STATENAME, "data/general/FIPS2STATENAME.Rds") | |
# Read in congressional districts data | |
#districts <- readRDS('data/general/districts.Rds') # TODO: Use new shapes! | |
# districts_new <- congressional_districts(year=2020) # Replace with readRDS() | |
#districts <- readRDS('data/general/districts_new.Rds') # CD116 districts; don't use! | |
districts <- readRDS('data/cd118_shapes/districts_cd118.Rds') # Via https://simplemaps.com/data/congress | |
# districts <- districts %>% | |
# dplyr::select(-c("STATENAME","Abbreviation")) %>% | |
# left_join(FIPS2STATENAME, by="STATEFP") %>% | |
# relocate(STATENAME, .after = STATEFP) %>% | |
# left_join(STATENAME2ABB, by="STATENAME") %>% | |
# relocate(Abbreviation, .after = STATENAME) | |
#saveRDS(districts, "data/general/districts_new.Rds") | |
# Function | |
generate_cong_districts <- function(state_code) { | |
# State congressional district elections data | |
state_elections <- district_elections %>% | |
dplyr::filter(state == state_code) | |
# State shape data | |
state_name <- state_choices[which(state_choices$Abbreviation == state_code), 1] | |
state_shape <- districts %>% | |
dplyr::filter(STATENAME == state_name) %>% | |
dplyr::mutate(ID = GEOID) %>% | |
dplyr::select(STATENAME, ID, DISTRICT) | |
# Merge | |
state_merged <- state_shape %>% | |
st_transform(4326) %>% # Using CRS 4326 | |
inner_join(state_elections, by = c('DISTRICT' = 'district')) | |
# Name and save the dataset | |
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3] | |
file_path <- paste(state_abbr, '.Rds', sep = '') | |
file_path <- paste('data/cong_districts/', file_path, sep = '') | |
saveRDS(state_merged, file_path) | |
} | |
# Generate congressional districts data for individual states ========== | |
for (i in 1: nrow(state_choices)) { | |
state_code <- state_choices[i, 2] | |
generate_cong_districts(state_code) | |
} | |
# Reservoir data ========== | |
# Read in reservoirs data | |
reservoirs <- readRDS("data/general/reservoirs.Rds") | |
# Function | |
generate_reservoirs <- function(state_code) { | |
# Filter to state reservoirs | |
state_reservoirs <- reservoirs %>% | |
filter(state == state_code) | |
# Read in state lowmod data for spatial merge | |
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3] | |
lowmod_path <- paste('data/lowmod/', state_abbr, sep = '') | |
lowmod_path <- paste(lowmod_path, '.Rds', sep = '') | |
state_lowmod_data <- readRDS(lowmod_path) | |
# Convert reservoir data to shape file; transform coordinates | |
state_reservoirs_spatial <- state_reservoirs %>% | |
st_as_sf(coords=c('long', 'lat'), crs = '+proj=longlat +datum=WGS84') %>% | |
st_transform(crs = st_crs(state_lowmod_data)) | |
# saveRDS(state_reservoirs_spatial, "state_reservoirs_spatial.Rds") # For debug | |
# Spatial join reservoir data and lowmod data; filter columns | |
# TODO: This is producing some NAs; some reservoir points not overlapping with census blocks | |
points_in_state <- st_join(state_reservoirs_spatial, state_lowmod_data, left = T) | |
points_in_state <- points_in_state[, c('dam_name', 'Lowmod_pct')] | |
# saveRDS(points_in_state, "points_in_state.Rds") # for debug | |
# Merge back with the original reservoir data | |
state_reservoirs_merged <- base::merge(state_reservoirs, points_in_state, by = 'dam_name') | |
# Add low mod percentage buckets | |
state_reservoirs_merged <- state_reservoirs_merged %>% mutate( | |
lowmod_bucket = case_when( | |
Lowmod_pct < 20 ~ '0 - 20', | |
Lowmod_pct >= 20 & Lowmod_pct < 40 ~ '20 - 40', | |
Lowmod_pct >= 40 & Lowmod_pct < 60 ~ '40 - 60', | |
Lowmod_pct >= 60 & Lowmod_pct < 80 ~ '60 - 80', | |
Lowmod_pct >= 80 ~ '80 - 100', | |
is.na(Lowmod_pct) ~ 'No LMI Data' | |
) | |
) | |
# Name and save the dataset | |
file_path <- paste(state_abbr, '.Rds', sep = '') | |
file_path <- paste('data/reservoirs/', file_path, sep = '') | |
saveRDS(state_reservoirs_merged, file_path) | |
} | |
# Generate reservoir data for individual states ========== | |
for (i in 1: nrow(state_choices)) { | |
state_code <- state_choices[i, 2] | |
generate_reservoirs(state_code) | |
} | |
# Electric substation data ========== | |
# Read in substation data | |
substations <- read.csv('data/general/electric_substations.csv') | |
# Function | |
generate_substations <- function(state_code) { | |
# Filter to state substations | |
state_substations <- substations %>% | |
filter(STATE == state_code) %>% | |
# get rid of useless columns | |
dplyr::select(-c('NAICS_CODE', 'NAICS_DESC', 'COUNTRY', 'SOURCE', | |
'SOURCEDATE', 'VAL_METHOD', 'VAL_DATE', 'LINES', | |
'MAX_INFER', 'MIN_INFER', 'X', 'Y')) %>% | |
# take out unavailable substations | |
filter(STATUS != 'NOT AVAILABLE') | |
# Name and save the dataset | |
state_abbr <- state_choices[which(state_choices$Abbreviation == state_code), 3] | |
file_path <- paste(state_abbr, '.Rds', sep = '') | |
file_path <- paste('data/substations/', file_path, sep = '') | |
saveRDS(state_substations, file_path) | |
} | |
# Generate substation data for individual states ========== | |
for (i in 1: nrow(state_choices)) { | |
state_code <- state_choices[i, 2] | |
generate_substations(state_code) | |
} | |
# Generate ALL DATA ========== | |
# Or run previous for-loops individually for each data type | |
for (i in 1: nrow(state_choices)) { | |
state_code <- state_choices[i, 2] | |
generate_lowmod_data(state_code) | |
generate_cong_districts(state_code) | |
generate_reservoirs(state_code) | |
} | |