# American Community Survey # Michael Minn # Rev 25 January 2025 library(sf) library(rjson) # https://api.census.gov/data/2023/acs/acs5/profile/variables.html variables = c( "DP02_0001E" = "Total_Households", "DP02_0016E" = "Average_Household_Size", "DP02_0038PE" = "Percent_Single_Mothers", "DP02_0058PE" = "Percent_In_College", "DP02_0065PE" = "Percent_Bachelors_Degree", "DP02_0066PE" = "Percent_Graduate_Degree", "DP02_0070PE" = "Percent_Veterans", "DP02_0072PE" = "Percent_Disabled", "DP02_0094PE" = "Percent_Foreign_Born", "DP03_0002PE" = "Workforce_Participation", "DP03_0009PE" = "Percent_Unemployed", "DP03_0019PE" = "Percent_Commute_Alone", "DP03_0021PE" = "Percent_Commute_Transit", "DP03_0022PE" = "Percent_Commute_Walk", "DP03_0024PE" = "Percent_Work_at_Home", "DP03_0025E" = "Mean_Commute_Minutes", "DP03_0027PE" = "Percent_Jobs_Management", "DP03_0028PE" = "Percent_Jobs_Service", "DP03_0029PE" = "Percent_Jobs_Sales", "DP03_0030PE" = "Percent_Jobs_Nat_Resources", "DP03_0031PE" = "Percent_Jobs_Production", "DP03_0033PE" = "Percent_Industry_Ag_Mining", "DP03_0034PE" = "Percent_Industry_Construction", "DP03_0035PE" = "Percent_Industry_Manufacturing", "DP03_0036PE" = "Percent_Industry_Wholesale", "DP03_0037PE" = "Percent_Industry_Retail", "DP03_0038PE" = "Percent_Industry_Transport", "DP03_0039PE" = "Percent_Industry_Information", "DP03_0040PE" = "Percent_Industry_Finance", "DP03_0041PE" = "Percent_Industry_Pro_Services", "DP03_0042PE" = "Percent_Industry_Education", "DP03_0043PE" = "Percent_Industry_Recreation", "DP03_0047PE" = "Percent_Private_Employed", "DP03_0048PE" = "Percent_Govt_Employed", "DP03_0049PE" = "Percent_Self_Employed", "DP03_0062E" = "Median_Household_Income", "DP03_0096PE" = "Percent_Health_Insurance", "DP04_0001E" = "Total_Housing_Units", "DP04_0003PE" = "Percent_Vacant_Units", "DP04_0026PE" = "Percent_Pre_War_Units", "DP04_0046PE" = "Homeownership_Percent", "DP04_0047PE" = "Percent_Renters", "DP04_0058PE" = "Percent_No_Vehicle", "DP04_0089E" = "Median_Home_Value", "DP04_0101E" = "Median_Monthly_Mortgage", "DP04_0134E" = "Median_Monthly_Rent", "DP04_0142PE" = "Percent_Unaffordable_Rent", "DP05_0001E" = "Total_Population", "DP05_0018E" = "Median_Age", "DP05_0005PE" = "Percent_Under_5", "DP05_0019PE" = "Percent_Under_18", "DP05_0024PE" = "Percent_Over_64") state_fips = c("01" = "AL", "02" = "AK", "04" = "AZ", "05" = "AR", "06" = "CA", "08" = "CO", "09" = "CT", "10" = "DE", "12" = "FL", "13" = "GA", "15" = "HI", "16" = "ID", "17" = "IL", "18" = "IN", "19" = "IA", "20" = "KS", "21" = "KY", "22" = "LA", "23" = "ME", "24" = "MD", "25" = "MA", "26" = "MI", "27" = "MN", "28" = "MS", "29" = "MO", "30" = "MT", "31" = "NE", "32" = "NV", "33" = "NH", "34" = "NJ", "35" = "NM", "36" = "NY", "37" = "NC", "38" = "ND", "39" = "OH", "40" = "OK", "41" = "OR", "42" = "PA", "44" = "RI", "45" = "SC", "46" = "SD", "47" = "TN", "48" = "TX", "49" = "UT", "50" = "VT", "51" = "VA", "53" = "WA", "54" = "WV", "55" = "WI", "56" = "WY") # =========== Tracts =========== tracts = st_read("2019-2023-acs-tract-polygons.shz") tracts = tracts[,c("GEOIDFQ", "STUSPS", "NAME", "ALAND", "geometry")] tracts$Latitude = st_coordinates(st_centroid(tracts))[,2] tracts$Longitude = st_coordinates(st_centroid(tracts))[,1] tracts$Square_Miles = round((tracts$ALAND + tracts$AWATER) / 2589988, 2) tracts = tracts[,c("GEOIDFQ", "STUSPS", "NAME", "Latitude", "Longitude", "Square_Miles")] names(tracts) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") tracts = st_crop(tracts, xmin=-180, xmax=0, ymin = 0, ymax = 89) tracts = st_transform(tracts, "EPSG:4326") for (variable in names(variables)) { combined = data.frame() for (fips in names(state_fips)) { url = paste0("https://api.census.gov/data/2023/acs/acs5/profile?get=GEO_ID,", variable, "&for=tract:*&in=state:", fips) print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) combined = rbind(combined, acs) } tracts = merge(tracts, combined) # stop("Test stop") } # plot(tracts[,"Median_Household_Income"], border=NA, breaks="quantile") tracts$Pop_per_Square_Mile = ifelse(tracts$Square_Miles > 0, tracts$Total_Population / tracts$Square_Miles, NA) st_write(tracts, "2019-2023-acs-tracts.geojson", delete_dsn=T) write.csv(st_drop_geometry(tracts), "2019-2023-acs-tracts.csv", row.names=F) stop("Tracts complete") # =========== States =========== states = st_read("2019-2023-acs-state-states.shz") states = states[,c("GEOIDFQ", "STUSPS", "NAME", "ALAND", "geometry")] states$Latitude = st_coordinates(st_centroid(states))[,2] states$Longitude = st_coordinates(st_centroid(states))[,1] states$Square_Miles = round(states$ALAND / 38610200) states = states[,c("GEOIDFQ", "STUSPS", "NAME", "Latitude", "Longitude", "Square_Miles")] names(states) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") states = st_crop(states, xmin=-180, xmax=0, ymin = 0, ymax = 89) states = st_transform(states, "EPSG:4326") for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2023/acs/acs5/profile?get=GEO_ID,", variable, "&for=state:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[,2] = as.numeric(acs[,2]) states = merge(states, acs) } states$Pop_per_Square_Mile = ifelse(states$Square_Miles > 0, states$Total_Population / states$Square_Miles, NA) st_write(states, "2019-2023-acs-states.geojson", delete_dsn=T) write.csv(st_drop_geometry(states), "2019-2023-acs-states.csv", row.names=F) stop("States complete") # =========== Counties =========== counties = st_read("2019-2023-acs-county-polygons.shz") counties = counties[,c("GEOIDFQ", "STUSPS", "NAME", "ALAND", "geometry")] counties$Latitude = st_coordinates(st_centroid(counties))[,2] counties$Longitude = st_coordinates(st_centroid(counties))[,1] counties$Square_Miles = round(counties$ALAND / 38610200) counties = counties[,c("GEOIDFQ", "STUSPS", "NAME", "Latitude", "Longitude", "Square_Miles")] names(counties) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") counties = st_crop(counties, xmin=-180, xmax=0, ymin = 0, ymax = 89) counties = st_transform(counties, "EPSG:4326") for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2023/acs/acs5/profile?get=GEO_ID,", variable, "&for=county:*&in=state:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) counties = merge(counties, acs) } # plot(counties[,"Median_Home_Value"], border=NA, breaks="quantile") counties$Pop_per_Square_Mile = ifelse(counties$Square_Miles > 0, counties$Total_Population / counties$Square_Miles, NA) st_write(counties, "2019-2023-acs-counties.geojson", delete_dsn=T) write.csv(st_drop_geometry(counties), "2019-2023-acs-counties.csv", row.names=F) stop("Counties complete") # =========== ZCTA =========== zcta = st_read("2019-2023-acs-zcta-polygons.shz") zcta = zcta[,c("AFFGEOID20", "NAME20", "ALAND20", "geometry")] zcta$Latitude = st_coordinates(st_centroid(zcta))[,2] zcta$Longitude = st_coordinates(st_centroid(zcta))[,1] zcta$Square_Miles = round(zcta$ALAND20 / 38610200, 2) zcta = zcta[,c("AFFGEOID20", "NAME20", "Latitude", "Longitude", "Square_Miles")] names(zcta) = c("GEOIDFQ", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") zcta = st_crop(zcta, xmin=-180, xmax=0, ymin = 0, ymax = 89) zcta = st_transform(zcta, "EPSG:4326") # ZCTA have no states states = st_read("2019-2023-acs-state-polygons.shz") states = st_transform(states, "EPSG:4326") states = states[,c("STUSPS", "geometry")] names(states) = c("ST", "geometry") zcta_centroids = st_centroid(zcta) zcta_centroids = st_join(zcta_centroids, states) zcta = merge(zcta, st_drop_geometry(zcta_centroids[, c("GEOIDFQ", "ST")])) for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2023/acs/acs5/profile?get=GEO_ID,", variable, "&for=zip%20code%20tabulation%20area:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) zcta = merge(zcta, acs) } # plot(zcta[zcta$ST == "IL", "Total_Households"], border=NA, breaks="quantile") zcta$Pop_per_Square_Mile = ifelse(zcta$Square_Miles > 0, zcta$Total_Population / zcta$Square_Miles, NA) st_write(zcta, "2019-2023-acs-zcta.geojson", delete_dsn=T) write.csv(st_drop_geometry(zcta), "2019-2023-acs-zcta.csv", row.names=F) stop("ZCTA complete") # =========== Correct invalid square mile calculation and add population density =========== library(sf) x = st_read("2019-2023-acs-state-polygons.shz") x$newarea = round((x$ALAND + x$AWATER) / 2589988) x = st_drop_geometry(x)[,c("GEOIDFQ", "newarea")] states = st_read("2019-2023-acs-states.geojson") states = merge(states, x, by="GEOIDFQ") states$Square_Miles = states$newarea states$newarea = NULL states$Pop_per_Square_Mile = ifelse(states$Square_Miles > 0, round(states$Total_Population / states$Square_Miles, 2), NA) st_write(states, "2019-2023-acs-states.geojson", delete_dsn=T) write.csv(st_drop_geometry(states), "2019-2023-acs-states.csv", row.names=F) x = st_read("2019-2023-acs-zcta-polygons.shz") x$newarea = round((x$ALAND20 + x$AWATER20) / 2589988, 2) x = st_drop_geometry(x)[,c("AFFGEOID20", "newarea")] names(x) = c("GEOIDFQ", "newarea") zcta = st_read("2019-2023-acs-zcta.geojson") zcta = merge(zcta, x, by="GEOIDFQ", all.x = T) zcta$Square_Miles = zcta$newarea zcta$newarea = NULL zcta$Pop_per_Square_Mile = ifelse(zcta$Square_Miles > 0, round(zcta$Total_Population / zcta$Square_Miles, 2), NA) st_write(zcta, "2019-2023-acs-zcta.geojson", delete_dsn=T) write.csv(st_drop_geometry(zcta), "2019-2023-acs-zcta.csv", row.names=F) x = st_read("2019-2023-acs-county-polygons.shz") x$newarea = round((x$ALAND + x$AWATER) / 2589988, 2) x = st_drop_geometry(x)[,c("GEOIDFQ", "newarea")] counties = st_read("2019-2023-acs-counties.geojson") counties = merge(counties, x, by="GEOIDFQ", all.x=T) counties$Square_Miles = counties$newarea counties$newarea = NULL counties$Pop_per_Square_Mile = ifelse(counties$Square_Miles > 0, round(counties$Total_Population / counties$Square_Miles, 2), NA) st_write(counties, "2019-2023-acs-counties.geojson", delete_dsn=T) write.csv(st_drop_geometry(counties), "2019-2023-acs-counties.csv", row.names=F) x = st_read("2019-2023-acs-tract-polygons.shz") x$newarea = round((x$ALAND + x$AWATER) / 2589988, 2) x = st_drop_geometry(x)[,c("GEOIDFQ", "newarea")] tracts = st_read("2019-2023-acs-tracts.geojson") tracts = merge(tracts, x, by="GEOIDFQ", all.x=T) tracts$Square_Miles = tracts$newarea tracts$newarea = NULL tracts$Pop_per_Square_Mile = ifelse(tracts$Square_Miles > 0, round(tracts$Total_Population / tracts$Square_Miles, 2), NA) st_write(tracts, "2019-2023-acs-tracts.geojson", delete_dsn=T) write.csv(st_drop_geometry(tracts), "2019-2023-acs-tracts.csv", row.names=F)