# American Community Survey 2010-2014 five year estimates # Michael Minn # Rev 4 March 2026 library(sf) library(rjson) # https://api.census.gov/data/2014/acs/acs5/profile/variables.html variables = c( "DP02_0001E" = "Total_Households", "DP02_0015E" = "Average_Household_Size", "DP02_0037PE" = "Percent_Single_Mothers", "DP02_0057PE" = "Percent_In_College", "DP02_0064PE" = "Percent_Bachelors_Degree", "DP02_0065PE" = "Percent_Graduate_Degree", "DP02_0069PE" = "Percent_Veterans", "DP02_0071PE" = "Percent_Disabled", "DP02_0093PE" = "Percent_Foreign_Born", # giving count instead of percent "DP03_0002PE" = "Workforce_Participation", "DP03_0009PE" = "Percent_Unemployed", "DP03_0019PE" = "Percent_Commute_Alone", "DP03_0021PE" = "Percent_Commute_Transit", "DP03_0022PE" = "Percent_Commute_Walk", "DP03_0024PE" = "Percent_Work_at_Home", "DP03_0025E" = "Mean_Commute_Minutes", "DP03_0027PE" = "Percent_Jobs_Management", "DP03_0028PE" = "Percent_Jobs_Service", "DP03_0029PE" = "Percent_Jobs_Sales", "DP03_0030PE" = "Percent_Jobs_Nat_Resources", "DP03_0031PE" = "Percent_Jobs_Production", "DP03_0033PE" = "Percent_Industry_Ag_Mining", "DP03_0034PE" = "Percent_Industry_Construction", "DP03_0035PE" = "Percent_Industry_Manufacturing", "DP03_0036PE" = "Percent_Industry_Wholesale", "DP03_0037PE" = "Percent_Industry_Retail", "DP03_0038PE" = "Percent_Industry_Transport", "DP03_0039PE" = "Percent_Industry_Information", "DP03_0040PE" = "Percent_Industry_Finance", "DP03_0041PE" = "Percent_Industry_Pro_Services", "DP03_0042PE" = "Percent_Industry_Education", "DP03_0043PE" = "Percent_Industry_Recreation", "DP03_0047PE" = "Percent_Private_Employed", "DP03_0048PE" = "Percent_Govt_Employed", "DP03_0049PE" = "Percent_Self_Employed", "DP03_0062E" = "Median_Household_Income", "DP03_0096PE" = "Percent_Health_Insurance", "DP04_0001E" = "Total_Housing_Units", "DP04_0003PE" = "Percent_Vacant_Units", "DP04_0025PE" = "Percent_Pre_War_Units", "DP04_0045PE" = "Homeownership_Percent", "DP04_0046PE" = "Percent_Renters", "DP04_0057PE" = "Percent_No_Vehicle", "DP04_0088E" = "Median_Home_Value", "DP04_0100E" = "Median_Monthly_Mortgage", "DP04_0132E" = "Median_Monthly_Rent", "DP04_0140PE" = "Percent_Unaffordable_Rent", "DP05_0001E" = "Total_Population", "DP05_0017E" = "Median_Age", "DP05_0005PE" = "Percent_Under_5", "DP05_0018PE" = "Percent_Over_18", "DP05_0025PE" = "Percent_Over_64") # giving count instead of percent state_fips = c("01" = "AL", "02" = "AK", "04" = "AZ", "05" = "AR", "06" = "CA", "08" = "CO", "09" = "CT", "10" = "DE", "12" = "FL", "13" = "GA", "15" = "HI", "16" = "ID", "17" = "IL", "18" = "IN", "19" = "IA", "20" = "KS", "21" = "KY", "22" = "LA", "23" = "ME", "24" = "MD", "25" = "MA", "26" = "MI", "27" = "MN", "28" = "MS", "29" = "MO", "30" = "MT", "31" = "NE", "32" = "NV", "33" = "NH", "34" = "NJ", "35" = "NM", "36" = "NY", "37" = "NC", "38" = "ND", "39" = "OH", "40" = "OK", "41" = "OR", "42" = "PA", "44" = "RI", "45" = "SC", "46" = "SD", "47" = "TN", "48" = "TX", "49" = "UT", "50" = "VT", "51" = "VA", "53" = "WA", "54" = "WV", "55" = "WI", "56" = "WY") # =========== Tracts =========== tracts = st_read("2010-2014-acs-tract-polygons.geojson") tracts = tracts[,c("AFFGEOID", "STATEFP", "NAME", "ALAND", "AWATER", "geometry")] tracts$Latitude = st_coordinates(st_centroid(tracts))[,2] tracts$Longitude = st_coordinates(st_centroid(tracts))[,1] tracts$Square_Miles = round((tracts$ALAND + tracts$AWATER) / 2589988, 2) tracts$STUSPS = state_fips[tracts$STATEFP] tracts = tracts[,c("AFFGEOID", "STUSPS", "NAME", "Latitude", "Longitude", "Square_Miles")] names(tracts) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") tracts = st_crop(tracts, xmin=-180, xmax=0, ymin = 0, ymax = 89) tracts = st_transform(tracts, "EPSG:4326") for (variable in names(variables)) { combined = data.frame() for (fips in names(state_fips)) { url = paste0("https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,", variable, "&for=tract:*&in=state:", fips) print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) combined = rbind(combined, acs) } tracts = merge(tracts, combined) # stop("Test stop") } # plot(tracts[,"Median_Household_Income"], border=NA, breaks="quantile") tracts$Pop_per_Square_Mile = ifelse(tracts$Square_Miles > 0, round(tracts$Total_Population / tracts$Square_Miles, 2), NA) # 2014 API returns counts instead of percents 3/4/2026 tracts$Percent_Foreign_Born = round(100 * tracts$Percent_Foreign_Born / tracts$Total_Population, 2) tracts$Percent_Over_64 = round(100 * tracts$Percent_Over_64 / tracts$Total_Population, 2) st_write(tracts, "2010-2014-acs-tracts.geojson", delete_dsn=T) write.csv(st_drop_geometry(tracts), "2010-2014-acs-tracts.csv", row.names=F) stop("Tracts complete") # =========== ZCTA =========== zcta = st_read("2010-2014-acs-zcta-polygons.shz") zcta = zcta[,c("AFFGEOID10", "ZCTA5CE10", "ALAND10", "AWATER10", "geometry")] zcta$Latitude = st_coordinates(st_centroid(zcta))[,2] zcta$Longitude = st_coordinates(st_centroid(zcta))[,1] zcta$Square_Miles = round((zcta$ALAND10 + zcta$AWATER10) / 2589988, 2) zcta = zcta[,c("AFFGEOID10", "ZCTA5CE10", "Latitude", "Longitude", "Square_Miles")] names(zcta) = c("GEOIDFQ", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") zcta = st_crop(zcta, xmin=-180, xmax=0, ymin = 0, ymax = 89) zcta = st_transform(zcta, "EPSG:4326") # ZCTA have no states states = st_read("2010-2014-acs-state-polygons.shz") states = st_transform(states, "EPSG:4326") states = states[,c("STUSPS", "geometry")] names(states) = c("ST", "geometry") zcta_centroids = st_centroid(zcta) zcta_centroids = st_join(zcta_centroids, states) zcta = merge(zcta, st_drop_geometry(zcta_centroids[, c("GEOIDFQ", "ST")])) for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,", variable, "&for=zip%20code%20tabulation%20area:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) zcta = merge(zcta, acs) } # plot(zcta[zcta$ST == "IL", "Total_Households"], border=NA, breaks="quantile") zcta$Pop_per_Square_Mile = ifelse(zcta$Square_Miles > 0, round(zcta$Total_Population / zcta$Square_Miles, 2), NA) # 2014 API returns counts instead of percents 3/4/2026 zcta$Percent_Foreign_Born = round(100 * zcta$Percent_Foreign_Born / zcta$Total_Population, 2) zcta$Percent_Over_64 = round(100 * zcta$Percent_Over_64 / zcta$Total_Population, 2) st_write(zcta, "2010-2014-acs-zcta.geojson", delete_dsn=T) write.csv(st_drop_geometry(zcta), "2010-2014-acs-zcta.csv", row.names=F) stop("ZCTA complete") # =========== CBSA =========== cbsa = st_read("2010-2014-acs-cbsa-polygons.shz") cbsa$Latitude = st_coordinates(st_centroid(cbsa))[,2] cbsa$Longitude = st_coordinates(st_centroid(cbsa))[,1] cbsa$Square_Miles = round((cbsa$ALAND + cbsa$AWATER) / 2589988, 2) cbsa = cbsa[,c("AFFGEOID", "NAME", "Latitude", "Longitude", "Square_Miles", "geometry")] names(cbsa) = c("GEOIDFQ", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") cbsa = st_crop(cbsa, xmin=-180, xmax=0, ymin = 0, ymax = 89) cbsa = st_transform(cbsa, "EPSG:4326") for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,", variable, "&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:*"); print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) # print("Before as.numeric()") acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) cbsa = merge(cbsa, acs) } cbsa$Pop_per_Square_Mile = ifelse(cbsa$Square_Miles > 0, round(cbsa$Total_Population / cbsa$Square_Miles, 2), NA) # 2014 API returns counts instead of percents 3/4/2026 cbsa$Percent_Foreign_Born = round(100 * cbsa$Percent_Foreign_Born / cbsa$Total_Population, 2) cbsa$Percent_Over_64 = round(100 * cbsa$Percent_Over_64 / cbsa$Total_Population, 2) st_write(cbsa, "2010-2014-acs-cbsa.geojson", delete_dsn=T) write.csv(st_drop_geometry(cbsa), "2010-2014-acs-cbsa.csv", row.names=F) stop("CBSA complete") # =========== States =========== states = st_read("2010-2014-acs-state-polygons.shz") states = states[,c("AFFGEOID", "STUSPS", "NAME", "ALAND", "AWATER", "geometry")] states$Latitude = st_coordinates(st_centroid(states))[,2] states$Longitude = st_coordinates(st_centroid(states))[,1] states$Square_Miles = round((states$ALAND + states$AWATER) / 2589988) states = states[,c("AFFGEOID", "STUSPS", "NAME", "Latitude", "Longitude", "Square_Miles")] names(states) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") states = st_crop(states, xmin=-180, xmax=0, ymin = 0, ymax = 89) states = st_transform(states, "EPSG:4326") for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,", variable, "&for=state:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[,2] = as.numeric(acs[,2]) states = merge(states, acs) } states$Pop_per_Square_Mile = ifelse(states$Square_Miles > 0, round(states$Total_Population / states$Square_Miles, 2), NA) # 2014 API returns counts instead of percents 3/4/2026 states$Percent_Foreign_Born = round(100 * states$Percent_Foreign_Born / states$Total_Population, 2) states$Percent_Over_64 = round(100 * states$Percent_Over_64 / states$Total_Population, 2) st_write(states, "2010-2014-acs-states.geojson", delete_dsn=T) write.csv(st_drop_geometry(states), "2010-2014-acs-states.csv", row.names=F) stop("States complete") # =========== Counties =========== counties = st_read("2010-2014-acs-county-polygons.shz") counties = counties[,c("AFFGEOID", "STATEFP", "NAME", "ALAND", "AWATER", "geometry")] counties$Latitude = st_coordinates(st_centroid(counties))[,2] counties$Longitude = st_coordinates(st_centroid(counties))[,1] counties$Square_Miles = round((counties$ALAND + counties$AWATER) / 2589988) counties$ST = state_fips[counties$STATEFP] counties = counties[,c("AFFGEOID", "ST", "NAME", "Latitude", "Longitude", "Square_Miles")] names(counties) = c("GEOIDFQ", "ST", "Name", "Latitude", "Longitude", "Square_Miles", "geometry") counties = st_crop(counties, xmin=-180, xmax=0, ymin = 0, ymax = 89) counties = st_transform(counties, "EPSG:4326") for (variable in names(variables)) { url = paste0("https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,", variable, "&for=county:*&in=state:*") print(url) acs = fromJSON(file = url) acs[[1]] = NULL # Variable names acs[[length(acs)]] = NULL # Trailer? acs = as.data.frame(do.call(rbind, acs))[,1:2] names(acs) = c("GEOIDFQ", variables[variable]) acs[, 2] = as.numeric(sapply(acs[, 2], as.character)) acs[, 2] = sapply(acs[,2], function(z) ifelse(z < 0, NA, z)) counties = merge(counties, acs) } # plot(counties[,"Median_Home_Value"], border=NA, breaks="quantile") counties$Pop_per_Square_Mile = ifelse(counties$Square_Miles > 0, round(counties$Total_Population / counties$Square_Miles, 2), NA) # 2014 API returns counts instead of percents 3/4/2026 counties$Percent_Foreign_Born = round(100 * counties$Percent_Foreign_Born / counties$Total_Population, 2) counties$Percent_Over_64 = round(100 * counties$Percent_Over_64 / counties$Total_Population, 2) st_write(counties, "2010-2014-acs-counties.geojson", delete_dsn=T) write.csv(st_drop_geometry(counties), "2010-2014-acs-counties.csv", row.names=F) stop("Counties complete") library(sf) zipfiles = c( "cb_2014_01_tract_500k.zip", "cb_2014_02_tract_500k.zip", "cb_2014_04_tract_500k.zip", "cb_2014_05_tract_500k.zip", "cb_2014_06_tract_500k.zip", "cb_2014_08_tract_500k.zip", "cb_2014_09_tract_500k.zip", "cb_2014_10_tract_500k.zip", "cb_2014_11_tract_500k.zip", "cb_2014_12_tract_500k.zip", "cb_2014_13_tract_500k.zip", "cb_2014_15_tract_500k.zip", "cb_2014_16_tract_500k.zip", "cb_2014_17_tract_500k.zip", "cb_2014_18_tract_500k.zip", "cb_2014_19_tract_500k.zip", "cb_2014_20_tract_500k.zip", "cb_2014_21_tract_500k.zip", "cb_2014_22_tract_500k.zip", "cb_2014_23_tract_500k.zip", "cb_2014_24_tract_500k.zip", "cb_2014_25_tract_500k.zip", "cb_2014_26_tract_500k.zip", "cb_2014_27_tract_500k.zip", "cb_2014_28_tract_500k.zip", "cb_2014_29_tract_500k.zip", "cb_2014_30_tract_500k.zip", "cb_2014_31_tract_500k.zip", "cb_2014_32_tract_500k.zip", "cb_2014_33_tract_500k.zip", "cb_2014_34_tract_500k.zip", "cb_2014_35_tract_500k.zip", "cb_2014_36_tract_500k.zip", "cb_2014_37_tract_500k.zip", "cb_2014_38_tract_500k.zip", "cb_2014_39_tract_500k.zip", "cb_2014_40_tract_500k.zip", "cb_2014_41_tract_500k.zip", "cb_2014_42_tract_500k.zip", "cb_2014_44_tract_500k.zip", "cb_2014_45_tract_500k.zip", "cb_2014_46_tract_500k.zip", "cb_2014_47_tract_500k.zip", "cb_2014_48_tract_500k.zip", "cb_2014_49_tract_500k.zip", "cb_2014_50_tract_500k.zip", "cb_2014_51_tract_500k.zip", "cb_2014_53_tract_500k.zip", "cb_2014_54_tract_500k.zip", "cb_2014_55_tract_500k.zip", "cb_2014_56_tract_500k.zip", "cb_2014_66_tract_500k.zip", "cb_2014_69_tract_500k.zip", "cb_2014_72_tract_500k.zip", "cb_2014_78_tract_500k.zip") root = "https://www2.census.gov/geo/tiger/GENZ2014/shp/" tempfile = "/tmp/temp.shz" tracts = NULL for (zipfile in zipfiles) { url = paste0(root, zipfile) print(url) download.file(url, tempfile) ziptracts = st_read(tempfile) tracts = rbind(tracts, ziptracts) print(dim(tracts)) } st_write(tracts, "2010-2014-acs-tract-polygons.geojson", delete_dsn=T) stop()