Processing 311 service requests and aggregating QoL complaints by census tract.
Extract quality-of-life-related 311 reports in NYC.
# Modules.
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import requests, time
from pathlib import Path
# Paths.
nyc_311_dir = Path("data/nyc_311")
nyc_311 = nyc_311_dir / "311_raw"
nyc_311.mkdir(parents = True, exist_ok = True)
# NYC 2020 census tracts shapefile.
tracts_path = Path("data/nyc_tracts_2020/nyc_tracts_2020.shp")
# Output.
raw_path = nyc_311 / "nyc_311_summer_2025.csv"
panel_path = nyc_311_dir / "nyc_311_tract_day_2025.csv"
point_path_geojson = nyc_311_dir / "nyc_311_points_2025.geojson"
# Noise and Social Activity (heat-sensitive)
QOL_NOISE = [
"LOUD MUSIC/PARTY", "BANGING/POUNDING", "LOUD TALKING",
"CAR/TRUCK MUSIC", "CAR/TRUCK HORN", "DOG NOISE",
"NOISE: BOAT(ENGINE,MUSIC,ETC) (NR10)",
"NOISE: ALARMS (NR3)",
"NOISE: AIR CONDITION/VENTILATION EQUIPMENT (NV1)",
"NOISE: CONSTRUCTION BEFORE/AFTER HOURS (NM1)",
"NOISE: JACK HAMMERING (NC2)",
"NOISE, BARKING DOG (NR5)",
"NOISE: MANUFACTURING NOISE (NK1)",
"NOISE: OTHER NOISE SOURCES (USE COMMENTS) (NZZ)"
]
# Outdoor Activity / Public Space Use
QOL_OUTDOOR = [
"BLOCKED HYDRANT", "BLOCKED SIDEWALK", "BLOCKED BIKE LANE",
"ILLEGAL PARKING", "DOUBLE PARKED BLOCKING TRAFFIC",
"BLOCKED CROSSWALK", "DERELICT VEHICLES", "CONGESTION/GRIDLOCK",
"GRAFFITI", "CHRONIC DUMPING",
"COMMERCIAL OVERNIGHT PARKING"
]
# Sanitation, Trash, Pests
QOL_SANITATION = [
"GARBAGE OR LITTER", "TRASH", "OVERFLOWING",
"RAT SIGHTING", "MOUSE SIGHTING", "CONDITION ATTRACTING RODENTS",
"PESTS", "UNSANITARY CONDITION", "DEAD ANIMAL",
"WASTE DISPOSAL", "DOG WASTE"
]
# Water Infrastructure & Hydrants
QOL_WATER = [
"WATER LEAK", "WATER SUPPLY", "HYDRANT LEAKING (WC1)",
"HYDRANT RUNNING FULL (WA4)", "HYDRANT RUNNING (WC3)",
"HYDRANT DEFECTIVE (WC2)", "SEWER", "SEWER ODOR (SA2)",
"SEWER BACKUP (SA)", "LEAK (USE COMMENTS) (WA2)"
]
# Infrastructure Heat Stress
QOL_INFRA_HEAT = [
"POWER OUTAGE", "ELECTRICAL/GAS RANGE", "VENTILATION SYSTEM",
"TRAFFIC SIGNAL LIGHT", "STREET LIGHT OUT",
"STREET LIGHT LAMP MISSING", "STREET LIGHT CYCLING"
]
# Build lookup dictionary for mapping.
def build_qol_lookup():
mapping = {}
for c in QOL_NOISE: mapping[c] = "QOL_NOISE"
for c in QOL_OUTDOOR: mapping[c] = "QOL_OUTDOOR"
for c in QOL_SANITATION: mapping[c] = "QOL_SANITATION"
for c in QOL_WATER: mapping[c] = "QOL_WATER_INFRA"
for c in QOL_INFRA_HEAT: mapping[c] = "QOL_INFRA_HEAT"
return mapping
QOL_LOOKUP = build_qol_lookup()
# Download helper.
def download_311_jfk_2025(token = None):
base = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"
headers = {"X-App-Token": token} if token else {}
limit = 50000
start = "2025-06-01T00:00:00"
end = "2025-08-22T23:59:59"
where_clause = (
f"created_date between '{start}' and '{end}' "
"AND latitude IS NOT NULL AND longitude IS NOT NULL"
)
cols = [
"unique_key", "created_date", "complaint_type",
"descriptor", "latitude", "longitude", "borough"
]
offset = 0
frames = []
while True:
params = {
"$select": ",".join(cols),
"$where": where_clause,
"$limit": limit,
"$offset": offset,
"$order": "created_date"
}
r = requests.get(base, params = params, headers = headers)
data = r.json()
if len(data) == 0:
break
frames.append(pd.DataFrame(data))
print("Fetched:", len(data), "offset:", offset)
offset += limit
time.sleep(0.3)
df = pd.concat(frames, ignore_index = True)
df.to_csv(raw_path, index = False)
print("Saved:", raw_path)
return df
# Download.
calls_311 = download_311_jfk_2025(token = None)
calls_311["created_date"] = pd.to_datetime(calls_311["created_date"], errors = "coerce")
calls_311["latitude"] = pd.to_numeric(calls_311["latitude"])
calls_311["longitude"] = pd.to_numeric(calls_311["longitude"])
Fetched: 50000 offset: 0 Fetched: 50000 offset: 50000 Fetched: 50000 offset: 100000 Fetched: 50000 offset: 150000 Fetched: 50000 offset: 200000 Fetched: 50000 offset: 250000 Fetched: 50000 offset: 300000 Fetched: 50000 offset: 350000 Fetched: 50000 offset: 400000 Fetched: 50000 offset: 450000 Fetched: 50000 offset: 500000 Fetched: 50000 offset: 550000 Fetched: 50000 offset: 600000 Fetched: 50000 offset: 650000 Fetched: 26191 offset: 700000 Saved: data\nyc_311\311_raw\nyc_311_summer_2025.csv
# Spatial join to tracts.
gdf_tracts = gpd.read_file(tracts_path)
nyc_prefixes = ("36005", "36047", "36061", "36081", "36085")
gdf_tracts = gdf_tracts[gdf_tracts["geoid"].str.startswith(nyc_prefixes)].copy()
gdf_311 = gpd.GeoDataFrame(
calls_311,
geometry=[Point(xy) for xy in zip(calls_311.longitude, calls_311.latitude)],
crs = "EPSG:4326"
).to_crs(gdf_tracts.crs)
joined_gdf = gpd.sjoin(
gdf_311,
gdf_tracts[["geoid","geometry"]],
how = "left",
predicate = "within"
)
joined_gdf = joined_gdf.dropna(subset = ["geoid"]).copy()
joined_gdf.rename(columns = {"geoid":"GEOID"}, inplace = True)
# Encode to QoL superclasses.
joined_gdf["ct_norm"] = joined_gdf["complaint_type"].str.upper().str.strip()
joined_gdf["qol_category"] = joined_gdf["ct_norm"].map(QOL_LOOKUP)
# Heat-relevant QoL calls.
joined_gdf["is_heat_qol"] = joined_gdf["qol_category"].notna()
# Build tract by day.
joined_gdf["date"] = joined_gdf["created_date"].dt.date
panel = (
joined_gdf.groupby(["GEOID","date"], as_index = False)
.agg(
total_calls = ("unique_key", "count"),
qol_calls = ("is_heat_qol", "sum"),
#mean_latitude = ("latitude", "mean"),
#mean_longitude = ("longitude", "mean")
)
)
panel["heat_qol_rate_1k"] = (panel["qol_calls"].astype(float))
panel["qol_pct"] = np.where(
panel["total_calls"] > 0,
panel["qol_calls"]/panel["total_calls"],
np.nan
)
panel.columns = panel.columns.str.upper()
panel.to_csv(panel_path, index = False)
print("Saved panel:", panel_path)
panel
Saved panel: data/nyc_311/panel/nyc_311_tract_day_2025.csv
| GEOID | DATE | TOTAL_CALLS | QOL_CALLS | HEAT_QOL_RATE_1K | QOL_PCT | |
|---|---|---|---|---|---|---|
| 0 | 36005000100 | 2025-06-30 | 1 | 0 | 0.0 | 0.000 |
| 1 | 36005000100 | 2025-07-23 | 1 | 0 | 0.0 | 0.000 |
| 2 | 36005000100 | 2025-08-04 | 1 | 0 | 0.0 | 0.000 |
| 3 | 36005000200 | 2025-06-01 | 8 | 5 | 5.0 | 0.625 |
| 4 | 36005000200 | 2025-06-02 | 3 | 0 | 0.0 | 0.000 |
| ... | ... | ... | ... | ... | ... | ... |
| 162339 | 36085032300 | 2025-08-15 | 1 | 0 | 0.0 | 0.000 |
| 162340 | 36085032300 | 2025-08-18 | 2 | 1 | 1.0 | 0.500 |
| 162341 | 36085032300 | 2025-08-19 | 4 | 4 | 4.0 | 1.000 |
| 162342 | 36085032300 | 2025-08-20 | 2 | 0 | 0.0 | 0.000 |
| 162343 | 36085032300 | 2025-08-21 | 1 | 0 | 0.0 | 0.000 |
162344 rows × 6 columns
# Save point aggregated data joined_gdf with GEOID assignment.
point_cols = [
"unique_key", "created_date", "complaint_type", "descriptor",
"latitude", "longitude", "borough", "GEOID",
"ct_norm", "qol_flag", "date", "geometry"
]
# Ensure only the necessary columns are kept and the index is dropped for clean output.
point_data_gdf = joined_gdf[point_cols].copy()
# Save GeoDataFrame to GeoJSON file.
point_data_gdf.to_file(point_path_geojson, driver = "GeoJSON")
print("Saved point data as GeoJSON:", point_path_geojson)
Saved point data as GeoJSON: data\nyc_311\nyc_311_points_2025.geojson
Source: 02_311_tract_daily.ipynb
Code Cells: 10
Figures: 0