Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migrate from csv files to sqlite databases for downstream use in queries #120

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ jobs:
python -m pip install --upgrade pip
pip install -e .[dev]

- name: Generate databases
run: python cities/utils/csv_to_db_pipeline.py
# - name: Generate databases
# run: python cities/utils/csv_to_db_pipeline.py


- name: Test
Expand Down
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ tests/.coverage
.vscode/launch.json
data/sql/counties_database.db
data/sql/msa_database.db
docs/experimental_notebooks/zoning/interactions_preds.dill
docs/experimental_notebooks/zoning/population_preds.dill
docs/experimental_notebooks/zoning/waic_dict_7.pkl
docs/experimental_notebooks/zoning/waic_dict_13.pkl
docs/experimental_notebooks/zoning/waic_dict_14.pkl


.Rproj.user
**/*.RData
Expand All @@ -42,6 +48,8 @@ data/minneapolis/sourced/demographic/**
data/minneapolis/preds/**
data/minneapolis/sourced/parcel_to_census_tract_mappings/**
data/minneapolis/sourced/parcel_to_parking_info_mappings/**

data/minneapolis/.pgpass
cities/deployment/tracts_minneapolis/tracts_model_guide.pkl
cities/deployment/tracts_minneapolis/tracts_model_params.pth
build/cities/deployment/tracts_minneapolis/tracts_model_guide.pkl
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
format: FORCE
./scripts/clean.sh


path ?= .

format_path: FORCE
./scripts/clean_path.sh $(path)

lint: FORCE
./scripts/lint.sh

test: FORCE
./scripts/test.sh

test_all: FORCE
./scripts/clean.sh
./scripts/lint.sh
./scripts/test.sh
./scripts/test_notebooks.sh
Expand Down
10 changes: 10 additions & 0 deletions build/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
GOOGLE_CLOUD_PROJECT=cities-429602
GOOGLE_CLOUD_BUCKET=minneapolis-basis

ENV=dev
INSTANCE_CONNECTION_NAME=cities-429602:us-central1:cities-devel
DB_SEARCH_PATH=dev,public
HOST=34.123.100.76
SCHEMA=minneapolis
DATABASE=cities
DB_USERNAME=postgres
10 changes: 10 additions & 0 deletions build/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python:3

WORKDIR /usr/src/app

COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD [ "python", "main.py" ]
10 changes: 10 additions & 0 deletions build/api/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python:3

WORKDIR /usr/src/app

COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD [ "python", "main.py" ]
235 changes: 235 additions & 0 deletions build/api/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import os

from typing import Annotated

from dotenv import load_dotenv
from fastapi import FastAPI, Depends, Query
from fastapi.middleware.gzip import GZipMiddleware
import uvicorn

import psycopg2
from psycopg2.pool import ThreadedConnectionPool

load_dotenv()

ENV = os.getenv("ENV")
USERNAME = os.getenv("DB_USERNAME")
PASSWORD = os.getenv("PASSWORD")
HOST = os.getenv("HOST")
DATABASE = os.getenv("DATABASE")
DB_SEARCH_PATH = os.getenv("DB_SEARCH_PATH")
INSTANCE_CONNECTION_NAME = os.getenv("INSTANCE_CONNECTION_NAME")

app = FastAPI()

if ENV == "dev":
from fastapi.middleware.cors import CORSMiddleware

origins = [
"http://localhost",
"http://localhost:5000",
]
app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True)

app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5)


if ENV == "dev":
host = HOST
else:
host = f"/cloudsql/{INSTANCE_CONNECTION_NAME}"

pool = ThreadedConnectionPool(
1,
10,
user=USERNAME,
password=PASSWORD,
host=HOST,
database=DATABASE,
options=f"-csearch_path={DB_SEARCH_PATH}",
)


def get_db() -> psycopg2.extensions.connection:
db = pool.getconn()
try:
yield db
finally:
pool.putconn(db)


predictor = None


def get_predictor(db: psycopg2.extensions.connection = Depends(get_db)):
from cities.deployment.tracts_minneapolis.predict import TractsModelPredictor

global predictor
if predictor is None:
predictor = TractsModelPredictor(db)
return predictor


Limit = Annotated[float, Query(ge=0, le=1)]
Radius = Annotated[float, Query(ge=0)]
Year = Annotated[int, Query(ge=2000, le=2030)]


@app.middleware("http")
async def add_cache_control_header(request, call_next):
response = await call_next(request)
response.headers["Cache-Control"] = "public, max-age=300"
return response


if ENV == "dev":

@app.middleware("http")
async def add_acess_control_header(request, call_next):
response = await call_next(request)
response.headers["Access-Control-Allow-Origin"] = "*"
return response


@app.get("/demographics")
async def read_demographics(
category: Annotated[str, Query(max_length=100)], db=Depends(get_db)
):
with db.cursor() as cur:
cur.execute(
"""
select tract_id, "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022"
from api__demographics where description = %s
""",
(category,),
)
return [[desc[0] for desc in cur.description]] + cur.fetchall()


@app.get("/census-tracts")
async def read_census_tracts(year: Year, db=Depends(get_db)):
with db.cursor() as cur:
cur.execute("select * from api__census_tracts where year_ = %s", (year,))
row = cur.fetchone()

return row[1] if row is not None else None


@app.get("/high-frequency-transit-lines")
async def read_high_frequency_transit_lines(year: Year, db=Depends(get_db)):
with db.cursor() as cur:
cur.execute(
"""
select line_geom_json
from api__high_frequency_transit_lines
where '%s-01-01'::date <@ valid
""",
(year,),
)
row = cur.fetchone()

return row[0] if row is not None else None


@app.get("/high-frequency-transit-stops")
async def read_high_frequency_transit_stops(year: Year, db=Depends(get_db)):
with db.cursor() as cur:
cur.execute(
"""
select stop_geom_json
from api__high_frequency_transit_lines
where '%s-01-01'::date <@ valid
""",
(year,),
)
row = cur.fetchone()

return row[0] if row is not None else None


@app.get("/yellow-zone")
async def read_yellow_zone(
year: Year, line_radius: Radius, stop_radius: Radius, db=Depends(get_db)
):
with db.cursor() as cur:
cur.execute(
"""
select
st_asgeojson(st_transform(st_union(st_buffer(line_geom, %s, 'quad_segs=4'), st_buffer(stop_geom, %s, 'quad_segs=4')), 4269))::json
from api__high_frequency_transit_lines
where '%s-01-01'::date <@ valid
""",
(line_radius, stop_radius, year),
)
row = cur.fetchone()

if row is None:
return None

return {
"type": "FeatureCollection",
"features": [
{"type": "Feature", "properties": {"id": "0"}, "geometry": row[0]}
],
}


@app.get("/blue-zone")
async def read_blue_zone(year: Year, radius: Radius, db=Depends(get_db)):
with db.cursor() as cur:
cur.execute(
"""
select st_asgeojson(st_transform(st_buffer(line_geom, %s, 'quad_segs=4'), 4269))::json
from api__high_frequency_transit_lines
where '%s-01-01'::date <@ valid
""",
(radius, year),
)
row = cur.fetchone()

if row is None:
return None

return {
"type": "FeatureCollection",
"features": [
{"type": "Feature", "properties": {"id": "0"}, "geometry": row[0]}
],
}


@app.get("/predict")
async def read_predict(
blue_zone_radius: Radius,
yellow_zone_line_radius: Radius,
yellow_zone_stop_radius: Radius,
blue_zone_limit: Limit,
yellow_zone_limit: Limit,
year: Year,
db=Depends(get_db),
predictor=Depends(get_predictor),
):
result = predictor.predict_cumulative(
db,
intervention=(
{
"radius_blue": blue_zone_radius,
"limit_blue": blue_zone_limit,
"radius_yellow_line": yellow_zone_line_radius,
"radius_yellow_stop": yellow_zone_stop_radius,
"limit_yellow": yellow_zone_limit,
"reform_year": year,
}
),
)
return {
"census_tracts": [str(t) for t in result["census_tracts"]],
"housing_units_factual": [t.item() for t in result["housing_units_factual"]],
"housing_units_counterfactual": [
t.tolist() for t in result["housing_units_counterfactual"]
],
}


if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))
Loading
Loading