Skip to content

Commit

Permalink
Merge pull request #60 from Urban-Analytics-Technology-Platform/itera…
Browse files Browse the repository at this point in the history
…tive_categorical_matching

Iterative categorical matching
  • Loading branch information
sgreenbury authored Nov 1, 2024
2 parents ada37b2 + f5b159c commit 87fa6be
Show file tree
Hide file tree
Showing 15 changed files with 4,074 additions and 764 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,9 @@ logs/

# pyright config
pyrightconfig.json

# scratch
notebooks/scratch*

# AcBM config
config/
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.2.0"
rev: "v0.7.0"
hooks:
# first, lint + autofix
- id: ruff
Expand Down
14 changes: 12 additions & 2 deletions config/base.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
[parameters]
seed = 0
region = "leeds"
number_of_households = 10000
number_of_households = 5000
zone_id = "OA21CD"
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
boundary_geography = "OA"

[matching]
required_columns = ["number_adults", "number_children"]
optional_columns = [
"number_cars",
"num_pension_age",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
n_matches = 10

[work_assignment]
use_percentages = true
Expand Down
14 changes: 0 additions & 14 deletions config/base_500.toml

This file was deleted.

13 changes: 0 additions & 13 deletions config/base_5000.toml

This file was deleted.

13 changes: 0 additions & 13 deletions config/base_all.toml

This file was deleted.

36 changes: 18 additions & 18 deletions notebooks/2.1_sandbox-match_households.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -54,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -75,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -111,7 +111,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -128,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -180,7 +180,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -228,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -276,7 +276,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -299,7 +299,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -319,7 +319,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -471,7 +471,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -530,7 +530,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -610,7 +610,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -789,7 +789,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -879,7 +879,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -945,7 +945,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -986,7 +986,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
Expand Down
3,926 changes: 3,526 additions & 400 deletions notebooks/2_match_households_and_individuals.ipynb

Large diffs are not rendered by default.

29 changes: 0 additions & 29 deletions scripts/1_prep_synthpop.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,6 @@ def main(config_file):
acbm.root_path / f"data/external/spc_output/{region}_people_hh.parquet"
)

# People and time-use data
# Subset of (non-time-use) features to include and unnest
# The features can be found here: https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto
features = {
"health": [
"bmi",
"has_cardiovascular_disease",
"has_diabetes",
"has_high_blood_pressure",
"self_assessed_health",
"life_satisfaction",
],
"demographics": ["age_years", "ethnicity", "sex", "nssec8"],
"employment": ["sic1d2007", "sic2d2007", "pwkstat", "salary_yearly"],
}

# build the table
spc_people_tu = (
Builder(path, region, backend="polars", input_type="parquet")
.add_households()
.add_time_use_diaries(features, diary_type="weekday_diaries")
.build()
)

# save the output
spc_people_tu.write_parquet(
acbm.root_path / f"data/external/spc_output/{region}_people_tu.parquet"
)


if __name__ == "__main__":
main()
Loading

0 comments on commit 87fa6be

Please sign in to comment.