Skip to content

Commit

Permalink
Merge pull request #48 from Urban-Analytics-Technology-Platform/time_…
Browse files Browse the repository at this point in the history
…estimates_workflow

Time estimates workflow
  • Loading branch information
Hussein-Mahfouz authored Oct 2, 2024
2 parents 4628798 + ba2ddab commit 41a32b6
Show file tree
Hide file tree
Showing 6 changed files with 516 additions and 196 deletions.
309 changes: 307 additions & 2 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ click = "^8.1.7"
tomlkit = "^0.13.0"
cml-pam = "0.3.2"
gdal = "<=3.8.4"
pandera = "^0.20.4"

[tool.poetry.dev-dependencies]
pytest = ">= 6"
Expand Down
80 changes: 29 additions & 51 deletions scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,69 +78,45 @@ def main(config_file):
# are compared to the travel times of the individual's actual trips from the nts
# (`tst`/`TripStart` and `tet`/`TripEnd`)

logger.info("Loading travel time matrix")

travel_times = pd.read_parquet(
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix_acbm.parquet"
)

logger.info("Travel time matrix loaded")

logger.info("Merging travel time matrix with boundaries")

# convert from_id and to_id to int to match the boundaries data type
travel_times = travel_times.astype({"from_id": int, "to_id": int})

# merge travel_times with boundaries
travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="from_id",
right_on="OBJECTID",
how="left",
)
travel_times = travel_times.drop(columns="OBJECTID")

travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="to_id",
right_on="OBJECTID",
how="left",
suffixes=("_from", "_to"),
)
travel_times = travel_times.drop(columns="OBJECTID")

# #### Travel distance matrix
#
# Some areas aren't reachable by specific modes. We create a travel distance matrix
# to fall back on when the, inplace=Truere are no travel time calculations

logger.info("Creating travel time estimates")

travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, to_dict=True
# TODO: move to config
travel_time_matrix_path = (
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix.parquet"
)

with open(
acbm.root_path / "data/interim/assigning/travel_time_estimates.pkl", "wb"
) as f:
pkl.dump(travel_time_estimates, f)

logger.info("Travel time estimates created")
if config.parameters.travel_times:
logger.info("Loading travel time matrix")
try:
travel_times = pd.read_parquet(travel_time_matrix_path)
print("Travel time matrix loaded successfully.")
except Exception as e:
logger.info(
f"Failed to load travel time matrix: {e}. Check that you have a "
"travel_times matrix at {travel_time_matrix_path}. Otherwise set "
"travel_times to false in config"
)
raise e
else:
# If travel_times is not true or loading failed, create a new travel time matrix
logger.info("No travel time matrix found. Creating a new travel time matrix.")
# Create a new travel time matrix based on distances between zones
travel_times = zones_to_time_matrix(zones=boundaries, id_col="OA21CD")
logger.info("Travel time estimates created")

# --- Intrazonal trip times
#
# Intrazonal trips all have time = 0. Our `get_possible_zones` function finds zones
# that are within a specified % threshold from the reported time in the NTS.
# A threshold percentage from a non zero number never equals 0, so intrazonal trips
# are not found. The problem is also explained in this issue #30
#

# Below, we assign intrazonal trips a non-zero time based on the zone area

# get intrazone travel time estimates per mode

logger.info("Creating intrazonal travel time estimates")

intrazone_times = intrazone_time(boundaries.set_index("OBJECTID"))
# TODO: use config zone_id instead of OA21CD
intrazone_times = intrazone_time(zones=boundaries, key_column="OA21CD")

logger.info("Intrazonal travel time estimates created")

Expand All @@ -150,7 +126,7 @@ def main(config_file):
travel_times = replace_intrazonal_travel_time(
travel_times=travel_times,
intrazonal_estimates=intrazone_times,
column_to_replace="travel_time_p50",
column_to_replace="time",
)

logger.info("Intrazonal travel times replaced")
Expand Down Expand Up @@ -223,10 +199,11 @@ def main(config_file):
activity_chains=activity_chains_edu,
travel_times=travel_times,
activities_per_zone=activities_per_zone,
boundaries=boundaries,
key_col="id",
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="education_type",
zone_id=config.zone_id,
time_tolerance=0.3,
)

Expand All @@ -249,10 +226,11 @@ def main(config_file):
activity_chains=activity_chains_work,
travel_times=travel_times,
activities_per_zone=activities_per_zone,
boundaries=boundaries,
key_col="id",
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="dact",
zone_id=config.zone_id,
time_tolerance=0.3,
)

Expand Down
73 changes: 27 additions & 46 deletions scripts/3.2.3_assign_secondary_zone.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
logger.info("Analysis (matrices): Step 1 - Loading travel time data")

travel_times = pd.read_parquet(
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix_acbm.parquet"
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix.parquet"
)

# Edit modes
Expand All @@ -323,44 +323,25 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

# I will do 2 for now

# keep only the rows that match specific "combination" values
modes_to_use = ["car", "walk", "cycle", "pt_wkday_morning"]

# Filter the DataFrame
travel_times = travel_times[travel_times["combination"].isin(modes_to_use)]

# Rename specific values in "combination" column
travel_times["combination"] = travel_times["combination"].replace(
{"cycle": "bike", "pt_wkday_morning": "pt"}
)

# Add OA21CD
# TODO: move this upstream and delete from here
logger.info("Analysis (matrices): Step 3 - Adding OA21CD to travel time data")

# convert from_id and to_id to int to match the boundaries data type
travel_times = travel_times.astype({"from_id": int, "to_id": int})

# merge travel_times with boundaries
travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="from_id",
right_on="OBJECTID",
how="left",
)
travel_times = travel_times.drop(columns="OBJECTID")
# Check if 'time_of_day' column exists (this implies we have travel times for PT by time of day - ie travel times have not
# been generated by zones_to_time_matrix() function)
# TODO: just replace with time estimates from zones_to_time_matrix() function
if "time_of_day" in travel_times.columns:
# Apply filtering logic
travel_times = travel_times[
(travel_times["mode"] != "pt")
| (
(travel_times["mode"] == "pt")
& (travel_times["time_of_day"] == "morning")
& (travel_times["weekday"] == 1)
)
]

travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="to_id",
right_on="OBJECTID",
how="left",
suffixes=("_from", "_to"),
)
travel_times = travel_times.drop(columns="OBJECTID")
# Rename specific values in "mode" column
travel_times["mode"] = travel_times["mode"].replace({"cycle": "bike"})

# --- Calculate OD probabilities (probabilities of choosing a destination zone for an activity, given the origin zone)
logger.info("Analysis (matrices): Step 4 - Calculating OD probabilities")
logger.info("Analysis (matrices): Step 3 - Calculating OD probabilities")

activities_per_zone = pd.read_parquet(
acbm.root_path / "data/interim/assigning/activities_per_zone.parquet"
Expand All @@ -385,14 +366,14 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

# Calculate the visit_probability: it is a funciton of floor_area and travel time
merged_df["visit_prob"] = np.where(
merged_df["travel_time_p50"] != 0, # avoid division by zero
round(merged_df["floor_area"] / np.sqrt(merged_df["travel_time_p50"])),
merged_df["time"] != 0, # avoid division by zero
round(merged_df["floor_area"] / np.sqrt(merged_df["time"])),
round(merged_df["floor_area"]),
)

# --- Create matrices for travel times and OD probabilities
logger.info(
"Analysis (matrices): Step 5 - Creating matrices for travel times and OD probabilities"
"Analysis (matrices): Step 4 - Creating matrices for travel times and OD probabilities"
)

# Get unique zone labels for matrix
Expand All @@ -409,8 +390,8 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

matrix_travel_times = create_od_matrices(
df=merged_df,
mode_column="combination",
value_column="travel_time_p50",
mode_column="mode",
value_column="time",
zone_labels=zone_labels,
fill_value=300, # replace missing travel times with 6 hours (they are unreachable)
zone_from=config.origin_zone_id(zone_id),
Expand All @@ -419,7 +400,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

matrix_od_probs = create_od_matrices(
df=merged_df,
mode_column="combination",
mode_column="mode",
value_column="visit_prob",
zone_labels=zone_labels,
# replace missing probabilities with 1. There are no activities so shouldn't be visited
Expand All @@ -431,9 +412,9 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
)

# Create ODMatrix objects
logger.info("Analysis (matrices): Step 6 - Creating ODMatrix objects")
logger.info("Analysis (matrices): Step 5 - Creating ODMatrix objects")

mode_types = travel_times["combination"].unique()
mode_types = travel_times["mode"].unique()

matrices_pam_travel_time = [
ODMatrix("time", mode, zone_labels, zone_labels, matrix_travel_times[mode])
Expand All @@ -449,7 +430,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
matrices_pam_all = matrices_pam_travel_time + matrices_pam_od_probs

# create ODFactory
logger.info("Analysis (matrices): Step 7 - Creating ODFactory object")
logger.info("Analysis (matrices): Step 6 - Creating ODFactory object")

od = ODFactory.from_matrices(matrices=matrices_pam_all)

Expand All @@ -459,7 +440,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
update_population_plans(population, od)

# --- Save
logger.info("Saving: Step 9 - Saving population")
logger.info("Saving: Step 7 - Saving population")

write.to_csv(population, dir=(acbm.root_path / "data/processed/activities_pam"))

Expand Down
Loading

0 comments on commit 41a32b6

Please sign in to comment.