Skip to content

Commit

Permalink
Propagate building properties to generated examples.
Browse files Browse the repository at this point in the history
This will enable the inference pipeline to propagate these same properties to
the output file.

This CL also simplifies the handling of building coordinates in the example
generation pipeline as a bonus.

PiperOrigin-RevId: 573114475
  • Loading branch information
jzxu authored and copybara-github committed Oct 13, 2023
1 parent 1cf57df commit f9fde5c
Show file tree
Hide file tree
Showing 10 changed files with 541 additions and 269 deletions.
40 changes: 20 additions & 20 deletions src/generate_examples_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
--cloud_region=us-west1
"""

import os
import time
from typing import List

Expand Down Expand Up @@ -164,6 +165,8 @@

Polygon = shapely.geometry.polygon.Polygon

BUILDINGS_FILE_NAME = 'processed_buildings.parquet'


def _read_image_config(path: str) -> List[str]:
with tf.io.gfile.GFile(path, 'r') as f:
Expand Down Expand Up @@ -207,35 +210,32 @@ def main(args):
raise ValueError('At least labels_file (for labeled examples extraction) '
'or buildings_method != none (for unlabeled data) should '
'be specified.')
if config.buildings_method != 'none':

buildings_path = os.path.join(config.output_dir, BUILDINGS_FILE_NAME)
if config.labels_file:
generate_examples.read_labels_file(
config.labels_file,
config.label_property,
config.labels_to_classes,
config.num_keep_labeled_examples,
buildings_path
)
buildings_labeled = True
else:
if config.aoi_path:
aois = buildings.read_aois(config.aoi_path)
else:
aois = [read_raster.get_raster_bounds(path, gdal_env)
for path in after_image_patterns]
try:
building_centroids = generate_examples.get_building_centroids(
config, aois
generate_examples.download_building_footprints(
config, aois, buildings_path
)
except generate_examples.NotInitializedEarthEngineError:
logging.fatal('Could not initialize Earth Engine.', exc_info=True)
except generate_examples.NoBuildingFoundError:
logging.fatal('No building is found.', exc_info=True)
logging.info('Found %d buildings in area of interest.',
len(building_centroids))
else:
# Only if one wants to extract labeled examples and labels_file is provided.
building_centroids = []

if config.labels_file:
labeled_coordinates = generate_examples.read_labels_file(
config.labels_file,
config.label_property,
config.labels_to_classes,
config.num_keep_labeled_examples,
)
else:
labeled_coordinates = []
buildings_labeled = False

generate_examples.generate_examples_pipeline(
before_image_patterns,
Expand All @@ -245,8 +245,8 @@ def main(args):
config.resolution,
config.output_dir,
config.output_shards,
building_centroids,
labeled_coordinates,
buildings_path,
buildings_labeled,
config.use_dataflow,
gdal_env,
timestamped_dataset,
Expand Down
124 changes: 92 additions & 32 deletions src/skai/buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

"""Functions for reading building centroids from files."""

from typing import List, Tuple
import os
import geopandas as gpd
import pandas as pd
import shapely.geometry
Expand All @@ -24,8 +24,8 @@
Polygon = shapely.geometry.polygon.Polygon


def _read_buildings_csv(path: str) -> List[Tuple[float, float]]:
"""Reads (longitude, latitude) coordinates from a CSV file.
def _read_buildings_csv(path: str) -> gpd.GeoDataFrame:
"""Reads CSV file containing building footprints to GeoDataFrame.
The file should contain "longitude" and "latitude" columns.
Expand All @@ -40,48 +40,48 @@ def _read_buildings_csv(path: str) -> List[Tuple[float, float]]:
"""
with tf.io.gfile.GFile(path, 'r') as csv_file:
df = pd.read_csv(csv_file)
if 'longitude' not in df.columns or 'latitude' not in df.columns:
raise ValueError(
f'Malformed CSV file "{path}". File does not contain "longitude" and '
'"latitude" columns')
return [(row.longitude, row.latitude) for _, row in df.iterrows()]
if 'geometry' in df.columns:
geometries = gpd.GeoSeries.from_wkt(df['geometry'])
df.drop(columns=['geometry'], inplace=True)
elif 'wkt' in df.columns:
geometries = gpd.GeoSeries.from_wkt(df['wkt'])
df.drop(columns=['wkt'], inplace=True)
elif 'longitude' in df.columns and 'latitude' in df.columns:
geometries = gpd.points_from_xy(df['longitude'], df['latitude'])
df.drop(columns=['longitude', 'latitude'], inplace=True)
else:
raise ValueError(f'No geometry information found in file "{path}"')

return gpd.GeoDataFrame(df, geometry=geometries, crs=4326)


def read_buildings_file(path: str,
regions: List[Polygon]) -> List[Tuple[float, float]]:
"""Extracts building coordinates from a file.
def convert_buildings_file(
path: str, regions: list[Polygon], output_path: str
) -> None:
"""Converts an input file encoding building footprints to the standard format.
Supported file formats are csv, shapefile, and geojson.
Also filters out any buildings that don't fall in one of the specified region
polygons.
Supported file formats are csv and anything that GeoPandas handles.
Args:
path: Path to buildings file.
regions: Regions to where building coordinates should come from.
Returns:
List of (longitude, latitude) building coordinates.
output_path: Path to write buildings GeoDataFrame to.
"""
if path.lower().endswith('.csv'):
coords = _read_buildings_csv(path)
buildings_gdf = _read_buildings_csv(path)
else:
coords = []
df = gpd.read_file(path).to_crs(epsg=4326)
geometries = list(df.geometry.values)
for g in geometries:
centroid = g.centroid
coords.append((centroid.x, centroid.y))

filtered_coords = []
for lon, lat in coords:
point = Point(lon, lat)
for region in regions:
if region.intersects(point):
filtered_coords.append((lon, lat))
break
with tf.io.gfile.GFile(path, 'rb') as f:
buildings_gdf = gpd.read_file(f).to_crs(epsg=4326)

return filtered_coords
combined_regions = gpd.GeoSeries(regions).unary_union
in_regions = buildings_gdf.intersects(combined_regions)
write_buildings_file(buildings_gdf[in_regions], output_path)


def read_aois(path: str) -> List[Polygon]:
def read_aois(path: str) -> list[Polygon]:
"""Reads area of interest polygons from a file.
Common file formats such as shapefile and GeoJSON are supported. However, the
Expand All @@ -106,3 +106,63 @@ def read_aois(path: str) -> List[Polygon]:
raise ValueError(
f'Unexpected geometry for area of interest: "{g.geometryType()}"')
return geometries


def write_buildings_file(gdf: gpd.GeoDataFrame, output_path: str) -> None:
"""Writes a GeoDataFrame of building geometries to file.
Serializes GeoDataFrame using Parquet file format to allow fast reading of
individual columns, such as longitude and latitude, in large datasets.
Args:
gdf: GeoDataFrame of building geometries.
output_path: Output path.
"""
if 'longitude' not in gdf.columns and 'latitude' not in gdf.columns:
centroids = gdf.geometry.centroid
output_gdf = gdf.copy().to_crs(4326)
output_gdf['longitude'] = [c.x for c in centroids]
output_gdf['latitude'] = [c.y for c in centroids]
else:
output_gdf = gdf.to_crs(4326)

output_dir = os.path.dirname(output_path)
if not tf.io.gfile.exists(output_dir):
tf.io.gfile.makedirs(output_dir)
with tf.io.gfile.GFile(output_path, 'wb') as f:
f.closed = False
output_gdf.to_parquet(f, index=False)


def read_buildings_file(path: str) -> gpd.GeoDataFrame:
"""Reads a GeoDataFrame of building geometries from file.
The GeoDataFrame must have been serialized by the write_buildings_file
function defined above.
Args:
path: Path to serialized GeoDataFrame.
Returns:
Buildings GeoDataFrame.
"""
with tf.io.gfile.GFile(path, 'rb') as f:
f.closed = False # Work-around for GFile issue.
return gpd.read_parquet(f).to_crs(4326)


def read_building_coordinates(path: str) -> pd.DataFrame:
"""Reads only the longitude and latitude columns of a buildings file.
The GeoDataFrame must have been serialized by the write_buildings_file
function defined above.
Args:
path: Path to buildings file. Should be a GeoDataFrame in parquet format.
Returns:
DataFrame (not GeoDataFrame) containing
"""
with tf.io.gfile.GFile(path, 'rb') as f:
f.closed = False # Work-around for GFile issue.
return pd.read_parquet(f, columns=['longitude', 'latitude'])
Loading

0 comments on commit f9fde5c

Please sign in to comment.