Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Story/bias adjust wind/3055 #3058

Merged
merged 18 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ fetch-database-partial:
fetch-database-complete:
$(POETRY_RUN) python scripts/copy_db_from_pod_to_local.py --mode=complete

fetch-database-machine-learning:
$(POETRY_RUN) python scripts/copy_db_from_pod_to_local.py --mode=ml

restore-partial-database-local:
# Restore a partial database to local server.
# Set environment variable PGPASSWORD=mywpspassword if you want to skip typing password for authentication.
Expand Down
36 changes: 36 additions & 0 deletions api/alembic/versions/b8aa2d38e9e1_add_bias_adj_cols_for_wind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Add bias adj cols for wind

Revision ID: b8aa2d38e9e1
Revises: 4916cd5313de
Create Date: 2023-08-10 14:25:49.259998

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'b8aa2d38e9e1'
down_revision = '4916cd5313de'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic ###
op.create_index(op.f('ix_morecast_forecast_precip'), 'morecast_forecast', ['precip'], unique=False)
dgboss marked this conversation as resolved.
Show resolved Hide resolved
op.create_index(op.f('ix_morecast_forecast_temp'), 'morecast_forecast', ['temp'], unique=False)
op.create_index(op.f('ix_morecast_forecast_wind_speed'), 'morecast_forecast', ['wind_speed'], unique=False)
op.add_column('weather_station_model_predictions', sa.Column('bias_adjusted_wdir', sa.Float(), nullable=True))
op.add_column('weather_station_model_predictions', sa.Column('bias_adjusted_wind_speed', sa.Float(), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic ###
op.drop_column('weather_station_model_predictions', 'bias_adjusted_wind_speed')
op.drop_column('weather_station_model_predictions', 'bias_adjusted_wdir')
op.drop_index(op.f('ix_morecast_forecast_wind_speed'), table_name='morecast_forecast')
op.drop_index(op.f('ix_morecast_forecast_temp'), table_name='morecast_forecast')
op.drop_index(op.f('ix_morecast_forecast_precip'), table_name='morecast_forecast')
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Add bias adj wind cols to mat view

Revision ID: f6400f2140b9
Revises: b8aa2d38e9e1
Create Date: 2023-08-11 15:13:36.038941

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'f6400f2140b9'
down_revision = 'b8aa2d38e9e1'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic ###
op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;")
op.execute("""
CREATE MATERIALIZED VIEW morecast_2_materialized_view AS
SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code,
weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature,
weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.apcp_sfc_0, weather_station_model_predictions.wdir_tgl_10,
weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.bias_adjusted_wind_speed, weather_station_model_predictions.bias_adjusted_wdir,
weather_station_model_predictions.update_date
FROM weather_station_model_predictions
JOIN prediction_model_run_timestamps
ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models
ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id
JOIN (
SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code,
date(weather_station_model_predictions.prediction_timestamp) AS unique_day
FROM weather_station_model_predictions
WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20
GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp)
) AS latest
ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code
ORDER BY weather_station_model_predictions.update_date DESC;""")
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;")
op.execute("""CREATE MATERIALIZED VIEW morecast_2_materialized_view AS
SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code, weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature, weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.apcp_sfc_0, weather_station_model_predictions.wdir_tgl_10, weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.update_date
FROM weather_station_model_predictions
JOIN prediction_model_run_timestamps
ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models
ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id
JOIN (
SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code,
date(weather_station_model_predictions.prediction_timestamp) AS unique_day
FROM weather_station_model_predictions
WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20
GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp)
) AS latest
ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code
ORDER BY weather_station_model_predictions.update_date DESC;""")
# ### end Alembic commands ###
2 changes: 2 additions & 0 deletions api/app/db/crud/weather_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ def get_latest_station_prediction_mat_view(session: Session,
MoreCast2MaterializedView.tmp_tgl_2,
MoreCast2MaterializedView.bias_adjusted_temperature,
MoreCast2MaterializedView.bias_adjusted_rh,
MoreCast2MaterializedView.bias_adjusted_wind_speed,
MoreCast2MaterializedView.bias_adjusted_wdir,
MoreCast2MaterializedView.apcp_sfc_0,
MoreCast2MaterializedView.wdir_tgl_10,
MoreCast2MaterializedView.wind_tgl_10,
Expand Down
11 changes: 9 additions & 2 deletions api/app/db/models/weather_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,12 @@
delta_precip = Column(Float, nullable=True)
# Wind direction 10m above ground.
wdir_tgl_10 = Column(Float, nullable=True)
# Wind direction adjusted for bias
bias_adjusted_wdir = Column(Float, nullable=True)
# Wind speed 10m above ground.
wind_tgl_10 = Column(Float, nullable=True)
# Wind speed adjusted for bias
bias_adjusted_wind_speed = Column(Float, nullable=True)
# Date this record was created.
create_date = Column(TZTimeStamp, nullable=False,
default=time_utils.get_utc_now())
Expand All @@ -216,8 +220,9 @@
default=time_utils.get_utc_now(), index=True)

def __str__(self):
return ('{self.station_code} {self.prediction_timestamp} {self.tmp_tgl_2} {self.apcp_sfc_0} '
'{self.delta_precip}').format(self=self)
return ('{self.station_code} {self.prediction_timestamp} {self.tmp_tgl_2} {self.bias_adjusted_temperature} '

Check warning on line 223 in api/app/db/models/weather_models.py

View check run for this annotation

Codecov / codecov/patch

api/app/db/models/weather_models.py#L223

Added line #L223 was not covered by tests
'{self.rh_tgl_2} {self.bias_adjusted_rh} {self.wdir_tgl_10} {self.bias_adjusted_wdir} {self.wind_tgl_10} '
'{self.bias_adjusted_wind_speed} {self.apcp_sfc_0} {self.delta_precip}').format(self=self)


class MoreCast2MaterializedView(Base):
Expand All @@ -230,6 +235,8 @@
apcp_sfc_0 = Column(Float, nullable=False)
bias_adjusted_rh = Column(Float, nullable=False)
bias_adjusted_temperature = Column(Float, nullable=False)
bias_adjusted_wind_speed = Column(Float, nullable=False)
bias_adjusted_wdir = Column(Float, nullable=False)
prediction_timestamp = Column(TZTimeStamp, nullable=False, index=True)
station_code = Column(Integer, nullable=True, index=True)
rh_tgl_2 = Column(Float, nullable=False)
Expand Down
11 changes: 11 additions & 0 deletions api/app/jobs/common_model_fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,17 @@ def transform(long, lat):
# Predict the rh
station_prediction.bias_adjusted_rh = machine.predict_rh(
station_prediction.rh_tgl_2, station_prediction.prediction_timestamp)
# Predict the wind speed
station_prediction.bias_adjusted_wind_speed = machine.predict_wind_speed(
station_prediction.wind_tgl_10,
station_prediction.prediction_timestamp
)
# Predict the wind direction
station_prediction.bias_adjusted_wdir = machine.predict_wind_direction(
station_prediction.wdir_tgl_10,
station_prediction.prediction_timestamp
)

# Update the update time (this might be an update)
station_prediction.update_date = time_utils.get_utc_now()
# Add this prediction to the session (we'll commit it later.)
Expand Down
2 changes: 1 addition & 1 deletion api/app/jobs/noaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@
except Exception as exception:
# We catch and log any exceptions we may have missed.
logger.error('unexpected exception processing', exc_info=exception)
rc_message = ':poop: Encountered error retrieving GFS model data from NOAA'
rc_message = ':poop: Encountered error retrieving {sys.argv[1]} model data from NOAA'

Check warning on line 408 in api/app/jobs/noaa.py

View check run for this annotation

Codecov / codecov/patch

api/app/jobs/noaa.py#L408

Added line #L408 was not covered by tests
send_rocketchat_notification(rc_message, exception)
# Exit with a failure code.
sys.exit(os.EX_SOFTWARE)
Expand Down
2 changes: 1 addition & 1 deletion api/app/weather_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Key values on ModelRunGridSubsetPrediction.
# Wind direction (wdir_tgl_10_b) is handled slightly differently, so not included here.
Copy link
Collaborator

@brettedw brettedw Aug 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very minor, but I think this comment is no longer needed, unless wdir_tgl_10_b is something different

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually maybe this is why I'm not getting bias adjusted values for wind dir...

SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'apcp_sfc_0', 'wind_tgl_10')
SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'wind_tgl_10', 'apcp_sfc_0', 'wdir_tgl_10')


class ModelEnum(str, Enum):
Expand Down
6 changes: 4 additions & 2 deletions api/app/weather_models/fetch/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess

daily_result = get_latest_station_prediction_mat_view(
session, active_station_codes, day_start, day_end)
for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, precip_24hours, wind_dir, wind_speed, update_date in daily_result:
for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, bias_adjusted_wind_speed, bias_adjusted_wdir, precip_24hours, wind_dir, wind_speed, update_date in daily_result:

# Create two WeatherIndeterminates, one for model predictions and one for bias corrected predictions
results.append(
Expand All @@ -162,7 +162,9 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess
determinate=f'{model_abbrev}_BIAS',
utc_timestamp=timestamp,
temperature=bias_adjusted_temp,
relative_humidity=bias_adjusted_rh
relative_humidity=bias_adjusted_rh,
wind_speed=bias_adjusted_wind_speed,
wind_dir=bias_adjusted_wdir
))
return post_process_fetched_predictions(results)

Expand Down
50 changes: 43 additions & 7 deletions api/app/weather_models/machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
logger = getLogger(__name__)

# Corresponding key values on HourlyActual and SampleCollection
SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity')
SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity', 'wind_speed', 'wind_direction')
# Number of days of historical actual data to learn from when training model
MAX_DAYS_TO_LEARN = 19


class LinearRegressionWrapper:
Expand All @@ -37,11 +39,14 @@
For each different reading, we have a seperate LinearRegression model.
"""

keys = ('temperature_wrapper', 'relative_humidity_wrapper')
keys = ('temperature_wrapper', 'relative_humidity_wrapper',
'wind_speed_wrapper', 'wind_direction_wrapper')

def __init__(self):
self.temperature_wrapper = LinearRegressionWrapper()
self.relative_humidity_wrapper = LinearRegressionWrapper()
self.wind_speed_wrapper = LinearRegressionWrapper()
self.wind_direction_wrapper = LinearRegressionWrapper()


class Samples:
Expand Down Expand Up @@ -104,6 +109,8 @@
def __init__(self):
self.temperature = Samples()
self.relative_humidity = Samples()
self.wind_speed = Samples()
self.wind_direction = Samples()


class StationMachineLearning:
Expand Down Expand Up @@ -137,19 +144,18 @@
# Maximum number of days to try to learn from. Experimentation has shown that
# about two weeks worth of data starts giving fairly good results compared to human forecasters.
# NOTE: This could be an environment variable.
self.max_days_to_learn = 19
self.max_days_to_learn = MAX_DAYS_TO_LEARN

def _add_sample_to_collection(self,
prediction: ModelRunGridSubsetPrediction,
actual: HourlyActual,
sample_collection: SampleCollection):
""" Take the provided prediction and observed value, adding them to the collection of samples """
# TODO: add precip and wind speed/direction to SAMPLE_VALUE_KEYS
for model_key, sample_key in zip(SCALAR_MODEL_VALUE_KEYS, SAMPLE_VALUE_KEYS):
model_value = getattr(prediction, model_key)
if model_value is not None:
actual_value = getattr(actual, sample_key)
if np.isnan(actual_value):
if actual_value is None or np.isnan(actual_value):
# If for whatever reason we don't have an actual value, we skip this one.
logger.warning('no actual value for %s', sample_key)
continue
Expand Down Expand Up @@ -215,7 +221,7 @@
# how much sample data we actually had etc., and then not mark the model as being "good".
regression_model.good_model = True

def predict_temperature(self, model_temperature, timestamp):
def predict_temperature(self, model_temperature: float, timestamp: datetime):
""" Predict the bias adjusted temperature for a given point in time, given a corresponding model
temperature.
: param model_temperature: Temperature as provided by the model
Expand All @@ -238,5 +244,35 @@
"""
hour = timestamp.hour
if self.regression_models[hour].relative_humidity_wrapper.good_model and model_rh is not None:
return self.regression_models[hour].relative_humidity_wrapper.model.predict([[model_rh]])[0]
predicted_rh = self.regression_models[hour].relative_humidity_wrapper.model.predict([[model_rh]])[0]
# in the real world the RH value can't be negative. Sometimes linear regression returns negative value, so assume 0
return max(0, predicted_rh)
return None

def predict_wind_speed(self, model_wind_speed: float, timestamp: datetime):
""" Predict the bias-adjusted wind speed for a given point in time, given a corresponding model wind speed.
: param model_wind_speed: Wind speed as provided by the model
: param timestamp: Datetime value for the predicted value
: return: The bias adjusted wind speed as predicted by the linear regression model.
"""
hour = timestamp.hour
if self.regression_models[hour].wind_speed_wrapper.good_model and model_wind_speed is not None:
predicted_wind_speed = self.regression_models[hour].wind_speed_wrapper.model.predict([[model_wind_speed]])[

Check warning on line 260 in api/app/weather_models/machine_learning.py

View check run for this annotation

Codecov / codecov/patch

api/app/weather_models/machine_learning.py#L260

Added line #L260 was not covered by tests
0]
# in the real world the wind speed can't be negative. Sometimes linear regression returns negative value, so assume 0
return max(0, predicted_wind_speed)

Check warning on line 263 in api/app/weather_models/machine_learning.py

View check run for this annotation

Codecov / codecov/patch

api/app/weather_models/machine_learning.py#L263

Added line #L263 was not covered by tests
return None

def predict_wind_direction(self, model_wind_dir: int, timestamp: datetime):
""" Predict the bias-adjusted wind direction for a given point in time, given a corresponding model wind direction.
: param model_wind_dir: Wind direction as provided by the model
: param timestamp: Datetime value for the predicted value
: return: The bias-adjusted wind direction as predicted by the linear regression model.
"""
hour = timestamp.hour
if self.regression_models[hour].wind_direction_wrapper.good_model and model_wind_dir is not None:
predicted_wind_dir = self.regression_models[hour].wind_direction_wrapper.model.predict([[model_wind_dir]])[

Check warning on line 274 in api/app/weather_models/machine_learning.py

View check run for this annotation

Codecov / codecov/patch

api/app/weather_models/machine_learning.py#L274

Added line #L274 was not covered by tests
0]
# a valid wind direction value is between 0 and 360. If the returned value is outside these bounds, correct it
return predicted_wind_dir % 360

Check warning on line 277 in api/app/weather_models/machine_learning.py

View check run for this annotation

Codecov / codecov/patch

api/app/weather_models/machine_learning.py#L277

Added line #L277 was not covered by tests
return None
Loading
Loading