bcgov · conbrad · Aug 16, 2023 · May 8, 2023 · May 8, 2023 · May 11, 2023
diff --git a/api/Makefile b/api/Makefile
@@ -123,6 +123,9 @@ fetch-database-partial:
 fetch-database-complete:
 	$(POETRY_RUN) python scripts/copy_db_from_pod_to_local.py --mode=complete
 
+fetch-database-machine-learning:
+	$(POETRY_RUN) python scripts/copy_db_from_pod_to_local.py --mode=ml
+
 restore-partial-database-local:
 	# Restore a partial database to local server.
 	# Set environment variable PGPASSWORD=mywpspassword if you want to skip typing password for authentication.

diff --git a/api/alembic/versions/b8aa2d38e9e1_add_bias_adj_cols_for_wind.py b/api/alembic/versions/b8aa2d38e9e1_add_bias_adj_cols_for_wind.py
@@ -0,0 +1,36 @@
+"""Add bias adj cols for wind
+
+Revision ID: b8aa2d38e9e1
+Revises: 4916cd5313de
+Create Date: 2023-08-10 14:25:49.259998
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'b8aa2d38e9e1'
+down_revision = '4916cd5313de'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic ###
+    op.create_index(op.f('ix_morecast_forecast_precip'), 'morecast_forecast', ['precip'], unique=False)
+    op.create_index(op.f('ix_morecast_forecast_temp'), 'morecast_forecast', ['temp'], unique=False)
+    op.create_index(op.f('ix_morecast_forecast_wind_speed'), 'morecast_forecast', ['wind_speed'], unique=False)
+    op.add_column('weather_station_model_predictions', sa.Column('bias_adjusted_wdir', sa.Float(), nullable=True))
+    op.add_column('weather_station_model_predictions', sa.Column('bias_adjusted_wind_speed', sa.Float(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic ###
+    op.drop_column('weather_station_model_predictions', 'bias_adjusted_wind_speed')
+    op.drop_column('weather_station_model_predictions', 'bias_adjusted_wdir')
+    op.drop_index(op.f('ix_morecast_forecast_wind_speed'), table_name='morecast_forecast')
+    op.drop_index(op.f('ix_morecast_forecast_temp'), table_name='morecast_forecast')
+    op.drop_index(op.f('ix_morecast_forecast_precip'), table_name='morecast_forecast')
+    # ### end Alembic commands ###
diff --git a/api/alembic/versions/f6400f2140b9_add_bias_adj_wind_cols_to_mat_view.py b/api/alembic/versions/f6400f2140b9_add_bias_adj_wind_cols_to_mat_view.py
@@ -0,0 +1,63 @@
+"""Add bias adj wind cols to mat view
+
+Revision ID: f6400f2140b9
+Revises: b8aa2d38e9e1
+Create Date: 2023-08-11 15:13:36.038941
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'f6400f2140b9'
+down_revision = 'b8aa2d38e9e1'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic ###
+    op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;")
+    op.execute("""
+               CREATE MATERIALIZED VIEW morecast_2_materialized_view AS
+            SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code, 
+                weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature,
+                weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.apcp_sfc_0, weather_station_model_predictions.wdir_tgl_10, 
+                weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.bias_adjusted_wind_speed, weather_station_model_predictions.bias_adjusted_wdir,
+                weather_station_model_predictions.update_date
+            FROM weather_station_model_predictions
+            JOIN prediction_model_run_timestamps
+            ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models
+            ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id
+            JOIN (
+            SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code,
+            date(weather_station_model_predictions.prediction_timestamp) AS unique_day
+            FROM weather_station_model_predictions
+            WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20
+            GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp)
+            ) AS latest
+            ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code
+            ORDER BY weather_station_model_predictions.update_date DESC;""")
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;")
+    op.execute("""CREATE MATERIALIZED VIEW morecast_2_materialized_view AS 
+               SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code, weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature, weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.apcp_sfc_0, weather_station_model_predictions.wdir_tgl_10, weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.update_date 
+               FROM weather_station_model_predictions 
+               JOIN prediction_model_run_timestamps 
+               ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models 
+               ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id 
+               JOIN ( 
+               SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code, 
+               date(weather_station_model_predictions.prediction_timestamp) AS unique_day 
+               FROM weather_station_model_predictions 
+               WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20 
+               GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp) 
+               ) AS latest 
+               ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code 
+               ORDER BY weather_station_model_predictions.update_date DESC;""")
+    # ### end Alembic commands ###
diff --git a/api/app/db/crud/weather_models.py b/api/app/db/crud/weather_models.py
@@ -323,6 +323,8 @@ def get_latest_station_prediction_mat_view(session: Session,
                            MoreCast2MaterializedView.tmp_tgl_2,
                            MoreCast2MaterializedView.bias_adjusted_temperature,
                            MoreCast2MaterializedView.bias_adjusted_rh,
+                           MoreCast2MaterializedView.bias_adjusted_wind_speed,
+                           MoreCast2MaterializedView.bias_adjusted_wdir,
                            MoreCast2MaterializedView.apcp_sfc_0,
                            MoreCast2MaterializedView.wdir_tgl_10,
                            MoreCast2MaterializedView.wind_tgl_10,

diff --git a/api/app/db/models/weather_models.py b/api/app/db/models/weather_models.py
@@ -206,8 +206,12 @@
     delta_precip = Column(Float, nullable=True)
     # Wind direction 10m above ground.
     wdir_tgl_10 = Column(Float, nullable=True)
+    # Wind direction adjusted for bias
+    bias_adjusted_wdir = Column(Float, nullable=True)
     # Wind speed 10m above ground.
     wind_tgl_10 = Column(Float, nullable=True)
+    # Wind speed adjusted for bias
+    bias_adjusted_wind_speed = Column(Float, nullable=True)
     # Date this record was created.
     create_date = Column(TZTimeStamp, nullable=False,
                          default=time_utils.get_utc_now())
@@ -216,8 +220,9 @@
                          default=time_utils.get_utc_now(), index=True)
 
     def __str__(self):
-        return ('{self.station_code} {self.prediction_timestamp} {self.tmp_tgl_2} {self.apcp_sfc_0} '
-                '{self.delta_precip}').format(self=self)
+        return ('{self.station_code} {self.prediction_timestamp} {self.tmp_tgl_2} {self.bias_adjusted_temperature} '
+                '{self.rh_tgl_2} {self.bias_adjusted_rh} {self.wdir_tgl_10} {self.bias_adjusted_wdir} {self.wind_tgl_10} '
+                '{self.bias_adjusted_wind_speed} {self.apcp_sfc_0} {self.delta_precip}').format(self=self)
 
 
 class MoreCast2MaterializedView(Base):
@@ -230,6 +235,8 @@
     apcp_sfc_0 = Column(Float, nullable=False)
     bias_adjusted_rh = Column(Float, nullable=False)
     bias_adjusted_temperature = Column(Float, nullable=False)
+    bias_adjusted_wind_speed = Column(Float, nullable=False)
+    bias_adjusted_wdir = Column(Float, nullable=False)
     prediction_timestamp = Column(TZTimeStamp, nullable=False, index=True)
     station_code = Column(Integer, nullable=True, index=True)
     rh_tgl_2 = Column(Float, nullable=False)

diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py
@@ -288,6 +288,17 @@ def transform(long, lat):
         # Predict the rh
         station_prediction.bias_adjusted_rh = machine.predict_rh(
             station_prediction.rh_tgl_2, station_prediction.prediction_timestamp)
+        # Predict the wind speed
+        station_prediction.bias_adjusted_wind_speed = machine.predict_wind_speed(
+            station_prediction.wind_tgl_10,
+            station_prediction.prediction_timestamp
+        )
+        # Predict the wind direction
+        station_prediction.bias_adjusted_wdir = machine.predict_wind_direction(
+            station_prediction.wdir_tgl_10,
+            station_prediction.prediction_timestamp
+        )
+
         # Update the update time (this might be an update)
         station_prediction.update_date = time_utils.get_utc_now()
         # Add this prediction to the session (we'll commit it later.)

diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py
@@ -405,7 +405,7 @@
     except Exception as exception:
         # We catch and log any exceptions we may have missed.
         logger.error('unexpected exception processing', exc_info=exception)
-        rc_message = ':poop: Encountered error retrieving GFS model data from NOAA'
+        rc_message = ':poop: Encountered error retrieving {sys.argv[1]} model data from NOAA'
         send_rocketchat_notification(rc_message, exception)
         # Exit with a failure code.
         sys.exit(os.EX_SOFTWARE)

diff --git a/api/app/weather_models/__init__.py b/api/app/weather_models/__init__.py
@@ -12,7 +12,7 @@
 
 # Key values on ModelRunGridSubsetPrediction.
 # Wind direction (wdir_tgl_10_b) is handled slightly differently, so not included here.
-SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'apcp_sfc_0', 'wind_tgl_10')
+SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'wind_tgl_10', 'apcp_sfc_0', 'wdir_tgl_10')
 
 
 class ModelEnum(str, Enum):

diff --git a/api/app/weather_models/fetch/predictions.py b/api/app/weather_models/fetch/predictions.py
@@ -140,7 +140,7 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess
 
         daily_result = get_latest_station_prediction_mat_view(
             session, active_station_codes, day_start, day_end)
-        for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, precip_24hours, wind_dir, wind_speed, update_date in daily_result:
+        for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, bias_adjusted_wind_speed, bias_adjusted_wdir, precip_24hours, wind_dir, wind_speed, update_date in daily_result:
 
             # Create two WeatherIndeterminates, one for model predictions and one for bias corrected predictions
             results.append(
@@ -162,7 +162,9 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess
                     determinate=f'{model_abbrev}_BIAS',
                     utc_timestamp=timestamp,
                     temperature=bias_adjusted_temp,
-                    relative_humidity=bias_adjusted_rh
+                    relative_humidity=bias_adjusted_rh,
+                    wind_speed=bias_adjusted_wind_speed,
+                    wind_dir=bias_adjusted_wdir
                 ))
     return post_process_fetched_predictions(results)
 

diff --git a/api/app/weather_models/machine_learning.py b/api/app/weather_models/machine_learning.py
@@ -19,7 +19,9 @@
 logger = getLogger(__name__)
 
 # Corresponding key values on HourlyActual and SampleCollection
-SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity')
+SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity', 'wind_speed', 'wind_direction')
+# Number of days of historical actual data to learn from when training model
+MAX_DAYS_TO_LEARN = 19
 
 
 class LinearRegressionWrapper:
@@ -37,11 +39,14 @@
     For each different reading, we have a seperate LinearRegression model.
     """
 
-    keys = ('temperature_wrapper', 'relative_humidity_wrapper')
+    keys = ('temperature_wrapper', 'relative_humidity_wrapper',
+            'wind_speed_wrapper', 'wind_direction_wrapper')
 
     def __init__(self):
         self.temperature_wrapper = LinearRegressionWrapper()
         self.relative_humidity_wrapper = LinearRegressionWrapper()
+        self.wind_speed_wrapper = LinearRegressionWrapper()
+        self.wind_direction_wrapper = LinearRegressionWrapper()
 
 
 class Samples:
@@ -104,6 +109,8 @@
     def __init__(self):
         self.temperature = Samples()
         self.relative_humidity = Samples()
+        self.wind_speed = Samples()
+        self.wind_direction = Samples()
 
 
 class StationMachineLearning:
@@ -137,19 +144,18 @@
         # Maximum number of days to try to learn from. Experimentation has shown that
         # about two weeks worth of data starts giving fairly good results compared to human forecasters.
         # NOTE: This could be an environment variable.
-        self.max_days_to_learn = 19
+        self.max_days_to_learn = MAX_DAYS_TO_LEARN
 
     def _add_sample_to_collection(self,
                                   prediction: ModelRunGridSubsetPrediction,
                                   actual: HourlyActual,
                                   sample_collection: SampleCollection):
         """ Take the provided prediction and observed value, adding them to the collection of samples """
-        # TODO: add precip and wind speed/direction to SAMPLE_VALUE_KEYS
         for model_key, sample_key in zip(SCALAR_MODEL_VALUE_KEYS, SAMPLE_VALUE_KEYS):
             model_value = getattr(prediction, model_key)
             if model_value is not None:
                 actual_value = getattr(actual, sample_key)
-                if np.isnan(actual_value):
+                if actual_value is None or np.isnan(actual_value):
                     # If for whatever reason we don't have an actual value, we skip this one.
                     logger.warning('no actual value for %s', sample_key)
                     continue
@@ -215,7 +221,7 @@
                 # how much sample data we actually had etc., and then not mark the model as being "good".
                 regression_model.good_model = True
 
-    def predict_temperature(self, model_temperature, timestamp):
+    def predict_temperature(self, model_temperature: float, timestamp: datetime):
         """ Predict the bias adjusted temperature for a given point in time, given a corresponding model
         temperature.
         : param model_temperature: Temperature as provided by the model
@@ -238,5 +244,35 @@
         """
         hour = timestamp.hour
         if self.regression_models[hour].relative_humidity_wrapper.good_model and model_rh is not None:
-            return self.regression_models[hour].relative_humidity_wrapper.model.predict([[model_rh]])[0]
+            predicted_rh = self.regression_models[hour].relative_humidity_wrapper.model.predict([[model_rh]])[0]
+            # in the real world the RH value can't be negative. Sometimes linear regression returns negative value, so assume 0
+            return max(0, predicted_rh)
+        return None
+
+    def predict_wind_speed(self, model_wind_speed: float, timestamp: datetime):
+        """ Predict the bias-adjusted wind speed for a given point in time, given a corresponding model wind speed.
+        : param model_wind_speed: Wind speed as provided by the model
+        : param timestamp: Datetime value for the predicted value
+        : return: The bias adjusted wind speed as predicted by the linear regression model.
+        """
+        hour = timestamp.hour
+        if self.regression_models[hour].wind_speed_wrapper.good_model and model_wind_speed is not None:
+            predicted_wind_speed = self.regression_models[hour].wind_speed_wrapper.model.predict([[model_wind_speed]])[
+                0]
+            # in the real world the wind speed can't be negative. Sometimes linear regression returns negative value, so assume 0
+            return max(0, predicted_wind_speed)
+        return None
+
+    def predict_wind_direction(self, model_wind_dir: int, timestamp: datetime):
+        """ Predict the bias-adjusted wind direction for a given point in time, given a corresponding model wind direction.
+        : param model_wind_dir: Wind direction as provided by the model
+        : param timestamp: Datetime value for the predicted value
+        : return: The bias-adjusted wind direction as predicted by the linear regression model.
+        """
+        hour = timestamp.hour
+        if self.regression_models[hour].wind_direction_wrapper.good_model and model_wind_dir is not None:
+            predicted_wind_dir = self.regression_models[hour].wind_direction_wrapper.model.predict([[model_wind_dir]])[
+                0]
+            # a valid wind direction value is between 0 and 360. If the returned value is outside these bounds, correct it
+            return predicted_wind_dir % 360
         return None