decisionintelligence · oneJue · Jan 17, 2025 · Jan 26, 2025 · Jan 26, 2025 · Jan 30, 2025
diff --git a/README.md b/README.md
@@ -120,7 +120,7 @@ We provide tutorial about how to develop your own method, you can [click here](.
 We provide tutorial about how to evaluate on your own time series, you can [click here](./docs/tutorials/steps_to_evaluate_your_own_time_series.md).
 
 ## Time series code bug the drop-last illustration
-Implementations of existing methods often  employ a “Drop Last” trick in the testing phase. To accelerate the testing, it is common to split the data into batches. However, if we discard the last incomplete batch with fewer instances than the batch size, this causes unfair comparisons. For example, in Figure 4, the ETTh2 has a testing series of length 2,880, and we need to predict 336 future time steps using a look-back window of size 512. If we select the batch sizes to be 32, 64, and 128, the number of samples in the last batch are 17, 49, and 113, respectively. **Unless all methods use the same batch size, discarding the last batch of test samples is unfair, as the actual usage length of the test set is inconsistent.** Table 2 shows the testing results of PatchTST, DLinear, and FEDformer on the ETTh2 with different batch sizes and the “Drop Last” trick turned on. **We observe that the performance of the methods changes when varying the batch size.**
+Implementations of existing methods often  employ a `Drop Last trick in the testing phase`. To accelerate the testing, it is common to split the data into batches. However, if we discard the last incomplete batch with fewer instances than the batch size, this causes unfair comparisons. For example, in Figure 4, the ETTh2 has a testing series of length 2,880, and we need to predict 336 future time steps using a look-back window of size 512. If we select the batch sizes to be 32, 64, and 128, the number of samples in the last batch are 17, 49, and 113, respectively. **Unless all methods use the same batch size, discarding the last batch of test samples is unfair, as the actual usage length of the test set is inconsistent.** Table 2 shows the testing results of PatchTST, DLinear, and FEDformer on the ETTh2 with different batch sizes and the “Drop Last” trick turned on. **We observe that the performance of the methods changes when varying the batch size.**
 
 **Therefore, TFB calls for the testing process to avoid using the drop-last operation to ensure fairness, and TFB did not use the drop-last operation during testing either.**
 <div align="center">

diff --git a/config/fixed_forecast_config_daily.json b/config/fixed_forecast_config_daily.json
@@ -29,7 +29,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_hourly.json b/config/fixed_forecast_config_hourly.json
@@ -28,7 +28,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_monthly.json b/config/fixed_forecast_config_monthly.json
@@ -29,7 +29,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_other.json b/config/fixed_forecast_config_other.json
@@ -29,7 +29,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_quarterly.json b/config/fixed_forecast_config_quarterly.json
@@ -29,7 +29,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_weekly.json b/config/fixed_forecast_config_weekly.json
@@ -28,7 +28,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/fixed_forecast_config_yearly.json b/config/fixed_forecast_config_yearly.json
@@ -29,7 +29,8 @@
             "train_ratio_in_tv": 1,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/config/rolling_forecast_config.json b/config/rolling_forecast_config.json
@@ -31,7 +31,8 @@
             "num_rollings": 48000,
             "seed": 2021,
             "deterministic": "efficient",
-            "save_true_pred": false
+            "save_true_pred": false,
+            "target_channel": null
         }
     },
 

diff --git a/ts_benchmark/baselines/duet/duet.py b/ts_benchmark/baselines/duet/duet.py
@@ -4,7 +4,7 @@
 from sklearn.preprocessing import StandardScaler
 
 from ts_benchmark.baselines.duet.utils.tools import EarlyStopping, adjust_learning_rate
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 from typing import Type, Dict, Optional, Tuple
 from torch import optim
 import numpy as np
@@ -352,7 +352,7 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
             raise ValueError("Model not trained. Call the fit() function first.")
 
         config = self.config
-        train, test = split_before(train, len(train) - config.seq_len)
+        train, test = split_time(train, len(train) - config.seq_len)
 
         # Additional timestamp marks required to generate transformer class methods
         test = self.padding_data_for_forecast(test)

diff --git a/ts_benchmark/baselines/fits/fits.py b/ts_benchmark/baselines/fits/fits.py
@@ -14,7 +14,7 @@
     train_val_split,
     get_time_mark,
 )
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 from ..time_series_library.utils.tools import EarlyStopping, adjust_learning_rate
 from ...models.model_base import ModelBase, BatchMaker
 
@@ -336,7 +336,7 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
             raise ValueError("Model not trained. Call the fit() function first.")
 
         config = self.config
-        train, test = split_before(train, len(train) - config.seq_len)
+        train, test = split_time(train, len(train) - config.seq_len)
 
         # Additional timestamp marks required to generate transformer class methods
         test = self.padding_data_for_forecast(test)

diff --git a/ts_benchmark/baselines/pathformer/pathformer.py b/ts_benchmark/baselines/pathformer/pathformer.py
@@ -15,7 +15,7 @@
     train_val_split,
     get_time_mark,
 )
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 from .utils.tools import EarlyStopping, adjust_learning_rate
 from ...models.model_base import ModelBase, BatchMaker
 
@@ -388,7 +388,7 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
             raise ValueError("Model not trained. Call the fit() function first.")
 
         config = self.config
-        train, test = split_before(train, len(train) - config.seq_len)
+        train, test = split_time(train, len(train) - config.seq_len)
 
         # Additional timestamp marks required to generate transformer class methods
         test = self.padding_data_for_forecast(test)

diff --git a/ts_benchmark/baselines/pdf/PDF.py b/ts_benchmark/baselines/pdf/PDF.py
@@ -15,7 +15,7 @@
     train_val_split,
     get_time_mark,
 )
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 from ts_benchmark.baselines.pdf.utils.tools import EarlyStopping, adjust_learning_rate
 from ts_benchmark.models.model_base import ModelBase, BatchMaker
 
@@ -398,7 +398,7 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
             raise ValueError("Model not trained. Call the fit() function first.")
 
         config = self.config
-        train, test = split_before(train, len(train) - config.seq_len)
+        train, test = split_time(train, len(train) - config.seq_len)
 
         # Additional timestamp marks required to generate transformer class methods
         test = self.padding_data_for_forecast(test)

diff --git a/ts_benchmark/baselines/time_series_library/adapters_for_transformers.py b/ts_benchmark/baselines/time_series_library/adapters_for_transformers.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import torch
 import torch.nn as nn
+from torch.utils.data import DataLoader
 from sklearn.preprocessing import StandardScaler
 from torch import optim
 
@@ -19,7 +20,7 @@
     get_time_mark,
 )
 from ts_benchmark.models.model_base import ModelBase, BatchMaker
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 
 DEFAULT_TRANSFORMER_BASED_HYPER_PARAMS = {
     "top_k": 5,
@@ -63,7 +64,7 @@
     "down_sampling_method": "avg",
     "decomp_method": "moving_avg",
     "use_norm": True,
-    "parallel_strategy": "DP"
+    "parallel_strategy": "DP",
 }
 
 
@@ -75,8 +76,10 @@ def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
 
-        if self.parallel_strategy not in [None, 'DP']:
-            raise ValueError("Invalid value for parallel_strategy. Supported values are 'DP' and None.")
+        if self.parallel_strategy not in [None, "DP"]:
+            raise ValueError(
+                "Invalid value for parallel_strategy. Supported values are 'DP' and None."
+            )
 
     @property
     def pred_len(self):
@@ -204,7 +207,18 @@ def _padding_time_stamp_mark(
         padding_mark = get_time_mark(whole_time_stamp, 1, self.config.freq)
         return padding_mark
 
-    def validate(self, valid_data_loader, criterion):
+    def validate(
+        self, valid_data_loader: DataLoader, exog_dim: int, criterion: torch.nn.Module
+    ) -> float:
+        """
+        Validates the model performance on the provided validation dataset.
+
+        :param valid_data_loader: A PyTorch DataLoader for the validation dataset.
+        :param exog_dim : The number of dimensions to exclude from the series data.
+        :param criterion : The loss function to compute the loss between model predictions and ground truth.
+
+        Returns:The mean loss computed over the validation dataset.
+        """
         config = self.config
         total_loss = []
         self.model.eval()
@@ -227,8 +241,17 @@ def validate(self, valid_data_loader, criterion):
 
             output = self.model(input, input_mark, dec_input, target_mark)
 
-            target = target[:, -config.horizon :, :]
-            output = output[:, -config.horizon :, :]
+            target = target[
+                :,
+                -config.horizon :,
+                : -exog_dim if exog_dim > 0 else None,
+            ]
+            output = output[
+                :,
+                -config.horizon :,
+                : -exog_dim if exog_dim > 0 else None,
+            ]
+
             loss = criterion(output, target).detach().cpu().numpy()
             total_loss.append(loss)
 
@@ -237,15 +260,24 @@ def validate(self, valid_data_loader, criterion):
         return total_loss
 
     def forecast_fit(
-        self, train_valid_data: pd.DataFrame, train_ratio_in_tv: float
+        self,
+        train_valid_data: pd.DataFrame,
+        covariates: Optional[Dict],
+        train_ratio_in_tv: float,
     ) -> "ModelBase":
         """
         Train the model.
 
         :param train_data: Time series data used for training.
+        :param covariates: Additional external variables
         :param train_ratio_in_tv: Represents the splitting ratio of the training set validation set. If it is equal to 1, it means that the validation set is not partitioned.
         :return: The fitted model object.
         """
+        exog_dim = -1
+        if covariates["exog"] is not None:
+            exog_dim = covariates["exog"].shape[-1]
+            train_valid_data = pd.concat([train_valid_data, covariates["exog"]], axis=1)
+
         if train_valid_data.shape[1] == 1:
             train_drop_last = False
             self.single_forecasting_hyper_param_tune(train_valid_data)
@@ -255,7 +287,7 @@ def forecast_fit(
 
         setattr(self.config, "task_name", "short_term_forecast")
         self.model = self.model_class(self.config)
-        
+
         device_ids = np.arange(torch.cuda.device_count()).tolist()
         if len(device_ids) > 1 and self.config.parallel_strategy == "DP":
             self.model = nn.DataParallel(self.model, device_ids=device_ids)
@@ -340,29 +372,45 @@ def forecast_fit(
 
                 output = self.model(input, input_mark, dec_input, target_mark)
 
-                target = target[:, -config.horizon :, :]
-                output = output[:, -config.horizon :, :]
+                target = target[
+                    :,
+                    -config.horizon :,
+                    : -exog_dim if exog_dim > 0 else None,
+                ]
+                output = output[
+                    :,
+                    -config.horizon :,
+                    : -exog_dim if exog_dim > 0 else None,
+                ]
                 loss = criterion(output, target)
 
                 loss.backward()
                 optimizer.step()
 
             if train_ratio_in_tv != 1:
-                valid_loss = self.validate(valid_data_loader, criterion)
+                valid_loss = self.validate(valid_data_loader, exog_dim, criterion)
                 self.early_stopping(valid_loss, self.model)
                 if self.early_stopping.early_stop:
                     break
 
             adjust_learning_rate(optimizer, epoch + 1, config)
 
-    def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
+    def forecast(
+        self, horizon: int, covariates: Optional[Dict], train: pd.DataFrame
+    ) -> np.ndarray:
         """
         Make predictions.
 
         :param horizon: The predicted length.
+        :param covariates: Additional external variables
         :param testdata: Time series data used for prediction.
         :return: An array of predicted results.
         """
+        exog_dim = -1
+        if covariates["exog"] is not None:
+            exog_dim = covariates["exog"].shape[-1]
+            train = pd.concat([train, covariates["exog"]], axis=1)
+
         if self.early_stopping.check_point is not None:
             self.model.load_state_dict(self.early_stopping.check_point)
 
@@ -377,7 +425,7 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
             raise ValueError("Model not trained. Call the fit() function first.")
 
         config = self.config
-        train, test = split_before(train, len(train) - config.seq_len)
+        train, test = split_time(train, len(train) - config.seq_len)
 
         # Additional timestamp marks required to generate transformer class methods
         test = self.padding_data_for_forecast(test)
@@ -423,9 +471,9 @@ def forecast(self, horizon: int, train: pd.DataFrame) -> np.ndarray:
                         answer[-horizon:] = self.scaler.inverse_transform(
                             answer[-horizon:]
                         )
-                    return answer[-horizon:]
+                    return answer[-horizon:][..., : -exog_dim if exog_dim > 0 else None]
 
-                output = output.cpu().numpy()[:, -config.horizon :, :]
+                output = output.cpu().numpy()[:, -config.horizon :]
                 for i in range(config.horizon):
                     test.iloc[i + config.seq_len] = output[0, i, :]
 
@@ -463,6 +511,13 @@ def batch_forecast(
         input_data = batch_maker.make_batch(self.config.batch_size, self.config.seq_len)
         input_np = input_data["input"]
 
+        exog_dim = -1
+        if input_data["covariates"] is not None:
+            exog_dim = input_data["covariates"]["exog"].shape[-1]
+            input_np = np.concatenate(
+                (input_np, input_data["covariates"]["exog"]), axis=2
+            )
+
         if self.config.norm:
             origin_shape = input_np.shape
             flattened_data = input_np.reshape((-1, input_np.shape[-1]))
@@ -482,7 +537,7 @@ def batch_forecast(
                 answers.shape
             )
 
-        return answers
+        return answers[..., : -exog_dim if exog_dim > 0 else None]
 
     def _perform_rolling_predictions(
         self,

diff --git a/ts_benchmark/baselines/utils.py b/ts_benchmark/baselines/utils.py
@@ -9,7 +9,7 @@
 from ts_benchmark.baselines.time_series_library.utils.timefeatures import (
     time_features,
 )
-from ts_benchmark.utils.data_processing import split_before
+from ts_benchmark.utils.data_processing import split_time
 
 
 class SlidingWindowDataLoader:
@@ -112,13 +112,13 @@ def train_val_split(train_data, ratio, seq_len):
     elif seq_len is not None:
         border = int((train_data.shape[0]) * ratio)
 
-        train_data_value, valid_data_rest = split_before(train_data, border)
-        train_data_rest, valid_data = split_before(train_data, border - seq_len)
+        train_data_value, valid_data_rest = split_time(train_data, border)
+        train_data_rest, valid_data = split_time(train_data, border - seq_len)
         return train_data_value, valid_data
     else:
         border = int((train_data.shape[0]) * ratio)
 
-        train_data_value, valid_data_rest = split_before(train_data, border)
+        train_data_value, valid_data_rest = split_time(train_data, border)
         return train_data_value, valid_data_rest