Merge pull request #274 from abstractqqq/fix_bug_in_online_lr

Fix bug in online lr
abstractqqq · Oct 20, 2024 · 192c61d · 192c61d
2 parents 0574154 + 2dd2441
commit 192c61d
Show file tree

Hide file tree

Showing 5 changed files with 116 additions and 70 deletions.
diff --git a/python/polars_ds/linear_models.py b/python/polars_ds/linear_models.py
@@ -187,6 +187,9 @@ def coeffs(self) -> np.ndarray:
         """
         return self._lr.coeffs
 
+    def bias(self) -> float:
+        self._lr.bias
+
     def fit(self, X: np.ndarray, y: np.ndarray, null_policy: NullPolicy = "ignore") -> Self:
         """
         Fit the linear regression model on NumPy data.
@@ -271,7 +274,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         X
             Data to predict on, as a matrix
         """
-        return self._lr.predict(X.reshape((1, -1)))
+        return self._lr.predict(X).reshape((-1, 1))
 
     def predict_df(self, df: PolarsFrame, name: str = "prediction") -> PolarsFrame:
         """
@@ -404,6 +407,9 @@ def coeffs(self) -> np.ndarray:
         """
         return self._en.coeffs
 
+    def bias(self) -> float:
+        self._en.bias
+
     def fit(self, X: np.ndarray, y: np.ndarray, null_policy: NullPolicy = "ignore") -> Self:
         """
         Fit the Elastic Net model on NumPy data.
@@ -469,7 +475,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         X
             Data to predict on, as a matrix
         """
-        return self._en.predict(X.reshape((1, -1)))
+        return self._en.predict(X).reshape((-1, 1))
 
     def predict_df(self, df: PolarsFrame, name: str = "prediction") -> PolarsFrame:
         """
@@ -503,44 +509,45 @@ class OnlineLR:
     """
     Normal or Ridge Online Regression. This doesn't support dataframe inputs.
 
+    Because of implementation details, it is not recommended to set fit_bias = True here
+    if runtime speed is crucial.
+
     Null Behaviors:
     1. During the initial fit, no nulls/NaNs should be present
     2. During online updates, if the record has null/NaN, then it will be ignored. Nothing will be updated.
     """
 
     def __init__(
         self,
-        fit_bias: bool = False,
         lambda_: float = 0.0,
+        fit_bias: bool = False,
     ):
         """
-        Parameters
-        ----------
-        fit_bias
-            Whether to add a bias term. Also known as intercept in other packages.
         lambda_
-            The regularization parameters for ridge. If this is positive, then this class will solve Ridge.
+            The L2 regularization factor
+        fit_bias
+            Whether this should fit the bias term
         """
-        self._lr = PyOnlineLR(fit_bias=fit_bias, lambda_=lambda_)
+        self._lr = PyOnlineLR(lambda_, fit_bias)
 
     @classmethod
-    def from_coeffs_and_inv(cls, coeffs: List[float], inv: np.ndarray, bias: float = 0.0) -> Self:
+    def from_coeffs_bias_inverse(cls, coeffs: List[float], bias: float, inv: np.ndarray) -> Self:
         """
-        Constructs an online linear regression instance from coefficients, inverse and bias. This copies
+        Constructs an online linear regression instance from coefficients, inverse. This copies
         data.
 
         Parameters
         ----------
         coeffs
             Iterable of numbers representing the coefficients
+        bias
+            The bias term
         inv
             2D NumPy matrix representing the inverse of XtX in a regression problem.
-        bias
-            Value for the bias term
         """
         coefficients = np.ascontiguousarray(coeffs, dtype=np.float64).flatten()
-        lr = cls(fit_bias=(bias != 0.0), lambda_=0.0)
-        lr._lr.set_coeffs_bias_inverse(coefficients, inv, bias)
+        lr = cls(fit_bias=(bias > 0.0), lambda_=0.0)
+        lr._lr.set_coeffs_bias_inverse(coefficients, bias, inv)
         return lr
 
     def is_fit(self) -> bool:
@@ -565,6 +572,9 @@ def coeffs(self) -> np.ndarray:
         """
         return self._lr.coeffs
 
+    def bias(self) -> float:
+        self._lr.bias
+
     def inv(self) -> np.ndarray:
         """
         Returns a copy of the current inverse matrix (inverse of XtX in a linear regression).
@@ -584,7 +594,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
         """
         if np.any(np.isnan(X)) | np.any(np.isnan(y)):
             raise ValueError(
-                "Online regression currently must fit with data without null for the initial fit."
+                "Online regression currently must fit without null for the initial fit."
             )
 
         self._lr.fit(X, y)
@@ -606,6 +616,9 @@ def update(self, X: np.ndarray, y: np.ndarray | float, c: float = 1.0) -> Self:
             the impact of the new data, and a value of -1.0 means we remove the impact of the
             data. Any other value will `scale` the impact of the data.
         """
+        if not self.is_fit():
+            raise ValueError("You cannot update before the initial fit of the matrix.")
+
         x_2d = X.reshape((1, -1))
         if isinstance(y, float):
             y_2d = np.array([[y]])
@@ -624,4 +637,4 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         X
             Data to predict on, as a matrix
         """
-        return self._lr.predict(X.reshape((1, -1)))
+        return self._lr.predict(X).reshape((-1, 1))
diff --git a/src/linalg/lstsq.rs b/src/linalg/lstsq.rs
@@ -267,17 +267,16 @@ impl OnlineLR {
     pub fn set_coeffs_bias_inverse(
         &mut self,
         coeffs: &[f64],
-        inv: MatRef<f64>,
         bias: f64,
+        inv: MatRef<f64>,
     ) -> Result<(), LinalgErrors> {
         if coeffs.len() != inv.ncols() {
             Err(LinalgErrors::DimensionMismatch)
         } else {
+            self.bias = bias;
             self.coefficients =
                 (faer::mat::from_row_major_slice(coeffs, coeffs.len(), 1)).to_owned();
             self.inv = inv.to_owned();
-            self.bias = bias;
-            self.fit_bias = bias.abs() > f64::EPSILON;
             Ok(())
         }
     }
@@ -291,17 +290,31 @@ impl OnlineLR {
     }
 
     pub fn update_unchecked(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
-        woodbury_step(
-            self.inv.as_mut(),
-            self.coefficients.as_mut(),
-            new_x,
-            new_y,
-            c,
-        )
-    }
-
-    pub fn update(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
-        if !(new_x.has_nan() || new_y.has_nan()) {
+        if self.fit_bias() {
+            let cur_coeffs = self.coefficients();
+            let ones = Mat::full(new_x.nrows(), 1, 1.0);
+            let new_new_x = faer::concat![[new_x, ones]];
+            // Need this because of dimension issue. Coefficients doesn't contain the bias term, but
+            // during fit, it was included which resulted in +1 dimension.
+            // We need to take care of this.
+            let nfeats = cur_coeffs.nrows();
+            let mut temp_weights = Mat::<f64>::from_fn(nfeats + 1, 1, |i, j| {
+                if i < nfeats {
+                    *cur_coeffs.get(i, j)
+                } else {
+                    self.bias
+                }
+            });
+            woodbury_step(
+                self.inv.as_mut(),
+                temp_weights.as_mut(),
+                new_new_x.as_ref(),
+                new_y,
+                c,
+            );
+            self.coefficients = temp_weights.get(..nfeats, ..).to_owned();
+            self.bias = *temp_weights.get(nfeats, 0);
+        } else {
             woodbury_step(
                 self.inv.as_mut(),
                 self.coefficients.as_mut(),
@@ -311,6 +324,12 @@ impl OnlineLR {
             )
         }
     }
+
+    pub fn update(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
+        if !(new_x.has_nan() || new_y.has_nan()) {
+            self.update_unchecked(new_x, new_y, c)
+        }
+    }
 }
 
 impl LinearRegression for OnlineLR {
@@ -327,12 +346,25 @@ impl LinearRegression for OnlineLR {
     }
 
     fn fit_unchecked(&mut self, X: MatRef<f64>, y: MatRef<f64>) {
-        (self.inv, self.coefficients) = match self.method {
-            ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(X, y),
-            ClosedFormLRMethods::L2 => {
-                faer_cholesky_ridge_with_inv(X, y, self.lambda, self.fit_bias)
-            }
-        };
+        if self.fit_bias {
+            let actual_features = X.ncols();
+            let ones = Mat::full(X.nrows(), 1, 1.0);
+            let new_x = faer::concat![[X, ones]];
+            let (inv, all_coefficients) = match self.method {
+                ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(new_x.as_ref(), y),
+                ClosedFormLRMethods::L2 => {
+                    faer_cholesky_ridge_with_inv(new_x.as_ref(), y, self.lambda, true)
+                }
+            };
+            self.inv = inv;
+            self.coefficients = all_coefficients.get(..actual_features, ..).to_owned();
+            self.bias = *all_coefficients.get(actual_features, 0);
+        } else {
+            (self.inv, self.coefficients) = match self.method {
+                ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(X, y),
+                ClosedFormLRMethods::L2 => faer_cholesky_ridge_with_inv(X, y, self.lambda, false),
+            };
+        }
     }
 }
 
@@ -576,7 +608,7 @@ pub fn faer_qr_lstsq_with_inv(x: MatRef<f64>, y: MatRef<f64>) -> (Mat<f64>, Mat<
     (inv, weights)
 }
 
-/// Returns the coefficients for lstsq with l2 (Ridge) regularization as a nrows x 1 matrix
+/// Returns the coefficients for lstsq with l2 (Ridge) regularization atogether with the inverse of XtX
 /// By default this uses Choleskey to solve the system, and in case the matrix is not positive
 /// definite, it falls back to SVD. (I suspect the matrix in this case is always positive definite!)
 #[inline(always)]
@@ -730,7 +762,6 @@ pub fn faer_recursive_lstsq(
     y: MatRef<f64>,
     n: usize,
     lambda: f64,
-    has_bias: bool,
 ) -> Vec<Mat<f64>> {
     let xn = x.nrows();
     // x: size xn x m
@@ -741,7 +772,9 @@ pub fn faer_recursive_lstsq(
     let x0 = x.get(..n, ..);
     let y0 = y.get(..n, ..);
 
-    let mut online_lr = OnlineLR::new(lambda, has_bias);
+    // This is because if add_bias, the 1 is added to
+    // all data already. No need to let OnlineLR add the 1 for the user.
+    let mut online_lr = OnlineLR::new(lambda, false);
     online_lr.fit_unchecked(x0, y0); // safe because things are checked in plugin / python functions.
     coefficients.push(online_lr.get_coefficients());
     for j in n..xn {
@@ -756,13 +789,7 @@ pub fn faer_recursive_lstsq(
 /// Given all data, we start running a lstsq starting at position n and compute new coefficients recurisively.
 /// This will return all coefficients for rows >= n. This will only be used in Polars Expressions.
 /// This supports Normal or Ridge regression
-pub fn faer_rolling_lstsq(
-    x: MatRef<f64>,
-    y: MatRef<f64>,
-    n: usize,
-    lambda: f64,
-    has_bias: bool,
-) -> Vec<Mat<f64>> {
+pub fn faer_rolling_lstsq(x: MatRef<f64>, y: MatRef<f64>, n: usize, lambda: f64) -> Vec<Mat<f64>> {
     let xn = x.nrows();
     // x: size xn x m
     // y: size xn x 1
@@ -772,7 +799,9 @@ pub fn faer_rolling_lstsq(
     let x0 = x.get(..n, ..);
     let y0 = y.get(..n, ..);
 
-    let mut online_lr = OnlineLR::new(lambda, has_bias);
+    // This is because if add_bias, the 1 is added to
+    // all data already. No need to let OnlineLR add the 1 for the user.
+    let mut online_lr = OnlineLR::new(lambda, false);
     online_lr.fit_unchecked(x0, y0);
     coefficients.push(online_lr.get_coefficients());
 
@@ -799,7 +828,6 @@ pub fn faer_rolling_skipping_lstsq(
     n: usize,
     m: usize,
     lambda: f64,
-    has_bias: bool,
 ) -> Vec<Mat<f64>> {
     let xn = x.nrows();
     let ncols = x.ncols();
@@ -816,7 +844,9 @@ pub fn faer_rolling_skipping_lstsq(
     let mut x_slice: Vec<f64> = Vec::with_capacity(n * ncols);
     let mut y_slice: Vec<f64> = Vec::with_capacity(n);
 
-    let mut online_lr = OnlineLR::new(lambda, has_bias);
+    // This is because if add_bias, the 1 is added to
+    // all data already. No need to let OnlineLR add the 1 for the user.
+    let mut online_lr = OnlineLR::new(lambda, false);
     while right <= xn {
         // Somewhat redundant here.
         non_null_cnt_in_window = 0;
@@ -881,15 +911,15 @@ fn woodbury_step(
     weights: MatMut<f64>,
     new_x: MatRef<f64>,
     new_y: MatRef<f64>,
-    c: f64, // Should be +1 or -1, for a "update" and a "removal"
+    c: f64, // +1 or -1, for a "update" and a "removal"
 ) {
     // It is truly amazing that the C in the Woodbury identity essentially controls the update and
     // and removal of a new record (rolling)... Linear regression seems to be designed by God to work so well
 
     let left = &inverse * new_x.transpose(); // corresponding to u in the reference
                                              // right = left.transpose() by the fact that if A is symmetric, invertible, A-1 is also symmetric
     let z = (c + (new_x * &left).read(0, 0)).recip();
-    // Update the inverse
+    // Update the information matrix's inverse. Page 56 of the gatech reference
     faer::linalg::matmul::matmul(
         inverse,
         &left,
@@ -901,7 +931,7 @@ fn woodbury_step(
 
     // Difference from esitmate using prior weights vs. actual next y
     let y_diff = new_y - (new_x * &weights);
-    // Update weights
+    // Update weights. Page 56, after 'Then',.. in gatech reference
     faer::linalg::matmul::matmul(
         weights,
         left,

diff --git a/src/num_ext/linear_regression.rs b/src/num_ext/linear_regression.rs
@@ -734,20 +734,20 @@ fn pl_wls_report(inputs: &[Series], kwargs: LstsqKwargs) -> PolarsResult<Series>
 #[polars_expr(output_type_func=coeff_pred_output)]
 fn pl_recursive_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<Series> {
     let n = kwargs.n; // Gauranteed n >= 1
-    let has_bias = kwargs.bias;
+    let add_bias = kwargs.bias;
 
     // Gauranteed in Python that this won't be SKIP. SKIP doesn't work now.
     let null_policy = NullPolicy::try_from(kwargs.null_policy)
         .map_err(|e| PolarsError::ComputeError(e.into()))?;
 
     // Target y is at index 0
-    match series_to_mat_for_lstsq(inputs, has_bias, null_policy) {
+    match series_to_mat_for_lstsq(inputs, add_bias, null_policy) {
         Ok((mat, mask)) => {
             // Solving Least Square
             let x = mat.slice(s![.., 1..]).into_faer();
             let y = mat.slice(s![.., 0..1]).into_faer();
 
-            let coeffs = faer_recursive_lstsq(x, y, n, kwargs.lambda, has_bias);
+            let coeffs = faer_recursive_lstsq(x, y, n, kwargs.lambda);
             let mut builder: ListPrimitiveChunkedBuilder<Float64Type> =
                 ListPrimitiveChunkedBuilder::new(
                     "coeffs",
@@ -820,7 +820,7 @@ fn pl_recursive_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult
 #[polars_expr(output_type_func=coeff_pred_output)] // They share the same output type
 fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<Series> {
     let n = kwargs.n; // Gauranteed n >= 2
-    let has_bias = kwargs.bias;
+    let add_bias = kwargs.bias;
 
     let mut null_policy = NullPolicy::try_from(kwargs.null_policy)
         .map_err(|e| PolarsError::ComputeError(e.into()))?;
@@ -833,7 +833,7 @@ fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<S
     };
 
     // Target y is at index 0
-    match series_to_mat_for_lstsq(inputs, has_bias, null_policy) {
+    match series_to_mat_for_lstsq(inputs, add_bias, null_policy) {
         Ok((mat, mask)) => {
             let should_skip = match null_policy {
                 NullPolicy::SKIP_WINDOW | NullPolicy::FILL_WINDOW(_) => (!&mask).any(),
@@ -842,9 +842,9 @@ fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<S
             let x = mat.slice(s![.., 1..]).into_faer();
             let y = mat.slice(s![.., 0..1]).into_faer();
             let coeffs = if should_skip {
-                faer_rolling_skipping_lstsq(x, y, n, kwargs.min_size, kwargs.lambda, has_bias)
+                faer_rolling_skipping_lstsq(x, y, n, kwargs.min_size, kwargs.lambda)
             } else {
-                faer_rolling_lstsq(x, y, n, kwargs.lambda, has_bias)
+                faer_rolling_lstsq(x, y, n, kwargs.lambda)
             };
 
             let mut builder: ListPrimitiveChunkedBuilder<Float64Type> =