Skip to content

Commit

Permalink
Merge pull request #274 from abstractqqq/fix_bug_in_online_lr
Browse files Browse the repository at this point in the history
Fix bug in online lr
  • Loading branch information
abstractqqq authored Oct 20, 2024
2 parents 0574154 + 2dd2441 commit 192c61d
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 70 deletions.
47 changes: 30 additions & 17 deletions python/polars_ds/linear_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ def coeffs(self) -> np.ndarray:
"""
return self._lr.coeffs

def bias(self) -> float:
self._lr.bias

def fit(self, X: np.ndarray, y: np.ndarray, null_policy: NullPolicy = "ignore") -> Self:
"""
Fit the linear regression model on NumPy data.
Expand Down Expand Up @@ -271,7 +274,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
X
Data to predict on, as a matrix
"""
return self._lr.predict(X.reshape((1, -1)))
return self._lr.predict(X).reshape((-1, 1))

def predict_df(self, df: PolarsFrame, name: str = "prediction") -> PolarsFrame:
"""
Expand Down Expand Up @@ -404,6 +407,9 @@ def coeffs(self) -> np.ndarray:
"""
return self._en.coeffs

def bias(self) -> float:
self._en.bias

def fit(self, X: np.ndarray, y: np.ndarray, null_policy: NullPolicy = "ignore") -> Self:
"""
Fit the Elastic Net model on NumPy data.
Expand Down Expand Up @@ -469,7 +475,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
X
Data to predict on, as a matrix
"""
return self._en.predict(X.reshape((1, -1)))
return self._en.predict(X).reshape((-1, 1))

def predict_df(self, df: PolarsFrame, name: str = "prediction") -> PolarsFrame:
"""
Expand Down Expand Up @@ -503,44 +509,45 @@ class OnlineLR:
"""
Normal or Ridge Online Regression. This doesn't support dataframe inputs.
Because of implementation details, it is not recommended to set fit_bias = True here
if runtime speed is crucial.
Null Behaviors:
1. During the initial fit, no nulls/NaNs should be present
2. During online updates, if the record has null/NaN, then it will be ignored. Nothing will be updated.
"""

def __init__(
self,
fit_bias: bool = False,
lambda_: float = 0.0,
fit_bias: bool = False,
):
"""
Parameters
----------
fit_bias
Whether to add a bias term. Also known as intercept in other packages.
lambda_
The regularization parameters for ridge. If this is positive, then this class will solve Ridge.
The L2 regularization factor
fit_bias
Whether this should fit the bias term
"""
self._lr = PyOnlineLR(fit_bias=fit_bias, lambda_=lambda_)
self._lr = PyOnlineLR(lambda_, fit_bias)

@classmethod
def from_coeffs_and_inv(cls, coeffs: List[float], inv: np.ndarray, bias: float = 0.0) -> Self:
def from_coeffs_bias_inverse(cls, coeffs: List[float], bias: float, inv: np.ndarray) -> Self:
"""
Constructs an online linear regression instance from coefficients, inverse and bias. This copies
Constructs an online linear regression instance from coefficients, inverse. This copies
data.
Parameters
----------
coeffs
Iterable of numbers representing the coefficients
bias
The bias term
inv
2D NumPy matrix representing the inverse of XtX in a regression problem.
bias
Value for the bias term
"""
coefficients = np.ascontiguousarray(coeffs, dtype=np.float64).flatten()
lr = cls(fit_bias=(bias != 0.0), lambda_=0.0)
lr._lr.set_coeffs_bias_inverse(coefficients, inv, bias)
lr = cls(fit_bias=(bias > 0.0), lambda_=0.0)
lr._lr.set_coeffs_bias_inverse(coefficients, bias, inv)
return lr

def is_fit(self) -> bool:
Expand All @@ -565,6 +572,9 @@ def coeffs(self) -> np.ndarray:
"""
return self._lr.coeffs

def bias(self) -> float:
self._lr.bias

def inv(self) -> np.ndarray:
"""
Returns a copy of the current inverse matrix (inverse of XtX in a linear regression).
Expand All @@ -584,7 +594,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
"""
if np.any(np.isnan(X)) | np.any(np.isnan(y)):
raise ValueError(
"Online regression currently must fit with data without null for the initial fit."
"Online regression currently must fit without null for the initial fit."
)

self._lr.fit(X, y)
Expand All @@ -606,6 +616,9 @@ def update(self, X: np.ndarray, y: np.ndarray | float, c: float = 1.0) -> Self:
the impact of the new data, and a value of -1.0 means we remove the impact of the
data. Any other value will `scale` the impact of the data.
"""
if not self.is_fit():
raise ValueError("You cannot update before the initial fit of the matrix.")

x_2d = X.reshape((1, -1))
if isinstance(y, float):
y_2d = np.array([[y]])
Expand All @@ -624,4 +637,4 @@ def predict(self, X: np.ndarray) -> np.ndarray:
X
Data to predict on, as a matrix
"""
return self._lr.predict(X.reshape((1, -1)))
return self._lr.predict(X).reshape((-1, 1))
102 changes: 66 additions & 36 deletions src/linalg/lstsq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,16 @@ impl OnlineLR {
pub fn set_coeffs_bias_inverse(
&mut self,
coeffs: &[f64],
inv: MatRef<f64>,
bias: f64,
inv: MatRef<f64>,
) -> Result<(), LinalgErrors> {
if coeffs.len() != inv.ncols() {
Err(LinalgErrors::DimensionMismatch)
} else {
self.bias = bias;
self.coefficients =
(faer::mat::from_row_major_slice(coeffs, coeffs.len(), 1)).to_owned();
self.inv = inv.to_owned();
self.bias = bias;
self.fit_bias = bias.abs() > f64::EPSILON;
Ok(())
}
}
Expand All @@ -291,17 +290,31 @@ impl OnlineLR {
}

pub fn update_unchecked(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
woodbury_step(
self.inv.as_mut(),
self.coefficients.as_mut(),
new_x,
new_y,
c,
)
}

pub fn update(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
if !(new_x.has_nan() || new_y.has_nan()) {
if self.fit_bias() {
let cur_coeffs = self.coefficients();
let ones = Mat::full(new_x.nrows(), 1, 1.0);
let new_new_x = faer::concat![[new_x, ones]];
// Need this because of dimension issue. Coefficients doesn't contain the bias term, but
// during fit, it was included which resulted in +1 dimension.
// We need to take care of this.
let nfeats = cur_coeffs.nrows();
let mut temp_weights = Mat::<f64>::from_fn(nfeats + 1, 1, |i, j| {
if i < nfeats {
*cur_coeffs.get(i, j)
} else {
self.bias
}
});
woodbury_step(
self.inv.as_mut(),
temp_weights.as_mut(),
new_new_x.as_ref(),
new_y,
c,
);
self.coefficients = temp_weights.get(..nfeats, ..).to_owned();
self.bias = *temp_weights.get(nfeats, 0);
} else {
woodbury_step(
self.inv.as_mut(),
self.coefficients.as_mut(),
Expand All @@ -311,6 +324,12 @@ impl OnlineLR {
)
}
}

pub fn update(&mut self, new_x: MatRef<f64>, new_y: MatRef<f64>, c: f64) {
if !(new_x.has_nan() || new_y.has_nan()) {
self.update_unchecked(new_x, new_y, c)
}
}
}

impl LinearRegression for OnlineLR {
Expand All @@ -327,12 +346,25 @@ impl LinearRegression for OnlineLR {
}

fn fit_unchecked(&mut self, X: MatRef<f64>, y: MatRef<f64>) {
(self.inv, self.coefficients) = match self.method {
ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(X, y),
ClosedFormLRMethods::L2 => {
faer_cholesky_ridge_with_inv(X, y, self.lambda, self.fit_bias)
}
};
if self.fit_bias {
let actual_features = X.ncols();
let ones = Mat::full(X.nrows(), 1, 1.0);
let new_x = faer::concat![[X, ones]];
let (inv, all_coefficients) = match self.method {
ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(new_x.as_ref(), y),
ClosedFormLRMethods::L2 => {
faer_cholesky_ridge_with_inv(new_x.as_ref(), y, self.lambda, true)
}
};
self.inv = inv;
self.coefficients = all_coefficients.get(..actual_features, ..).to_owned();
self.bias = *all_coefficients.get(actual_features, 0);
} else {
(self.inv, self.coefficients) = match self.method {
ClosedFormLRMethods::Normal => faer_qr_lstsq_with_inv(X, y),
ClosedFormLRMethods::L2 => faer_cholesky_ridge_with_inv(X, y, self.lambda, false),
};
}
}
}

Expand Down Expand Up @@ -576,7 +608,7 @@ pub fn faer_qr_lstsq_with_inv(x: MatRef<f64>, y: MatRef<f64>) -> (Mat<f64>, Mat<
(inv, weights)
}

/// Returns the coefficients for lstsq with l2 (Ridge) regularization as a nrows x 1 matrix
/// Returns the coefficients for lstsq with l2 (Ridge) regularization atogether with the inverse of XtX
/// By default this uses Choleskey to solve the system, and in case the matrix is not positive
/// definite, it falls back to SVD. (I suspect the matrix in this case is always positive definite!)
#[inline(always)]
Expand Down Expand Up @@ -730,7 +762,6 @@ pub fn faer_recursive_lstsq(
y: MatRef<f64>,
n: usize,
lambda: f64,
has_bias: bool,
) -> Vec<Mat<f64>> {
let xn = x.nrows();
// x: size xn x m
Expand All @@ -741,7 +772,9 @@ pub fn faer_recursive_lstsq(
let x0 = x.get(..n, ..);
let y0 = y.get(..n, ..);

let mut online_lr = OnlineLR::new(lambda, has_bias);
// This is because if add_bias, the 1 is added to
// all data already. No need to let OnlineLR add the 1 for the user.
let mut online_lr = OnlineLR::new(lambda, false);
online_lr.fit_unchecked(x0, y0); // safe because things are checked in plugin / python functions.
coefficients.push(online_lr.get_coefficients());
for j in n..xn {
Expand All @@ -756,13 +789,7 @@ pub fn faer_recursive_lstsq(
/// Given all data, we start running a lstsq starting at position n and compute new coefficients recurisively.
/// This will return all coefficients for rows >= n. This will only be used in Polars Expressions.
/// This supports Normal or Ridge regression
pub fn faer_rolling_lstsq(
x: MatRef<f64>,
y: MatRef<f64>,
n: usize,
lambda: f64,
has_bias: bool,
) -> Vec<Mat<f64>> {
pub fn faer_rolling_lstsq(x: MatRef<f64>, y: MatRef<f64>, n: usize, lambda: f64) -> Vec<Mat<f64>> {
let xn = x.nrows();
// x: size xn x m
// y: size xn x 1
Expand All @@ -772,7 +799,9 @@ pub fn faer_rolling_lstsq(
let x0 = x.get(..n, ..);
let y0 = y.get(..n, ..);

let mut online_lr = OnlineLR::new(lambda, has_bias);
// This is because if add_bias, the 1 is added to
// all data already. No need to let OnlineLR add the 1 for the user.
let mut online_lr = OnlineLR::new(lambda, false);
online_lr.fit_unchecked(x0, y0);
coefficients.push(online_lr.get_coefficients());

Expand All @@ -799,7 +828,6 @@ pub fn faer_rolling_skipping_lstsq(
n: usize,
m: usize,
lambda: f64,
has_bias: bool,
) -> Vec<Mat<f64>> {
let xn = x.nrows();
let ncols = x.ncols();
Expand All @@ -816,7 +844,9 @@ pub fn faer_rolling_skipping_lstsq(
let mut x_slice: Vec<f64> = Vec::with_capacity(n * ncols);
let mut y_slice: Vec<f64> = Vec::with_capacity(n);

let mut online_lr = OnlineLR::new(lambda, has_bias);
// This is because if add_bias, the 1 is added to
// all data already. No need to let OnlineLR add the 1 for the user.
let mut online_lr = OnlineLR::new(lambda, false);
while right <= xn {
// Somewhat redundant here.
non_null_cnt_in_window = 0;
Expand Down Expand Up @@ -881,15 +911,15 @@ fn woodbury_step(
weights: MatMut<f64>,
new_x: MatRef<f64>,
new_y: MatRef<f64>,
c: f64, // Should be +1 or -1, for a "update" and a "removal"
c: f64, // +1 or -1, for a "update" and a "removal"
) {
// It is truly amazing that the C in the Woodbury identity essentially controls the update and
// and removal of a new record (rolling)... Linear regression seems to be designed by God to work so well

let left = &inverse * new_x.transpose(); // corresponding to u in the reference
// right = left.transpose() by the fact that if A is symmetric, invertible, A-1 is also symmetric
let z = (c + (new_x * &left).read(0, 0)).recip();
// Update the inverse
// Update the information matrix's inverse. Page 56 of the gatech reference
faer::linalg::matmul::matmul(
inverse,
&left,
Expand All @@ -901,7 +931,7 @@ fn woodbury_step(

// Difference from esitmate using prior weights vs. actual next y
let y_diff = new_y - (new_x * &weights);
// Update weights
// Update weights. Page 56, after 'Then',.. in gatech reference
faer::linalg::matmul::matmul(
weights,
left,
Expand Down
14 changes: 7 additions & 7 deletions src/num_ext/linear_regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,20 +734,20 @@ fn pl_wls_report(inputs: &[Series], kwargs: LstsqKwargs) -> PolarsResult<Series>
#[polars_expr(output_type_func=coeff_pred_output)]
fn pl_recursive_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<Series> {
let n = kwargs.n; // Gauranteed n >= 1
let has_bias = kwargs.bias;
let add_bias = kwargs.bias;

// Gauranteed in Python that this won't be SKIP. SKIP doesn't work now.
let null_policy = NullPolicy::try_from(kwargs.null_policy)
.map_err(|e| PolarsError::ComputeError(e.into()))?;

// Target y is at index 0
match series_to_mat_for_lstsq(inputs, has_bias, null_policy) {
match series_to_mat_for_lstsq(inputs, add_bias, null_policy) {
Ok((mat, mask)) => {
// Solving Least Square
let x = mat.slice(s![.., 1..]).into_faer();
let y = mat.slice(s![.., 0..1]).into_faer();

let coeffs = faer_recursive_lstsq(x, y, n, kwargs.lambda, has_bias);
let coeffs = faer_recursive_lstsq(x, y, n, kwargs.lambda);
let mut builder: ListPrimitiveChunkedBuilder<Float64Type> =
ListPrimitiveChunkedBuilder::new(
"coeffs",
Expand Down Expand Up @@ -820,7 +820,7 @@ fn pl_recursive_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult
#[polars_expr(output_type_func=coeff_pred_output)] // They share the same output type
fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<Series> {
let n = kwargs.n; // Gauranteed n >= 2
let has_bias = kwargs.bias;
let add_bias = kwargs.bias;

let mut null_policy = NullPolicy::try_from(kwargs.null_policy)
.map_err(|e| PolarsError::ComputeError(e.into()))?;
Expand All @@ -833,7 +833,7 @@ fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<S
};

// Target y is at index 0
match series_to_mat_for_lstsq(inputs, has_bias, null_policy) {
match series_to_mat_for_lstsq(inputs, add_bias, null_policy) {
Ok((mat, mask)) => {
let should_skip = match null_policy {
NullPolicy::SKIP_WINDOW | NullPolicy::FILL_WINDOW(_) => (!&mask).any(),
Expand All @@ -842,9 +842,9 @@ fn pl_rolling_lstsq(inputs: &[Series], kwargs: SWWLstsqKwargs) -> PolarsResult<S
let x = mat.slice(s![.., 1..]).into_faer();
let y = mat.slice(s![.., 0..1]).into_faer();
let coeffs = if should_skip {
faer_rolling_skipping_lstsq(x, y, n, kwargs.min_size, kwargs.lambda, has_bias)
faer_rolling_skipping_lstsq(x, y, n, kwargs.min_size, kwargs.lambda)
} else {
faer_rolling_lstsq(x, y, n, kwargs.lambda, has_bias)
faer_rolling_lstsq(x, y, n, kwargs.lambda)
};

let mut builder: ListPrimitiveChunkedBuilder<Float64Type> =
Expand Down
Loading

0 comments on commit 192c61d

Please sign in to comment.