Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename sample_weight to w (code quality) #1457

Merged
merged 7 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Unreleased

## cluster

- Renamed `sample_weight` to `w`.

## ensemble

- Renamed `sample_weight` to `w`.

## facto

- Renamed `sample_weight` to `w`.

## forest

- Renamed `sample_weight` to `w`.

## tree

- Renamed `sample_weight` to `w`.

## metrics

- Renamed `sample_weight` to `w`.


4 changes: 2 additions & 2 deletions river/cluster/dbstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _recluster(self):

self.clustering_is_up_to_date = True

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self._update(x)

if self._time_stamp % self.cleanup_interval == 0:
Expand All @@ -399,7 +399,7 @@ def learn_one(self, x, sample_weight=None):

return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
self._recluster()

min_distance = math.inf
Expand Down
4 changes: 2 additions & 2 deletions river/cluster/denstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def _initial_dbscan(self):
else:
item.covered = False

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self._n_samples_seen += 1
# control the stream speed
if self._n_samples_seen % self.stream_speed == 0:
Expand Down Expand Up @@ -352,7 +352,7 @@ def learn_one(self, x, sample_weight=None):
self.o_micro_clusters.pop(j)
return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
# This function handles the case when a clustering request arrives.
# implementation of the DBSCAN algorithm proposed by Ester et al.
if not self.initialized:
Expand Down
4 changes: 2 additions & 2 deletions river/cluster/streamkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self, chunk_size=10, n_clusters=2, **kwargs):
self._temp_chunk = {}
self.centers = {}

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self.time_stamp += 1

index = self.time_stamp % self.chunk_size
Expand All @@ -107,7 +107,7 @@ def learn_one(self, x, sample_weight=None):

return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
def get_distance(c):
return utils.math.minkowski_distance(self.centers[c], x, 2)

Expand Down
4 changes: 2 additions & 2 deletions river/cluster/textclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(
self.micro_distance = self.distances(self.micro_distance)
self.macro_distance = self.distances(self.macro_distance)

def learn_one(self, x, t=None, sample_weight=None):
def learn_one(self, x, t=None, w=None):
localdict = {}
for key in x.keys():
new_key = key
Expand Down Expand Up @@ -213,7 +213,7 @@ def learn_one(self, x, t=None, sample_weight=None):

## predicts the cluster number. The type specifies whether this should happen on micro-cluster
## or macro-cluster level
def predict_one(self, x, sample_weight=None, type="micro"):
def predict_one(self, x, w=None, type="micro"):
localdict = {}
for key in x.keys():
new_key = key
Expand Down
18 changes: 9 additions & 9 deletions river/ensemble/streaming_random_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
k = poisson(rate=self.lam, rng=self._rng)
if k == 0:
continue
model.learn_one(x=x, y=y, sample_weight=k, n_samples_seen=self._n_samples_seen)
model.learn_one(x=x, y=y, w=k, n_samples_seen=self._n_samples_seen)

return self

Expand Down Expand Up @@ -532,7 +532,7 @@ def learn_one(
x: dict,
y: base.typing.ClfTarget,
*,
sample_weight: int,
w: int,
n_samples_seen: int,
**kwargs,
):
Expand All @@ -543,16 +543,16 @@ def learn_one(
# Use all features
x_subset = x

# TODO Find a way to verify if the model natively supports sample_weight
for _ in range(int(sample_weight)):
# TODO Find a way to verify if the model natively supports sample_weight (w)
for _ in range(int(w)):
self.model.learn_one(x=x_subset, y=y, **kwargs)

if self._background_learner:
# Train the background learner
# Note: Pass the original instance x so features are correctly
# selected based on the corresponding subspace
self._background_learner.learn_one(
x=x, y=y, sample_weight=sample_weight, n_samples_seen=n_samples_seen # type: ignore
x=x, y=y, w=w, n_samples_seen=n_samples_seen # type: ignore
)

if not self.disable_drift_detector and not self.is_background_learner:
Expand Down Expand Up @@ -830,7 +830,7 @@ def learn_one(
x: dict,
y: base.typing.RegTarget,
*,
sample_weight: int,
w: int,
n_samples_seen: int,
**kwargs,
):
Expand All @@ -842,8 +842,8 @@ def learn_one(
# Use all features
x_subset = x

# TODO Find a way to verify if the model natively supports sample_weight
for _ in range(int(sample_weight)):
# TODO Find a way to verify if the model natively supports sample_weight (w)
for _ in range(int(w)):
self.model.learn_one(x=x_subset, y=y, **kwargs)

# Drift detection input
Expand All @@ -860,7 +860,7 @@ def learn_one(
# Note: Pass the original instance x so features are correctly
# selected based on the corresponding subspace
self._background_learner.learn_one(
x=x, y=y, sample_weight=sample_weight, n_samples_seen=n_samples_seen # type: ignore
x=x, y=y, w=w, n_samples_seen=n_samples_seen # type: ignore
)

if not self.disable_drift_detector and not self.is_background_learner:
Expand Down
8 changes: 4 additions & 4 deletions river/facto/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,28 +65,28 @@ def __init__(
def _init_latents(self) -> collections.defaultdict:
"""Initializes latent weights dict."""

def learn_one(self, x, y, sample_weight=1.0):
def learn_one(self, x, y, w=1.0):
x = self._ohe_cat_features(x)

if self.sample_normalization:
x_l2_norm = sum(xj**2 for xj in x.values()) ** 0.5
x = {j: xj / x_l2_norm for j, xj in x.items()}

return self._learn_one(x, y, sample_weight=sample_weight)
return self._learn_one(x, y, w=w)

def _ohe_cat_features(self, x):
"""One hot encodes string features considering them as categorical."""
return dict((f"{j}_{xj}", 1) if isinstance(xj, str) else (j, xj) for j, xj in x.items())

def _learn_one(self, x, y, sample_weight=1.0):
def _learn_one(self, x, y, w=1.0):
# Calculate the gradient of the loss with respect to the raw output
g_loss = self.loss.gradient(y_true=y, y_pred=self._raw_dot(x))

# Clamp the gradient to avoid numerical instability
g_loss = utils.math.clamp(g_loss, minimum=-self.clip_gradient, maximum=self.clip_gradient)

# Apply the sample weight
g_loss *= sample_weight
g_loss *= w

# Update the intercept
intercept_lr = self.intercept_lr.get(self.weight_optimizer.n_iterations)
Expand Down
4 changes: 2 additions & 2 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
k = poisson(rate=self.lambda_value, rng=self._rng)
if k > 0:
if not self._warning_detection_disabled and self._background[i] is not None:
self._background[i].learn_one(x=x, y=y, sample_weight=k) # type: ignore
self._background[i].learn_one(x=x, y=y, w=k) # type: ignore

model.learn_one(x=x, y=y, sample_weight=k)
model.learn_one(x=x, y=y, w=k)

drift_input = None
if not self._warning_detection_disabled:
Expand Down
4 changes: 2 additions & 2 deletions river/forest/online_extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,10 @@ def learn_one(self, x, y):
if w == 0: # Skip model update if w is zero
continue

model.learn_one(x, y, sample_weight=w)
model.learn_one(x, y, w=w)

if i in self._background_trees:
self._background_trees[i].learn_one(x, y, sample_weight=w)
self._background_trees[i].learn_one(x, y, w=w)

trained.append(i)

Expand Down
38 changes: 19 additions & 19 deletions river/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,19 @@ def __init__(self, cm: confusion.ConfusionMatrix | None = None):
cm = confusion.ConfusionMatrix()
self.cm = cm

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
self.cm.update(
y_true,
y_pred,
sample_weight=sample_weight,
w=w,
)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
self.cm.revert(
y_true,
y_pred,
sample_weight=sample_weight,
w=w,
)
return self

Expand Down Expand Up @@ -148,21 +148,21 @@ def update(
self,
y_true: bool,
y_pred: bool | float | dict[bool, float],
sample_weight=1.0,
w=1.0,
) -> BinaryMetric:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().update(y_true == self.pos_val, y_pred, sample_weight)
return super().update(y_true == self.pos_val, y_pred, w)

def revert(
self,
y_true: bool,
y_pred: bool | float | dict[bool, float],
sample_weight=1.0,
w=1.0,
) -> BinaryMetric:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().revert(y_true == self.pos_val, y_pred, sample_weight)
return super().revert(y_true == self.pos_val, y_pred, w)


class MultiClassMetric(ClassificationMetric):
Expand Down Expand Up @@ -224,7 +224,7 @@ def __init__(self, metrics, str_sep=", "):
super().__init__(metrics)
self.str_sep = str_sep

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
Expand All @@ -239,19 +239,19 @@ def update(self, y_true, y_pred, sample_weight=1.0):
m.update(y_true, y_pred)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
for m in self:
if m.requires_labels:
m.revert(y_true, max(y_pred, key=y_pred.get), sample_weight)
m.revert(y_true, max(y_pred, key=y_pred.get), w)
else:
m.revert(y_true, y_pred, sample_weight)
m.revert(y_true, y_pred, w)
return self

for m in self:
m.revert(y_true, y_pred, sample_weight)
m.revert(y_true, y_pred, w)
return self

def get(self):
Expand Down Expand Up @@ -333,12 +333,12 @@ def __init__(self):
def _eval(self, y_true, y_pred):
pass

def update(self, y_true, y_pred, sample_weight=1.0):
self._mean.update(x=self._eval(y_true, y_pred), w=sample_weight)
def update(self, y_true, y_pred, w=1.0):
self._mean.update(x=self._eval(y_true, y_pred), w=w)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
self._mean.revert(x=self._eval(y_true, y_pred), w=sample_weight)
def revert(self, y_true, y_pred, w=1.0):
self._mean.revert(x=self._eval(y_true, y_pred), w=w)
return self

def get(self):
Expand All @@ -354,11 +354,11 @@ class ClusteringMetric(base.Base, abc.ABC):
_fmt = ",.6f" # Use commas to separate big numbers and show 6 decimals

@abc.abstractmethod
def update(self, x, y_pred, centers, sample_weight=1.0) -> ClusteringMetric:
def update(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
"""Update the metric."""

@abc.abstractmethod
def revert(self, x, y_pred, centers, sample_weight=1.0) -> ClusteringMetric:
def revert(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
"""Revert the metric."""

@abc.abstractmethod
Expand Down
20 changes: 10 additions & 10 deletions river/metrics/confusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,22 @@ def __getitem__(self, key):
"""Syntactic sugar for accessing the counts directly."""
return self.data[key]

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
self.n_samples += 1
self._update(y_true, y_pred, sample_weight)
self._update(y_true, y_pred, w)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
self.n_samples -= 1
# Revert is equal to subtracting so we pass the negative sample_weight
self._update(y_true, y_pred, -sample_weight)
# Revert is equal to subtracting so we pass the negative sample_weight (w)
self._update(y_true, y_pred, -w)
return self

def _update(self, y_true, y_pred, sample_weight):
self.data[y_true][y_pred] += sample_weight
self.total_weight += sample_weight
self.sum_row[y_true] += sample_weight
self.sum_col[y_pred] += sample_weight
def _update(self, y_true, y_pred, w):
self.data[y_true][y_pred] += w
self.total_weight += w
self.sum_row[y_true] += w
self.sum_col[y_pred] += w

@property
def classes(self):
Expand Down
4 changes: 2 additions & 2 deletions river/metrics/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ class RMSLE(RMSE):

"""

def update(self, y_true, y_pred, sample_weight=1.0):
return super().update(math.log(y_true + 1), math.log(y_pred + 1), sample_weight)
def update(self, y_true, y_pred, w=1.0):
return super().update(math.log(y_true + 1), math.log(y_pred + 1), w)
Loading