Skip to content

Commit

Permalink
Merge branch 'fix/clustersplit_seed' into 'dev'
Browse files Browse the repository at this point in the history
Fix inconsistent results benchmarking with clustersplit

See merge request cdd/QSPRpred!177
  • Loading branch information
HellevdM committed Mar 13, 2024
2 parents 21c42ec + a4869b0 commit d703692
Show file tree
Hide file tree
Showing 43 changed files with 11,658 additions and 11,760 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ From v3.0.2 to v3.0.3

- Fixed a bug where an attached standardizer would be refit when calling
`QSPRModel.predictMols` with `use_applicability_domain=True`.
- Fixed random seed not set in `FoldsFromDataSplit.iterFolds` for `ClusterSplit`.

## Changes

Expand Down
4 changes: 2 additions & 2 deletions qsprpred/benchmarks/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def getSeedList(self, seed: int | None = None) -> list[int]:
"""
Get a list of seeds for the replicas from one 'master' randomSeed.
The list of seeds is generated by sampling from the range of
possible seeds (0 to 2^32 - 1) with the given randomSeed as the random
possible seeds (0 to 2**31-1) with the given randomSeed as the random
randomSeed for the random module. This means that the list of seeds
will be the same for each run of the benchmarking experiment
with the same 'master' randomSeed. This is useful for reproducibility,
Expand All @@ -181,7 +181,7 @@ def getSeedList(self, seed: int | None = None) -> list[int]:
"""
seed = seed or self.settings.random_seed
random.seed(seed)
return random.sample(range(2**32 - 1), self.nRuns)
return random.sample(range(2**31), self.nRuns)

def iterReplicas(self) -> Generator[Replica, None, None]:
"""Generator that yields `Replica` objects for each benchmarking run.
Expand Down
11 changes: 11 additions & 0 deletions qsprpred/data/sampling/splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,14 @@ def setSeed(self, seed: int | None):
self.seed = seed
if hasattr(self.clustering, "seed"):
self.clustering.seed = seed

def getSeed(self):
"""Get the seed for this instance.
Returns:
int: the seed for this instance or None if no seed is set.
"""
if hasattr(self, "seed"):
return self.seed
else:
return None
2 changes: 1 addition & 1 deletion qsprpred/data/tables/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(
self.storeFormat = store_format
self.randomState = None
self.setRandomState(
random_state or int(np.random.randint(0, 2**32 - 1, dtype=np.int64))
random_state or int(np.random.randint(0, 2**31-1, dtype=np.int64))
)
self.name = name
self.indexCols = index_cols
Expand Down
2 changes: 1 addition & 1 deletion qsprpred/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def initRandomState(self, random_state):
Random state to use for shuffling and other random operations.
"""
if random_state is None:
self.randomState = int(np.random.randint(0, 2**32 - 1, dtype=np.int64))
self.randomState = int(np.random.randint(0, 2**31-1, dtype=np.int64))
logger.info(
"No random state supplied."
f"Setting random state to: {self.randomState}."
Expand Down
Loading

0 comments on commit d703692

Please sign in to comment.