From c49cee42b9824e551bbf186cf0b5b2e89c7ed5cd Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 18:44:50 -0700 Subject: [PATCH 001/157] bump --- CHANGELOG.md | 1 + nequip/_version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20146a16..85f99112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. +## [Unreleased] - 0.6.0 ## [Unreleased] - 0.5.6 ### Added diff --git a/nequip/_version.py b/nequip/_version.py index b02164d2..8e22989a 100644 --- a/nequip/_version.py +++ b/nequip/_version.py @@ -2,4 +2,4 @@ # See Python packaging guide # https://packaging.python.org/guides/single-sourcing-package-version/ -__version__ = "0.5.6" +__version__ = "0.6.0" From 585c5bdc27ff3bfc4b559b0a2a0b0eb123a4f169 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 18:45:55 -0700 Subject: [PATCH 002/157] `model_dtype` initial --- nequip/data/dataset.py | 16 ++++++++-------- nequip/model/_build.py | 14 +++++++++++++- nequip/nn/_atomwise.py | 4 ++-- nequip/scripts/train.py | 5 +++-- nequip/utils/misc.py | 5 ++++- nequip/utils/regressor.py | 2 +- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index c38b8eae..588fa38d 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -41,12 +41,14 @@ class AtomicDataset(Dataset): fixed_fields: Dict[str, Any] root: str + dtype: torch.dtype def __init__( self, root: str, type_mapper: Optional[TypeMapper] = None, ): + self.dtype = torch.get_default_dtype() super().__init__(root=root, transform=type_mapper) def statistics( @@ -80,7 +82,7 @@ def _get_parameters(self) -> Dict[str, Any]: if k not in IGNORE_KEYS and hasattr(self, k) } # Add other relevant metadata: - params["dtype"] = str(torch.get_default_dtype()) + params["dtype"] = str(self.dtype) params["nequip_version"] = nequip.__version__ return params @@ -441,9 +443,7 @@ def statistics( if callable(field): # make a joined thing? so it includes fixed fields arr, arr_is_per = field(data_transformed) - arr = arr.to( - torch.get_default_dtype() - ) # all statistics must be on floating + arr = arr.to(self.dtype) # all statistics must be on floating assert arr_is_per in ("node", "graph", "edge") else: if field not in all_keys: @@ -475,7 +475,7 @@ def statistics( ) if not isinstance(arr, torch.Tensor): if np.issubdtype(arr.dtype, np.floating): - arr = torch.as_tensor(arr, dtype=torch.get_default_dtype()) + arr = torch.as_tensor(arr, dtype=self.dtype) else: arr = torch.as_tensor(arr) if arr_is_per == "node": @@ -583,8 +583,8 @@ def _per_atom_statistics( f"{ana_mode} for per-atom analysis is not implemented" ) - @staticmethod def _per_species_statistics( + self, ana_mode: str, arr: torch.Tensor, arr_is_per: str, @@ -610,12 +610,12 @@ def _per_species_statistics( f"{ana_mode} for per species analysis is not implemented for shape {arr.shape}" ) - N = N.type(torch.get_default_dtype()) + N = N.type(self.dtype) return solver(N, arr, **algorithm_kwargs) elif arr_is_per == "node": - arr = arr.type(torch.get_default_dtype()) + arr = arr.type(self.dtype) if ana_mode == "mean_std": mean = scatter_mean(arr, atom_types, dim=0) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 7e1a63fd..9c1fc186 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -1,10 +1,12 @@ import inspect from typing import Optional +import torch + from nequip.data import AtomicDataset from nequip.data.transforms import TypeMapper from nequip.nn import GraphModuleMixin -from nequip.utils import load_callable, instantiate +from nequip.utils import load_callable, instantiate, dtype_from_name def model_from_config( @@ -22,6 +24,8 @@ def model_from_config( - ``dataset``: if ``initialize`` is True, the dataset - ``deploy``: whether the model object is for deployment / inference + Note that this function temporarily sets ``torch.set_default_dtype()`` and as such is not thread safe. + Args: config initialize (bool): whether ``model_builders`` should be instructed to initialize the model @@ -53,6 +57,11 @@ def model_from_config( config["num_types"] = type_mapper.num_types config["type_names"] = type_mapper.type_names + default_dtype = torch.get_default_dtype() + config["model_dtype"] = dtype_from_name(config.get("model_dtype", default_dtype)) + # set temporarily the default dtype + torch.set_default_dtype(config["model_dtype"]) + # Build builders = [ load_callable(b, prefix="nequip.model") @@ -99,4 +108,7 @@ def model_from_config( f"Builder {builder.__name__} didn't return a GraphModuleMixin, got {type(model)} instead" ) + # reset default dtype + torch.set_default_dtype(default_dtype) + return model diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index 6b7a2ecd..a2ed315d 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -146,7 +146,7 @@ def __init__( self.has_shifts = shifts is not None if shifts is not None: - shifts = torch.as_tensor(shifts, dtype=torch.get_default_dtype()) + shifts = torch.as_tensor(shifts) if len(shifts.reshape([-1])) == 1: shifts = torch.ones(num_types) * shifts assert shifts.shape == (num_types,), f"Invalid shape of shifts {shifts}" @@ -158,7 +158,7 @@ def __init__( self.has_scales = scales is not None if scales is not None: - scales = torch.as_tensor(scales, dtype=torch.get_default_dtype()) + scales = torch.as_tensor(scales) if len(scales.reshape([-1])) == 1: scales = torch.ones(num_types) * scales assert scales.shape == (num_types,), f"Invalid shape of scales {scales}" diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 88b55f7e..1f4f25be 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -34,8 +34,9 @@ "RescaleEnergyEtc", ], dataset_statistics_stride=1, - default_dtype="float32", - allow_tf32=False, # TODO: until we understand equivar issues + default_dtype="float64", + model_dtype="float32", + allow_tf32=False, verbose="INFO", model_debug_mode=False, equivariance_test=False, diff --git a/nequip/utils/misc.py b/nequip/utils/misc.py index 4beba97b..1adc602f 100644 --- a/nequip/utils/misc.py +++ b/nequip/utils/misc.py @@ -1,5 +1,8 @@ +from typing import Union import torch -def dtype_from_name(name: str) -> torch.dtype: +def dtype_from_name(name: Union[str, torch.dtype]) -> torch.dtype: + if isinstance(name, torch.dtype): + return name return {"float32": torch.float32, "float64": torch.float64}[name] diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py index 30c8f9ab..a5866d2c 100644 --- a/nequip/utils/regressor.py +++ b/nequip/utils/regressor.py @@ -9,7 +9,7 @@ def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs): - dtype = torch.get_default_dtype() + dtype = y.dtype # the floating point targets should have the right dtype X = X[::stride].to(dtype) y = y[::stride].to(dtype) From 3cb98540d458b6a2ed66c8ba62e1eac7581f0ac9 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 19:49:16 -0700 Subject: [PATCH 003/157] GraphModel --- CHANGELOG.md | 2 + nequip/model/_build.py | 14 +++- nequip/nn/__init__.py | 1 + nequip/nn/_graph_model.py | 108 +++++++++++++++++++++++++++++ nequip/train/trainer.py | 80 +++++++-------------- tests/integration/test_evaluate.py | 5 +- tests/integration/test_train.py | 10 ++- 7 files changed, 160 insertions(+), 60 deletions(-) create mode 100644 nequip/nn/_graph_model.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 85f99112..d101b3ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. ## [Unreleased] - 0.6.0 +### Added +- Refactor overall model logic into `GraphModel` top-level module ## [Unreleased] - 0.5.6 ### Added diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 9c1fc186..7052c7b9 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -5,7 +5,7 @@ from nequip.data import AtomicDataset from nequip.data.transforms import TypeMapper -from nequip.nn import GraphModuleMixin +from nequip.nn import GraphModuleMixin, GraphModel from nequip.utils import load_callable, instantiate, dtype_from_name @@ -58,9 +58,10 @@ def model_from_config( config["type_names"] = type_mapper.type_names default_dtype = torch.get_default_dtype() - config["model_dtype"] = dtype_from_name(config.get("model_dtype", default_dtype)) + model_dtype: torch.dtype = dtype_from_name(config.get("model_dtype", default_dtype)) + config["model_dtype"] = str(model_dtype) # set temporarily the default dtype - torch.set_default_dtype(config["model_dtype"]) + torch.set_default_dtype(model_dtype) # Build builders = [ @@ -111,4 +112,11 @@ def model_from_config( # reset default dtype torch.set_default_dtype(default_dtype) + # Wrap the model up + model = GraphModel( + model, + model_dtype=model_dtype, + model_input_fields=config.get("model_input_fields", {}), + ) + return model diff --git a/nequip/nn/__init__.py b/nequip/nn/__init__.py index 10cebee6..fac27f93 100644 --- a/nequip/nn/__init__.py +++ b/nequip/nn/__init__.py @@ -1,4 +1,5 @@ from ._graph_mixin import GraphModuleMixin, SequentialGraphNetwork # noqa: F401 +from ._graph_model import GraphModel # noqa: F401 from ._atomwise import ( # noqa: F401 AtomwiseOperation, AtomwiseReduce, diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py new file mode 100644 index 00000000..6dd60fe4 --- /dev/null +++ b/nequip/nn/_graph_model.py @@ -0,0 +1,108 @@ +from typing import List, Dict, Any + +import torch + +from nequip.data import AtomicDataDict + +from ._graph_mixin import GraphModuleMixin +from ._rescale import RescaleOutput + + +class GraphModel(GraphModuleMixin, torch.nn.Module): + """Top-level module for any complete `nequip` model. + + Manages top-level rescaling, dtypes, and more. + + Args: + + """ + + model_dtype: torch.dtype + model_input_fields: List[str] + + _num_rescale_layers: int + + def __init__( + self, + model: GraphModuleMixin, + model_dtype: torch.dtype, + model_input_fields: Dict[str, Any] = {}, + ) -> None: + super().__init__() + irreps_in = { + # Things that always make sense as inputs: + AtomicDataDict.POSITIONS_KEY: "1o", + AtomicDataDict.EDGE_INDEX_KEY: None, + AtomicDataDict.EDGE_CELL_SHIFT_KEY: None, + AtomicDataDict.CELL_KEY: "3x1o", + AtomicDataDict.BATCH_KEY: None, + AtomicDataDict.ATOM_TYPE_KEY: None, + } + model_input_fields = AtomicDataDict._fix_irreps_dict(model_input_fields) + assert len(set(irreps_in.keys()).intersection(model_input_fields.keys())) == 0 + irreps_in.update(model_input_fields) + self._init_irreps(irreps_in=irreps_in, irreps_out=model.irreps_out) + for k, irreps in model.irreps_in.items(): + if self.irreps_in.get(k, None) != irreps: + raise RuntimeError( + f"Model has `{k}` in its irreps_in with irreps `{irreps}`, but `{k}` is missing from/has inconsistent irreps in model_input_fields of `{self.irreps_in.get(k, 'missing')}`" + ) + self.model = model + self.model_dtype = model_dtype + self.model_input_fields = list(self.irreps_in.keys()) + + self._num_rescale_layers = 0 + outer_layer = self.model + while isinstance(outer_layer, RescaleOutput): + self._num_rescale_layers += 1 + outer_layer = outer_layer.model + + # == Rescaling == + @torch.jit.unused + def all_RescaleOutputs(self) -> List[RescaleOutput]: + """All ``RescaleOutput``s wrapping the model, in evaluation order.""" + if self._num_rescale_layers == 0: + return [] + # we know there's at least one + out = [self.model] + for _ in range(self._num_rescale_layers - 1): + out.append(out[-1].model) + # we iterated outermost to innermost, which is opposite of evaluation order + assert len(out) == self._num_rescale_layers + return out[::-1] + + @torch.jit.unused + def unscale( + self, data: AtomicDataDict.Type, force_process: bool = False + ) -> AtomicDataDict.Type: + data_unscaled = data.copy() + # we need to unscale from the outside-in: + for layer in self.all_RescaleOutputs()[::-1]: + data_unscaled = layer.unscale(data_unscaled, force_process=force_process) + return data_unscaled + + @torch.jit.unused + def scale( + self, data: AtomicDataDict.Type, force_process: bool = False + ) -> AtomicDataDict.Type: + data_scaled = data.copy() + # we need to scale from the inside out: + for layer in self.all_RescaleOutputs(): + data_scaled = layer.scale(data_scaled, force_process=force_process) + return data_scaled + + # == Inference == + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + # restrict the input data to allowed keys, and cast to model_dtype + # this also prevents the model from direclty using the dict from the outside, + # preventing weird pass-by-reference bugs + new_data: AtomicDataDict.Type = {} + for k, v in data.items(): + if k in self.model_input_fields: + if v.is_floating_point(): + v = v.type(self.model_dtype) + new_data[k] = v + # run the model + data = self.model(new_data) + return data diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 55efec32..459e8ad9 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -27,6 +27,7 @@ from torch_ema import ExponentialMovingAverage from nequip.data import DataLoader, AtomicData, AtomicDataDict, AtomicDataset +from nequip.nn import GraphModel from nequip.utils import ( Output, Config, @@ -211,6 +212,8 @@ class Trainer: lr_scheduler_module = torch.optim.lr_scheduler optim_module = torch.optim + model: GraphModel + def __init__( self, model, @@ -330,23 +333,6 @@ def __init__( self.train_on_keys = self.loss.keys if train_on_keys is not None: assert set(train_on_keys) == set(self.train_on_keys) - self._remove_from_model_input = set(self.train_on_keys) - if ( - len( - self._remove_from_model_input.intersection( - AtomicDataDict.ALL_ENERGY_KEYS - ) - ) - > 0 - ): - # if we are training on _any_ of the energy quantities (energy, force, partials, stress, etc.) - # then none of them should be fed into the model - self._remove_from_model_input = self._remove_from_model_input.union( - AtomicDataDict.ALL_ENERGY_KEYS - ) - if kwargs.get("_override_allow_truth_label_inputs", False): - # needed for unit testing models - self._remove_from_model_input = set() # load all callbacks self._init_callbacks = [load_callable(callback) for callback in init_callbacks] @@ -701,6 +687,7 @@ def init(self): """initialize optimizer""" if self.model is None: return + assert isinstance(self.model, GraphModel) self.model.to(self.torch_device) @@ -710,12 +697,6 @@ def init(self): f"Number of trainable weights: {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}" ) - self.rescale_layers = [] - outer_layer = self.model - while hasattr(outer_layer, "unscale"): - self.rescale_layers.append(outer_layer) - outer_layer = getattr(outer_layer, "model", None) - self.init_objects() self._initialized = True @@ -799,32 +780,22 @@ def batch_step(self, data, validation=False): data = data.to(self.torch_device) data = AtomicData.to_AtomicDataDict(data) - data_unscaled = data - for layer in self.rescale_layers: - # This means that self.model is RescaleOutputs - # this will normalize the targets - # in validation (eval mode), it does nothing - # in train mode, if normalizes the targets - data_unscaled = layer.unscale(data_unscaled) + # this will normalize the targets + # in both validation and train we want targets normalized _for the loss_ + data_for_loss = self.model.unscale(data, force_process=True) # Run model # We make a shallow copy of the input dict in case the model modifies it - input_data = { - k: v - for k, v in data_unscaled.items() - if k not in self._remove_from_model_input - } - out = self.model(input_data) - del input_data + out = self.model(data_for_loss) # If we're in evaluation mode (i.e. validation), then - # data_unscaled's target prop is unnormalized, and out's has been rescaled to be in the same units - # If we're in training, data_unscaled's target prop has been normalized, and out's hasn't been touched, so they're both in normalized units - # Note that either way all normalization was handled internally by RescaleOutput + # data_for_loss's target prop is unnormalized, and out's has been rescaled to be in the same units + # If we're in training, data_for_loss's target prop has been normalized, and out's hasn't been touched, so they're both in normalized units + # Note that either way all normalization was handled internally by GraphModel via RescaleOutput if not validation: # Actually do an optimization step, since we're training: - loss, loss_contrib = self.loss(pred=out, ref=data_unscaled) + loss, loss_contrib = self.loss(pred=out, ref=data_for_loss) # see https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#use-parameter-grad-none-instead-of-model-zero-grad-or-optimizer-zero-grad self.optim.zero_grad(set_to_none=True) loss.backward() @@ -846,25 +817,26 @@ def batch_step(self, data, validation=False): with torch.no_grad(): if validation: - scaled_out = out - _data_unscaled = data - for layer in self.rescale_layers: - # loss function always needs to be in normalized unit - scaled_out = layer.unscale(scaled_out, force_process=True) - _data_unscaled = layer.unscale(_data_unscaled, force_process=True) - loss, loss_contrib = self.loss(pred=scaled_out, ref=_data_unscaled) + # loss function always needs to be in normalized unit + normalized_units_out = self.model.unscale(out, force_process=True) + # data_for_loss is always forced into normalized units + loss, loss_contrib = self.loss( + pred=normalized_units_out, ref=data_for_loss + ) + del normalized_units_out + # everything else is already in real units for metrics, so do nothing else: # If we are in training mode, we need to bring the prediction - # into real units - for layer in self.rescale_layers[::-1]: - out = layer.scale(out, force_process=True) + # into real units for metrics + out = self.model.scale(out, force_process=True) # save metrics stats self.batch_losses = self.loss_stat(loss, loss_contrib) - # in validation mode, data is in real units and the network scales + # in validation mode, reference data is in real units and the network scales # out to be in real units interally. - # in training mode, data is still in real units, and we rescaled - # out to be in real units above. + # in training mode, reference data is still in real units, and we rescaled + # network predicted out to be in real units right above + # thus, we get metrics in real units always: self.batch_metrics = self.metrics(pred=out, ref=data) @property diff --git a/tests/integration/test_evaluate.py b/tests/integration/test_evaluate.py index 2bec1215..99f06136 100644 --- a/tests/integration/test_evaluate.py +++ b/tests/integration/test_evaluate.py @@ -53,7 +53,10 @@ def training_session(request, BENCHMARK_ROOT, conffile): true_config["max_epochs"] = 2 true_config["model_builders"] = [builder] # We need truth labels as inputs for these fake testing models - true_config["_override_allow_truth_label_inputs"] = True + true_config["model_input_fields"] = { + AtomicDataDict.FORCE_KEY: "1o", + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + } # to be a true identity, we can't have rescaling true_config["global_rescale_shift"] = None diff --git a/tests/integration/test_train.py b/tests/integration/test_train.py index 36597a98..325b6d5d 100644 --- a/tests/integration/test_train.py +++ b/tests/integration/test_train.py @@ -120,7 +120,10 @@ def test_metrics(nequip_dataset, BENCHMARK_ROOT, conffile, builder): # We just don't add rescaling: true_config["model_builders"] = [builder] # We need truth labels as inputs for these fake testing models - true_config["_override_allow_truth_label_inputs"] = True + true_config["model_input_fields"] = { + AtomicDataDict.FORCE_KEY: "1o", + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + } config_path = tmpdir + "/conf.yaml" with open(config_path, "w+") as fp: @@ -238,7 +241,10 @@ def test_requeue(nequip_dataset, BENCHMARK_ROOT, conffile): # We just don't add rescaling: true_config["model_builders"] = [builder] # We need truth labels as inputs for these fake testing models - true_config["_override_allow_truth_label_inputs"] = True + true_config["model_input_fields"] = { + AtomicDataDict.FORCE_KEY: "1o", + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + } for irun in range(3): From 3705fbfa5fb33aa703e20dd6041b56fc7966251c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 19:49:24 -0700 Subject: [PATCH 004/157] promote dtype in loss --- nequip/train/_loss.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/nequip/train/_loss.py b/nequip/train/_loss.py index 6df59fe3..e265236c 100644 --- a/nequip/train/_loss.py +++ b/nequip/train/_loss.py @@ -44,17 +44,20 @@ def __call__( key: str, mean: bool = True, ): + ref = ref[key] + # make sure prediction is promoted to dtype of reference + pred = pred[key].to(ref.dtype) # zero the nan entries - has_nan = self.ignore_nan and torch.isnan(ref[key].mean()) + has_nan = self.ignore_nan and torch.isnan(ref.mean()) if has_nan: - not_nan = (ref[key] == ref[key]).int() - loss = self.func(pred[key], torch.nan_to_num(ref[key], nan=0.0)) * not_nan + not_nan = (ref == ref).int() + loss = self.func(pred, torch.nan_to_num(ref, nan=0.0)) * not_nan if mean: return loss.sum() / not_nan.sum() else: return loss else: - loss = self.func(pred[key], ref[key]) + loss = self.func(pred, ref) if mean: return loss.mean() else: @@ -69,28 +72,29 @@ def __call__( key: str, mean: bool = True, ): + ref = ref[key] + # make sure prediction is promoted to dtype of reference + pred = pred[key].to(ref.dtype) # zero the nan entries - has_nan = self.ignore_nan and torch.isnan(ref[key].sum()) + has_nan = self.ignore_nan and torch.isnan(ref.sum()) N = torch.bincount(ref[AtomicDataDict.BATCH_KEY]) N = N.reshape((-1, 1)) if has_nan: - not_nan = (ref[key] == ref[key]).int() - loss = ( - self.func(pred[key], torch.nan_to_num(ref[key], nan=0.0)) * not_nan / N - ) + not_nan = (ref == ref).int() + loss = self.func(pred, torch.nan_to_num(ref, nan=0.0)) * not_nan / N if self.func_name == "MSELoss": loss = loss / N - assert loss.shape == pred[key].shape # [atom, dim] + assert loss.shape == pred.shape # [atom, dim] if mean: return loss.sum() / not_nan.sum() else: return loss else: - loss = self.func(pred[key], ref[key]) + loss = self.func(pred, ref) loss = loss / N if self.func_name == "MSELoss": loss = loss / N - assert loss.shape == pred[key].shape # [atom, dim] + assert loss.shape == pred.shape # [atom, dim] if mean: return loss.mean() else: @@ -113,16 +117,17 @@ def __call__( ): if not mean: raise NotImplementedError("Cannot handle this yet") + ref = ref[key] + # make sure prediction is promoted to dtype of reference + pred = pred[key].to(ref.dtype) - has_nan = self.ignore_nan and torch.isnan(ref[key].mean()) + has_nan = self.ignore_nan and torch.isnan(ref.mean()) if has_nan: - not_nan = (ref[key] == ref[key]).int() - per_atom_loss = ( - self.func(pred[key], torch.nan_to_num(ref[key], nan=0.0)) * not_nan - ) + not_nan = (ref == ref).int() + per_atom_loss = self.func(pred, torch.nan_to_num(ref, nan=0.0)) * not_nan else: - per_atom_loss = self.func(pred[key], ref[key]) + per_atom_loss = self.func(pred, ref) reduce_dims = tuple(i + 1 for i in range(len(per_atom_loss.shape) - 1)) From 09bb5db1e7e86717c80b5d86266d9892aceb7a9b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 22:06:48 -0700 Subject: [PATCH 005/157] Working `model_dtype` --- nequip/model/_build.py | 6 ++++++ nequip/model/_scaling.py | 1 + nequip/nn/_atomwise.py | 37 +++++++++++++++++++++++++++++++------ nequip/nn/_rescale.py | 17 ++++++++++++++--- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 7052c7b9..49ed8a7d 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -62,6 +62,12 @@ def model_from_config( config["model_dtype"] = str(model_dtype) # set temporarily the default dtype torch.set_default_dtype(model_dtype) + # confirm sanity + assert default_dtype in (torch.float32, torch.float64) + if default_dtype == torch.float32 and model_dtype == torch.float64: + raise ValueError( + "Overall default_dtype=float32, but model_dtype=float64 is a higher precision- change default_dtype to float64" + ) # Build builders = [ diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index 8a7ffa46..5de1408c 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -127,6 +127,7 @@ def GlobalRescale( related_shift_keys=default_related_shift_keys, shift_trainable=config.get(f"{module_prefix}_shift_trainable", False), scale_trainable=config.get(f"{module_prefix}_scale_trainable", False), + default_dtype=config.get("default_dtype", None), ) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index a2ed315d..ad11e5b4 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -9,6 +9,7 @@ from nequip.data import AtomicDataDict from nequip.data.transforms import TypeMapper +from nequip.utils import dtype_from_name from ._graph_mixin import GraphModuleMixin @@ -98,6 +99,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: class PerSpeciesScaleShift(GraphModuleMixin, torch.nn.Module): """Scale and/or shift a predicted per-atom property based on (learnable) per-species/type parameters. + Note that scaling/shifting is always done (casting into) ``default_dtype``, even if ``model_dtype`` is lower precision. + Args: field: the per-atom field to scale/shift. num_types: the number of types in the model. @@ -119,6 +122,7 @@ class PerSpeciesScaleShift(GraphModuleMixin, torch.nn.Module): shifts_trainable: bool has_scales: bool has_shifts: bool + default_dtype: torch.dtype def __init__( self, @@ -131,6 +135,7 @@ def __init__( out_field: Optional[str] = None, scales_trainable: bool = False, shifts_trainable: bool = False, + default_dtype: Optional[str] = None, irreps_in={}, ): super().__init__() @@ -144,11 +149,18 @@ def __init__( irreps_out={self.out_field: irreps_in[self.field]}, ) + self.default_dtype = dtype_from_name( + torch.get_default_dtype() if default_dtype is None else default_dtype + ) + self.has_shifts = shifts is not None if shifts is not None: - shifts = torch.as_tensor(shifts) + shifts = torch.as_tensor(shifts, dtype=self.default_dtype) if len(shifts.reshape([-1])) == 1: - shifts = torch.ones(num_types) * shifts + shifts = ( + torch.ones(num_types, dtype=shifts.dtype, device=shifts.device) + * shifts + ) assert shifts.shape == (num_types,), f"Invalid shape of shifts {shifts}" self.shifts_trainable = shifts_trainable if shifts_trainable: @@ -158,9 +170,12 @@ def __init__( self.has_scales = scales is not None if scales is not None: - scales = torch.as_tensor(scales) + scales = torch.as_tensor(scales, dtype=self.default_dtype) if len(scales.reshape([-1])) == 1: - scales = torch.ones(num_types) * scales + scales = ( + torch.ones(num_types, dtype=scales.dtype, device=scales.device) + * scales + ) assert scales.shape == (num_types,), f"Invalid shape of scales {scales}" self.scales_trainable = scales_trainable if scales_trainable: @@ -180,9 +195,19 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: assert len(in_field) == len( species_idx ), "in_field doesnt seem to have correct per-atom shape" - if self.has_scales: + # multiplication / addition promotes dtypes already, so no cast is needed: + if self.has_scales and self.has_shifts: + # we can used an FMA for performance + # addcmul computes + # input + tensor1 * tensor2 elementwise + in_field = torch.addcmul( + self.shifts[species_idx].view(-1, 1), + self.scales[species_idx].view(-1, 1), + in_field, + ) + elif self.has_scales: in_field = self.scales[species_idx].view(-1, 1) * in_field - if self.has_shifts: + elif self.has_shifts: in_field = self.shifts[species_idx].view(-1, 1) + in_field data[self.out_field] = in_field return data diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index 8bea7096..4baed254 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -1,4 +1,4 @@ -from typing import Sequence, List, Union +from typing import Sequence, List, Union, Optional import torch @@ -6,12 +6,15 @@ from nequip.data import AtomicDataDict from nequip.nn import GraphModuleMixin +from nequip.utils import dtype_from_name @compile_mode("script") class RescaleOutput(GraphModuleMixin, torch.nn.Module): """Wrap a model and rescale its outputs when in ``eval()`` mode. + Note that scaling/shifting is always done (casting into) ``default_dtype``, even if ``model_dtype`` is lower precision. + Args: model : GraphModuleMixin The model whose outputs are to be rescaled. @@ -39,6 +42,8 @@ class RescaleOutput(GraphModuleMixin, torch.nn.Module): has_scale: bool has_shift: bool + default_dtype: torch.dtype + def __init__( self, model: GraphModuleMixin, @@ -50,6 +55,7 @@ def __init__( shift_by=None, shift_trainable: bool = False, scale_trainable: bool = False, + default_dtype: Optional[str] = None, irreps_in: dict = {}, ): super().__init__() @@ -84,10 +90,14 @@ def __init__( self.related_scale_keys = list(set(related_scale_keys).union(scale_keys)) self.related_shift_keys = list(set(related_shift_keys).union(shift_keys)) + self.default_dtype = dtype_from_name( + torch.get_default_dtype() if default_dtype is None else default_dtype + ) + self.has_scale = scale_by is not None self.scale_trainble = scale_trainable if self.has_scale: - scale_by = torch.as_tensor(scale_by) + scale_by = torch.as_tensor(scale_by, dtype=self.default_dtype) if self.scale_trainble: self.scale_by = torch.nn.Parameter(scale_by) else: @@ -103,7 +113,7 @@ def __init__( self.has_shift = shift_by is not None self.rescale_trainable = shift_trainable if self.has_shift: - shift_by = torch.as_tensor(shift_by) + shift_by = torch.as_tensor(shift_by, dtype=self.default_dtype) if self.rescale_trainable: self.shift_by = torch.nn.Parameter(shift_by) else: @@ -142,6 +152,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: return data else: # Scale then shift + # * and + promote dtypes by default if self.has_scale: for field in self.scale_keys: data[field] = data[field] * self.scale_by From c1d68a40166bfea42e7e805f833c61cce02f85fc Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 18 Dec 2022 22:07:03 -0700 Subject: [PATCH 006/157] Fix equivariance tests --- nequip/nn/_graph_model.py | 2 +- nequip/utils/test.py | 30 +++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py index 6dd60fe4..42d2ebc2 100644 --- a/nequip/nn/_graph_model.py +++ b/nequip/nn/_graph_model.py @@ -34,7 +34,7 @@ def __init__( AtomicDataDict.POSITIONS_KEY: "1o", AtomicDataDict.EDGE_INDEX_KEY: None, AtomicDataDict.EDGE_CELL_SHIFT_KEY: None, - AtomicDataDict.CELL_KEY: "3x1o", + AtomicDataDict.CELL_KEY: "1o", # 3 of them, but still AtomicDataDict.BATCH_KEY: None, AtomicDataDict.ATOM_TYPE_KEY: None, } diff --git a/nequip/utils/test.py b/nequip/utils/test.py index 60e68730..c25f50ec 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -4,7 +4,7 @@ from e3nn import o3 from e3nn.util.test import equivariance_error, FLOAT_TOLERANCE -from nequip.nn import GraphModuleMixin +from nequip.nn import GraphModuleMixin, GraphModel from nequip.data import ( AtomicData, AtomicDataDict, @@ -43,7 +43,11 @@ def assert_permutation_equivariant( __tracebackhide__ = True if tolerance is None: - atol = PERMUTATION_FLOAT_TOLERANCE[torch.get_default_dtype()] + atol = PERMUTATION_FLOAT_TOLERANCE[ + func.model_dtype + if isinstance(func, GraphModel) + else torch.get_default_dtype() + ] else: atol = tolerance @@ -193,9 +197,9 @@ def assert_AtomicData_equivariant( if AtomicDataDict.CELL_KEY in irps: prev_cell_irps = irps[AtomicDataDict.CELL_KEY] assert prev_cell_irps is None or o3.Irreps(prev_cell_irps) == o3.Irreps( - "3x1o" + "1o" ) - # must be this to actually rotate it + # must be this to actually rotate it when flattened irps[AtomicDataDict.CELL_KEY] = "3x1o" stress_keys = (AtomicDataDict.STRESS_KEY, AtomicDataDict.VIRIAL_KEY) @@ -258,7 +262,11 @@ def wrapper(*args): errs = {k: torch.max(torch.vstack([e[k] for e in errs]), dim=0)[0] for k in errs[0]} if o3_tolerance is None: - o3_tolerance = FLOAT_TOLERANCE[torch.get_default_dtype()] + o3_tolerance = FLOAT_TOLERANCE[ + func.model_dtype + if isinstance(func, GraphModel) + else torch.get_default_dtype() + ] all_errs = [] for case, err in errs.items(): for key, this_err in zip(irreps_out.keys(), err): @@ -270,9 +278,10 @@ def wrapper(*args): int(k[0]), str(bool(k[1])), str(k[2]), float(k[3]) ) for k, prob in zip(all_errs, is_problem) + if irreps_out[str(k[2])] is not None ) - if sum(is_problem) > 0 or "FAIL" in permutation_message: + if any(is_problem) or "FAIL" in permutation_message: raise AssertionError(f"Equivariance test failed for cases:\n{message}") return message @@ -322,10 +331,13 @@ def pre_hook(mod: GraphModuleMixin, inp): f"Module {mname} should have received a dict or a torch_geometric Data, instead got a {type(inp).__name__}" ) for k, ir in mod.irreps_in.items(): + # TODO: don't consider lack of an input to be an error in general? if k not in inp: - raise KeyError( - f"Field {k} with irreps {ir} expected to be input to {mname}; not present" - ) + # GraphModel keeps all _possible_ inputs in `irreps_in`... + if not isinstance(mod, GraphModel): + raise KeyError( + f"Field {k} with irreps {ir} expected to be input to {mname}; not present" + ) elif isinstance(inp[k], torch.Tensor) and isinstance(ir, o3.Irreps): if inp[k].ndim == 1: raise ValueError( From 5fa3e494ec174445a8dc1a19f5e37548aaa5e9e6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 19 Dec 2022 15:55:22 -0500 Subject: [PATCH 007/157] passing tests --- nequip/model/_build.py | 2 +- nequip/nn/_grad_output.py | 6 ++--- nequip/nn/_graph_model.py | 10 +++++--- nequip/train/_loss.py | 6 +++-- nequip/utils/test.py | 11 ++------- tests/unit/model/test_nequip_model.py | 8 +++--- tests/unit/trainer/test_trainer.py | 35 +++++++++++++++------------ 7 files changed, 40 insertions(+), 38 deletions(-) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 49ed8a7d..6710b1b8 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -59,7 +59,7 @@ def model_from_config( default_dtype = torch.get_default_dtype() model_dtype: torch.dtype = dtype_from_name(config.get("model_dtype", default_dtype)) - config["model_dtype"] = str(model_dtype) + config["model_dtype"] = str(model_dtype).lstrip("torch.") # set temporarily the default dtype torch.set_default_dtype(model_dtype) # confirm sanity diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 673f8ff0..2bf52606 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -209,8 +209,8 @@ def __init__( irreps_out=self.func.irreps_out.copy(), ) self.irreps_out[AtomicDataDict.FORCE_KEY] = "1o" - self.irreps_out[AtomicDataDict.STRESS_KEY] = "3x1o" - self.irreps_out[AtomicDataDict.VIRIAL_KEY] = "3x1o" + self.irreps_out[AtomicDataDict.STRESS_KEY] = "1o" + self.irreps_out[AtomicDataDict.VIRIAL_KEY] = "1o" # for torchscript compat self.register_buffer("_empty", torch.Tensor()) @@ -316,9 +316,9 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: ).unsqueeze(-1) stress = virial / volume.view(-1, 1, 1) data[AtomicDataDict.CELL_KEY] = orig_cell + data[AtomicDataDict.STRESS_KEY] = stress else: stress = self._empty # torchscript - data[AtomicDataDict.STRESS_KEY] = stress # see discussion in https://github.com/libAtoms/QUIP/issues/227 about sign convention # they say the standard convention is virial = -stress x volume diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py index 42d2ebc2..eb86a2e5 100644 --- a/nequip/nn/_graph_model.py +++ b/nequip/nn/_graph_model.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional import torch @@ -25,7 +25,7 @@ class GraphModel(GraphModuleMixin, torch.nn.Module): def __init__( self, model: GraphModuleMixin, - model_dtype: torch.dtype, + model_dtype: Optional[torch.dtype] = None, model_input_fields: Dict[str, Any] = {}, ) -> None: super().__init__() @@ -48,7 +48,9 @@ def __init__( f"Model has `{k}` in its irreps_in with irreps `{irreps}`, but `{k}` is missing from/has inconsistent irreps in model_input_fields of `{self.irreps_in.get(k, 'missing')}`" ) self.model = model - self.model_dtype = model_dtype + self.model_dtype = ( + model_dtype if model_dtype is not None else torch.get_default_dtype() + ) self.model_input_fields = list(self.irreps_in.keys()) self._num_rescale_layers = 0 @@ -101,7 +103,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: for k, v in data.items(): if k in self.model_input_fields: if v.is_floating_point(): - v = v.type(self.model_dtype) + v = v.to(dtype=self.model_dtype) new_data[k] = v # run the model data = self.model(new_data) diff --git a/nequip/train/_loss.py b/nequip/train/_loss.py index e265236c..1b00dfe2 100644 --- a/nequip/train/_loss.py +++ b/nequip/train/_loss.py @@ -72,12 +72,13 @@ def __call__( key: str, mean: bool = True, ): + ref_dict = ref ref = ref[key] # make sure prediction is promoted to dtype of reference pred = pred[key].to(ref.dtype) # zero the nan entries has_nan = self.ignore_nan and torch.isnan(ref.sum()) - N = torch.bincount(ref[AtomicDataDict.BATCH_KEY]) + N = torch.bincount(ref_dict[AtomicDataDict.BATCH_KEY]) N = N.reshape((-1, 1)) if has_nan: not_nan = (ref == ref).int() @@ -119,6 +120,7 @@ def __call__( raise NotImplementedError("Cannot handle this yet") ref = ref[key] # make sure prediction is promoted to dtype of reference + pred_dict = pred pred = pred[key].to(ref.dtype) has_nan = self.ignore_nan and torch.isnan(ref.mean()) @@ -131,7 +133,7 @@ def __call__( reduce_dims = tuple(i + 1 for i in range(len(per_atom_loss.shape) - 1)) - spe_idx = pred[AtomicDataDict.ATOM_TYPE_KEY].squeeze(-1) + spe_idx = pred_dict[AtomicDataDict.ATOM_TYPE_KEY].squeeze(-1) if has_nan: if len(reduce_dims) > 0: per_atom_loss = per_atom_loss.sum(dim=reduce_dims) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index c25f50ec..af352ba2 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -331,13 +331,8 @@ def pre_hook(mod: GraphModuleMixin, inp): f"Module {mname} should have received a dict or a torch_geometric Data, instead got a {type(inp).__name__}" ) for k, ir in mod.irreps_in.items(): - # TODO: don't consider lack of an input to be an error in general? if k not in inp: - # GraphModel keeps all _possible_ inputs in `irreps_in`... - if not isinstance(mod, GraphModel): - raise KeyError( - f"Field {k} with irreps {ir} expected to be input to {mname}; not present" - ) + pass elif isinstance(inp[k], torch.Tensor) and isinstance(ir, o3.Irreps): if inp[k].ndim == 1: raise ValueError( @@ -362,9 +357,7 @@ def post_hook(mod: GraphModuleMixin, _, out): ) for k, ir in mod.irreps_out.items(): if k not in out: - raise KeyError( - f"Field {k} with irreps {ir} expected to be in output from {mname}; not present" - ) + pass elif isinstance(out[k], torch.Tensor) and isinstance(ir, o3.Irreps): if out[k].ndim == 1: raise ValueError( diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py index 2aa82e15..86de6ee8 100644 --- a/tests/unit/model/test_nequip_model.py +++ b/tests/unit/model/test_nequip_model.py @@ -109,14 +109,14 @@ def test_submods(self): config = minimal_config2.copy() config["model_builders"] = ["EnergyModel"] model = model_from_config(config=config, initialize=True) - assert isinstance(model.chemical_embedding, AtomwiseLinear) + chemical_embedding = model.model.chemical_embedding + assert isinstance(chemical_embedding, AtomwiseLinear) true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"]) assert ( - model.chemical_embedding.irreps_out[model.chemical_embedding.out_field] - == true_irreps + chemical_embedding.irreps_out[chemical_embedding.out_field] == true_irreps ) # Make sure it propagates assert ( - model.layer0_convnet.irreps_in[model.chemical_embedding.out_field] + model.model.layer0_convnet.irreps_in[chemical_embedding.out_field] == true_irreps ) diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py index 860be357..90fcf4a3 100644 --- a/tests/unit/trainer/test_trainer.py +++ b/tests/unit/trainer/test_trainer.py @@ -14,7 +14,7 @@ from nequip.data import AtomicDataDict from nequip.train.trainer import Trainer from nequip.utils.savenload import load_file -from nequip.nn import GraphModuleMixin +from nequip.nn import GraphModuleMixin, GraphModel, RescaleOutput def dummy_builder(): @@ -45,16 +45,17 @@ def dummy_builder(): ) -@pytest.fixture(scope="class") -def trainer(): +@pytest.fixture(scope="function") +def trainer(float_tolerance): """ Generate a class instance with minimal configurations """ - minimal_config["default_dtype"] = str(torch.get_default_dtype())[len("torch.") :] - model = model_from_config(minimal_config) + conf = minimal_config.copy() + conf["default_dtype"] = str(torch.get_default_dtype())[len("torch.") :] + model = model_from_config(conf) with tempfile.TemporaryDirectory(prefix="output") as path: - minimal_config["root"] = path - c = Trainer(model=model, **minimal_config) + conf["root"] = path + c = Trainer(model=model, **conf) yield c @@ -73,14 +74,14 @@ def test_duplicate_id_2(self, temp_data): check whether the Output class can automatically insert timestr when a workdir has pre-existed """ + conf = minimal_config.copy() + conf["root"] = temp_data - minimal_config["root"] = temp_data - - model = DummyNet(3) - Trainer(model=model, **minimal_config) + model = GraphModel(DummyNet(3)) + Trainer(model=model, **conf) with pytest.raises(RuntimeError): - Trainer(model=model, **minimal_config) + Trainer(model=model, **conf) class TestSaveLoad: @@ -281,15 +282,19 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: return data -class DummyScale(torch.nn.Module): +# subclass to make sure it gets picked up by GraphModel +class DummyScale(RescaleOutput): """mimic the rescale model""" def __init__(self, key, scale, shift) -> None: - super().__init__() + torch.nn.Module.__init__(self) # skip RescaleOutput's __init__ self.key = key self.scale_by = torch.as_tensor(scale, dtype=torch.get_default_dtype()) self.shift_by = torch.as_tensor(shift, dtype=torch.get_default_dtype()) self.linear2 = Linear(3, 3) + self.irreps_in = {} + self.irreps_out = {key: "3x0e"} + self.model = None def forward(self, data): out = self.linear2(data["pos"]) @@ -317,7 +322,7 @@ def unscale(self, data, force_process=False): def scale_train(nequip_dataset): with tempfile.TemporaryDirectory(prefix="output") as path: trainer = Trainer( - model=DummyScale(AtomicDataDict.FORCE_KEY, scale=1.3, shift=1), + model=GraphModel(DummyScale(AtomicDataDict.FORCE_KEY, scale=1.3, shift=1)), n_train=4, n_val=4, max_epochs=0, From d563759873987fdebdf8f3d4030b6b65252fb72b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 12:43:20 -0700 Subject: [PATCH 008/157] Use Cholesky for solver --- nequip/utils/regressor.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py index 76d140bc..dd755d15 100644 --- a/nequip/utils/regressor.py +++ b/nequip/utils/regressor.py @@ -2,7 +2,6 @@ import torch from torch import matmul -from torch.linalg import solve, inv from typing import Optional, Sequence from opt_einsum import contract @@ -26,16 +25,20 @@ def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kw feature_rms = torch.sqrt(torch.mean(X**2, axis=0)) - alpha_mat = torch.diag(feature_rms) * alpha * alpha + alpha_mat = torch.diag(feature_rms) * (alpha * alpha) A = matmul(X.T, X) + alpha_mat dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape) Xy = matmul(X.T, dy) - mean = solve(A, Xy) + # A is symmetric positive semidefinite <=> A=(X + alpha*I)^T (X + alpha*I), + # so we can use cholesky: + A_cholesky = torch.linalg.cholesky(A) + mean = torch.cholesky_solve(Xy.unsqueeze(-1), A_cholesky).squeeze(-1) + Ainv = torch.cholesky_inverse(A_cholesky) + del A_cholesky sigma2 = torch.var(matmul(X, mean) - dy) - Ainv = inv(A) cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv)) mean = mean + y_mean.reshape([-1]) From 617d3fc0ca01885911735b3fe5707a693bae03f4 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:17:01 -0700 Subject: [PATCH 009/157] docstring --- nequip/model/_scaling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index 8a7ffa46..1d254e59 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -136,7 +136,7 @@ def PerSpeciesRescale( initialize: bool, dataset: Optional[AtomicDataset] = None, ): - """Add global rescaling for energy(-based quantities). + """Add per-atom rescaling (and shifting) for energy. If ``initialize`` is false, doesn't compute statistics. """ From 0657fefaf0a125812bd7a238cec0c9e3fc6b61c4 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:17:50 -0700 Subject: [PATCH 010/157] bump --- CHANGELOG.md | 2 ++ nequip/_version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf50972d..85dff913 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. +## Unreleased - 0.6.0 + ## [0.5.6] - 2022-12-19 ### Added diff --git a/nequip/_version.py b/nequip/_version.py index b02164d2..8e22989a 100644 --- a/nequip/_version.py +++ b/nequip/_version.py @@ -2,4 +2,4 @@ # See Python packaging guide # https://packaging.python.org/guides/single-sourcing-package-version/ -__version__ = "0.5.6" +__version__ = "0.6.0" From 2ce69d4eebb473f49b68ebda99cd034d71b75124 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:22:18 -0700 Subject: [PATCH 011/157] seed changes --- CHANGELOG.md | 3 +++ nequip/train/trainer.py | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85dff913..0ba49fdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. ## Unreleased - 0.6.0 +### Changed +- Always require explicit `seed` +- [Breaking] Set `dataset_seed` to `seed` if it is not explicitly provided ## [0.5.6] - 2022-12-19 diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 55efec32..3b8356a2 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -295,13 +295,14 @@ def __init__( self.trainer_save_path = output.generate_file("trainer.pth") self.config_path = self.output.generate_file("config.yaml") - if seed is not None: - torch.manual_seed(seed) - np.random.seed(seed) + if seed is None: + raise ValueError("seed is required") + + torch.manual_seed(seed) + np.random.seed(seed) self.dataset_rng = torch.Generator() - if dataset_seed is not None: - self.dataset_rng.manual_seed(dataset_seed) + self.dataset_rng.manual_seed(dataset_seed if dataset_seed is not None else seed) self.logger.info(f"Torch device: {self.device}") self.torch_device = torch.device(self.device) From 20c25297567b53163d452e5106b1b836b670471e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:46:31 -0700 Subject: [PATCH 012/157] better logging default --- CHANGELOG.md | 1 + configs/example.yaml | 2 +- configs/full.yaml | 2 +- nequip/train/trainer.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ba49fdd..3137f21c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Most recent change on the bottom. ### Changed - Always require explicit `seed` - [Breaking] Set `dataset_seed` to `seed` if it is not explicitly provided +- Don't log as often by default ## [0.5.6] - 2022-12-19 diff --git a/configs/example.yaml b/configs/example.yaml index 1d92882d..127e4c6e 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -68,7 +68,7 @@ wandb: true wandb_project: toluene-example # project name used in wandb verbose: info # the same as python logging, e.g. warning, info, debug, error; case insensitive -log_batch_freq: 10 # batch frequency, how often to print training errors withinin the same epoch +log_batch_freq: 100 # batch frequency, how often to print training errors withinin the same epoch log_epoch_freq: 1 # epoch frequency, how often to print save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving of intermediate checkpoints when the value is not positive. save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving of intermediate checkpoints when the value is not positive. diff --git a/configs/full.yaml b/configs/full.yaml index 2f98164e..8d54d76b 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -168,7 +168,7 @@ wandb_watch: false # log_graph: true verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive -log_batch_freq: 1 # batch frequency, how often to print training errors withinin the same epoch +log_batch_freq: 100 # batch frequency, how often to print training errors withinin the same epoch log_epoch_freq: 1 # epoch frequency, how often to print save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving of intermediate checkpoints when the value is not positive. save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving of intermediate checkpoints when the value is not positive. diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 3b8356a2..c83b71c0 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -250,7 +250,7 @@ def __init__( end_of_batch_callbacks: list = [], end_of_train_callbacks: list = [], final_callbacks: list = [], - log_batch_freq: int = 1, + log_batch_freq: int = 100, log_epoch_freq: int = 1, save_checkpoint_freq: int = -1, save_ema_checkpoint_freq: int = -1, From 1c6352504213fd243d43fb46d7bba69d78280e7a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 15:53:33 -0700 Subject: [PATCH 013/157] ssp -> silu default --- CHANGELOG.md | 1 + docs/options/model.rst | 4 ++-- nequip/nn/_convnetlayer.py | 4 ++-- nequip/nn/_interaction_block.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3137f21c..ff874671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Most recent change on the bottom. - Always require explicit `seed` - [Breaking] Set `dataset_seed` to `seed` if it is not explicitly provided - Don't log as often by default +- [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) ## [0.5.6] - 2022-12-19 diff --git a/docs/options/model.rst b/docs/options/model.rst index a9ecb694..b5659224 100644 --- a/docs/options/model.rst +++ b/docs/options/model.rst @@ -109,12 +109,12 @@ nonlinearity_type nonlinearity_scalars ^^^^^^^^^^^^^^^^^^^^ | Type: dict - | Default: ``{'e': 'ssp', 'o': 'tanh'}`` + | Default: ``{'e': 'silu', 'o': 'tanh'}`` nonlinearity_gates ^^^^^^^^^^^^^^^^^^ | Type: dict - | Default: ``{'e': 'ssp', 'o': 'abs'}`` + | Default: ``{'e': 'silu', 'o': 'tanh'}`` use_sc ^^^^^^ diff --git a/nequip/nn/_convnetlayer.py b/nequip/nn/_convnetlayer.py index 8d3d0dad..9e5437a8 100644 --- a/nequip/nn/_convnetlayer.py +++ b/nequip/nn/_convnetlayer.py @@ -39,8 +39,8 @@ def __init__( num_layers: int = 3, resnet: bool = False, nonlinearity_type: str = "gate", - nonlinearity_scalars: Dict[int, Callable] = {"e": "ssp", "o": "tanh"}, - nonlinearity_gates: Dict[int, Callable] = {"e": "ssp", "o": "abs"}, + nonlinearity_scalars: Dict[int, Callable] = {"e": "silu", "o": "tanh"}, + nonlinearity_gates: Dict[int, Callable] = {"e": "silu", "o": "tanh"}, ): super().__init__() # initialization diff --git a/nequip/nn/_interaction_block.py b/nequip/nn/_interaction_block.py index 99b3acc6..f14a2187 100644 --- a/nequip/nn/_interaction_block.py +++ b/nequip/nn/_interaction_block.py @@ -26,7 +26,7 @@ def __init__( invariant_neurons=8, avg_num_neighbors=None, use_sc=True, - nonlinearity_scalars: Dict[int, Callable] = {"e": "ssp"}, + nonlinearity_scalars: Dict[int, Callable] = {"e": "silu"}, ) -> None: """ InteractionBlock. From 631904c0b92cd665817d5cbb695418c08abd1630 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 16:32:06 -0700 Subject: [PATCH 014/157] missing test seed --- tests/unit/trainer/test_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py index 860be357..55ce90b8 100644 --- a/tests/unit/trainer/test_trainer.py +++ b/tests/unit/trainer/test_trainer.py @@ -318,6 +318,7 @@ def scale_train(nequip_dataset): with tempfile.TemporaryDirectory(prefix="output") as path: trainer = Trainer( model=DummyScale(AtomicDataDict.FORCE_KEY, scale=1.3, shift=1), + seed=9, n_train=4, n_val=4, max_epochs=0, From c281f56c7e8a4e44c05884f08e3f007e532695ff Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 23:46:50 -0500 Subject: [PATCH 015/157] updated tests for float64/float32 mixed --- configs/minimal_toy_emt.yaml | 6 +- tests/integration/conftest.py | 170 +++++++++++++++++ tests/integration/test_evaluate.py | 185 +++++++++---------- tests/integration/test_train.py | 281 +++++++++-------------------- 4 files changed, 345 insertions(+), 297 deletions(-) create mode 100644 tests/integration/conftest.py diff --git a/configs/minimal_toy_emt.yaml b/configs/minimal_toy_emt.yaml index 38b7f95d..0bc62361 100644 --- a/configs/minimal_toy_emt.yaml +++ b/configs/minimal_toy_emt.yaml @@ -22,7 +22,7 @@ num_layers: 4 # data set dataset: EMTTest # type of data set, can be npz or ase dataset_element: Cu -dataset_num_frames: 100 +dataset_num_frames: 50 chemical_symbols: - Cu @@ -31,8 +31,8 @@ wandb: false # verbose: debug # training -n_train: 90 -n_val: 10 +n_train: 20 +n_val: 5 batch_size: 1 max_epochs: 100 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 00000000..3a082602 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,170 @@ +import pytest +import tempfile +import pathlib +import yaml +import subprocess +import os + +import torch + +from nequip.data import AtomicDataDict +from nequip.nn import GraphModuleMixin + + +class IdentityModel(GraphModuleMixin, torch.nn.Module): + def __init__(self, **kwargs): + super().__init__() + self._init_irreps( + irreps_in={ + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + AtomicDataDict.FORCE_KEY: "1o", + }, + ) + self.zero = torch.nn.Parameter(torch.as_tensor(0.0)) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + err = self.zero + data[AtomicDataDict.FORCE_KEY] = data[AtomicDataDict.FORCE_KEY] + err + data[AtomicDataDict.NODE_FEATURES_KEY] = ( + 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() + ) # some BS + data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( + data[AtomicDataDict.TOTAL_ENERGY_KEY] + err + ) + return data + + +class ConstFactorModel(GraphModuleMixin, torch.nn.Module): + def __init__(self, **kwargs): + super().__init__() + self._init_irreps( + irreps_in={ + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + AtomicDataDict.FORCE_KEY: "1o", + }, + ) + # to keep the optimizer happy: + self.dummy = torch.nn.Parameter(torch.zeros(1)) + self.register_buffer("factor", 3.7777 * torch.randn(1).squeeze()) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data[AtomicDataDict.FORCE_KEY] = ( + self.factor * data[AtomicDataDict.FORCE_KEY] + 0.0 * self.dummy + ) + data[AtomicDataDict.NODE_FEATURES_KEY] = ( + 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() + ) # some BS + data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( + self.factor * data[AtomicDataDict.TOTAL_ENERGY_KEY] + 0.0 * self.dummy + ) + return data + + +class LearningFactorModel(GraphModuleMixin, torch.nn.Module): + def __init__(self, **kwargs): + super().__init__() + self._init_irreps( + irreps_in={ + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + AtomicDataDict.FORCE_KEY: "1o", + }, + ) + # By using a big factor, we keep it in a nice descending part + # of the optimization without too much oscilation in loss at + # the beginning + self.factor = torch.nn.Parameter(torch.as_tensor(1.111)) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data[AtomicDataDict.FORCE_KEY] = self.factor * data[AtomicDataDict.FORCE_KEY] + data[AtomicDataDict.NODE_FEATURES_KEY] = ( + 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() + ) # some BS + data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( + self.factor * data[AtomicDataDict.TOTAL_ENERGY_KEY] + ) + return data + + +def _training_session(conffile, model_dtype, builder, BENCHMARK_ROOT): + default_dtype = str(torch.get_default_dtype())[len("torch.") :] + if default_dtype == "float32" and model_dtype == "float64": + pytest.skip("default_dtype=float32 and model_dtype=float64 doesn't make sense") + + path_to_this_file = pathlib.Path(__file__) + config_path = path_to_this_file.parents[2] / f"configs/{conffile}" + true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) + + with tempfile.TemporaryDirectory() as tmpdir: + # Save time + run_name = "test_train_" + default_dtype + true_config["run_name"] = run_name + true_config["root"] = "./" + true_config["dataset_file_name"] = str( + BENCHMARK_ROOT / "aspirin_ccsd-train.npz" + ) + true_config["default_dtype"] = default_dtype + true_config["model_dtype"] = model_dtype + true_config["max_epochs"] = 2 + true_config["model_builders"] = [builder] + # just do forces, which is what the mock models have: + true_config["loss_coeffs"] = "forces" + # We need truth labels as inputs for these fake testing models + true_config["model_input_fields"] = { + AtomicDataDict.FORCE_KEY: "1o", + AtomicDataDict.TOTAL_ENERGY_KEY: "0e", + } + + config_path = tmpdir + "/conf.yaml" + with open(config_path, "w+") as fp: + yaml.dump(true_config, fp) + # == Train model == + env = dict(os.environ) + # make this script available so model builders can be loaded + env["PYTHONPATH"] = ":".join( + [str(path_to_this_file.parent)] + env.get("PYTHONPATH", "").split(":") + ) + + retcode = subprocess.run( + ["nequip-train", "conf.yaml"], + cwd=tmpdir, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + retcode.check_returncode() + + yield true_config, tmpdir, env + + +@pytest.fixture( + scope="module", + params=[ + ("minimal.yaml", AtomicDataDict.FORCE_KEY), + ("minimal_toy_emt.yaml", AtomicDataDict.STRESS_KEY), + ], +) +def conffile(request): + return request.param + + +@pytest.fixture( + scope="module", + params=["float32", "float64"], +) +def model_dtype(request, float_tolerance): + if torch.get_default_dtype() == torch.float32 and model_dtype == "float64": + pytest.skip("default_dtype=float32 and model_dtype=float64 doesn't make sense") + return request.param + + +@pytest.fixture( + scope="module", params=[ConstFactorModel, LearningFactorModel, IdentityModel] +) +def fake_model_training_session(request, BENCHMARK_ROOT, conffile, model_dtype): + conffile, _ = conffile + builder = request.param + + session = _training_session(conffile, model_dtype, builder, BENCHMARK_ROOT) + true_config, tmpdir, env = next(session) + yield builder, true_config, tmpdir, env + del session diff --git a/tests/integration/test_evaluate.py b/tests/integration/test_evaluate.py index 99f06136..96d8d43b 100644 --- a/tests/integration/test_evaluate.py +++ b/tests/integration/test_evaluate.py @@ -1,9 +1,5 @@ import pytest -import tempfile -import pathlib -import yaml import subprocess -import os import textwrap import shutil @@ -14,75 +10,22 @@ from nequip.data import AtomicDataDict -from test_train import ConstFactorModel, IdentityModel # noqa - - -@pytest.fixture( - scope="module", - params=[ - ("minimal.yaml", AtomicDataDict.FORCE_KEY), - ], -) -def conffile(request): - return request.param - - -@pytest.fixture(scope="module", params=[ConstFactorModel, IdentityModel]) -def training_session(request, BENCHMARK_ROOT, conffile): - conffile, _ = conffile - builder = request.param - dtype = str(torch.get_default_dtype())[len("torch.") :] - - # if torch.cuda.is_available(): - # # TODO: is this true? - # pytest.skip("CUDA and subprocesses have issues") - - path_to_this_file = pathlib.Path(__file__) - config_path = path_to_this_file.parents[2] / f"configs/{conffile}" - true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) - with tempfile.TemporaryDirectory() as tmpdir: - # == Run training == - # Save time - run_name = "test_train_" + dtype - true_config["run_name"] = run_name - true_config["root"] = "./" - true_config["dataset_file_name"] = str( - BENCHMARK_ROOT / "aspirin_ccsd-train.npz" - ) - true_config["default_dtype"] = dtype - true_config["max_epochs"] = 2 - true_config["model_builders"] = [builder] - # We need truth labels as inputs for these fake testing models - true_config["model_input_fields"] = { - AtomicDataDict.FORCE_KEY: "1o", - AtomicDataDict.TOTAL_ENERGY_KEY: "0e", - } - - # to be a true identity, we can't have rescaling - true_config["global_rescale_shift"] = None - true_config["global_rescale_scale"] = None - - config_path = tmpdir + "/conf.yaml" - with open(config_path, "w+") as fp: - yaml.dump(true_config, fp) - # == Train model == - env = dict(os.environ) - # make this script available so model builders can be loaded - env["PYTHONPATH"] = ":".join( - [str(path_to_this_file.parent)] + env.get("PYTHONPATH", "").split(":") - ) - retcode = subprocess.run(["nequip-train", "conf.yaml"], cwd=tmpdir, env=env) - retcode.check_returncode() - - yield builder, true_config, tmpdir, env +from conftest import IdentityModel, ConstFactorModel @pytest.mark.parametrize("do_test_idcs", [True, False]) @pytest.mark.parametrize("do_metrics", [True, False]) @pytest.mark.parametrize("do_output_fields", [True, False]) -def test_metrics(training_session, do_test_idcs, do_metrics, do_output_fields): - - builder, true_config, tmpdir, env = training_session +def test_metrics( + fake_model_training_session, conffile, do_test_idcs, do_metrics, do_output_fields +): + energy_only: bool = conffile[0] == "minimal_eng.yaml" + if energy_only: + # By default, don't run the energy only tests... they are redundant and add a _lot_ of expense + pytest.skip() + builder, true_config, tmpdir, env = fake_model_training_session + if builder not in (IdentityModel, ConstFactorModel): + pytest.skip() # == Run test error == outdir = f"{true_config['root']}/{true_config['run_name']}/" @@ -122,9 +65,16 @@ def runit(params: dict): # Test idcs if do_test_idcs: - # The Aspirin dataset is 1000 frames long - # Pick some arbitrary number of frames - test_idcs_arr = torch.randperm(1000)[:257] + if conffile[0] == "minimal.yaml": + # The Aspirin dataset is 1000 frames long + # Pick some arbitrary number of frames + test_idcs_arr = torch.randperm(1000)[:257] + elif conffile[0] == "minimal_toy_emt.yaml": + # The toy EMT dataset is 50 frames long + # Pick some arbitrary number of frames + test_idcs_arr = torch.randperm(50)[:7] + else: + raise KeyError test_idcs = "some-test-idcs.pth" torch.save(test_idcs_arr, f"{tmpdir}/{test_idcs}") else: @@ -137,39 +87,64 @@ def runit(params: dict): metrics_yaml = "my-metrics.yaml" with open(f"{tmpdir}/{metrics_yaml}", "w") as f: # Write out a fancier metrics file - f.write( - textwrap.dedent( - """ - metrics_components: - - - forces - - rmse - - report_per_component: True - - - forces - - mae - - PerSpecies: True - - - total_energy - - mae - - - total_energy - - mae - - PerAtom: True - """ + if energy_only: + f.write( + textwrap.dedent( + """ + metrics_components: + - - total_energy + - mae + - - total_energy + - mae + - PerAtom: True + """ + ) + ) + expect_metrics = { + "e_mae", + "e/N_mae", + } + else: + # Write out a fancier metrics file + f.write( + textwrap.dedent( + """ + metrics_components: + - - forces + - rmse + - report_per_component: True + - - forces + - mae + - PerSpecies: True + - - total_energy + - mae + - - total_energy + - mae + - PerAtom: True + """ + ) + ) + expect_metrics = { + "f_rmse_0", + "f_rmse_1", + "f_rmse_2", + "psavg_f_mae", + "e_mae", + "e/N_mae", + }.union( + { + # For the PerSpecies + sym + "_f_mae" + for sym in true_config["chemical_symbols"] + } ) - ) - expect_metrics = { - "f_rmse_0", - "f_rmse_1", - "f_rmse_2", - "H_f_mae", - "C_f_mae", - "O_f_mae", - "psavg_f_mae", - "e_mae", - "e/N_mae", - } else: metrics_yaml = None # Regardless of builder, with minimal.yaml, we should have RMSE and MAE - expect_metrics = {"f_mae", "f_rmse"} + if energy_only: + expect_metrics = {"e_mae", "e_rmse"} + else: + expect_metrics = {"f_mae", "f_rmse"} default_params["metrics-config"] = metrics_yaml if do_output_fields: @@ -190,8 +165,16 @@ def runit(params: dict): # check metrics if builder == IdentityModel: + true_identity: bool = true_config["default_dtype"] == true_config["model_dtype"] for metric, err in metrics.items(): - assert np.allclose(err, 0.0), f"Metric `{metric}` wasn't zero!" + # see test_train.py for discussion + assert np.allclose( + err, + 0.0, + atol=1e-8 + if true_identity + else (1e-2 if metric.startswith("e") else 1e-4), + ), f"Metric `{metric}` wasn't zero!" elif builder == ConstFactorModel: # TODO: check comperable to naive numpy compute pass diff --git a/tests/integration/test_train.py b/tests/integration/test_train.py index 325b6d5d..53e400b3 100644 --- a/tests/integration/test_train.py +++ b/tests/integration/test_train.py @@ -9,203 +9,102 @@ import torch from nequip.data import AtomicDataDict -from nequip.nn import GraphModuleMixin +from conftest import IdentityModel, ConstFactorModel, LearningFactorModel -class IdentityModel(GraphModuleMixin, torch.nn.Module): - def __init__(self, **kwargs): - super().__init__() - self._init_irreps( - irreps_in={ - AtomicDataDict.TOTAL_ENERGY_KEY: "0e", - AtomicDataDict.FORCE_KEY: "1o", - }, - ) - self.one = torch.nn.Parameter(torch.as_tensor(1.0)) - - def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: - data[AtomicDataDict.FORCE_KEY] = self.one * data[AtomicDataDict.FORCE_KEY] - data[AtomicDataDict.NODE_FEATURES_KEY] = ( - 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() - ) # some BS - data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( - self.one * data[AtomicDataDict.TOTAL_ENERGY_KEY] - ) - return data - - -class ConstFactorModel(GraphModuleMixin, torch.nn.Module): - def __init__(self, **kwargs): - super().__init__() - self._init_irreps( - irreps_in={ - AtomicDataDict.TOTAL_ENERGY_KEY: "0e", - AtomicDataDict.FORCE_KEY: "1o", - }, - ) - # to keep the optimizer happy: - self.dummy = torch.nn.Parameter(torch.zeros(1)) - self.register_buffer("factor", 3.7777 * torch.randn(1).squeeze()) - - def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: - data[AtomicDataDict.FORCE_KEY] = ( - self.factor * data[AtomicDataDict.FORCE_KEY] + 0.0 * self.dummy - ) - data[AtomicDataDict.NODE_FEATURES_KEY] = ( - 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() - ) # some BS - data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( - self.factor * data[AtomicDataDict.TOTAL_ENERGY_KEY] + 0.0 * self.dummy - ) - return data - - -class LearningFactorModel(GraphModuleMixin, torch.nn.Module): - def __init__(self, **kwargs): - super().__init__() - self._init_irreps( - irreps_in={ - AtomicDataDict.TOTAL_ENERGY_KEY: "0e", - AtomicDataDict.FORCE_KEY: "1o", - }, - ) - # By using a big factor, we keep it in a nice descending part - # of the optimization without too much oscilation in loss at - # the beginning - self.factor = torch.nn.Parameter(torch.as_tensor(1.111)) - - def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: - data[AtomicDataDict.FORCE_KEY] = self.factor * data[AtomicDataDict.FORCE_KEY] - data[AtomicDataDict.NODE_FEATURES_KEY] = ( - 0.77 * data[AtomicDataDict.FORCE_KEY].tanh() - ) # some BS - data[AtomicDataDict.TOTAL_ENERGY_KEY] = ( - self.factor * data[AtomicDataDict.TOTAL_ENERGY_KEY] - ) - return data - - -@pytest.mark.parametrize( - "conffile", - [ - "minimal.yaml", - "minimal_eng.yaml", - ], -) -@pytest.mark.parametrize( - "builder", [IdentityModel, ConstFactorModel, LearningFactorModel] -) -def test_metrics(nequip_dataset, BENCHMARK_ROOT, conffile, builder): - dtype = str(torch.get_default_dtype())[len("torch.") :] +def test_metrics(fake_model_training_session, model_dtype): + default_dtype = str(torch.get_default_dtype()).lstrip("torch.") + builder, true_config, tmpdir, env = fake_model_training_session - # if torch.cuda.is_available(): - # # TODO: is this true? - # pytest.skip("CUDA and subprocesses have issues") + # == Load metrics == + outdir = f"{tmpdir}/{true_config['root']}/{true_config['run_name']}/" - path_to_this_file = pathlib.Path(__file__) - config_path = path_to_this_file.parents[2] / f"configs/{conffile}" - true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) + if builder == IdentityModel or builder == LearningFactorModel: + for which in ("train", "val"): - with tempfile.TemporaryDirectory() as tmpdir: - # Save time - run_name = "test_train_" + dtype - true_config["run_name"] = run_name - true_config["root"] = "./" - true_config["dataset_file_name"] = str( - BENCHMARK_ROOT / "aspirin_ccsd-train.npz" - ) - true_config["default_dtype"] = dtype - true_config["max_epochs"] = 2 - # We just don't add rescaling: - true_config["model_builders"] = [builder] - # We need truth labels as inputs for these fake testing models - true_config["model_input_fields"] = { - AtomicDataDict.FORCE_KEY: "1o", - AtomicDataDict.TOTAL_ENERGY_KEY: "0e", - } - - config_path = tmpdir + "/conf.yaml" - with open(config_path, "w+") as fp: - yaml.dump(true_config, fp) - # == Train model == - env = dict(os.environ) - # make this script available so model builders can be loaded - env["PYTHONPATH"] = ":".join( - [str(path_to_this_file.parent)] + env.get("PYTHONPATH", "").split(":") - ) - - retcode = subprocess.run( - ["nequip-train", "conf.yaml"], - cwd=tmpdir, - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - retcode.check_returncode() - - # == Load metrics == - outdir = f"{tmpdir}/{true_config['root']}/{run_name}/" - - if builder == IdentityModel or builder == LearningFactorModel: - for which in ("train", "val"): - - dat = np.genfromtxt( - f"{outdir}/metrics_batch_{which}.csv", - delimiter=",", - names=True, - dtype=None, - ) - for field in dat.dtype.names: - if field == "epoch" or field == "batch": - continue - # Everything else should be a loss or a metric - if builder == IdentityModel: + dat = np.genfromtxt( + f"{outdir}/metrics_batch_{which}.csv", + delimiter=",", + names=True, + dtype=None, + ) + for field in dat.dtype.names: + if field == "epoch" or field == "batch": + continue + # Everything else should be a loss or a metric + if builder == IdentityModel: + if model_dtype == default_dtype: + # We have a true identity model assert np.allclose( - dat[field], 0.0 + dat[field], + 0.0, + atol=1e-6 if default_dtype == "float32" else 1e-9, ), f"Loss/metric `{field}` wasn't all zeros for {which}" - elif builder == LearningFactorModel: - assert ( - dat[field][-1] < dat[field][0] - ), f"Loss/metric `{field}` didn't go down for {which}" - - # epoch metrics - dat = np.genfromtxt( - f"{outdir}/metrics_epoch.csv", - delimiter=",", - names=True, - dtype=None, - ) - for field in dat.dtype.names: - if field == "epoch" or field == "wall" or field == "LR": - continue - - # Everything else should be a loss or a metric - if builder == IdentityModel: + else: + # we have an approximate identity model that applies a floating point truncation + # in the actual aspirin test data used here, the truncation error is maximally 0.0155 + # there is also no rescaling so everything is in real units here + assert np.all( + dat[field] < 0.02 + ), f"Loss/metric `{field}` wasn't approximately zeros for {which}" + elif builder == LearningFactorModel: + assert ( + dat[field][-1] < dat[field][0] + ), f"Loss/metric `{field}` didn't go down for {which}" + + # epoch metrics + dat = np.genfromtxt( + f"{outdir}/metrics_epoch.csv", + delimiter=",", + names=True, + dtype=None, + ) + for field in dat.dtype.names: + if field == "epoch" or field == "wall" or field == "LR": + continue + + # Everything else should be a loss or a metric + if builder == IdentityModel: + if model_dtype == default_dtype: + # we have a true identity model assert np.allclose( - dat[field][1:], 0.0 + dat[field][1:], + 0.0, + atol=1e-6 if default_dtype == "float32" else 1e-9, ), f"Loss/metric `{field}` wasn't all equal to zero for epoch" - elif builder == ConstFactorModel: - # otherwise just check its constant. - # epoch-wise numbers should be the same, since there's no randomness at this level - assert np.allclose( - dat[field], dat[field][0] - ), f"Loss/metric `{field}` wasn't all equal to {dat[field][0]} for epoch" - elif builder == LearningFactorModel: - assert ( - dat[field][-1] < dat[field][0] - ), f"Loss/metric `{field}` didn't go down across epochs" - - # == Check model == - model = torch.load(outdir + "/last_model.pth") - - if builder == IdentityModel: - one = model["one"] - # Since the loss is always zero, even though the constant - # 1 was trainable, it shouldn't have changed - assert torch.allclose( - one, torch.ones(1, device=one.device, dtype=one.dtype) - ) + else: + # we have an approximate identity model that applies a floating point truncation + # see above + assert np.all( + dat[field][1:] < 0.02 + ), f"Loss/metric `{field}` wasn't approximately zeros for {which}" + elif builder == ConstFactorModel: + # otherwise just check its constant. + # epoch-wise numbers should be the same, since there's no randomness at this level + assert np.allclose( + dat[field], dat[field][0] + ), f"Loss/metric `{field}` wasn't all equal to {dat[field][0]} for epoch" + elif builder == LearningFactorModel: + assert ( + dat[field][-1] < dat[field][0] + ), f"Loss/metric `{field}` didn't go down across epochs" + + # == Check model == + model = torch.load(outdir + "/last_model.pth") + + if builder == IdentityModel: + # GraphModel.IdentityModel + zero = model["model.zero"] + # Since the loss is always zero, even though the constant + # 1 was trainable, it shouldn't have changed + # the tolerances when loss is nonzero are large-ish because the default learning rate 0.01 is high + # these tolerances are _also_ in real units + assert torch.allclose( + zero, + torch.zeros(1, device=zero.device, dtype=zero.dtype), + atol=1e-7 if model_dtype == default_dtype else 1e-2, + ) @pytest.mark.parametrize( @@ -216,14 +115,10 @@ def test_metrics(nequip_dataset, BENCHMARK_ROOT, conffile, builder): ], ) def test_requeue(nequip_dataset, BENCHMARK_ROOT, conffile): - - builder = IdentityModel + # TODO test metrics against one that goes all the way through + builder = IdentityModel # TODO: train a real model? dtype = str(torch.get_default_dtype())[len("torch.") :] - # if torch.cuda.is_available(): - # # TODO: is this true? - # pytest.skip("CUDA and subprocesses have issues") - path_to_this_file = pathlib.Path(__file__) config_path = path_to_this_file.parents[2] / f"configs/{conffile}" true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) From 28df610d6a9b22f4c69d2636eaf51a97211c32a8 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 20 Dec 2022 23:49:11 -0500 Subject: [PATCH 016/157] FMA with version check --- nequip/nn/_atomwise.py | 20 ++++++++++++++------ nequip/utils/versions.py | 7 ++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index ad11e5b4..5d7823c8 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -10,6 +10,7 @@ from nequip.data import AtomicDataDict from nequip.data.transforms import TypeMapper from nequip.utils import dtype_from_name +from nequip.utils.versions import _TORCH_IS_GE_1_13 from ._graph_mixin import GraphModuleMixin @@ -123,6 +124,7 @@ class PerSpeciesScaleShift(GraphModuleMixin, torch.nn.Module): has_scales: bool has_shifts: bool default_dtype: torch.dtype + _use_fma: bool def __init__( self, @@ -185,6 +187,9 @@ def __init__( self.arguments_in_dataset_units = arguments_in_dataset_units + # we can use FMA for performance but its type promotion is broken until 1.13 + self._use_fma = _TORCH_IS_GE_1_13 + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: if not (self.has_scales or self.has_shifts): @@ -195,8 +200,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: assert len(in_field) == len( species_idx ), "in_field doesnt seem to have correct per-atom shape" - # multiplication / addition promotes dtypes already, so no cast is needed: - if self.has_scales and self.has_shifts: + + if self._use_fma and self.has_scales and self.has_shifts: # we can used an FMA for performance # addcmul computes # input + tensor1 * tensor2 elementwise @@ -205,10 +210,13 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: self.scales[species_idx].view(-1, 1), in_field, ) - elif self.has_scales: - in_field = self.scales[species_idx].view(-1, 1) * in_field - elif self.has_shifts: - in_field = self.shifts[species_idx].view(-1, 1) + in_field + else: + # fallback path for torch<1.13 OR mix of enabled shifts and scales + # multiplication / addition promotes dtypes already, so no cast is needed: + if self.has_scales: + in_field = self.scales[species_idx].view(-1, 1) * in_field + if self.has_shifts: + in_field = self.shifts[species_idx].view(-1, 1) + in_field data[self.out_field] = in_field return data diff --git a/nequip/utils/versions.py b/nequip/utils/versions.py index db35a451..3c733c65 100644 --- a/nequip/utils/versions.py +++ b/nequip/utils/versions.py @@ -1,4 +1,5 @@ -from typing import Tuple +from typing import Tuple, Final +import packaging.version import logging @@ -8,6 +9,10 @@ from .git import get_commit +_TORCH_IS_GE_1_13: Final[bool] = packaging.version.parse( + torch.__version__ +) >= packaging.version.parse("1.13.0") + _DEFAULT_VERSION_CODES = [torch, e3nn, nequip] _DEFAULT_COMMIT_CODES = ["e3nn", "nequip"] From 15edf66623e79b59c03cf7f3aad37b974955ee87 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 01:42:39 -0500 Subject: [PATCH 017/157] remove fixed_fields machinery --- CHANGELOG.md | 3 + docs/howto/dataset.rst | 14 +- docs/options/dataset.rst | 10 -- nequip/data/__init__.py | 2 + nequip/data/_build.py | 6 +- nequip/data/_test_data.py | 7 +- nequip/data/dataloader.py | 28 +--- nequip/data/dataset.py | 221 +++++++++++-------------- nequip/utils/unittests/conftest.py | 2 +- tests/unit/data/test_dataloader.py | 2 +- tests/unit/data/test_dataset.py | 63 +++---- tests/unit/model/test_builder_utils.py | 2 +- 12 files changed, 142 insertions(+), 218 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff874671..0b2759f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ Most recent change on the bottom. - Don't log as often by default - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) +### Removed +- [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) + ## [0.5.6] - 2022-12-19 ### Added diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst index 2b5267e7..7b18073e 100644 --- a/docs/howto/dataset.rst +++ b/docs/howto/dataset.rst @@ -25,14 +25,6 @@ NequIP will not automatically update the cached data. Key concepts ------------ -fixed_fields -~~~~~~~~~~~~ -Fixed fields are the quantities that are shared among all the configurations in the dataset. -For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species -are indeed a constant matrix/vector through out the whole dataset. -In this case, in stead of repeating the same values for many times, -we specify the cell and species as fixed fields and only provide them once. - yaml interface ~~~~~~~~~~~~~~ ``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments. @@ -108,6 +100,12 @@ In the npz file, all the values should have the same row as the number of the co For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3); their total_energy array should have the shape of (36). +NPZ also supports "fixed fields." Fixed fields are the quantities that are shared among all the configurations in the dataset. +For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species +are indeed a constant matrix/vector through out the whole dataset. +In this case, in stead of repeating the same values for many times, +we specify the cell and species as fixed fields and only provide them once. + Below is an example of the yaml specification. .. code:: yaml diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst index f3ca194c..356f549a 100644 --- a/docs/options/dataset.rst +++ b/docs/options/dataset.rst @@ -53,16 +53,6 @@ url | Type: NoneType | Default: ``None`` -force_fixed_keys -^^^^^^^^^^^^^^^^ - | Type: list - | Default: ``[]`` - -extra_fixed_fields -^^^^^^^^^^^^^^^^^^ - | Type: dict - | Default: ``{}`` - include_frames ^^^^^^^^^^^^^^ | Type: NoneType diff --git a/nequip/data/__init__.py b/nequip/data/__init__.py index 212cc5f6..2d47d845 100644 --- a/nequip/data/__init__.py +++ b/nequip/data/__init__.py @@ -6,6 +6,7 @@ _NODE_FIELDS, _EDGE_FIELDS, _GRAPH_FIELDS, + _LONG_FIELDS, ) from .dataset import AtomicDataset, AtomicInMemoryDataset, NpzDataset, ASEDataset from .dataloader import DataLoader, Collater @@ -27,5 +28,6 @@ _NODE_FIELDS, _EDGE_FIELDS, _GRAPH_FIELDS, + _LONG_FIELDS, EMTTestDataset, ] diff --git a/nequip/data/_build.py b/nequip/data/_build.py index 8757198f..35b59dba 100644 --- a/nequip/data/_build.py +++ b/nequip/data/_build.py @@ -57,10 +57,10 @@ def dataset_from_config(config, prefix: str = "dataset") -> AtomicDataset: raise NameError(f"dataset type {dataset_name} does not exists") # if dataset r_max is not found, use the universal r_max - eff_key = "extra_fixed_fields" - prefixed_eff_key = f"{prefix}_{eff_key}" + atomicdata_options_key = "AtomicData_options" + prefixed_eff_key = f"{prefix}_{atomicdata_options_key}" config[prefixed_eff_key] = get_w_prefix( - eff_key, {}, prefix=prefix, arg_dicts=config + atomicdata_options_key, {}, prefix=prefix, arg_dicts=config ) config[prefixed_eff_key]["r_max"] = get_w_prefix( "r_max", diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index e8f4109e..65b85052 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -30,7 +30,7 @@ def __init__( dataset_seed: int = 123456, file_name: Optional[str] = None, url: Optional[str] = None, - extra_fixed_fields: Dict[str, Any] = {}, + AtomicData_options: Dict[str, Any] = {}, include_frames: Optional[List[int]] = None, type_mapper: TypeMapper = None, ): @@ -46,8 +46,7 @@ def __init__( file_name=file_name, url=url, root=root, - force_fixed_keys=[AtomicDataDict.CELL_KEY, AtomicDataDict.PBC_KEY], - extra_fixed_fields=extra_fixed_fields, + AtomicData_options=AtomicData_options, include_frames=include_frames, type_mapper=type_mapper, ) @@ -78,7 +77,7 @@ def get_data(self): forces=base_atoms.get_forces(), total_energy=base_atoms.get_potential_energy(), stress=base_atoms.get_stress(voigt=False), - **self.extra_fixed_fields + **self.AtomicData_options ) ) return (datas,) diff --git a/nequip/data/dataloader.py b/nequip/data/dataloader.py index a6c16670..6b1bdf76 100644 --- a/nequip/data/dataloader.py +++ b/nequip/data/dataloader.py @@ -9,16 +9,13 @@ class Collater(object): """Collate a list of ``AtomicData``. Args: - fixed_fields: which fields are fixed fields exclude_keys: keys to ignore in the input, not copying to the output """ def __init__( self, - fixed_fields: List[str] = [], exclude_keys: List[str] = [], ): - self.fixed_fields = fixed_fields self._exclude_keys = set(exclude_keys) @classmethod @@ -27,35 +24,14 @@ def for_dataset( dataset, exclude_keys: List[str] = [], ): - """Construct a collater appropriate to ``dataset``. - - All kwargs besides ``fixed_fields`` are passed through to the constructor. - """ + """Construct a collater appropriate to ``dataset``.""" return cls( - fixed_fields=list(getattr(dataset, "fixed_fields", {}).keys()), exclude_keys=exclude_keys, ) def collate(self, batch: List[Data]) -> Batch: """Collate a list of data""" - # For fixed fields, we need to batch those that are per-node or - # per-edge, since they need to be repeated in order to have the same - # number of nodes/edges as the full batch graph. - # For fixed fields that are per-example, however — those with __cat_dim__ - # of None — we can just put one copy over the whole batch graph. - # Figure out which ones those are: - new_dim_fixed = set() - for f in self.fixed_fields: - if batch[0].__cat_dim__(f, None) is None: - new_dim_fixed.add(f) - # TODO: cache ^ and the batched versions of fixed fields for various batch sizes if necessary for performance - out = Batch.from_data_list( - batch, exclude_keys=self._exclude_keys.union(new_dim_fixed) - ) - for f in new_dim_fixed: - if f in self._exclude_keys: - continue - out[f] = batch[0][f] + out = Batch.from_data_list(batch, exclude_keys=self._exclude_keys) return out def __call__(self, batch: List[Data]) -> Batch: diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index c38b8eae..71791b17 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -27,19 +27,18 @@ _NODE_FIELDS, _EDGE_FIELDS, _GRAPH_FIELDS, + _LONG_FIELDS, ) from nequip.utils.batch_ops import bincount from nequip.utils.regressor import solver from nequip.utils.savenload import atomic_write from nequip.utils.multiprocessing import num_tasks from .transforms import TypeMapper -from .AtomicData import _process_dict class AtomicDataset(Dataset): """The base class for all NequIP datasets.""" - fixed_fields: Dict[str, Any] root: str def __init__( @@ -117,8 +116,7 @@ class AtomicInMemoryDataset(AtomicDataset): root (str, optional): Root directory where the dataset should be saved. Defaults to current working directory. file_name (str, optional): file name of data source. only used in children class url (str, optional): url to download data source - force_fixed_keys (list, optional): keys to move from AtomicData to fixed_fields dictionary - extra_fixed_fields (dict, optional): extra key that are not stored in data but needed for AtomicData initialization + AtomicData_options (dict, optional): extra key that are not stored in data but needed for AtomicData initialization include_frames (list, optional): the frames to process with the constructor. type_mapper (TypeMapper): the transformation to map atomic information to species index. Optional """ @@ -128,8 +126,7 @@ def __init__( root: str, file_name: Optional[str] = None, url: Optional[str] = None, - force_fixed_keys: List[str] = [], - extra_fixed_fields: Dict[str, Any] = {}, + AtomicData_options: Dict[str, Any] = {}, include_frames: Optional[List[int]] = None, type_mapper: Optional[TypeMapper] = None, ): @@ -138,17 +135,12 @@ def __init__( self.file_name = ( getattr(type(self), "FILE_NAME", None) if file_name is None else file_name ) - force_fixed_keys = set(force_fixed_keys).union( - getattr(type(self), "FORCE_FIXED_KEYS", []) - ) self.url = getattr(type(self), "URL", url) - self.force_fixed_keys = force_fixed_keys - self.extra_fixed_fields = extra_fixed_fields + self.AtomicData_options = AtomicData_options self.include_frames = include_frames self.data = None - self.fixed_fields = None # !!! don't delete this block. # otherwise the inherent children class @@ -165,9 +157,7 @@ def __init__( # Then pre-process the data if disk files are not found super().__init__(root=root, type_mapper=type_mapper) if self.data is None: - self.data, self.fixed_fields, include_frames = torch.load( - self.processed_paths[0] - ) + self.data, include_frames = torch.load(self.processed_paths[0]) if not np.all(include_frames == self.include_frames): raise ValueError( f"the include_frames is changed. " @@ -195,11 +185,9 @@ def get_data( Note that parameters for graph construction such as ``pbc`` and ``r_max`` should be included here as (likely, but not necessarily, fixed) fields. Returns: - A two-tuple of: + A dict: fields: dict mapping a field name ('pos', 'cell') to a list-like sequence of tensor-like objects giving that field's value for each example. - fixed_fields: dict - mapping field names to their constant values for every example in the dataset. Or: data_list: List[AtomicData] """ @@ -216,46 +204,28 @@ def download(self): def process(self): data = self.get_data() - if len(data) == 1: + if isinstance(data, list): # It's a data list - data_list = data[0] - if not (self.include_frames is None or data[0] is None): + data_list = data + if not (self.include_frames is None or data_list is None): data_list = [data_list[i] for i in self.include_frames] assert all(isinstance(e, AtomicData) for e in data_list) assert all(AtomicDataDict.BATCH_KEY not in e for e in data_list) - fields, fixed_fields = {}, {} - - # take the force_fixed_keys away from the fields - for key in self.force_fixed_keys: - if key in data_list[0]: - fixed_fields[key] = data_list[0][key] - - fixed_fields.update(self.extra_fixed_fields) - - elif len(data) == 2: + fields = {} - # It's fields and fixed_fields + elif isinstance(data, dict): + # It's fields # Get our data - fields, fixed_fields = data - - fixed_fields.update(self.extra_fixed_fields) + fields = data # check keys - all_keys = set(fields.keys()).union(fixed_fields.keys()) - assert len(all_keys) == len(fields) + len( - fixed_fields - ), "No overlap in keys between data and fixed fields allowed!" + all_keys = set(fields.keys()) assert AtomicDataDict.BATCH_KEY not in all_keys # Check bad key combinations, but don't require that this be a graph yet. AtomicDataDict.validate_keys(all_keys, graph_required=False) - # take the force_fixed_keys away from the fields - for key in self.force_fixed_keys: - if key in fields: - fixed_fields[key] = fields.pop(key)[0] - # check dimesionality num_examples = set([len(a) for a in fields.values()]) if not len(num_examples) == 1: @@ -275,11 +245,16 @@ def process(self): else: # do neighborlist from points constructor = AtomicData.from_points - assert "r_max" in all_keys + assert "r_max" in self.AtomicData_options assert AtomicDataDict.POSITIONS_KEY in all_keys data_list = [ - constructor(**{**{f: v[i] for f, v in fields.items()}, **fixed_fields}) + constructor( + **{ + **{f: v[i] for f, v in fields.items()}, + **self.AtomicData_options, + } + ) for i in include_frames ] @@ -288,13 +263,10 @@ def process(self): # Batch it for efficient saving # This limits an AtomicInMemoryDataset to a maximum of LONG_MAX atoms _overall_, but that is a very big number and any dataset that large is probably not "InMemory" anyway - data = Batch.from_data_list(data_list, exclude_keys=fixed_fields.keys()) + data = Batch.from_data_list(data_list) del data_list del fields - # type conversion - _process_dict(fixed_fields, ignore_fields=["r_max"]) - total_MBs = sum(item.numel() * item.element_size() for _, item in data) / ( 1024 * 1024 ) @@ -310,21 +282,45 @@ def process(self): # datasets. It only matters that they don't simultaneously try # to write the _same_ file, corrupting it. with atomic_write(self.processed_paths[0], binary=True) as f: - torch.save((data, fixed_fields, self.include_frames), f) + torch.save((data, self.include_frames), f) with atomic_write(self.processed_paths[1], binary=False) as f: yaml.dump(self._get_parameters(), f) logging.info("Cached processed data to disk") self.data = data - self.fixed_fields = fixed_fields def get(self, idx): - out = self.data.get_example(idx) - # Add back fixed fields - for f, v in self.fixed_fields.items(): - out[f] = v - return out + return self.data.get_example(idx) + + def _selectors( + self, + stride: int = 1, + ): + if self._indices is not None: + graph_selector = torch.as_tensor(self._indices)[::stride] + # note that self._indices is _not_ necessarily in order, + # while self.data --- which we take our arrays from --- + # is always in the original order. + # In particular, the values of `self.data.batch` + # are indexes in the ORIGINAL order + # thus we need graph level properties to also be in the original order + # so that batch values index into them correctly + # since self.data.batch is always sorted & contiguous + # (because of Batch.from_data_list) + # we sort it: + graph_selector, _ = torch.sort(graph_selector) + else: + graph_selector = torch.arange(0, self.len(), stride) + + node_selector = torch.as_tensor( + np.in1d(self.data.batch.numpy(), graph_selector.numpy()) + ) + + edge_index = self.data[AtomicDataDict.EDGE_INDEX_KEY] + edge_selector = node_selector[edge_index[0]] & node_selector[edge_index[1]] + + return (graph_selector, node_selector, edge_selector) def statistics( self, @@ -374,45 +370,22 @@ def statistics( if len(fields) == 0: return [] - if self._indices is not None: - graph_selector = torch.as_tensor(self._indices)[::stride] - # note that self._indices is _not_ necessarily in order, - # while self.data --- which we take our arrays from --- - # is always in the original order. - # In particular, the values of `self.data.batch` - # are indexes in the ORIGINAL order - # thus we need graph level properties to also be in the original order - # so that batch values index into them correctly - # since self.data.batch is always sorted & contiguous - # (because of Batch.from_data_list) - # we sort it: - graph_selector, _ = torch.sort(graph_selector) - else: - graph_selector = torch.arange(0, self.len(), stride) - num_graphs = len(graph_selector) + graph_selector, node_selector, edge_selector = self._selectors(stride=stride) - node_selector = torch.as_tensor( - np.in1d(self.data.batch.numpy(), graph_selector.numpy()) - ) + num_graphs = len(graph_selector) num_nodes = node_selector.sum() - - edge_index = self.data[AtomicDataDict.EDGE_INDEX_KEY] - edge_selector = node_selector[edge_index[0]] & node_selector[edge_index[1]] num_edges = edge_selector.sum() - del edge_index if self.transform is not None: - # pre-transform the fixed fields and data so that statistics process transformed data - ff_transformed = self.transform(self.fixed_fields, types_required=False) + # pre-transform the data so that statistics process transformed data data_transformed = self.transform(self.data.to_dict(), types_required=False) else: - ff_transformed = self.fixed_fields data_transformed = self.data.to_dict() # pre-select arrays # this ensures that all following computations use the right data all_keys = set() selectors = {} - for k in list(ff_transformed.keys()) + list(data_transformed.keys()): + for k in data_transformed.keys(): all_keys.add(k) if k in _NODE_FIELDS: selectors[k] = node_selector @@ -425,9 +398,6 @@ def statistics( # TODO: do the batch indexes, edge_indexes, etc. after selection need to be # "compacted" to subtract out their offsets? For now, we just punt this # onto the writer of the callable field. - # do not actually select on fixed fields, since they are constant - # but still only select fields that are correctly registered - ff_transformed = {k: v for k, v in ff_transformed.items() if k in selectors} # apply selector to actual data data_transformed = { k: data_transformed[k][selectors[k]] @@ -455,10 +425,7 @@ def statistics( raise RuntimeError( f"Only per-node and per-graph fields can have statistics computed; `{field}` has not been registered as either. If it is per-node or per-graph, please register it as such using `nequip.data.register_fields`" ) - if field in ff_transformed: - arr = ff_transformed[field] - else: - arr = data_transformed[field] + arr = data_transformed[field] if field in _NODE_FIELDS: arr_is_per = "node" elif field in _GRAPH_FIELDS: @@ -510,15 +477,7 @@ def statistics( ana_mode = ana_mode[len("per_species_") :] if atom_types is None: - if AtomicDataDict.ATOM_TYPE_KEY in data_transformed: - atom_types = data_transformed[AtomicDataDict.ATOM_TYPE_KEY] - elif AtomicDataDict.ATOM_TYPE_KEY in ff_transformed: - atom_types = ff_transformed[AtomicDataDict.ATOM_TYPE_KEY] - atom_types = ( - atom_types.unsqueeze(0) - .expand((num_graphs,) + atom_types.shape) - .reshape(-1) - ) + atom_types = data_transformed[AtomicDataDict.ATOM_TYPE_KEY] results = self._per_species_statistics( ana_mode, @@ -641,7 +600,7 @@ class NpzDataset(AtomicInMemoryDataset): """Load data from an npz file. To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``, - ``npz_fixed_fields_keys`` or ``extra_fixed_fields``. + or ``npz_fixed_fields_keys``. Args: key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional @@ -676,6 +635,10 @@ class NpzDataset(AtomicInMemoryDataset): force: forces energy: total_energy Z: atomic_numbers + graph_fields: + - user_label1 + node_fields: + - user_label2 ``` """ @@ -695,8 +658,7 @@ def __init__( npz_fixed_field_keys: List[str] = [], file_name: Optional[str] = None, url: Optional[str] = None, - force_fixed_keys: List[str] = [], - extra_fixed_fields: Dict[str, Any] = {}, + AtomicData_options: Dict[str, Any] = {}, include_frames: Optional[List[int]] = None, type_mapper: TypeMapper = None, ): @@ -708,8 +670,7 @@ def __init__( file_name=file_name, url=url, root=root, - force_fixed_keys=force_fixed_keys, - extra_fixed_fields=extra_fixed_fields, + AtomicData_options=AtomicData_options, include_frames=include_frames, type_mapper=type_mapper, ) @@ -730,26 +691,42 @@ def get_data(self): keys = set(list(self.key_mapping.keys())) keys.update(self.npz_fixed_field_keys) keys.update(self.include_keys) - keys.update(list(self.extra_fixed_fields.keys())) keys = keys.intersection(set(list(data.keys()))) mapped = {self.key_mapping.get(k, k): data[k] for k in keys} - # TODO: generalize this? - for intkey in ( - AtomicDataDict.ATOMIC_NUMBERS_KEY, - AtomicDataDict.ATOM_TYPE_KEY, - AtomicDataDict.EDGE_INDEX_KEY, - ): + for intkey in _LONG_FIELDS: if intkey in mapped: mapped[intkey] = mapped[intkey].astype(np.int64) fields = {k: v for k, v in mapped.items() if k not in self.npz_fixed_field_keys} - # note that we don't deal with extra_fixed_fields here; AtomicInMemoryDataset does that. - fixed_fields = { - k: v for k, v in mapped.items() if k in self.npz_fixed_field_keys - } - return fields, fixed_fields + num_examples, num_atoms, n_dim = fields[AtomicDataDict.POSITIONS_KEY].shape + assert n_dim == 3 + + # now we replicate and add the fixed fields: + for fixed_field in self.npz_fixed_field_keys: + orig = mapped[fixed_field] + if fixed_field in _NODE_FIELDS: + assert orig.ndim >= 1 # [n_atom, feature_dims] + assert orig.shape[0] == num_atoms + replicated = np.expand_dims(orig, 0) + replicated = np.tile( + replicated, + (num_examples,) + (1,) * len(replicated.shape[1:]), + ) # [n_example, n_atom, feature_dims] + elif fixed_field in _GRAPH_FIELDS: + # orig is [feature_dims] + replicated = np.expand_dims(orig, 0) + replicated = np.tile( + replicated, + (num_examples,) + (1,) * len(replicated.shape[1:]), + ) # [n_example, feature_dims] + else: + raise KeyError( + f"npz_fixed_field_keys contains `{fixed_field}`, but it isn't registered as a node or graph field" + ) + fields[fixed_field] = replicated + return fields def _ase_dataset_reader( @@ -861,8 +838,7 @@ def __init__( ase_args: dict = {}, file_name: Optional[str] = None, url: Optional[str] = None, - force_fixed_keys: List[str] = [], - extra_fixed_fields: Dict[str, Any] = {}, + AtomicData_options: Dict[str, Any] = {}, include_frames: Optional[List[int]] = None, type_mapper: TypeMapper = None, key_mapping: Optional[dict] = None, @@ -881,8 +857,7 @@ def __init__( file_name=file_name, url=url, root=root, - force_fixed_keys=force_fixed_keys, - extra_fixed_fields=extra_fixed_fields, + AtomicData_options=AtomicData_options, include_frames=include_frames, type_mapper=type_mapper, ) @@ -940,7 +915,7 @@ def get_data(self): key_mapping=self.key_mapping, ) kwargs = {k: v for k, v in kwargs.items() if v is not None} - kwargs.update(self.extra_fixed_fields) + kwargs.update(self.AtomicData_options) n_proc = num_tasks() with tempfile.TemporaryDirectory() as tmpdir: from nequip.utils._global_options import _get_latest_global_options @@ -972,4 +947,4 @@ def get_data(self): # datas here is already in order, stride 1 start 0 # no need to un-interleave # return list of AtomicData: - return ([e[1] for e in datas],) + return [e[1] for e in datas] diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py index 4cfa98ff..a4aab07a 100644 --- a/nequip/utils/unittests/conftest.py +++ b/nequip/utils/unittests/conftest.py @@ -121,7 +121,7 @@ def nequip_dataset(molecules, temp_data, float_tolerance): a = ASEDataset( file_name=fp.name, root=temp_data, - extra_fixed_fields={"r_max": 3.0}, + AtomicData_options={"r_max": 3.0}, ase_args=dict(format="extxyz"), type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}), ) diff --git a/tests/unit/data/test_dataloader.py b/tests/unit/data/test_dataloader.py index 5fbeeb93..fc511143 100644 --- a/tests/unit/data/test_dataloader.py +++ b/tests/unit/data/test_dataloader.py @@ -69,7 +69,7 @@ def npz_dataset(): a = NpzDataset( file_name=folder + "/npzdata.npz", root=folder, - extra_fixed_fields={"r_max": 3}, + AtomicData_options={"r_max": 3}, ) yield a diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py index 95cfe48d..365dc320 100644 --- a/tests/unit/data/test_dataset.py +++ b/tests/unit/data/test_dataset.py @@ -59,7 +59,7 @@ def npz_dataset(npz_data, temp_data): a = NpzDataset( file_name=npz_data, root=temp_data + "/test_dataset", - extra_fixed_fields={"r_max": 3}, + AtomicData_options={"r_max": 3}, ) yield a @@ -86,7 +86,7 @@ def test_init(self): assert str(excinfo.value) == "" def test_npz(self, npz_data, root): - g = NpzDataset(file_name=npz_data, root=root, extra_fixed_fields={"r_max": 3.0}) + g = NpzDataset(file_name=npz_data, root=root, AtomicData_options={"r_max": 3.0}) assert isdir(g.root) assert isdir(g.processed_dir) assert isfile(g.processed_dir + "/data.pth") @@ -95,7 +95,7 @@ def test_ase(self, ase_file, root): a = ASEDataset( file_name=ase_file, root=root, - extra_fixed_fields={"r_max": 3.0}, + AtomicData_options={"r_max": 3.0}, ase_args=dict(format="extxyz"), ) assert isdir(a.root) @@ -206,7 +206,7 @@ class TestPerAtomStatistics: @pytest.mark.parametrize("mode", ["mean_std", "rms"]) def test_per_node_field(self, npz_dataset, mode): # set up the transformer - npz_dataset = set_up_transformer(npz_dataset, True, False, False) + npz_dataset = set_up_transformer(npz_dataset, True, False) with pytest.raises(ValueError) as excinfo: npz_dataset.statistics( @@ -218,16 +218,15 @@ def test_per_node_field(self, npz_dataset, mode): == f"It doesn't make sense to ask for `{mode}` since `{AtomicDataDict.BATCH_KEY}` is not per-graph" ) - @pytest.mark.parametrize("fixed_field", [True, False]) @pytest.mark.parametrize("subset", [True, False]) @pytest.mark.parametrize( "key,dim", [(AtomicDataDict.TOTAL_ENERGY_KEY, (1,)), ("somekey", (3,))] ) - def test_per_graph_field(self, npz_dataset, fixed_field, subset, key, dim): + def test_per_graph_field(self, npz_dataset, subset, key, dim): if key == "somekey": register_fields(graph_fields=[key]) - npz_dataset = set_up_transformer(npz_dataset, True, fixed_field, subset) + npz_dataset = set_up_transformer(npz_dataset, True, subset) if npz_dataset is None: return @@ -262,14 +261,11 @@ def test_per_graph_field(self, npz_dataset, fixed_field, subset, key, dim): class TestPerSpeciesStatistics: - @pytest.mark.parametrize("fixed_field", [True, False]) @pytest.mark.parametrize("mode", ["mean_std", "rms"]) @pytest.mark.parametrize("subset", [True, False]) - def test_per_node_field(self, npz_dataset, fixed_field, mode, subset): + def test_per_node_field(self, npz_dataset, mode, subset): # set up the transformer - npz_dataset = set_up_transformer( - npz_dataset, not fixed_field, fixed_field, subset - ) + npz_dataset = set_up_transformer(npz_dataset, True, subset) (result,) = npz_dataset.statistics( [AtomicDataDict.BATCH_KEY], @@ -278,15 +274,14 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset): print(result) @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01]) - @pytest.mark.parametrize("fixed_field", [True, False]) @pytest.mark.parametrize("full_rank", [True, False]) @pytest.mark.parametrize("subset", [True, False]) - def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset): + def test_per_graph_field(self, npz_dataset, alpha, full_rank, subset): if alpha <= 1e-4 and not full_rank: return - npz_dataset = set_up_transformer(npz_dataset, full_rank, fixed_field, subset) + npz_dataset = set_up_transformer(npz_dataset, full_rank, subset) if npz_dataset is None: return @@ -351,14 +346,14 @@ class TestReload: @pytest.mark.parametrize("give_url", [True, False]) @pytest.mark.parametrize("change_key_map", [True, False]) def test_reload(self, npz_dataset, npz_data, change_rmax, give_url, change_key_map): - r_max = npz_dataset.extra_fixed_fields["r_max"] + change_rmax + r_max = npz_dataset.AtomicData_options["r_max"] + change_rmax keymap = npz_dataset.key_mapping.copy() # the default one if change_key_map: keymap["x1"] = "x2" a = NpzDataset( file_name=npz_data, root=npz_dataset.root, - extra_fixed_fields={"r_max": r_max}, + AtomicData_options={"r_max": r_max}, key_mapping=keymap, **({"url": "example.com/data.dat"} if give_url else {}), ) @@ -373,10 +368,10 @@ class TestFromConfig: @pytest.mark.parametrize( "args", [ - dict(extra_fixed_fields={"r_max": 3.0}), - dict(dataset_extra_fixed_fields={"r_max": 3.0}), + dict(AtomicData_options={"r_max": 3.0}), + dict(dataset_AtomicData_options={"r_max": 3.0}), dict(r_max=3.0), - dict(r_max=3.0, extra_fixed_fields={}), + dict(r_max=3.0, AtomicData_options={}), ], ) def test_npz(self, npz_data, root, args): @@ -392,7 +387,7 @@ def test_npz(self, npz_data, root, args): ) ) g = dataset_from_config(config) - assert g.fixed_fields["r_max"] == 3 + assert g.AtomicData_options["r_max"] == 3 assert isdir(g.root) assert isdir(g.processed_dir) assert isfile(g.processed_dir + "/data.pth") @@ -403,7 +398,7 @@ def test_ase(self, ase_file, root, prefix): dict( file_name=ase_file, root=root, - extra_fixed_fields={"r_max": 3.0}, + AtomicData_options={"r_max": 3.0}, ase_args=dict(format="extxyz"), chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}, ) @@ -427,7 +422,7 @@ def test_ase(self, ase_file, root, prefix): class TestFromList: def test_from_atoms(self, molecules): dataset = ASEDataset.from_atoms_list( - molecules, extra_fixed_fields={"r_max": 4.5} + molecules, AtomicData_options={"r_max": 4.5} ) assert len(dataset) == len(molecules) for i, mol in enumerate(molecules): @@ -448,13 +443,9 @@ def generate_E(N, mean_min, mean_max, std): return ref_mean, ref_std, (N * E).sum(axis=-1) -def set_up_transformer(npz_dataset, full_rank, fixed_field, subset): +def set_up_transformer(npz_dataset, full_rank, subset): if full_rank: - - if fixed_field: - return - unique = torch.unique(npz_dataset.data[AtomicDataDict.ATOMIC_NUMBERS_KEY]) npz_dataset.transform = TypeMapper( chemical_symbol_to_type={ @@ -466,19 +457,9 @@ def set_up_transformer(npz_dataset, full_rank, fixed_field, subset): # let all atoms to be the same type distribution num_nodes = npz_dataset.data[AtomicDataDict.BATCH_KEY].shape[0] - if fixed_field: - del npz_dataset.data[AtomicDataDict.ATOMIC_NUMBERS_KEY] - del npz_dataset.data.__slices__[ - AtomicDataDict.ATOMIC_NUMBERS_KEY - ] # remove batch metadata for the key - new_n = torch.ones(NATOMS, dtype=torch.int64) - new_n[0] += ntype - npz_dataset.fixed_fields[AtomicDataDict.ATOMIC_NUMBERS_KEY] = new_n - else: - npz_dataset.fixed_fields.pop(AtomicDataDict.ATOMIC_NUMBERS_KEY, None) - new_n = torch.ones(num_nodes, dtype=torch.int64) - new_n[::NATOMS] += ntype - npz_dataset.data[AtomicDataDict.ATOMIC_NUMBERS_KEY] = new_n + new_n = torch.ones(num_nodes, dtype=torch.int64) + new_n[::NATOMS] += ntype + npz_dataset.data[AtomicDataDict.ATOMIC_NUMBERS_KEY] = new_n # set up the transformer npz_dataset.transform = TypeMapper( diff --git a/tests/unit/model/test_builder_utils.py b/tests/unit/model/test_builder_utils.py index caebde55..c90327d8 100644 --- a/tests/unit/model/test_builder_utils.py +++ b/tests/unit/model/test_builder_utils.py @@ -27,7 +27,7 @@ def test_avg_num(molecules, temp_data, r_max, subset, to_test): nequip_dataset = ASEDataset( file_name=fp.name, root=temp_data, - extra_fixed_fields={"r_max": r_max}, + AtomicData_options={"r_max": r_max}, ase_args=dict(format="extxyz"), type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}), ) From 50539bc8e35d90b7c476caa4c7a0dc37fee00484 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 01:57:14 -0500 Subject: [PATCH 018/157] 1.10 compat --- nequip/utils/regressor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py index dd755d15..578c45f6 100644 --- a/nequip/utils/regressor.py +++ b/nequip/utils/regressor.py @@ -73,7 +73,10 @@ def down_sampling_by_composition( for i in range(n_types): ids = sort_by[id_start[i] : id_end[i]] for j, p in enumerate(percentage): - new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear") + # it defaults to linear anyway, and `interpolation` was a 1.11 addition + # so we leave out `, interpolation="linear")` + # https://pytorch.org/docs/1.11/generated/torch.quantile.html?highlight=quantile#torch.quantile + new_y[i * n_points + j] = torch.quantile(y[ids], p) new_X[i * n_points + j] = unique_comps[i] return new_X, new_y From 86857c318d4c850054ef01ae372366715f626710 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 12:42:38 -0500 Subject: [PATCH 019/157] lint --- nequip/data/_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index 65b85052..c7e0f558 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -7,7 +7,7 @@ import ase.build from ase.calculators.emt import EMT -from nequip.data import AtomicInMemoryDataset, AtomicData, AtomicDataDict +from nequip.data import AtomicInMemoryDataset, AtomicData from .transforms import TypeMapper From 77b8971272f948609c6794159a72b9950d825571 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 14:06:34 -0500 Subject: [PATCH 020/157] ensure dtype reset if error --- nequip/model/_build.py | 107 ++++++++++++++++++++------------------- nequip/utils/__init__.py | 3 +- nequip/utils/misc.py | 16 ++++++ 3 files changed, 74 insertions(+), 52 deletions(-) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 6710b1b8..526be891 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -6,7 +6,12 @@ from nequip.data import AtomicDataset from nequip.data.transforms import TypeMapper from nequip.nn import GraphModuleMixin, GraphModel -from nequip.utils import load_callable, instantiate, dtype_from_name +from nequip.utils import ( + load_callable, + instantiate, + dtype_from_name, + torch_default_dtype, +) def model_from_config( @@ -60,63 +65,63 @@ def model_from_config( default_dtype = torch.get_default_dtype() model_dtype: torch.dtype = dtype_from_name(config.get("model_dtype", default_dtype)) config["model_dtype"] = str(model_dtype).lstrip("torch.") - # set temporarily the default dtype - torch.set_default_dtype(model_dtype) # confirm sanity assert default_dtype in (torch.float32, torch.float64) if default_dtype == torch.float32 and model_dtype == torch.float64: raise ValueError( "Overall default_dtype=float32, but model_dtype=float64 is a higher precision- change default_dtype to float64" ) - - # Build - builders = [ - load_callable(b, prefix="nequip.model") - for b in config.get("model_builders", []) - ] - - model = None - - for builder in builders: - pnames = inspect.signature(builder).parameters - params = {} - if "initialize" in pnames: - params["initialize"] = initialize - if "deploy" in pnames: - params["deploy"] = deploy - if "config" in pnames: - params["config"] = config - if "dataset" in pnames: - if "initialize" not in pnames: - raise ValueError("Cannot request dataset without requesting initialize") - if ( - initialize - and pnames["dataset"].default == inspect.Parameter.empty - and dataset is None - ): - raise RuntimeError( - f"Builder {builder.__name__} requires the dataset, initialize is true, but no dataset was provided to `model_from_config`." + # temporarily set the default dtype + with torch_default_dtype(model_dtype): + + # Build + builders = [ + load_callable(b, prefix="nequip.model") + for b in config.get("model_builders", []) + ] + + model = None + + for builder in builders: + pnames = inspect.signature(builder).parameters + params = {} + if "initialize" in pnames: + params["initialize"] = initialize + if "deploy" in pnames: + params["deploy"] = deploy + if "config" in pnames: + params["config"] = config + if "dataset" in pnames: + if "initialize" not in pnames: + raise ValueError( + "Cannot request dataset without requesting initialize" + ) + if ( + initialize + and pnames["dataset"].default == inspect.Parameter.empty + and dataset is None + ): + raise RuntimeError( + f"Builder {builder.__name__} requires the dataset, initialize is true, but no dataset was provided to `model_from_config`." + ) + params["dataset"] = dataset + if "model" in pnames: + if model is None: + raise RuntimeError( + f"Builder {builder.__name__} asked for the model as an input, but no previous builder has returned a model" + ) + params["model"] = model + else: + if model is not None: + raise RuntimeError( + f"All model_builders after the first one that returns a model must take the model as an argument; {builder.__name__} doesn't" + ) + model = builder(**params) + if model is not None and not isinstance(model, GraphModuleMixin): + raise TypeError( + f"Builder {builder.__name__} didn't return a GraphModuleMixin, got {type(model)} instead" ) - params["dataset"] = dataset - if "model" in pnames: - if model is None: - raise RuntimeError( - f"Builder {builder.__name__} asked for the model as an input, but no previous builder has returned a model" - ) - params["model"] = model - else: - if model is not None: - raise RuntimeError( - f"All model_builders after the first one that returns a model must take the model as an argument; {builder.__name__} doesn't" - ) - model = builder(**params) - if model is not None and not isinstance(model, GraphModuleMixin): - raise TypeError( - f"Builder {builder.__name__} didn't return a GraphModuleMixin, got {type(model)} instead" - ) - - # reset default dtype - torch.set_default_dtype(default_dtype) + # reset to default dtype by context manager # Wrap the model up model = GraphModel( diff --git a/nequip/utils/__init__.py b/nequip/utils/__init__.py index e7dd0912..46ab22ec 100644 --- a/nequip/utils/__init__.py +++ b/nequip/utils/__init__.py @@ -14,7 +14,7 @@ from .config import Config from .output import Output from .modules import find_first_of_type -from .misc import dtype_from_name +from .misc import dtype_from_name, torch_default_dtype __all__ = [ instantiate_from_cls_name, @@ -30,4 +30,5 @@ Output, find_first_of_type, dtype_from_name, + torch_default_dtype, ] diff --git a/nequip/utils/misc.py b/nequip/utils/misc.py index 1adc602f..47b57c0d 100644 --- a/nequip/utils/misc.py +++ b/nequip/utils/misc.py @@ -1,4 +1,6 @@ from typing import Union +import contextlib + import torch @@ -6,3 +8,17 @@ def dtype_from_name(name: Union[str, torch.dtype]) -> torch.dtype: if isinstance(name, torch.dtype): return name return {"float32": torch.float32, "float64": torch.float64}[name] + + +@contextlib.contextmanager +def torch_default_dtype(dtype): + """Set `torch.get_default_dtype()` for the duration of a with block, cleaning up with a `finally`. + + Note that this is NOT thread safe, since `torch.set_default_dtype()` is not thread safe. + """ + orig_default_dtype = torch.get_default_dtype() + try: + torch.set_default_dtype(dtype) + yield + finally: + torch.set_default_dtype(orig_default_dtype) From 9ddd616b408d8de3905ba21f06c8dfae02b01592 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 14:07:06 -0500 Subject: [PATCH 021/157] fix type promotion in scaling --- nequip/nn/_atomwise.py | 5 ++++- nequip/nn/_rescale.py | 27 ++++++++++++++++++++++----- nequip/train/trainer.py | 28 +++++++++++++++++----------- tests/integration/test_deploy.py | 15 ++++++++------- 4 files changed, 51 insertions(+), 24 deletions(-) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index 5d7823c8..344c3d4e 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -205,6 +205,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # we can used an FMA for performance # addcmul computes # input + tensor1 * tensor2 elementwise + # it will promote to widest dtype, which comes from shifts/scales in_field = torch.addcmul( self.shifts[species_idx].view(-1, 1), self.scales[species_idx].view(-1, 1), @@ -212,7 +213,9 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: ) else: # fallback path for torch<1.13 OR mix of enabled shifts and scales - # multiplication / addition promotes dtypes already, so no cast is needed: + # multiplication / addition promotes dtypes already, so no cast is needed + # this is specifically because self.*[species_idx].view(-1, 1) + # is never a scalar (ndim == 0), since it is always [n_atom, 1] if self.has_scales: in_field = self.scales[species_idx].view(-1, 1) * in_field if self.has_shifts: diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index 4baed254..3c1e1483 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -38,6 +38,7 @@ class RescaleOutput(GraphModuleMixin, torch.nn.Module): related_shift_keys: List[str] scale_trainble: bool rescale_trainable: bool + _all_keys: List[str] has_scale: bool has_shift: bool @@ -87,6 +88,7 @@ def __init__( self.scale_keys = list(scale_keys) self.shift_keys = list(shift_keys) + self._all_keys = list(all_keys) self.related_scale_keys = list(set(related_scale_keys).union(scale_keys)) self.related_shift_keys = list(set(related_shift_keys).union(shift_keys)) @@ -149,17 +151,32 @@ def get_inner_model(self): def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = self.model(data) if self.training: - return data + # no scaling, but still need to promote for consistent dtype behavior + # this is hopefully a no-op in most circumstances due to a + # preceeding PerSpecies rescaling: + for field in self._all_keys: + data[field] = data[field].to(dtype=self.default_dtype) else: # Scale then shift - # * and + promote dtypes by default + # * and + promote dtypes by default, but not when the other + # operand is a scalar, which `scale/shift_by` are. + # The .to(dtype=self.default_dtype) should be a free no-op + # under most circumstances, since if this RescaleOutput + # is preceeded by a PerSpecies, that will cast up to + # default_dtype through promotion in the per-atom + # * and +, which are always between tensors and always + # promote. Still, we include it just to be sure if self.has_scale: for field in self.scale_keys: - data[field] = data[field] * self.scale_by + data[field] = ( + data[field].to(dtype=self.default_dtype) * self.scale_by + ) if self.has_shift: for field in self.shift_keys: - data[field] = data[field] + self.shift_by - return data + data[field] = ( + data[field].to(dtype=self.default_dtype) + self.shift_by + ) + return data @torch.jit.export def scale( diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 33a81fbe..2e0fa7d1 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -658,6 +658,19 @@ def load_model_from_training_session( device="cpu", config_dictionary: Optional[dict] = None, ) -> Tuple[torch.nn.Module, Config]: + """Load a model from a training session. + + Note that this uses ``model_from_config`` internally and is thus not thread safe. + + Args: + traindir: the training session + model_name: which checkpoint to load; defaults to ``best_model.pth`` + device: target device to load to, defaults to ``cpu`` + config_dictionary: optionally use this config instead of ``traindir/config.yaml`` + + Returns: + (model, config) + """ traindir = str(traindir) model_name = str(model_name) @@ -666,21 +679,14 @@ def load_model_from_training_session( else: config = Config.from_file(traindir + "/config.yaml") + # model_from_config takes care of dtypes already model = model_from_config( config=config, initialize=False, ) - if model is not None: # TODO: why would it be? - # TODO: this is not exactly equivalent to building with - # this set as default dtype... does it matter? - model.to( - device=torch.device(device), - dtype=dtype_from_name(config.default_dtype), - ) - model_state_dict = torch.load( - traindir + "/" + model_name, map_location=device - ) - model.load_state_dict(model_state_dict) + model.to(device=torch.device(device)) + model_state_dict = torch.load(traindir + "/" + model_name, map_location=device) + model.load_state_dict(model_state_dict) return model, config diff --git a/tests/integration/test_deploy.py b/tests/integration/test_deploy.py index cc710f11..468f9b3a 100644 --- a/tests/integration/test_deploy.py +++ b/tests/integration/test_deploy.py @@ -19,13 +19,13 @@ @pytest.mark.parametrize( "device", ["cpu"] + (["cuda"] if torch.cuda.is_available() else []) ) -def test_deploy(BENCHMARK_ROOT, device): +@pytest.mark.parametrize("model_dtype", ["float32", "float64"]) +def test_deploy(BENCHMARK_ROOT, device, model_dtype): dtype = str(torch.get_default_dtype())[len("torch.") :] - atol = {"float32": 1e-5, "float64": 1e-7}[dtype] - - # if torch.cuda.is_available(): - # # TODO: is this true? - # pytest.skip("CUDA and subprocesses have issues") + if dtype == "float32" and model_dtype == "float64": + pytest.skip("default_dtype=float32 and model_dtype=float64 doesn't make sense") + # atol on MODEL dtype, since a mostly float32 model still has float32 variation + atol = {"float32": 1e-5, "float64": 1e-7}[model_dtype] keys = [ AtomicDataDict.TOTAL_ENERGY_KEY, @@ -45,6 +45,7 @@ def test_deploy(BENCHMARK_ROOT, device): BENCHMARK_ROOT / "aspirin_ccsd-train.npz" ) true_config["default_dtype"] = dtype + true_config["model_dtype"] = model_dtype true_config["max_epochs"] = 1 true_config["n_train"] = 1 true_config["n_val"] = 1 @@ -72,7 +73,7 @@ def test_deploy(BENCHMARK_ROOT, device): assert deployed_path.is_file(), "Deploy didn't create file" # now test predictions the same - best_mod, _ = Trainer.load_model_from_training_session( + best_mod, train_config = Trainer.load_model_from_training_session( traindir=f"{root}/{run_name}/", model_name="best_model.pth", device=device, From 9b954049697b990f4f7f5c255ab1121d4da78c1b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 14:22:23 -0500 Subject: [PATCH 022/157] fix to new return format --- nequip/data/_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index 65b85052..8193a728 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -80,4 +80,4 @@ def get_data(self): **self.AtomicData_options ) ) - return (datas,) + return datas From d1f4da3a352c18a2527d55e7e960599af5b85991 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 14:22:32 -0500 Subject: [PATCH 023/157] make tests more efficient --- tests/integration/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 3a082602..f8060a02 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -137,7 +137,7 @@ def _training_session(conffile, model_dtype, builder, BENCHMARK_ROOT): @pytest.fixture( - scope="module", + scope="session", params=[ ("minimal.yaml", AtomicDataDict.FORCE_KEY), ("minimal_toy_emt.yaml", AtomicDataDict.STRESS_KEY), @@ -148,7 +148,7 @@ def conffile(request): @pytest.fixture( - scope="module", + scope="session", params=["float32", "float64"], ) def model_dtype(request, float_tolerance): @@ -158,7 +158,7 @@ def model_dtype(request, float_tolerance): @pytest.fixture( - scope="module", params=[ConstFactorModel, LearningFactorModel, IdentityModel] + scope="session", params=[ConstFactorModel, LearningFactorModel, IdentityModel] ) def fake_model_training_session(request, BENCHMARK_ROOT, conffile, model_dtype): conffile, _ = conffile From 1a4a3ca9ae5542f22d1a1a8afd5a187295590feb Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:39:58 -0500 Subject: [PATCH 024/157] lint --- nequip/data/_test_data.py | 2 +- nequip/train/trainer.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index 8193a728..73244d42 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -7,7 +7,7 @@ import ase.build from ase.calculators.emt import EMT -from nequip.data import AtomicInMemoryDataset, AtomicData, AtomicDataDict +from nequip.data import AtomicInMemoryDataset, AtomicData from .transforms import TypeMapper diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 2e0fa7d1..ae232b91 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -39,7 +39,6 @@ atomic_write, finish_all_writes, atomic_write_group, - dtype_from_name, ) from nequip.utils.versions import check_code_version from nequip.model import model_from_config From 75d9286e3bfa579a1983eaf14096adf3a1177177 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:46:34 -0500 Subject: [PATCH 025/157] cheaper? dtype promotion --- nequip/nn/_rescale.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index 3c1e1483..937ccd75 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -153,29 +153,26 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: if self.training: # no scaling, but still need to promote for consistent dtype behavior # this is hopefully a no-op in most circumstances due to a - # preceeding PerSpecies rescaling: + # preceeding PerSpecies rescale promoting to default_dtype anyway: for field in self._all_keys: data[field] = data[field].to(dtype=self.default_dtype) else: # Scale then shift # * and + promote dtypes by default, but not when the other # operand is a scalar, which `scale/shift_by` are. - # The .to(dtype=self.default_dtype) should be a free no-op - # under most circumstances, since if this RescaleOutput - # is preceeded by a PerSpecies, that will cast up to - # default_dtype through promotion in the per-atom - # * and +, which are always between tensors and always - # promote. Still, we include it just to be sure + # We solve this by expanding `scale/shift_by` to tensors + # This is free and doesn't allocate new memory on CUDA: + # https://pytorch.org/docs/stable/generated/torch.Tensor.expand.html#torch.Tensor.expand + # confirmed in PyTorch slack + # https://pytorch.slack.com/archives/C3PDTEV8E/p1671652283801129 if self.has_scale: for field in self.scale_keys: - data[field] = ( - data[field].to(dtype=self.default_dtype) * self.scale_by - ) + v = data[field] + data[field] = v * self.scale_by.expand(v.shape) if self.has_shift: for field in self.shift_keys: - data[field] = ( - data[field].to(dtype=self.default_dtype) + self.shift_by - ) + v = data[field] + data[field] = v + self.shift_by.expand(v.shape) return data @torch.jit.export From aaa061c849011f7b3bb53fbc6a7061205a289d60 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:46:42 -0500 Subject: [PATCH 026/157] Run tests on multiple GPUs when available --- nequip/utils/unittests/conftest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py index a4aab07a..bebd90a1 100644 --- a/nequip/utils/unittests/conftest.py +++ b/nequip/utils/unittests/conftest.py @@ -19,6 +19,15 @@ from nequip.utils._global_options import _set_global_options from nequip.utils.misc import dtype_from_name +# Sometimes we run parallel using pytest-xdist, and want to be able to use +# as many GPUs as are available +# https://pytest-xdist.readthedocs.io/en/latest/how-to.html#identifying-the-worker-process-during-a-test +_is_pytest_xdist: bool = os.environ.get("PYTEST_XDIST_WORKER", "master") != "master" +if _is_pytest_xdist and torch.cuda.is_available(): + _xdist_worker_rank: int = int(os.environ["PYTEST_XDIST_WORKER"].lstrip("gw")) + torch.cuda.set_device(_xdist_worker_rank % torch.cuda.device_count()) + + if "NEQUIP_NUM_TASKS" not in os.environ: # Test parallelization, but don't waste time spawning tons of workers if lots of cores available os.environ["NEQUIP_NUM_TASKS"] = "2" From 9c8a998d72884046aa80decc412b39bd9c4f8ec1 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 16:52:35 -0500 Subject: [PATCH 027/157] multi gpu pytest --- nequip/utils/unittests/conftest.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py index bebd90a1..1e56f958 100644 --- a/nequip/utils/unittests/conftest.py +++ b/nequip/utils/unittests/conftest.py @@ -25,7 +25,17 @@ _is_pytest_xdist: bool = os.environ.get("PYTEST_XDIST_WORKER", "master") != "master" if _is_pytest_xdist and torch.cuda.is_available(): _xdist_worker_rank: int = int(os.environ["PYTEST_XDIST_WORKER"].lstrip("gw")) - torch.cuda.set_device(_xdist_worker_rank % torch.cuda.device_count()) + _cuda_vis_devs = os.environ.get( + "CUDA_VISIBLE_DEVICES", + ",".join(str(e) for e in range(torch.cuda.device_count())), + ).split(",") + _cuda_vis_devs = [int(e) for e in _cuda_vis_devs] + # set this for tests that run in this process + _local_gpu_rank = _xdist_worker_rank % torch.cuda.device_count() + torch.cuda.set_device(_local_gpu_rank) + # set this for launched child processes + os.environ["CUDA_VISIBLE_DEVICES"] = str(_cuda_vis_devs[_local_gpu_rank]) + del _xdist_worker_rank, _cuda_vis_devs, _local_gpu_rank if "NEQUIP_NUM_TASKS" not in os.environ: From 436e5cf70a5577b26ff1268155af7033fbfecb24 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 20:51:51 -0500 Subject: [PATCH 028/157] Fix tests --- nequip/utils/test.py | 29 ++++++++++++++------------- nequip/utils/unittests/model_tests.py | 10 ++++++--- tests/integration/conftest.py | 13 +++++++++++- tests/integration/test_deploy.py | 8 +++++--- tests/integration/test_evaluate.py | 4 ++-- tests/integration/test_train.py | 9 +++++++-- 6 files changed, 48 insertions(+), 25 deletions(-) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index af352ba2..41db252b 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -146,7 +146,7 @@ def assert_AtomicData_equivariant( AtomicData, AtomicDataDict.Type, List[Union[AtomicData, AtomicDataDict.Type]] ], permutation_tolerance: Optional[float] = None, - o3_tolerance: Optional[float] = None, + e3_tolerance: Optional[float] = None, **kwargs, ) -> str: r"""Test the rotation, translation, parity, and permutation equivariance of ``func``. @@ -235,7 +235,9 @@ def wrapper(*args): # we need it to be decomposed into irreps for equivar testing for k in stress_keys: if k in output: - output[k] = stress_cart_tensor.from_cartesian(output[k], rtp=stress_rtp) + output[k] = stress_cart_tensor.from_cartesian( + output[k], rtp=stress_rtp.to(output[k].dtype) + ) return [output[k] for k in irreps_out] # prepare input data @@ -261,28 +263,27 @@ def wrapper(*args): # take max across errors errs = {k: torch.max(torch.vstack([e[k] for e in errs]), dim=0)[0] for k in errs[0]} - if o3_tolerance is None: - o3_tolerance = FLOAT_TOLERANCE[ - func.model_dtype - if isinstance(func, GraphModel) - else torch.get_default_dtype() - ] + current_dtype = ( + func.model_dtype if isinstance(func, GraphModel) else torch.get_default_dtype() + ) + if e3_tolerance is None: + e3_tolerance = FLOAT_TOLERANCE[current_dtype] all_errs = [] for case, err in errs.items(): for key, this_err in zip(irreps_out.keys(), err): all_errs.append(case + (key, this_err)) - is_problem = [e[-1] > o3_tolerance for e in all_errs] + is_problem = [e[-1] > e3_tolerance for e in all_errs] message = (permutation_message + "\n") + "\n".join( - " (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format( - int(k[0]), str(bool(k[1])), str(k[2]), float(k[3]) - ) + f" (parity_k={int(k[0]):1d}, did_translate={str(bool(k[1])):5}, field={str(k[2]):20}) -> max error={float(k[3]):.3e}{' FAIL' if prob else ''}" for k, prob in zip(all_errs, is_problem) if irreps_out[str(k[2])] is not None ) - if any(is_problem) or "FAIL" in permutation_message: - raise AssertionError(f"Equivariance test failed for cases:\n{message}") + if any(is_problem) or " FAIL" in permutation_message: + raise AssertionError( + f"Equivariance test of {type(func).__name__} failed:\n default/model dtype: {current_dtype} E(3) tolerance: {e3_tolerance}\n{message}" + ) return message diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 2b6a8b63..08060506 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -60,7 +60,7 @@ def make_model(config, device, initialize: bool = True, deploy: bool = False): return model @pytest.fixture(scope="class") - def model(self, config, device): + def model(self, config, device, float_tolerance): config, out_fields = config model = self.make_model(config, device=device) return model, out_fields @@ -199,7 +199,9 @@ def test_embedding_cutoff(self, model, config, device): # For example, an Allegro edge feature is many body so will be affected assert torch.allclose(edge_embed[:2], edge_embed2[:2]) assert edge_embed[2:].abs().sum() > 1e-6 # some nonzero terms - assert torch.allclose(edge_embed2[2:], torch.zeros(1, device=device)) + assert torch.allclose( + edge_embed2[2:], torch.zeros(1, device=device, dtype=edge_embed2.dtype) + ) # test gradients in_dict = AtomicData.to_AtomicDataDict(data) @@ -214,7 +216,9 @@ def test_embedding_cutoff(self, model, config, device): inputs=in_dict[AtomicDataDict.POSITIONS_KEY], retain_graph=True, )[0] - assert torch.allclose(grads, torch.zeros(1, device=device)) + assert torch.allclose( + grads, torch.zeros(1, device=device, dtype=grads.dtype) + ) if AtomicDataDict.PER_ATOM_ENERGY_KEY in out: # are the first two atom's energies unaffected by atom at the cutoff? diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f8060a02..b98ee2bc 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -4,6 +4,7 @@ import yaml import subprocess import os +import sys import torch @@ -11,6 +12,16 @@ from nequip.nn import GraphModuleMixin +def _check_and_print(retcode): + __tracebackhide__ = True + if retcode.returncode: + if len(retcode.stdout) > 0: + print(retcode.stdout.decode("ascii")) + if len(retcode.stderr) > 0: + print(retcode.stderr.decode("ascii"), file=sys.stderr) + retcode.check_returncode() + + class IdentityModel(GraphModuleMixin, torch.nn.Module): def __init__(self, **kwargs): super().__init__() @@ -131,7 +142,7 @@ def _training_session(conffile, model_dtype, builder, BENCHMARK_ROOT): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - retcode.check_returncode() + _check_and_print(retcode) yield true_config, tmpdir, env diff --git a/tests/integration/test_deploy.py b/tests/integration/test_deploy.py index 468f9b3a..93f2d69e 100644 --- a/tests/integration/test_deploy.py +++ b/tests/integration/test_deploy.py @@ -15,6 +15,8 @@ from nequip.train import Trainer from nequip.ase import NequIPCalculator +from conftest import _check_and_print + @pytest.mark.parametrize( "device", ["cpu"] + (["cuda"] if torch.cuda.is_available() else []) @@ -55,7 +57,7 @@ def test_deploy(BENCHMARK_ROOT, device, model_dtype): yaml.dump(true_config, fp) # Train model retcode = subprocess.run(["nequip-train", str(config_path)], cwd=tmpdir) - retcode.check_returncode() + _check_and_print(retcode) # Deploy deployed_path = pathlib.Path(f"deployed_{dtype}.pth") retcode = subprocess.run( @@ -68,7 +70,7 @@ def test_deploy(BENCHMARK_ROOT, device, model_dtype): ], cwd=tmpdir, ) - retcode.check_returncode() + _check_and_print(retcode) deployed_path = tmpdir / deployed_path assert deployed_path.is_file(), "Deploy didn't create file" @@ -120,7 +122,7 @@ def test_deploy(BENCHMARK_ROOT, device, model_dtype): stdout=subprocess.PIPE, **text, ) - retcode.check_returncode() + _check_and_print(retcode) # Try to load extract config config = yaml.load(retcode.stdout, Loader=yaml.Loader) del config diff --git a/tests/integration/test_evaluate.py b/tests/integration/test_evaluate.py index 96d8d43b..4dd9bce0 100644 --- a/tests/integration/test_evaluate.py +++ b/tests/integration/test_evaluate.py @@ -10,7 +10,7 @@ from nequip.data import AtomicDataDict -from conftest import IdentityModel, ConstFactorModel +from conftest import IdentityModel, ConstFactorModel, _check_and_print @pytest.mark.parametrize("do_test_idcs", [True, False]) @@ -51,7 +51,7 @@ def runit(params: dict): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - retcode.check_returncode() + _check_and_print(retcode) # Check the output metrics = dict( diff --git a/tests/integration/test_train.py b/tests/integration/test_train.py index 53e400b3..83abacae 100644 --- a/tests/integration/test_train.py +++ b/tests/integration/test_train.py @@ -10,7 +10,12 @@ from nequip.data import AtomicDataDict -from conftest import IdentityModel, ConstFactorModel, LearningFactorModel +from conftest import ( + IdentityModel, + ConstFactorModel, + LearningFactorModel, + _check_and_print, +) def test_metrics(fake_model_training_session, model_dtype): @@ -162,7 +167,7 @@ def test_requeue(nequip_dataset, BENCHMARK_ROOT, conffile): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - retcode.check_returncode() + _check_and_print(retcode) # == Load metrics == dat = np.genfromtxt( From ed9a3285183e7cc64256de71780ec6c0b7bebb76 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 22:50:50 -0500 Subject: [PATCH 029/157] more info in equivar test failure --- nequip/utils/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index 41db252b..f349f5b6 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -282,7 +282,7 @@ def wrapper(*args): if any(is_problem) or " FAIL" in permutation_message: raise AssertionError( - f"Equivariance test of {type(func).__name__} failed:\n default/model dtype: {current_dtype} E(3) tolerance: {e3_tolerance}\n{message}" + f"Equivariance test of {type(func).__name__} failed:\n default dtype: {torch.get_default_dtype()} (assumed) model dtype: {current_dtype} E(3) tolerance: {e3_tolerance}\n{message}" ) return message From 5973497316886791fa563de408fe99507145e846 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 21 Dec 2022 22:58:55 -0500 Subject: [PATCH 030/157] warn on default_dtype=float32 --- nequip/scripts/train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 1f4f25be..1af0fc39 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -124,6 +124,10 @@ def fresh_start(config): # we use add_to_config cause it's a fresh start and need to record it check_code_version(config, add_to_config=True) _set_global_options(config) + if config["default_dtype"] != "float64": + warnings.warn( + f"default_dtype={config['default_dtype']} but we strongly recommend float64" + ) # = Make the trainer = if config.wandb: From f244deb033d6a97aafe851cf8636d093fe5c19a6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 22 Dec 2022 18:18:42 -0500 Subject: [PATCH 031/157] get_device() helper --- nequip/nn/_graph_model.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py index eb86a2e5..ef0c8dbd 100644 --- a/nequip/nn/_graph_model.py +++ b/nequip/nn/_graph_model.py @@ -2,6 +2,8 @@ import torch +from e3nn.util._argtools import _get_device + from nequip.data import AtomicDataDict from ._graph_mixin import GraphModuleMixin @@ -108,3 +110,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # run the model data = self.model(new_data) return data + + # == Helpers == + + def get_device(self) -> torch.device: + return _get_device(self) From 66e970787672713322a20ecd7edb6b85b91a7e78 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 22 Dec 2022 18:18:56 -0500 Subject: [PATCH 032/157] graph_model model builders --- nequip/model/_build.py | 45 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 526be891..be5b2ac0 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -72,6 +72,7 @@ def model_from_config( "Overall default_dtype=float32, but model_dtype=float64 is a higher precision- change default_dtype to float64" ) # temporarily set the default dtype + start_graph_model_builders = None with torch_default_dtype(model_dtype): # Build @@ -82,9 +83,13 @@ def model_from_config( model = None - for builder in builders: + for builder_i, builder in enumerate(builders): pnames = inspect.signature(builder).parameters params = {} + if "graph_model" in pnames: + # start graph_model builders, which happen later + start_graph_model_builders = builder_i + break if "initialize" in pnames: params["initialize"] = initialize if "deploy" in pnames: @@ -130,4 +135,42 @@ def model_from_config( model_input_fields=config.get("model_input_fields", {}), ) + # Run GraphModel builders + if start_graph_model_builders is not None: + for builder in builders[start_graph_model_builders:]: + pnames = inspect.signature(builder).parameters + params = {} + assert "graph_model" in pnames + params["graph_model"] = model + if "model" in pnames: + raise ValueError( + f"Once any builder requests `graph_model` (first requested by {builders[start_graph_model_builders].__name__}), no builder can request `model`, but {builder.__name__} did" + ) + if "initialize" in pnames: + params["initialize"] = initialize + if "deploy" in pnames: + params["deploy"] = deploy + if "config" in pnames: + params["config"] = config + if "dataset" in pnames: + if "initialize" not in pnames: + raise ValueError( + "Cannot request dataset without requesting initialize" + ) + if ( + initialize + and pnames["dataset"].default == inspect.Parameter.empty + and dataset is None + ): + raise RuntimeError( + f"Builder {builder.__name__} requires the dataset, initialize is true, but no dataset was provided to `model_from_config`." + ) + params["dataset"] = dataset + + model = builder(**params) + if not isinstance(model, GraphModel): + raise TypeError( + f"Builder {builder.__name__} didn't return a GraphModel, got {type(model)} instead" + ) + return model From dd6c7ba7e61a8409ee68d9adfc601487ee2a208b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Jan 2023 12:05:46 -0700 Subject: [PATCH 033/157] More robust embedding cutoff test --- nequip/utils/unittests/model_tests.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 2b6a8b63..8ec8c2f0 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -175,6 +175,18 @@ def test_equivariance(self, model, atomic_batch, device): def test_embedding_cutoff(self, model, config, device): instance, out_fields = model + + # make all weights nonzero in order to have the most robust test + # default init weights can sometimes be zero (e.g. biases) but we want + # to ensure smoothness for nonzero values + # assumes any trainable parameter will be trained and thus that + # nonzero values are valid + with torch.no_grad(): + all_params = list(instance.parameters()) + old_state = [p.detach().clone() for p in all_params] + for p in all_params: + p.uniform_(-1.0, 1.0) + config, out_fields = config r_max = config["r_max"] @@ -227,6 +239,11 @@ def test_embedding_cutoff(self, model, config, device): assert grads.shape == (3, 3) assert torch.allclose(grads[2], torch.zeros(1, device=device)) + # restore previous model state + with torch.no_grad(): + for p, v in zip(all_params, old_state): + p.copy_(v) + class BaseEnergyModelTests(BaseModelTests): def test_large_separation(self, model, config, molecules, device): From d0f9fc5a4cd1333fbb318c7663454f48a1b5347e Mon Sep 17 00:00:00 2001 From: Lixin Sun Date: Tue, 10 Jan 2023 20:38:10 +0000 Subject: [PATCH 034/157] adding Tensorboard as logger (#289) * add tensorboard logger * update example and readme Co-authored-by: Lixin Sun --- CHANGELOG.md | 2 ++ README.md | 9 +++++++-- configs/full.yaml | 3 +++ nequip/data/_test_data.py | 2 +- nequip/scripts/train.py | 9 ++++++--- nequip/train/trainer_tensorboard.py | 31 +++++++++++++++++++++++++++++ 6 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 nequip/train/trainer_tensorboard.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b2759f6..0a8ec3fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ Most recent change on the bottom. ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) +### Added +- add Tensorboard as logger option ## [0.5.6] - 2022-12-19 ### Added diff --git a/README.md b/README.md index da741c09..14f50055 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,17 @@ NequIP requires: To install: -* We use [Weights&Biases](https://wandb.ai) to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install the Python package: +* We use [Weights&Biases](https://wandb.ai) (or TensorBoard) to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install the Python package: ``` - pip install wandb + pip install wandb # tensorboard ``` + * for TensorBoard users + * On your local computer, build an ssh tunnel to your compute node by `ssh -L 6006:127.0.0.1:6006 username@ip` + * On the compute node, go to the `{root}` folder specify in the config file, and run `tensorboard --logdir tb_summary` + * Use your local computer browser to log on `http://localhost:6006` + * Install NequIP NequIP can be installed from PyPI: diff --git a/configs/full.yaml b/configs/full.yaml index 8d54d76b..5997b14f 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -161,6 +161,9 @@ wandb: true wandb_project: toluene-example # project name used in wandb wandb_watch: false +# # using tensorboard for logging +# tensorboard: true + # see https://docs.wandb.ai/ref/python/watch # wandb_watch_kwargs: # log: all diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index 65b85052..c7e0f558 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -7,7 +7,7 @@ import ase.build from ase.calculators.emt import EMT -from nequip.data import AtomicInMemoryDataset, AtomicData, AtomicDataDict +from nequip.data import AtomicInMemoryDataset, AtomicData from .transforms import TypeMapper diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 88b55f7e..45f40ec0 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -24,6 +24,7 @@ default_config = dict( root="./", run_name="NequIP", + tensorboard=False, wandb=False, wandb_project="NequIP", model_builders=[ @@ -126,19 +127,21 @@ def fresh_start(config): # = Make the trainer = if config.wandb: + import wandb # noqa: F401 - from nequip.train.trainer_wandb import TrainerWandB + from nequip.train.trainer_wandb import TrainerWandB as Trainer # download parameters from wandb in case of sweeping from nequip.utils.wandb import init_n_update config = init_n_update(config) - trainer = TrainerWandB(model=None, **dict(config)) + elif config.tensorboard: + from nequip.train.trainer_tensorboard import TrainerTensorBoard as Trainer else: from nequip.train.trainer import Trainer - trainer = Trainer(model=None, **dict(config)) + trainer = Trainer(model=None, **dict(config)) # what is this # to update wandb data? diff --git a/nequip/train/trainer_tensorboard.py b/nequip/train/trainer_tensorboard.py new file mode 100644 index 00000000..de76cbe9 --- /dev/null +++ b/nequip/train/trainer_tensorboard.py @@ -0,0 +1,31 @@ +from torch.utils.tensorboard import SummaryWriter + +from .trainer import Trainer, TRAIN, VALIDATION + + +class TrainerTensorBoard(Trainer): + """Trainer class that adds WandB features""" + + def end_of_epoch_log(self): + Trainer.end_of_epoch_log(self) + kwargs = dict( + global_step=self.iepoch, walltime=self.mae_dict["cumulative_wall"] + ) + for k, v in self.mae_dict.items(): + terms = k.split("_") + if terms[0] in [TRAIN, VALIDATION]: + header = "/".join(terms[1:]) + self.tb_writer.add_scalar(f"{header}/{terms[0]}", v, **kwargs) + elif k not in ["cumulative_wall", "epoch"]: + self.tb_writer.add_scalar(k, v, **kwargs) + self.tb_writer.flush() + + def init(self): + super().init() + + if not self._initialized: + return + + self.tb_writer = SummaryWriter( + log_dir=f"{self.output.root}/tb_summary/{self.output.run_name}", + ) From 33f67518cf192989e82f0e3f3fc5b62576f8e0ae Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 10 Jan 2023 23:17:26 -0700 Subject: [PATCH 035/157] partial epochs --- CHANGELOG.md | 1 + nequip/data/__init__.py | 3 +- nequip/data/dataloader.py | 87 ++++++++++++++++++++++++++++++++++++++- nequip/train/trainer.py | 25 ++++++++++- 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9258f035..944a89c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Most recent change on the bottom. - [Breaking] Set `dataset_seed` to `seed` if it is not explicitly provided - Don't log as often by default - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) +- Will not reproduce previous versions' data shuffling order (for all practical purposes this does not matter, the `shuffle` option is unchanged) ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) diff --git a/nequip/data/__init__.py b/nequip/data/__init__.py index 2d47d845..377f84d7 100644 --- a/nequip/data/__init__.py +++ b/nequip/data/__init__.py @@ -9,7 +9,7 @@ _LONG_FIELDS, ) from .dataset import AtomicDataset, AtomicInMemoryDataset, NpzDataset, ASEDataset -from .dataloader import DataLoader, Collater +from .dataloader import DataLoader, Collater, PartialSampler from ._build import dataset_from_config from ._test_data import EMTTestDataset @@ -24,6 +24,7 @@ ASEDataset, DataLoader, Collater, + PartialSampler, dataset_from_config, _NODE_FIELDS, _EDGE_FIELDS, diff --git a/nequip/data/dataloader.py b/nequip/data/dataloader.py index 6b1bdf76..f883e07b 100644 --- a/nequip/data/dataloader.py +++ b/nequip/data/dataloader.py @@ -1,8 +1,10 @@ -from typing import List +from typing import List, Optional, Iterator +import math import torch +from torch.utils.data import Sampler -from nequip.utils.torch_geometric import Batch, Data +from nequip.utils.torch_geometric import Batch, Data, Dataset class Collater(object): @@ -62,3 +64,84 @@ def __init__( collate_fn=Collater.for_dataset(dataset, exclude_keys=exclude_keys), **kwargs, ) + + +class PartialSampler(Sampler[int]): + r"""Samples elements without replacement, but divided across a number of calls to `__iter__`. + + To ensure deterministic reproducibility and restartability, dataset permutations are generated + from a combination of the overall seed and the epoch number. As a result, the caller must + tell this sampler the epoch number before each time `__iter__` is called by calling + `my_partial_sampler.step_epoch(epoch_number_about_to_run)` each time. + + Args: + data_source (Dataset): dataset to sample from + shuffle (bool): whether to shuffle the dataset each time the _entire_ dataset is consumed + num_samples_per_segment (int): number of samples to draw in each call to `__iter__`. + If `None`, defaults to `len(data_source)`. The entire dataset will be consumed in + `ceil(len(data_source) / num_samples_per_segment)`. + generator (Generator): Generator used in sampling. + """ + data_source: Dataset + num_samples_per_segment: int + num_segments: int + shuffle: bool + _epoch: int + _prev_epoch: int + + def __init__( + self, + data_source: Dataset, + shuffle: bool = True, + num_samples_per_segment: Optional[int] = None, + generator=None, + ) -> None: + self.data_source = data_source + self.shuffle = shuffle + if num_samples_per_segment is None: + num_samples_per_segment = len(data_source) + self.num_samples_per_segment = num_samples_per_segment + self.num_segments = int( + math.ceil(self.num_samples_total / self.num_samples_per_segment) + ) + self.generator = generator + self._epoch = None + self._prev_epoch = None + + @property + def num_samples_total(self) -> int: + # dataset size might change at runtime + return len(self.data_source) + + def step_epoch(self, epoch: int) -> None: + self._epoch = epoch + + def __iter__(self) -> Iterator[int]: + assert self._epoch is not None + assert (self._prev_epoch is None) or (self._epoch == self._prev_epoch + 1) + + full_epoch_i, segment_i = divmod(self._epoch, self.num_segments) + if self.shuffle: + temp_rng = torch.Generator() + # Get new randomness for each _full_ time through the dataset + # This is deterministic w.r.t. the combination of dataset seed and epoch number + # Both of which persist across restarts + # (initial_seed() is restored by set_state()) + temp_rng.manual_seed(self.generator.initial_seed() + full_epoch_i) + full_order = torch.randperm(self.num_samples_total, generator=temp_rng) + else: + full_order = torch.arange(self.num_samples_total) + + this_segment_indexes = full_order[ + self.num_samples_per_segment + * segment_i : self.num_samples_per_segment + * (segment_i + 1) + ] + assert len(this_segment_indexes) > 0 + assert len(this_segment_indexes) <= self.num_samples_per_segment + yield from this_segment_indexes + + self._prev_epoch = self._epoch + + def __len__(self) -> int: + return self.num_samples_per_segment diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index ae232b91..ad7f2dab 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -26,7 +26,13 @@ import torch from torch_ema import ExponentialMovingAverage -from nequip.data import DataLoader, AtomicData, AtomicDataDict, AtomicDataset +from nequip.data import ( + DataLoader, + PartialSampler, + AtomicData, + AtomicDataDict, + AtomicDataset, +) from nequip.nn import GraphModel from nequip.utils import ( Output, @@ -150,6 +156,7 @@ class Trainer: validation_batch_size (int): batch size for evaluating the model for validation shuffle (bool): parameters for dataloader n_train (int): # of frames for training + n_train_per_epoch (optional int): how many frames from `n_train` to use each epoch; see `PartialSampler`. When `None`, all `n_train` frames will be used each epoch. n_val (int): # of frames for validation exclude_keys (list): fields from dataset to ignore. dataloader_num_workers (int): `num_workers` for the `DataLoader`s @@ -242,6 +249,7 @@ def __init__( validation_batch_size: int = 5, shuffle: bool = True, n_train: Optional[int] = None, + n_train_per_epoch: Optional[int] = None, n_val: Optional[int] = None, dataloader_num_workers: int = 0, train_idcs: Optional[list] = None, @@ -878,6 +886,10 @@ def epoch_step(self): dataloaders = [ dataloaders[c] for c in categories ] # get the right dataloaders for the catagories we actually run + if TRAIN in categories: + # We have to step the sampler so it knows what epoch it is + self.dl_train_sampler.step_epoch(self.iepoch) + self.metrics_dict = {} self.loss_dict = {} @@ -1199,10 +1211,19 @@ def set_dataset( # use the right randomness generator=self.dataset_rng, ) + self.dl_train_sampler = PartialSampler( + data_source=self.dataset_train, + # training should shuffle (if enabled) + shuffle=self.shuffle, + # if n_train_per_epoch is None (default), it's set to len(self.dataset_train) == n_train + # i.e. use all `n_train` frames each epoch + num_samples_per_segment=self.n_train_per_epoch, + generator=self.dataset_rng, + ) self.dl_train = DataLoader( dataset=self.dataset_train, - shuffle=self.shuffle, # training should shuffle batch_size=self.batch_size, + sampler=self.dl_train_sampler, **dl_kwargs, ) # validation, on the other hand, shouldn't shuffle From 34dc79f152c6368b98389d8942f1295aa5df6a6f Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Jan 2023 17:31:54 -0500 Subject: [PATCH 036/157] match tolerance to dtype --- nequip/utils/unittests/model_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index c74ef248..6083a1c8 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -380,7 +380,10 @@ def test_partial_forces(self, config, atomic_batch, device, strict_locality): assert torch.allclose( output[k], output_partial[k], - atol=1e-8 if k == AtomicDataDict.TOTAL_ENERGY_KEY else 1e-6, + atol=1e-8 + if k == AtomicDataDict.TOTAL_ENERGY_KEY + and torch.get_default_dtype() == torch.float64 + else 1e-5, ) else: assert torch.equal(output[k], output_partial[k]) From 96d6feee9c03cf33839cf8dee2933ebfd05c04ea Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Jan 2023 23:42:05 -0500 Subject: [PATCH 037/157] Work with `wandb>=0.13.8` --- CHANGELOG.md | 3 +++ nequip/utils/wandb.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a8ec3fe..b651ea58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ Most recent change on the bottom. - Don't log as often by default - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) +### Fixed +- Work with `wandb>=0.13.8` + ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) diff --git a/nequip/utils/wandb.py b/nequip/utils/wandb.py index 2391a9f4..310b0f31 100644 --- a/nequip/utils/wandb.py +++ b/nequip/utils/wandb.py @@ -1,5 +1,6 @@ import wandb import logging +import secrets from wandb.util import json_friendly_val @@ -11,9 +12,11 @@ def init_n_update(config): "Due to wandb limitations, only string keys are supported in configurations." ) - # download from wandb set up - config.run_id = wandb.util.generate_id() + # create a run id + # see https://github.com/wandb/wandb/pull/4676 + config.run_id = secrets.token_urlsafe() + # download from wandb set up wandb.init( project=config.wandb_project, config=conf_dict, From 9713ede9477f9e57fd36a7df65ff5e44dbf45bb1 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 12 Jan 2023 01:49:26 -0500 Subject: [PATCH 038/157] docs --- nequip/scripts/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index f7dfa12b..4ebcb92c 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -68,7 +68,7 @@ def main(args=None, running_as_script: bool = True): ) parser.add_argument( "--test-indexes", - help="Path to a file containing the indexes in the dataset that make up the test set. If omitted, all data frames *not* used as training or validation data in the training session `train_dir` will be used.", + help="Path to a file containing the indexes in the dataset that make up the test set. If omitted, all data frames *not* used as training or validation data in the training session `train_dir` will be used. PyTorch, YAML, and JSON formats containing a list of integers are supported.", type=Path, default=None, ) From d43a7254a0d97f57cb704b08dca4c1cd515dfeab Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 17 Jan 2023 19:59:30 -0500 Subject: [PATCH 039/157] handle data type --- nequip/data/_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/data/_test_data.py b/nequip/data/_test_data.py index 73244d42..498ba13e 100644 --- a/nequip/data/_test_data.py +++ b/nequip/data/_test_data.py @@ -38,7 +38,7 @@ def __init__( assert element in ("Cu", "Pd", "Au", "Pt", "Al", "Ni", "Ag") self.element = element self.sigma = sigma - self.supercell = supercell + self.supercell = tuple(supercell) self.num_frames = num_frames self.dataset_seed = dataset_seed From 63099ab2cc23c04495282b9d105628fe8ed6fe5a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 17 Jan 2023 20:00:44 -0500 Subject: [PATCH 040/157] benchmark with explicit CUDA sync too --- nequip/scripts/benchmark.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 1deb0de2..12a5fc6f 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -57,7 +57,7 @@ def main(args=None): "-n", help="Number of trials.", type=int, - default=30, + default=None, ) parser.add_argument( "--n-data", @@ -157,6 +157,9 @@ def main(args=None): if args.n == 0: print("Got -n 0, so quitting without running benchmark.") return + elif args.n is None: + args.n = 5 if args.profile else 30 + print(args.n) # Load model: if args.model is None: @@ -239,8 +242,11 @@ def trace_handler(p): on_trace_ready=trace_handler, ) as p: for _ in range(1 + warmup + args.n): - model(next(datas).copy()) + out = model(next(datas).copy()) + out[AtomicDataDict.TOTAL_ENERGY_KEY].item() p.step() + + print(p.key_averages().table(sort_by="cuda_time_total", row_limit=100)) elif args.pdb: print("Running model under debugger...") try: @@ -280,7 +286,8 @@ def trace_handler(p): print("Benchmarking...") # just time t = Timer( - stmt="model(next(datas).copy())", globals={"model": model, "datas": datas} + stmt="model(next(datas).copy())['total_energy'].item()", + globals={"model": model, "datas": datas}, ) perloop: Measurement = t.timeit(args.n) From ee0b15d7a23d504c20c8f2b6a35039aa8a527199 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 18 Jan 2023 21:45:10 -0500 Subject: [PATCH 041/157] remove torch in setup.py --- README.md | 4 +++- setup.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 14f50055..75929a2e 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ NequIP is an open-source code for building E(3)-equivariant interatomic potentia NequIP requires: * Python >= 3.7 -* PyTorch >= 1.8, !=1.9, <=1.11.*. PyTorch can be installed following the [instructions from their documentation](https://pytorch.org/get-started/locally/). Note that neither `torchvision` nor `torchaudio`, included in the default install command, are needed for NequIP. +* PyTorch == 1.10.* or 1.13.*. PyTorch can be installed following the [instructions from their documentation](https://pytorch.org/get-started/locally/). Note that neither `torchvision` nor `torchaudio`, included in the default install command, are needed for NequIP. + +**You must install PyTorch before installing NequIP, however it is not marked as a dependency of `nequip` to prevent `pip` from trying to overwrite your PyTorch installation.** To install: diff --git a/setup.py b/setup.py index d7a5b465..6ca9e3cf 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,6 @@ "numpy", "ase", "tqdm", - "torch>=1.10.0,<1.13,!=1.9.0", "e3nn>=0.4.4,<0.6.0", "pyyaml", "contextlib2;python_version<'3.7'", # backport of nullcontext From 9da6bb073c3a01175f0f7f81e1a1d8121bbd30ec Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 18 Jan 2023 21:46:00 -0500 Subject: [PATCH 042/157] version bump --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_develop.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1f835e90..79df1094 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.9] - torch-version: [1.11.0, 1.12.1] + torch-version: [1.11.0, 1.13.1] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml index 2c23350c..5aa1ab08 100644 --- a/.github/workflows/tests_develop.yml +++ b/.github/workflows/tests_develop.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.9] - torch-version: [1.12.1] + torch-version: [1.13.1] steps: - uses: actions/checkout@v2 From 922b622aeeb96dd837c9397dea969d5c1a33c7dc Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 23 Jan 2023 16:01:02 -0500 Subject: [PATCH 043/157] add batch ptr key to avoid .max() calls --- nequip/data/AtomicData.py | 1 + nequip/data/AtomicDataDict.py | 8 ++++++++ nequip/data/_keys.py | 1 + nequip/nn/_atomwise.py | 22 ++++++++++++++++------ nequip/nn/_graph_model.py | 1 + 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 728c260b..4b4bb50c 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -56,6 +56,7 @@ AtomicDataDict.VIRIAL_KEY, AtomicDataDict.PBC_KEY, AtomicDataDict.CELL_KEY, + AtomicDataDict.BATCH_PTR_KEY, } _NODE_FIELDS: Set[str] = set(_DEFAULT_NODE_FIELDS) _EDGE_FIELDS: Set[str] = set(_DEFAULT_EDGE_FIELDS) diff --git a/nequip/data/AtomicDataDict.py b/nequip/data/AtomicDataDict.py index 069f8cff..f7713e6f 100644 --- a/nequip/data/AtomicDataDict.py +++ b/nequip/data/AtomicDataDict.py @@ -111,4 +111,12 @@ def with_batch(data: Type) -> Type: pos = data[_keys.POSITIONS_KEY] batch = torch.zeros(len(pos), dtype=torch.long, device=pos.device) data[_keys.BATCH_KEY] = batch + # ugly way to make a tensor of [0, len(pos)], but it avoids transfers or casts + data[_keys.BATCH_PTR_KEY] = torch.arange( + start=0, + end=len(pos) + 1, + step=len(pos), + dtype=torch.long, + device=pos.device, + ) return data diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py index 54b66ce3..9ebb6e19 100644 --- a/nequip/data/_keys.py +++ b/nequip/data/_keys.py @@ -66,6 +66,7 @@ ] BATCH_KEY: Final[str] = "batch" +BATCH_PTR_KEY: Final[str] = "ptr" # Make a list of allowed keys ALLOWED_KEYS: List[str] = [ diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index 344c3d4e..2b698b3d 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -90,7 +90,11 @@ def __init__( def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = AtomicDataDict.with_batch(data) data[self.out_field] = scatter( - data[self.field], data[AtomicDataDict.BATCH_KEY], dim=0, reduce=self.reduce + data[self.field], + data[AtomicDataDict.BATCH_KEY], + dim=0, + dim_size=len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1, + reduce=self.reduce, ) if self.constant != 1.0: data[self.out_field] = data[self.out_field] * self.constant @@ -195,7 +199,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: if not (self.has_scales or self.has_shifts): return data - species_idx = data[AtomicDataDict.ATOM_TYPE_KEY] + species_idx = data[AtomicDataDict.ATOM_TYPE_KEY].squeeze(-1) in_field = data[self.field] assert len(in_field) == len( species_idx @@ -207,8 +211,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # input + tensor1 * tensor2 elementwise # it will promote to widest dtype, which comes from shifts/scales in_field = torch.addcmul( - self.shifts[species_idx].view(-1, 1), - self.scales[species_idx].view(-1, 1), + torch.index_select(self.shifts, 0, species_idx).view(-1, 1), + torch.index_select(self.scales, 0, species_idx).view(-1, 1), in_field, ) else: @@ -217,9 +221,15 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # this is specifically because self.*[species_idx].view(-1, 1) # is never a scalar (ndim == 0), since it is always [n_atom, 1] if self.has_scales: - in_field = self.scales[species_idx].view(-1, 1) * in_field + in_field = ( + torch.index_select(self.scales, 0, species_idx).view(-1, 1) + * in_field + ) if self.has_shifts: - in_field = self.shifts[species_idx].view(-1, 1) + in_field + in_field = ( + torch.index_select(self.shifts, 0, species_idx).view(-1, 1) + + in_field + ) data[self.out_field] = in_field return data diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py index ef0c8dbd..7d543816 100644 --- a/nequip/nn/_graph_model.py +++ b/nequip/nn/_graph_model.py @@ -38,6 +38,7 @@ def __init__( AtomicDataDict.EDGE_CELL_SHIFT_KEY: None, AtomicDataDict.CELL_KEY: "1o", # 3 of them, but still AtomicDataDict.BATCH_KEY: None, + AtomicDataDict.BATCH_PTR_KEY: None, AtomicDataDict.ATOM_TYPE_KEY: None, } model_input_fields = AtomicDataDict._fix_irreps_dict(model_input_fields) From 3db6752c2c37171175a8fe60e0db92e012f2ea42 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 23 Jan 2023 19:43:27 -0500 Subject: [PATCH 044/157] looping for partialsampler --- nequip/data/dataloader.py | 60 +++++++++++++++++++++++++-------------- nequip/train/trainer.py | 4 ++- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/nequip/data/dataloader.py b/nequip/data/dataloader.py index f883e07b..d38eb22e 100644 --- a/nequip/data/dataloader.py +++ b/nequip/data/dataloader.py @@ -74,17 +74,19 @@ class PartialSampler(Sampler[int]): tell this sampler the epoch number before each time `__iter__` is called by calling `my_partial_sampler.step_epoch(epoch_number_about_to_run)` each time. + This sampler decouples epochs from the dataset size and cycles through the dataset over as + many (partial) epochs as it may take. As a result, the _dataset_ epoch can change partway + through a training epoch. + Args: data_source (Dataset): dataset to sample from shuffle (bool): whether to shuffle the dataset each time the _entire_ dataset is consumed - num_samples_per_segment (int): number of samples to draw in each call to `__iter__`. - If `None`, defaults to `len(data_source)`. The entire dataset will be consumed in - `ceil(len(data_source) / num_samples_per_segment)`. + num_samples_per_epoch (int): number of samples to draw in each call to `__iter__`. + If `None`, defaults to `len(data_source)`. generator (Generator): Generator used in sampling. """ data_source: Dataset - num_samples_per_segment: int - num_segments: int + num_samples_per_epoch: int shuffle: bool _epoch: int _prev_epoch: int @@ -93,17 +95,16 @@ def __init__( self, data_source: Dataset, shuffle: bool = True, - num_samples_per_segment: Optional[int] = None, + num_samples_per_epoch: Optional[int] = None, generator=None, ) -> None: self.data_source = data_source self.shuffle = shuffle - if num_samples_per_segment is None: - num_samples_per_segment = len(data_source) - self.num_samples_per_segment = num_samples_per_segment - self.num_segments = int( - math.ceil(self.num_samples_total / self.num_samples_per_segment) - ) + if num_samples_per_epoch is None: + num_samples_per_epoch = self.num_samples_total + self.num_samples_per_epoch = num_samples_per_epoch + assert self.num_samples_per_epoch <= self.num_samples_total + assert self.num_samples_per_epoch >= 1 self.generator = generator self._epoch = None self._prev_epoch = None @@ -119,8 +120,15 @@ def step_epoch(self, epoch: int) -> None: def __iter__(self) -> Iterator[int]: assert self._epoch is not None assert (self._prev_epoch is None) or (self._epoch == self._prev_epoch + 1) + assert self._epoch >= 0 + + full_epoch_i, start_sample_i = divmod( + # how much data we've already consumed: + self._epoch * self.num_samples_per_epoch, + # how much data there is the dataset: + self.num_samples_total, + ) - full_epoch_i, segment_i = divmod(self._epoch, self.num_segments) if self.shuffle: temp_rng = torch.Generator() # Get new randomness for each _full_ time through the dataset @@ -128,20 +136,30 @@ def __iter__(self) -> Iterator[int]: # Both of which persist across restarts # (initial_seed() is restored by set_state()) temp_rng.manual_seed(self.generator.initial_seed() + full_epoch_i) - full_order = torch.randperm(self.num_samples_total, generator=temp_rng) + full_order_this = torch.randperm(self.num_samples_total, generator=temp_rng) + # reseed the generator for the _next_ epoch to get the shuffled order of the + # _next_ dataset epoch to pad out this one for completing any partial batches + # at the end: + temp_rng.manual_seed(self.generator.initial_seed() + full_epoch_i + 1) + full_order_next = torch.randperm(self.num_samples_total, generator=temp_rng) + del temp_rng else: - full_order = torch.arange(self.num_samples_total) + full_order_this = torch.arange(self.num_samples_total) + # without shuffling, the next epoch has the same sampling order as this one: + full_order_next = full_order_this + + full_order = torch.cat((full_order_this, full_order_next), dim=0) + del full_order_next, full_order_this this_segment_indexes = full_order[ - self.num_samples_per_segment - * segment_i : self.num_samples_per_segment - * (segment_i + 1) + start_sample_i : start_sample_i + self.num_samples_per_epoch ] - assert len(this_segment_indexes) > 0 - assert len(this_segment_indexes) <= self.num_samples_per_segment + # because we cycle into indexes from the next dataset epoch, + # we should _always_ be able to get num_samples_per_epoch + assert len(this_segment_indexes) == self.num_samples_per_epoch yield from this_segment_indexes self._prev_epoch = self._epoch def __len__(self) -> int: - return self.num_samples_per_segment + return self.num_samples_per_epoch diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index ad7f2dab..1cb6a1ab 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -1211,13 +1211,15 @@ def set_dataset( # use the right randomness generator=self.dataset_rng, ) + if self.n_train_per_epoch is not None: + assert self.n_train_per_epoch % self.batch_size == 0 self.dl_train_sampler = PartialSampler( data_source=self.dataset_train, # training should shuffle (if enabled) shuffle=self.shuffle, # if n_train_per_epoch is None (default), it's set to len(self.dataset_train) == n_train # i.e. use all `n_train` frames each epoch - num_samples_per_segment=self.n_train_per_epoch, + num_samples_per_epoch=self.n_train_per_epoch, generator=self.dataset_rng, ) self.dl_train = DataLoader( From 703142ad76a8109522c17082daac95a3425ce314 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 24 Jan 2023 11:46:45 -0500 Subject: [PATCH 045/157] update with plugin section --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 75929a2e..c2473d32 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,12 @@ pair_coeff * * deployed.pth Date: Tue, 24 Jan 2023 17:49:28 -0500 Subject: [PATCH 046/157] fix with graphmodel --- nequip/model/_weight_init.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/nequip/model/_weight_init.py b/nequip/model/_weight_init.py index 7d6184c4..d0f7003d 100644 --- a/nequip/model/_weight_init.py +++ b/nequip/model/_weight_init.py @@ -5,12 +5,14 @@ import e3nn.o3 import e3nn.nn -from nequip.nn import GraphModuleMixin +from nequip.nn import GraphModuleMixin, GraphModel from nequip.utils import Config # == Load old state == -def initialize_from_state(config: Config, model: GraphModuleMixin, initialize: bool): +def initialize_from_state( + config: Config, graph_model: GraphModel, initialize: bool +) -> GraphModel: """Initialize the model from the state dict file given by the config options `initial_model_state`. Only loads the state dict if `initialize` is `True`; this is meant for, say, starting a training from a previous state. @@ -22,18 +24,21 @@ def initialize_from_state(config: Config, model: GraphModuleMixin, initialize: b See https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.load_state_dict. """ if not initialize: - return model # do nothing + return graph_model # do nothing return load_model_state( - config=config, model=model, initialize=initialize, _prefix="initial_model_state" + config=config, + model=graph_model, + initialize=initialize, + _prefix="initial_model_state", ) def load_model_state( config: Config, - model: GraphModuleMixin, + graph_model: GraphModel, initialize: bool, _prefix: str = "load_model_state", -): +) -> GraphModel: """Load the model from the state dict file given by the config options `load_model_state`. Loads the state dict always; this is meant, for example, for building a new model to deploy with a given state dict. @@ -49,8 +54,8 @@ def load_model_state( f"initialize_from_state requires the `{_prefix}` option specifying the state to initialize from" ) state = torch.load(config[_prefix]) - model.load_state_dict(state, strict=config.get(_prefix + "_strict", True)) - return model + graph_model.load_state_dict(state, strict=config.get(_prefix + "_strict", True)) + return graph_model # == Init functions == From 032031cb3d7644eca9e29fc89fa848cec16e2195 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Feb 2023 22:54:37 -0500 Subject: [PATCH 047/157] thresholds --- nequip/utils/unittests/model_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 6083a1c8..c3e77cd3 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -96,8 +96,9 @@ def test_jit(self, model, atomic_batch, device): atol = { # tight, but not that tight, since GPU nondet has to pass + # plus model insides are still float32 with global dtype float64 in the tests torch.float32: 1e-6, - torch.float64: 1e-10, + torch.float64: 5e-7, }[torch.get_default_dtype()] for out_field in out_fields: From fd74bc6aa05e4fef138ab15d06fbfb6516771368 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Feb 2023 22:56:07 -0500 Subject: [PATCH 048/157] improve NequIP numerics --- nequip/nn/_interaction_block.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nequip/nn/_interaction_block.py b/nequip/nn/_interaction_block.py index f14a2187..f3164709 100644 --- a/nequip/nn/_interaction_block.py +++ b/nequip/nn/_interaction_block.py @@ -168,12 +168,13 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: edge_features = self.tp( x[edge_src], data[AtomicDataDict.EDGE_ATTRS_KEY], weight ) - x = scatter(edge_features, edge_dst, dim=0, dim_size=len(x)) - + # divide first for numerics, scatter is linear # Necessary to get TorchScript to be able to type infer when its not None avg_num_neigh: Optional[float] = self.avg_num_neighbors if avg_num_neigh is not None: - x = x.div(avg_num_neigh**0.5) + edge_features = edge_features.div(avg_num_neigh**0.5) + # now scatter down + x = scatter(edge_features, edge_dst, dim=0, dim_size=len(x)) x = self.linear_2(x) From 75976a0480f9d40e0b484590641c0261562082c6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Feb 2023 23:12:47 -0500 Subject: [PATCH 049/157] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 833cc4f9..d3958749 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Most recent change on the bottom. ## Unreleased - 0.6.0 ### Added - [Breaking] Refactor overall model logic into `GraphModel` top-level module +- [Breaking] Added `model_dtype` +- `BATCH_PTR_KEY` in `AtomicDataDict` ### Changed - Always require explicit `seed` @@ -16,6 +18,7 @@ Most recent change on the bottom. - Don't log as often by default - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) - Will not reproduce previous versions' data shuffling order (for all practical purposes this does not matter, the `shuffle` option is unchanged) +- [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`) ### Fixed - Work with `wandb>=0.13.8` From 9e983cd9f3d38d24114f40a6b96c8dcaed0d838d Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Feb 2023 23:14:26 -0500 Subject: [PATCH 050/157] lint --- nequip/data/dataloader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nequip/data/dataloader.py b/nequip/data/dataloader.py index d38eb22e..ea9c7fc9 100644 --- a/nequip/data/dataloader.py +++ b/nequip/data/dataloader.py @@ -1,5 +1,4 @@ from typing import List, Optional, Iterator -import math import torch from torch.utils.data import Sampler From 8f13ecb488f05f6e8ac94a59ebc3535e1651f117 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 2 Feb 2023 17:24:58 -0500 Subject: [PATCH 051/157] fix trainer test --- tests/unit/trainer/test_trainer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py index 98550194..197f3897 100644 --- a/tests/unit/trainer/test_trainer.py +++ b/tests/unit/trainer/test_trainer.py @@ -146,11 +146,17 @@ def test_from_file(self, trainer, append): class TestData: @pytest.mark.parametrize("mode", ["random", "sequential"]) def test_split(self, trainer, nequip_dataset, mode): - trainer.train_val_split = mode trainer.set_dataset(nequip_dataset) - for i, batch in enumerate(trainer.dl_train): - print(i, batch) + for epoch_i in range(3): + trainer.dl_train_sampler.step_epoch(epoch_i) + n_samples: int = 0 + for i, batch in enumerate(trainer.dl_train): + n_samples += batch[AtomicDataDict.BATCH_PTR_KEY].shape[0] - 1 + if trainer.n_train_per_epoch is not None: + assert n_samples == trainer.n_train_per_epoch + else: + assert n_samples == trainer.n_train class TestTrain: From c651445657866af0eb68d825700ac5b46a0fa28c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 17:35:59 -0500 Subject: [PATCH 052/157] remove related_scale/shift_keys --- nequip/model/_scaling.py | 8 -------- nequip/nn/_atomwise.py | 17 +++++++++++++---- nequip/nn/_rescale.py | 8 -------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index 2a1e767a..f9b5d208 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -29,8 +29,6 @@ def RescaleEnergyEtc( default_shift=None, default_scale_keys=AtomicDataDict.ALL_ENERGY_KEYS, default_shift_keys=[AtomicDataDict.TOTAL_ENERGY_KEY], - default_related_scale_keys=[AtomicDataDict.PER_ATOM_ENERGY_KEY], - default_related_shift_keys=[], ) @@ -43,8 +41,6 @@ def GlobalRescale( default_shift: Union[str, float, list], default_scale_keys: list, default_shift_keys: list, - default_related_scale_keys: list, - default_related_shift_keys: list, dataset: Optional[AtomicDataset] = None, ): """Add global rescaling for energy(-based quantities). @@ -113,8 +109,6 @@ def GlobalRescale( error_string = "keys need to be a list" assert isinstance(default_scale_keys, list), error_string assert isinstance(default_shift_keys, list), error_string - assert isinstance(default_related_scale_keys, list), error_string - assert isinstance(default_related_shift_keys, list), error_string # == Build the model == return RescaleOutput( @@ -123,8 +117,6 @@ def GlobalRescale( scale_by=global_scale, shift_keys=[k for k in default_shift_keys if k in model.irreps_out], shift_by=global_shift, - related_scale_keys=default_related_scale_keys, - related_shift_keys=default_related_shift_keys, shift_trainable=config.get(f"{module_prefix}_shift_trainable", False), scale_trainable=config.get(f"{module_prefix}_scale_trainable", False), default_dtype=config.get("default_dtype", None), diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index 2b698b3d..43f243c4 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -12,6 +12,7 @@ from nequip.utils import dtype_from_name from nequip.utils.versions import _TORCH_IS_GE_1_13 from ._graph_mixin import GraphModuleMixin +from ._rescale import RescaleOutput class AtomwiseOperation(GraphModuleMixin, torch.nn.Module): @@ -233,10 +234,18 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data[self.out_field] = in_field return data - def update_for_rescale(self, rescale_module): - if hasattr(rescale_module, "related_scale_keys"): - if self.out_field not in rescale_module.related_scale_keys: - return + def update_for_rescale(self, rescale_module: RescaleOutput): + if not self.arguments_in_dataset_units: + # nothing to rescale, arguments are in normalized units already / unitless + return + # are we scaling something related to the global rescaling? + if self.field not in rescale_module.scale_keys: + return + # now check that we have the right rescaling in the specific energy case + if self.field == AtomicDataDict.PER_ATOM_ENERGY_KEY and not ( + set(rescale_module.scale_keys) <= set(AtomicDataDict.ALL_ENERGY_KEYS) + ): + raise AssertionError("Some unsupported energy scaling arangement...") if self.arguments_in_dataset_units and rescale_module.has_scale: logging.debug( f"PerSpeciesScaleShift's arguments were in dataset units; rescaling:\n " diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index 937ccd75..1828ab56 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -22,8 +22,6 @@ class RescaleOutput(GraphModuleMixin, torch.nn.Module): Which fields to rescale. shift_keys : list of keys, default [] Which fields to shift after rescaling. - related_scale_keys: list of keys that could be contingent to this rescale - related_shift_keys: list of keys that could be contingent to this rescale scale_by : floating or Tensor, default 1. The scaling factor by which to multiply fields in ``scale``. shift_by : floating or Tensor, default 0. @@ -34,8 +32,6 @@ class RescaleOutput(GraphModuleMixin, torch.nn.Module): scale_keys: List[str] shift_keys: List[str] - related_scale_keys: List[str] - related_shift_keys: List[str] scale_trainble: bool rescale_trainable: bool _all_keys: List[str] @@ -50,8 +46,6 @@ def __init__( model: GraphModuleMixin, scale_keys: Union[Sequence[str], str] = [], shift_keys: Union[Sequence[str], str] = [], - related_shift_keys: Union[Sequence[str], str] = [], - related_scale_keys: Union[Sequence[str], str] = [], scale_by=None, shift_by=None, shift_trainable: bool = False, @@ -89,8 +83,6 @@ def __init__( self.scale_keys = list(scale_keys) self.shift_keys = list(shift_keys) self._all_keys = list(all_keys) - self.related_scale_keys = list(set(related_scale_keys).union(scale_keys)) - self.related_shift_keys = list(set(related_shift_keys).union(shift_keys)) self.default_dtype = dtype_from_name( torch.get_default_dtype() if default_dtype is None else default_dtype From e471c8d299ea708d1b9efbe9d4781dd72d5e3ff9 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 18:37:20 -0500 Subject: [PATCH 053/157] stress units note --- configs/minimal_stress.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/configs/minimal_stress.yaml b/configs/minimal_stress.yaml index 8ffa7e31..63aaf832 100644 --- a/configs/minimal_stress.yaml +++ b/configs/minimal_stress.yaml @@ -19,11 +19,14 @@ parity: true num_features: 16 # data set -dataset: ase # type of data set, can be npz or ase +dataset: ase # type of data set, can be npz or ase dataset_url: https://qmml.org/Datasets/w-14.zip # url to download the npz. optional -dataset_file_name: ./benchmark_data/w-14.xyz # path to data set file +dataset_file_name: ./benchmark_data/w-14.xyz # path to data set file dataset_key_mapping: force: forces +# !! WARNING !!: NequIP expects virial / stress labels to be in "consistent units", i.e. +# using the same base units as all other data. For stress, in particular, +# this means the correct units for nequip are energy / length^3. dataset_include_keys: - virial # A mapping of chemical species to type indexes is necessary if the dataset is provided with atomic numbers instead of type indexes. From 580a76a8aaf267e15d950977a43a7b9762f283b2 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 19:10:44 -0500 Subject: [PATCH 054/157] test PartialSampler --- tests/unit/data/test_dataloader.py | 5 +- tests/unit/data/test_sampler.py | 84 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 tests/unit/data/test_sampler.py diff --git a/tests/unit/data/test_dataloader.py b/tests/unit/data/test_dataloader.py index fc511143..8c7cc510 100644 --- a/tests/unit/data/test_dataloader.py +++ b/tests/unit/data/test_dataloader.py @@ -54,10 +54,13 @@ def test_subset_sampler(self, npz_dataset): print(batch) +NPZ_DATASET_FIXTURE_N_FRAMES: int = 8 + + @pytest.fixture(scope="module") def npz_dataset(): natoms = 3 - nframes = 8 + nframes = NPZ_DATASET_FIXTURE_N_FRAMES npz = dict( positions=np.random.random((nframes, natoms, 3)), force=np.random.random((nframes, natoms, 3)), diff --git a/tests/unit/data/test_sampler.py b/tests/unit/data/test_sampler.py new file mode 100644 index 00000000..1b249d65 --- /dev/null +++ b/tests/unit/data/test_sampler.py @@ -0,0 +1,84 @@ +import pytest +import itertools + +import torch + +from nequip.data import PartialSampler + +from test_dataloader import npz_dataset, NPZ_DATASET_FIXTURE_N_FRAMES # noqa + + +@pytest.fixture(params=[True, False], scope="module") +def shuffle(request) -> bool: + return request.param + + +@pytest.fixture( + params=[None, 1, 2, 5, 7, NPZ_DATASET_FIXTURE_N_FRAMES], scope="function" +) +def sampler(request, npz_dataset, shuffle) -> PartialSampler: # noqa: F811 + return PartialSampler( + data_source=npz_dataset, + shuffle=shuffle, + num_samples_per_epoch=request.param, + generator=torch.Generator().manual_seed(0), + ) + + +def test_partials_add_up(sampler: PartialSampler): + """Confirm that full data epochs are (random permutations of) the list of all dataset indexes""" + seq = [] + for epoch_i in range(2 * sampler.num_samples_total + 1): + sampler.step_epoch(epoch_i) + seq.extend(iter(sampler)) + + seq = [int(e) for e in seq] + + if sampler.shuffle: + # make sure we've at least hit every frame once + assert set(seq) == set(range(sampler.num_samples_total)) + # then go through it by dataset epochs + i = 0 + while True: + data_epoch_idexes = seq[i : i + sampler.num_samples_total] + if len(data_epoch_idexes) == 0: + break + if len(data_epoch_idexes) == sampler.num_samples_total: + # it should be a random permutation + assert set(data_epoch_idexes) == set(range(sampler.num_samples_total)) + elif len(data_epoch_idexes) < sampler.num_samples_total: + # we hae a partial dataset epoch at the end + assert set(data_epoch_idexes) <= set(range(sampler.num_samples_total)) + assert len(set(data_epoch_idexes)) == len(data_epoch_idexes) + else: + assert False + i += sampler.num_samples_total + else: + # make sure its a repeating sequence of aranges + assert ( + seq + == list( + itertools.chain( + *[ + range(sampler.num_samples_total) + for _ in range(sampler._epoch + 2) + ] + ) + )[: len(seq)] + ) + + +def test_epoch_count(sampler: PartialSampler): + with pytest.raises(AssertionError): + list(iter(sampler)) + sampler.step_epoch(0) + assert sampler._epoch == 0 + assert sampler._prev_epoch is None + list(iter(sampler)) + assert sampler._prev_epoch == 0 + with pytest.raises(AssertionError): + list(iter(sampler)) + sampler.step_epoch(1) + list(iter(sampler)) + assert sampler._epoch == 1 + assert sampler._prev_epoch == 1 # since that's the prev epoch we've just completed From f0529036b3140a34d52a6a4a635fdcf9f1d58b27 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 19:18:32 -0500 Subject: [PATCH 055/157] add force test --- nequip/utils/unittests/model_tests.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index c3e77cd3..c8c358bb 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -283,6 +283,16 @@ def test_large_separation(self, model, config, molecules, device): out_both[AtomicDataDict.TOTAL_ENERGY_KEY], atol=atol, ) + if AtomicDataDict.FORCE_KEY in out1: + # check forces if it's a force model + assert torch.allclose( + torch.cat( + (out1[AtomicDataDict.FORCE_KEY], out2[AtomicDataDict.FORCE_KEY]), + dim=0, + ), + out_both[AtomicDataDict.FORCE_KEY], + atol=atol, + ) atoms_both2 = atoms1.copy() atoms3 = atoms2.copy() From 0c4baad1a584f00f5b5ff03761ff89112284d115 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 19:23:40 -0500 Subject: [PATCH 056/157] improved batch indexing in stress --- nequip/nn/_grad_output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 2bf52606..819a82d5 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -219,7 +219,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = AtomicDataDict.with_batch(data) batch = data[AtomicDataDict.BATCH_KEY] - num_batch: int = int(batch.max().cpu().item()) + 1 + num_batch: int = len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1 pos = data[AtomicDataDict.POSITIONS_KEY] has_cell: bool = AtomicDataDict.CELL_KEY in data @@ -265,7 +265,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: pos.requires_grad_(True) # bmm is natom in batch data[AtomicDataDict.POSITIONS_KEY] = pos + torch.bmm( - pos.unsqueeze(-2), symmetric_displacement[batch] + pos.unsqueeze(-2), torch.index_select(symmetric_displacement, 0, batch) ).squeeze(-2) # we only displace the cell if we have one: if has_cell: From cdc36b5d377764140a4de8c5305bf857136231fa Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 19:58:02 -0500 Subject: [PATCH 057/157] test wrapped vs unwrapped consistent --- nequip/utils/unittests/conftest.py | 12 ++++++++- nequip/utils/unittests/model_tests.py | 39 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py index 1e56f958..a2dc103d 100644 --- a/nequip/utils/unittests/conftest.py +++ b/nequip/utils/unittests/conftest.py @@ -6,7 +6,7 @@ import os from ase.atoms import Atoms -from ase.build import molecule +from ase.build import molecule, bulk from ase.calculators.singlepoint import SinglePointCalculator from ase.io import write @@ -115,6 +115,16 @@ def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]: return atoms, data +@pytest.fixture(scope="session") +def Cu_bulk(float_tolerance) -> Tuple[Atoms, AtomicData]: + atoms = bulk("Cu") * (2, 2, 1) + atoms.rattle() + data = AtomicData.from_ase(atoms, r_max=3.5) + tm = TypeMapper(chemical_symbol_to_type={"Cu": 0}) + data = tm(data) + return atoms, data + + @pytest.fixture(scope="session") def molecules() -> List[Atoms]: atoms_list = [] diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index c8c358bb..6d40a557 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -116,6 +116,45 @@ def test_forward(self, model, atomic_batch, device): for out_field in out_fields: assert out_field in output + def test_wrapped_unwrapped(self, model, device, Cu_bulk, float_tolerance): + atoms, data_orig = Cu_bulk + instance, out_fields = model + data = AtomicData.from_ase(atoms, r_max=3.5) + data[AtomicDataDict.ATOM_TYPE_KEY] = data_orig[AtomicDataDict.ATOM_TYPE_KEY] + data.to(device) + out_ref = instance(AtomicData.to_AtomicDataDict(data)) + # now put things in other periodic images + cell_shifts = torch.randint( + 0, + 3, + (len(atoms), 3), + device=device, + dtype=data[AtomicDataDict.POSITIONS_KEY].dtype, + ) + shifts = torch.einsum("zi,ix->zx", cell_shifts, data[AtomicDataDict.CELL_KEY]) + atoms.positions += shifts.detach().cpu().numpy() + # must recompute the neighborlist for this, since the edge_cell_shifts changed + data2 = AtomicData.from_ase(atoms, r_max=3.5) + data2[AtomicDataDict.ATOM_TYPE_KEY] = data[AtomicDataDict.ATOM_TYPE_KEY] + data2.to(device) + assert torch.equal( + data[AtomicDataDict.EDGE_INDEX_KEY], data2[AtomicDataDict.EDGE_INDEX_KEY] + ) + tmp = ( + data[AtomicDataDict.EDGE_CELL_SHIFT_KEY] + + cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][0]] + - cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][1]] + ) + assert torch.equal( + tmp, + data2[AtomicDataDict.EDGE_CELL_SHIFT_KEY], + ) + out_unwrapped = instance(AtomicData.to_AtomicDataDict(data2)) + for out_field in out_fields: + assert torch.allclose( + out_ref[out_field], out_unwrapped[out_field], atol=float_tolerance + ) + def test_batch(self, model, atomic_batch, device, float_tolerance): """Confirm that the results for individual examples are the same regardless of whether they are batched.""" allclose = functools.partial(torch.allclose, atol=float_tolerance) From efa9c2005955f2331dd93ea0023420419a713621 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Feb 2023 20:23:52 -0500 Subject: [PATCH 058/157] RDF --- CHANGELOG.md | 1 + examples/rdf.py | 47 +++++++++++++++++++++++++++++++++++++ nequip/data/dataset.py | 53 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 examples/rdf.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d3958749..f6e50743 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Most recent change on the bottom. - [Breaking] Refactor overall model logic into `GraphModel` top-level module - [Breaking] Added `model_dtype` - `BATCH_PTR_KEY` in `AtomicDataDict` +- `AtomicInMemoryDataset.rdf()` ### Changed - Always require explicit `seed` diff --git a/examples/rdf.py b/examples/rdf.py new file mode 100644 index 00000000..13377417 --- /dev/null +++ b/examples/rdf.py @@ -0,0 +1,47 @@ +"""Example of loading a NequIP dataset and computing its RDFs""" + +import argparse +import itertools + +from scipy.special import comb +import matplotlib.pyplot as plt + +from nequip.utils import Config +from nequip.data import dataset_from_config +from nequip.scripts.train import default_config +from nequip.utils._global_options import _set_global_options + +# Parse arguments: +parser = argparse.ArgumentParser( + description="Plot RDFs of dataset specified in a `nequip` YAML file" +) +parser.add_argument("config", help="YAML file configuring dataset") +args = parser.parse_args() +config = Config.from_file(args.config, defaults=default_config) +_set_global_options(config) + +print("Loading dataset...") +r_max = config["r_max"] +dataset = dataset_from_config(config=config) +print( + f" loaded dataset of {len(dataset)} frames with {dataset.type_mapper.num_types} types" +) + +print("Computing RDFs...") +rdfs = dataset.rdf(bin_width=0.01) + +print("Plotting...") +num_types: int = dataset.type_mapper.num_types +fig, axs = plt.subplots(nrows=int(comb(N=num_types, k=2)), sharex=True) + +for i, (type1, type2) in enumerate(itertools.combinations(range(num_types), 2)): + ax = axs[i] + ax.set_ylabel( + f"{dataset.type_mapper.type_names[type1]}-{dataset.type_mapper.type_names[type2]} RDF" + ) + hist, bin_edges = rdfs[(type1, type2)] + ax.plot(bin_edges[:-1], hist) + +ax.set_xlabel("Distance") + +plt.show() diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 25e2927b..526afdf1 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -6,6 +6,7 @@ import itertools import yaml import hashlib +import math from os.path import dirname, basename, abspath from typing import Tuple, Dict, Any, List, Callable, Union, Optional, Sequence @@ -595,6 +596,58 @@ def _per_species_statistics( else: raise NotImplementedError + def rdf( + self, bin_width: float, stride: int = 1 + ) -> Dict[Tuple[int, int], Tuple[np.ndarray, np.ndarray]]: + """Compute the pairwise RDFs of the dataset. + + Args: + bin_width: width of the histogram bin in distance units + stride: stride of data to include + + Returns: + dictionary mapping `(type1, type2)` to tuples of `(hist, bin_edges)` in the style of `np.histogram`. + """ + graph_selector, node_selector, edge_selector = self._selectors(stride=stride) + + data = AtomicData.to_AtomicDataDict(self.data) + data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) + + results = {} + + types = self.type_mapper(data)[AtomicDataDict.ATOM_TYPE_KEY] + + edge_types = torch.index_select( + types, 0, data[AtomicDataDict.EDGE_INDEX_KEY].reshape(-1) + ).view(2, -1) + types_center = edge_types[0].numpy() + types_neigh = edge_types[1].numpy() + + r_max: float = self.AtomicData_options["r_max"] + # + 1 to always have a zero bin at the end + n_bins: int = int(math.ceil(r_max / bin_width)) + 1 + # +1 since these are bin_edges including rightmost + bins = bin_width * np.arange(n_bins + 1) + + for type1, type2 in itertools.combinations( + range(self.type_mapper.num_types), 2 + ): + # Try to do as much of this as possible in-place + mask = types_center == type1 + np.logical_and(mask, types_neigh == type2, out=mask) + np.logical_and(mask, edge_selector, out=mask) + mask = mask.astype(np.int32) + results[(type1, type2)] = np.histogram( + data[AtomicDataDict.EDGE_LENGTH_KEY], + weights=mask, + bins=bins, + density=True, + ) + # RDF is symmetric + results[(type2, type1)] = results[(type1, type2)] + + return results + class NpzDataset(AtomicInMemoryDataset): """Load data from an npz file. From d569419fb1e2ed63da15c9bc481f572f4aa058b6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 21:52:30 -0500 Subject: [PATCH 059/157] remove unnecessary sum --- nequip/nn/_grad_output.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 819a82d5..a69e61bd 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -242,10 +242,20 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # Paper they worked from: # Knuth et. al. Comput. Phys. Commun 190, 33-50, 2015 # https://pure.mpg.de/rest/items/item_2085135_9/component/file_2156800/content - displacement = torch.zeros( - (num_batch, 3, 3), - dtype=pos.dtype, - device=pos.device, + # However, unlike the above approach, we make a slight change for efficiency + # by using the displacement as the identity rather than zero, and removing the + # sums. This works out identically in the math because: + # d[(I + eps)r]/d[eps] + # (chain rule) = d[(I + eps)r]/d[I + eps] * d[I + eps]/d[eps] + # (simplify) = d[(I + eps)r]/d[I + eps] * 1 + displacement = ( + torch.eye( + 3, + dtype=pos.dtype, + device=pos.device, + ) + .view(-1, 3, 3) + .expand(num_batch, 3, 3) ) displacement.requires_grad_(True) data["_displacement"] = displacement @@ -264,7 +274,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: did_pos_req_grad: bool = pos.requires_grad pos.requires_grad_(True) # bmm is natom in batch - data[AtomicDataDict.POSITIONS_KEY] = pos + torch.bmm( + data[AtomicDataDict.POSITIONS_KEY] = torch.bmm( pos.unsqueeze(-2), torch.index_select(symmetric_displacement, 0, batch) ).squeeze(-2) # we only displace the cell if we have one: @@ -276,9 +286,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # there would then be an infinitesimal rotation of the positions # but not cell, and it thus wouldn't be global and have # no effect due to equivariance/invariance. - data[AtomicDataDict.CELL_KEY] = cell + torch.bmm( - cell, symmetric_displacement - ) + data[AtomicDataDict.CELL_KEY] = torch.bmm(cell, symmetric_displacement) # Call model and get gradients data = self.func(data) From 4b870ca28eab69dd2261598d28cac2ff6b725807 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 22:16:36 -0500 Subject: [PATCH 060/157] remove weird print --- nequip/scripts/benchmark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 12a5fc6f..80ea1873 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -159,7 +159,6 @@ def main(args=None): return elif args.n is None: args.n = 5 if args.profile else 30 - print(args.n) # Load model: if args.model is None: From 005104d8811acfaf6247102488f2bc98a9491dce Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 22:23:33 -0500 Subject: [PATCH 061/157] don't batch / scatter unnecessarily --- nequip/nn/_atomwise.py | 28 ++++++++++++++------- nequip/nn/_grad_output.py | 51 ++++++++++++++++++++++++++------------- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index 43f243c4..c4592b16 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -89,16 +89,26 @@ def __init__( ) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: - data = AtomicDataDict.with_batch(data) - data[self.out_field] = scatter( - data[self.field], - data[AtomicDataDict.BATCH_KEY], - dim=0, - dim_size=len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1, - reduce=self.reduce, - ) + field = data[self.field] + if AtomicDataDict.BATCH_KEY in data: + result = scatter( + field, + data[AtomicDataDict.BATCH_KEY], + dim=0, + dim_size=len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1, + reduce=self.reduce, + ) + else: + # We can significantly simplify and avoid scatters + if self.reduce == "sum": + result = field.sum(dim=0) + elif self.reduce == "mean": + result = field.mean(dim=0) + else: + assert False if self.constant != 1.0: - data[self.out_field] = data[self.out_field] * self.constant + result = result * self.constant + data[self.out_field] = result return data diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index a69e61bd..93f4b1cc 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -216,10 +216,15 @@ def __init__( self.register_buffer("_empty", torch.Tensor()) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: - data = AtomicDataDict.with_batch(data) - batch = data[AtomicDataDict.BATCH_KEY] - num_batch: int = len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1 + if AtomicDataDict.BATCH_KEY in data: + batch = data[AtomicDataDict.BATCH_KEY] + num_batch: int = len(data[AtomicDataDict.BATCH_PTR_KEY]) - 1 + else: + # Special case for efficiency + batch = self._empty + num_batch: int = 1 + pos = data[AtomicDataDict.POSITIONS_KEY] has_cell: bool = AtomicDataDict.CELL_KEY in data @@ -248,15 +253,14 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # d[(I + eps)r]/d[eps] # (chain rule) = d[(I + eps)r]/d[I + eps] * d[I + eps]/d[eps] # (simplify) = d[(I + eps)r]/d[I + eps] * 1 - displacement = ( - torch.eye( - 3, - dtype=pos.dtype, - device=pos.device, - ) - .view(-1, 3, 3) - .expand(num_batch, 3, 3) + displacement = torch.eye( + 3, + dtype=pos.dtype, + device=pos.device, ) + if num_batch > 1: + # add n_batch dimension + displacement = displacement.view(-1, 3, 3).expand(num_batch, 3, 3) displacement.requires_grad_(True) data["_displacement"] = displacement # in the above paper, the infinitesimal distortion is *symmetric* @@ -273,10 +277,15 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: symmetric_displacement = 0.5 * (displacement + displacement.transpose(-1, -2)) did_pos_req_grad: bool = pos.requires_grad pos.requires_grad_(True) - # bmm is natom in batch - data[AtomicDataDict.POSITIONS_KEY] = torch.bmm( - pos.unsqueeze(-2), torch.index_select(symmetric_displacement, 0, batch) - ).squeeze(-2) + if num_batch > 1: + # bmm is natom in batch + # batched [natom, 1, 3] @ [natom, 3, 3] -> [natom, 1, 3] -> [natom, 3] + data[AtomicDataDict.POSITIONS_KEY] = torch.bmm( + pos.unsqueeze(-2), torch.index_select(symmetric_displacement, 0, batch) + ).squeeze(-2) + else: + # [natom, 3] @ [3, 3] -> [natom, 3] + data[AtomicDataDict.POSITIONS_KEY] = torch.mm(pos, symmetric_displacement) # we only displace the cell if we have one: if has_cell: # bmm is num_batch in batch @@ -286,7 +295,14 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # there would then be an infinitesimal rotation of the positions # but not cell, and it thus wouldn't be global and have # no effect due to equivariance/invariance. - data[AtomicDataDict.CELL_KEY] = torch.bmm(cell, symmetric_displacement) + if num_batch > 1: + # [n_batch, 3, 3] @ [n_batch, 3, 3] + data[AtomicDataDict.CELL_KEY] = torch.bmm(cell, symmetric_displacement) + else: + # [3, 3] @ [3, 3] --- enforced to these shapes + data[AtomicDataDict.CELL_KEY] = torch.mm( + cell.squeeze(0), symmetric_displacement + ).unsqueeze(0) # Call model and get gradients data = self.func(data) @@ -310,6 +326,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: if virial is None: # condition needed to unwrap optional for torchscript assert False, "failed to compute virial autograd" + virial = virial.view(num_batch, 3, 3) # we only compute the stress (1/V * virial) if we have a cell whose volume we can compute if has_cell: @@ -322,7 +339,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: cell[:, 0, :], torch.cross(cell[:, 1, :], cell[:, 2, :], dim=1), ).unsqueeze(-1) - stress = virial / volume.view(-1, 1, 1) + stress = virial / volume.view(num_batch, 1, 1) data[AtomicDataDict.CELL_KEY] = orig_cell data[AtomicDataDict.STRESS_KEY] = stress else: From 6b4c0d58b42642db85e960e72882ba24ce5268fd Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 23:09:19 -0500 Subject: [PATCH 062/157] addmm --- nequip/nn/_grad_output.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 93f4b1cc..24f36fb1 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -247,14 +247,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # Paper they worked from: # Knuth et. al. Comput. Phys. Commun 190, 33-50, 2015 # https://pure.mpg.de/rest/items/item_2085135_9/component/file_2156800/content - # However, unlike the above approach, we make a slight change for efficiency - # by using the displacement as the identity rather than zero, and removing the - # sums. This works out identically in the math because: - # d[(I + eps)r]/d[eps] - # (chain rule) = d[(I + eps)r]/d[I + eps] * d[I + eps]/d[eps] - # (simplify) = d[(I + eps)r]/d[I + eps] * 1 - displacement = torch.eye( - 3, + displacement = torch.zeros( + (3, 3), dtype=pos.dtype, device=pos.device, ) @@ -280,12 +274,14 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: if num_batch > 1: # bmm is natom in batch # batched [natom, 1, 3] @ [natom, 3, 3] -> [natom, 1, 3] -> [natom, 3] - data[AtomicDataDict.POSITIONS_KEY] = torch.bmm( + data[AtomicDataDict.POSITIONS_KEY] = pos + torch.bmm( pos.unsqueeze(-2), torch.index_select(symmetric_displacement, 0, batch) ).squeeze(-2) else: # [natom, 3] @ [3, 3] -> [natom, 3] - data[AtomicDataDict.POSITIONS_KEY] = torch.mm(pos, symmetric_displacement) + data[AtomicDataDict.POSITIONS_KEY] = torch.addmm( + pos, pos, symmetric_displacement + ) # we only displace the cell if we have one: if has_cell: # bmm is num_batch in batch @@ -297,12 +293,16 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # no effect due to equivariance/invariance. if num_batch > 1: # [n_batch, 3, 3] @ [n_batch, 3, 3] - data[AtomicDataDict.CELL_KEY] = torch.bmm(cell, symmetric_displacement) + data[AtomicDataDict.CELL_KEY] = cell + torch.bmm( + cell, symmetric_displacement + ) else: # [3, 3] @ [3, 3] --- enforced to these shapes - data[AtomicDataDict.CELL_KEY] = torch.mm( - cell.squeeze(0), symmetric_displacement + tmpcell = cell.squeeze(0) + data[AtomicDataDict.CELL_KEY] = torch.addmm( + tmpcell, tmpcell, symmetric_displacement ).unsqueeze(0) + del tmpcell # Call model and get gradients data = self.func(data) From 9cdf325652d41b61f302f0f82fb6f45ec79a5cb8 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 23:32:15 -0500 Subject: [PATCH 063/157] JIT --- nequip/nn/_grad_output.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 24f36fb1..05a019c3 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -282,6 +282,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data[AtomicDataDict.POSITIONS_KEY] = torch.addmm( pos, pos, symmetric_displacement ) + # assert torch.equal(pos, data[AtomicDataDict.POSITIONS_KEY]) # we only displace the cell if we have one: if has_cell: # bmm is num_batch in batch @@ -302,7 +303,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data[AtomicDataDict.CELL_KEY] = torch.addmm( tmpcell, tmpcell, symmetric_displacement ).unsqueeze(0) - del tmpcell + # assert torch.equal(cell, data[AtomicDataDict.CELL_KEY]) # Call model and get gradients data = self.func(data) From b59d8cf561e28ab3d6f21bb255a4086df344fdd7 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Feb 2023 23:37:16 -0500 Subject: [PATCH 064/157] double check for valid autograd graph --- nequip/nn/_grad_output.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 05a019c3..3649c7b0 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -216,6 +216,7 @@ def __init__( self.register_buffer("_empty", torch.Tensor()) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + assert AtomicDataDict.EDGE_VECTORS_KEY not in data if AtomicDataDict.BATCH_KEY in data: batch = data[AtomicDataDict.BATCH_KEY] From 289dd13a44c33dc33f3c6a46b9215f3e17e2ffe4 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 7 Feb 2023 00:26:57 -0500 Subject: [PATCH 065/157] fix dimensions in special case --- nequip/nn/_atomwise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index c4592b16..a03ccbf1 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -101,9 +101,9 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: else: # We can significantly simplify and avoid scatters if self.reduce == "sum": - result = field.sum(dim=0) + result = field.sum(dim=0, keepdim=True) elif self.reduce == "mean": - result = field.mean(dim=0) + result = field.mean(dim=0, keepdim=True) else: assert False if self.constant != 1.0: From f5c19c9e8476384a2963f00a5cb2ccc546ad3c22 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 7 Feb 2023 00:27:35 -0500 Subject: [PATCH 066/157] text wrapped more --- nequip/utils/unittests/model_tests.py | 66 +++++++++++++++------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 6d40a557..6434ea0e 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -124,36 +124,44 @@ def test_wrapped_unwrapped(self, model, device, Cu_bulk, float_tolerance): data.to(device) out_ref = instance(AtomicData.to_AtomicDataDict(data)) # now put things in other periodic images - cell_shifts = torch.randint( - 0, - 3, - (len(atoms), 3), - device=device, - dtype=data[AtomicDataDict.POSITIONS_KEY].dtype, - ) - shifts = torch.einsum("zi,ix->zx", cell_shifts, data[AtomicDataDict.CELL_KEY]) - atoms.positions += shifts.detach().cpu().numpy() - # must recompute the neighborlist for this, since the edge_cell_shifts changed - data2 = AtomicData.from_ase(atoms, r_max=3.5) - data2[AtomicDataDict.ATOM_TYPE_KEY] = data[AtomicDataDict.ATOM_TYPE_KEY] - data2.to(device) - assert torch.equal( - data[AtomicDataDict.EDGE_INDEX_KEY], data2[AtomicDataDict.EDGE_INDEX_KEY] - ) - tmp = ( - data[AtomicDataDict.EDGE_CELL_SHIFT_KEY] - + cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][0]] - - cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][1]] - ) - assert torch.equal( - tmp, - data2[AtomicDataDict.EDGE_CELL_SHIFT_KEY], - ) - out_unwrapped = instance(AtomicData.to_AtomicDataDict(data2)) - for out_field in out_fields: - assert torch.allclose( - out_ref[out_field], out_unwrapped[out_field], atol=float_tolerance + rng = torch.Generator(device=device).manual_seed(12345) + # try a few different shifts + for _ in range(3): + cell_shifts = torch.randint( + -5, + 5, + (len(atoms), 3), + device=device, + dtype=data[AtomicDataDict.POSITIONS_KEY].dtype, + generator=rng, + ) + shifts = torch.einsum( + "zi,ix->zx", cell_shifts, data[AtomicDataDict.CELL_KEY] ) + atoms2 = atoms.copy() + atoms2.positions += shifts.detach().cpu().numpy() + # must recompute the neighborlist for this, since the edge_cell_shifts changed + data2 = AtomicData.from_ase(atoms2, r_max=3.5) + data2[AtomicDataDict.ATOM_TYPE_KEY] = data[AtomicDataDict.ATOM_TYPE_KEY] + data2.to(device) + assert torch.equal( + data[AtomicDataDict.EDGE_INDEX_KEY], + data2[AtomicDataDict.EDGE_INDEX_KEY], + ) + tmp = ( + data[AtomicDataDict.EDGE_CELL_SHIFT_KEY] + + cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][0]] + - cell_shifts[data[AtomicDataDict.EDGE_INDEX_KEY][1]] + ) + assert torch.equal( + tmp, + data2[AtomicDataDict.EDGE_CELL_SHIFT_KEY], + ) + out_unwrapped = instance(AtomicData.to_AtomicDataDict(data2)) + for out_field in out_fields: + assert torch.allclose( + out_ref[out_field], out_unwrapped[out_field], atol=float_tolerance + ) def test_batch(self, model, atomic_batch, device, float_tolerance): """Confirm that the results for individual examples are the same regardless of whether they are batched.""" From 265fa0153eef07ffbef30512fad13fe36889ac18 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 8 Feb 2023 11:20:15 -0500 Subject: [PATCH 067/157] initial pair potentials --- CHANGELOG.md | 2 + configs/minimal_pair.yaml | 77 +++++++++ nequip/data/transforms.py | 15 ++ nequip/model/__init__.py | 3 + nequip/model/_build.py | 1 + nequip/model/_pair_potential.py | 35 ++++ nequip/nn/pair_potential.py | 297 ++++++++++++++++++++++++++++++++ tests/unit/model/test_pair.py | 191 ++++++++++++++++++++ 8 files changed, 621 insertions(+) create mode 100644 configs/minimal_pair.yaml create mode 100644 nequip/model/_pair_potential.py create mode 100644 nequip/nn/pair_potential.py create mode 100644 tests/unit/model/test_pair.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f6e50743..4bfdd772 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ Most recent change on the bottom. - [Breaking] Added `model_dtype` - `BATCH_PTR_KEY` in `AtomicDataDict` - `AtomicInMemoryDataset.rdf()` +- `type_to_chemical_symbol` +- Pair potential terms ### Changed - Always require explicit `seed` diff --git a/configs/minimal_pair.yaml b/configs/minimal_pair.yaml new file mode 100644 index 00000000..7c50e766 --- /dev/null +++ b/configs/minimal_pair.yaml @@ -0,0 +1,77 @@ +# Minimal example of training a pair potential. + +# general +root: results/aspirin +run_name: minimal-pair +seed: 123 +dataset_seed: 456 + +# network +# For only a pair potential: +# model_builders: +# - PairPotential +# - StressForceOutput +# - RescaleEnergyEtc +# For a pair potential term with a neural network model on top: +model_builders: + - SimpleIrrepsConfig + - EnergyModel + - PerSpeciesRescale + - PairPotentialTerm # MUST come after PerSpeciesRescale + - StressForceOutput + - RescaleEnergyEtc + +# neural network +num_basis: 8 +r_max: 4.0 +l_max: 2 +parity: true +num_features: 16 + +# pair potential +# pair_style: LJ +# lj_sigma: 0.5 +# lj_delta: 0.0 +# lj_epsilon: 0.05 +# lj_sigma_trainable: true +# lj_delta_trainable: false +# lj_epsilon_trainable: true +pair_style: ZBL + +# data set +# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys +# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) +# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields +# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key +dataset: npz # type of data set, can be npz or ase +dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip # url to download the npz. optional +dataset_file_name: ./benchmark_data/aspirin_ccsd-train.npz # path to data set file +key_mapping: + z: atomic_numbers # atomic species, integers + E: total_energy # total potential eneriges to train to + F: forces # atomic forces to train to + R: pos # raw atomic positions +npz_fixed_field_keys: # fields that are repeated across different examples + - atomic_numbers + +chemical_symbols: + - H + - O + - C + +# logging +wandb: false +# verbose: debug + +# training +n_train: 5 +n_val: 5 +batch_size: 1 +validation_batch_size: 5 +max_epochs: 10 + +# loss function +loss_coeffs: forces + +# optimizer +optimizer_name: Adam diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py index 4f6331b7..e39aeb98 100644 --- a/nequip/data/transforms.py +++ b/nequip/data/transforms.py @@ -13,6 +13,7 @@ class TypeMapper: num_types: int chemical_symbol_to_type: Optional[Dict[str, int]] + type_to_chemical_symbol: Optional[Dict[int, str]] type_names: List[str] _min_Z: int @@ -20,6 +21,7 @@ def __init__( self, type_names: Optional[List[str]] = None, chemical_symbol_to_type: Optional[Dict[str, int]] = None, + type_to_chemical_symbol: Optional[Dict[int, str]] = None, chemical_symbols: Optional[List[str]] = None, ): if chemical_symbols is not None: @@ -75,6 +77,14 @@ def __init__( for sym, type_idx in self.chemical_symbol_to_type.items(): self._index_to_Z[type_idx] = ase.data.atomic_numbers[sym] self._valid_set = set(valid_atomic_numbers) + true_type_to_chemical_symbol = { + type_id: sym for sym, type_id in self.chemical_symbol_to_type.items() + } + if type_to_chemical_symbol is not None: + assert type_to_chemical_symbol == true_type_to_chemical_symbol + else: + type_to_chemical_symbol = true_type_to_chemical_symbol + # check if type_names is None: raise ValueError( @@ -88,6 +98,11 @@ def __init__( self.num_types = len(type_names) # Check type_names self.type_names = type_names + if type_to_chemical_symbol is not None: + assert set(type_to_chemical_symbol.keys()) == set(range(self.num_types)) + self.type_to_chemical_symbol = type_to_chemical_symbol + else: + self.type_to_chemical_symbol = None def __call__( self, data: Union[AtomicDataDict.Type, AtomicData], types_required: bool = True diff --git a/nequip/model/__init__.py b/nequip/model/__init__.py index b79a820c..26fafc36 100644 --- a/nequip/model/__init__.py +++ b/nequip/model/__init__.py @@ -6,6 +6,7 @@ initialize_from_state, load_model_state, ) +from ._pair_potential import PairPotential, PairPotentialTerm from ._build import model_from_config @@ -23,5 +24,7 @@ initialize_from_state, load_model_state, model_from_config, + PairPotential, + PairPotentialTerm, builder_utils, ] diff --git a/nequip/model/_build.py b/nequip/model/_build.py index be5b2ac0..35faf536 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -61,6 +61,7 @@ def model_from_config( ), "inconsistant config & dataset" config["num_types"] = type_mapper.num_types config["type_names"] = type_mapper.type_names + config["type_to_chemical_symbol"] = type_mapper.type_to_chemical_symbol default_dtype = torch.get_default_dtype() model_dtype: torch.dtype = dtype_from_name(config.get("model_dtype", default_dtype)) diff --git a/nequip/model/_pair_potential.py b/nequip/model/_pair_potential.py new file mode 100644 index 00000000..0aa66482 --- /dev/null +++ b/nequip/model/_pair_potential.py @@ -0,0 +1,35 @@ +from nequip.nn import SequentialGraphNetwork, AtomwiseReduce +from nequip.data import AtomicDataDict +from nequip.nn.pair_potential import LennardJones, ZBL + + +def PairPotentialTerm( + model: SequentialGraphNetwork, + config, +) -> SequentialGraphNetwork: + assert isinstance(model, SequentialGraphNetwork) + + model.insert_from_parameters( + shared_params=config, + name="pair_potential", + builder={"LJ": LennardJones, "ZBL": ZBL}[config.pair_style], + before="total_energy_sum", + ) + return model + + +def PairPotential(config) -> SequentialGraphNetwork: + return SequentialGraphNetwork.from_parameters( + shared_params=config, + layers={ + "pair_potential": {"LJ": LennardJones, "ZBL": ZBL}[config.pair_style], + "total_energy_sum": ( + AtomwiseReduce, + dict( + reduce="sum", + field=AtomicDataDict.PER_ATOM_ENERGY_KEY, + out_field=AtomicDataDict.TOTAL_ENERGY_KEY, + ), + ), + }, + ) diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py new file mode 100644 index 00000000..c51c06a0 --- /dev/null +++ b/nequip/nn/pair_potential.py @@ -0,0 +1,297 @@ +from typing import Union, Optional, Dict, List + +import torch +from torch_runstats.scatter import scatter + +from e3nn.util.jit import compile_mode + +import ase.data + +from nequip.data import AtomicDataDict +from nequip.nn import GraphModuleMixin, RescaleOutput +from nequip.nn.cutoffs import PolynomialCutoff + + +@torch.jit.script +def _param(param, index1, index2): + if param.ndim == 2: + # make it symmetric + param = param.triu() + param.triu(1).transpose(-1, -2) + # get for each atom pair + param = torch.index_select(param.view(-1), 0, index1 * param.shape[0] + index2) + # make it positive + param = param.relu() # TODO: better way? + return param + + +@compile_mode("script") +class LennardJones(GraphModuleMixin, torch.nn.Module): + """Lennard-Jones and related pair potentials.""" + + lj_style: str + exponent: float + + def __init__( + self, + num_types: int, + lj_sigma: Union[torch.Tensor, float], + lj_delta: Union[torch.Tensor, float] = 0, + lj_epsilon: Optional[Union[torch.Tensor, float]] = None, + lj_sigma_trainable: bool = False, + lj_delta_trainable: bool = False, + lj_epsilon_trainable: bool = False, + lj_exponent: Optional[float] = None, + lj_per_type: bool = True, + lj_style: str = "lj", + cutoff=PolynomialCutoff, + cutoff_kwargs={}, + irreps_in=None, + ) -> None: + super().__init__() + self._init_irreps( + irreps_in=irreps_in, irreps_out={AtomicDataDict.PER_ATOM_ENERGY_KEY: "0e"} + ) + assert lj_style in ("lj", "lj_repulsive_only", "repulsive") + self.lj_style = lj_style + + for param, (value, trainable) in { + "epsilon": (lj_epsilon, lj_epsilon_trainable), + "sigma": (lj_sigma, lj_sigma_trainable), + "delta": (lj_delta, lj_delta_trainable), + }.items(): + if value is None: + self.register_buffer(param, torch.Tensor()) # torchscript + continue + value = torch.as_tensor(value, dtype=torch.get_default_dtype()) + if value.ndim == 0 and lj_per_type: + # one scalar for all pair types + value = ( + torch.ones( + num_types, num_types, device=value.device, dtype=value.dtype + ) + * value + ) + elif value.ndim == 2: + assert lj_per_type + # one per pair type, check symmetric + assert value.shape == (num_types, num_types) + # per-species square, make sure symmetric + assert torch.equal(value, value.T) + value = torch.triu(value) + else: + raise ValueError + setattr(self, param, torch.nn.Parameter(value, requires_grad=trainable)) + + if lj_exponent is None: + lj_exponent = 6.0 + self.exponent = lj_exponent + + self._has_cutoff = cutoff is not None + if self._has_cutoff: + self.cutoff = cutoff(**cutoff_kwargs) + else: + self.cutoff = torch.nn.Identity() + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) + edge_center = data[AtomicDataDict.EDGE_INDEX_KEY][0] + atom_types = data[AtomicDataDict.ATOM_TYPE_KEY] + edge_len = data[AtomicDataDict.EDGE_LENGTH_KEY].unsqueeze(-1) + edge_types = torch.index_select( + atom_types, 0, data[AtomicDataDict.EDGE_INDEX_KEY].reshape(-1) + ).view(2, -1) + index1 = edge_types[0] + index2 = edge_types[1] + + sigma = _param(self.sigma, index1, index2) + delta = _param(self.delta, index1, index2) + epsilon = _param(self.epsilon, index1, index2) + + if self.lj_style == "repulsive": + # 0.5 to assign half and half the energy to each side of the interaction + lj_eng = 0.5 * epsilon * ((sigma * (edge_len - delta)) ** -self.exponent) + else: + lj_eng = (sigma / (edge_len - delta)) ** self.exponent + lj_eng = torch.neg(lj_eng) + lj_eng = lj_eng + lj_eng.square() + # 2.0 because we do the slightly symmetric thing and let + # ij and ji each contribute half of the LJ energy of the pair + # this avoids indexing out certain edges in the general case where + # the edges are not ordered. + lj_eng = (2.0 * epsilon) * lj_eng + + if self.lj_style == "lj_repulsive_only": + # if taking only the repulsive part, shift up so the minima is at eng=0 + lj_eng = lj_eng + epsilon + # this is continuous at the minima, and we mask out everything greater + # TODO: this is probably broken with NaNs at delta + lj_eng = lj_eng * (edge_len < (2 ** (1.0 / self.exponent) + delta)) + + if self._has_cutoff: + # apply the cutoff for smoothness + lj_eng = lj_eng * self.cutoff(edge_len) + + # sum edge LJ energies onto atoms + atomic_eng = scatter( + lj_eng, + edge_center, + dim=0, + dim_size=len(data[AtomicDataDict.POSITIONS_KEY]), + ) + if AtomicDataDict.PER_ATOM_ENERGY_KEY in data: + atomic_eng = atomic_eng + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] = atomic_eng + return data + + def __repr__(self) -> str: + def _f(e): + e = e.data + if e.ndim == 0: + return f"{e:.6f}" + elif e.ndim == 2: + return f"{e}" + + return f"PairPotential(lj_style={self.lj_style} | σ={_f(self.sigma)} δ={_f(self.delta)} ε={_f(self.epsilon)} exp={self.exponent:.1f})" + + def update_for_rescale(self, rescale_module: RescaleOutput): + if AtomicDataDict.PER_ATOM_ENERGY_KEY not in rescale_module.scale_keys: + return + with torch.no_grad(): + # Our energy will be scaled by scale_by later, so we have to divide here to cancel out: + self.epsilon.copy_(self.epsilon / rescale_module.scale_by.item()) + + +@torch.jit.script +def _zbl( + Z: torch.Tensor, + r: torch.Tensor, + atom_types: torch.Tensor, + edge_index: torch.Tensor, + r_max: float, + p: float, + qqr2exesquare: float, +) -> torch.Tensor: + # from LAMMPS pair_zbl_const.h + pzbl: float = 0.23 + a0: float = 0.46850 + c1: float = 0.02817 + c2: float = 0.28022 + c3: float = 0.50986 + c4: float = 0.18175 + d1: float = -0.20162 + d2: float = -0.40290 + d3: float = -0.94229 + d4: float = -3.19980 + # compute + edge_types = torch.index_select(atom_types, 0, edge_index.reshape(-1)).view(2, -1) + Zi = torch.index_select(Z, 0, edge_types[0]) + Zj = torch.index_select(Z, 0, edge_types[1]) + x = ((torch.pow(Zi, pzbl) + torch.pow(Zj, pzbl)) * r) / a0 + psi = ( + c1 * (d1 * x).exp() + + c2 * (d2 * x).exp() + + c3 * (d3 * x).exp() + + c4 * (d4 * x).exp() + ) + eng = qqr2exesquare * ((Zi * Zj) / r) * psi + + # compute cutoff envelope + r = r / r_max + cutoff = 1.0 - (((p + 1.0) * (p + 2.0) / 2.0) * torch.pow(r, p)) + cutoff = cutoff + (p * (p + 2.0) * torch.pow(r, p + 1.0)) + cutoff = cutoff - ((p * (p + 1.0) / 2) * torch.pow(r, p + 2.0)) + cutoff = cutoff * (r < 1.0) + + return cutoff * eng + + +@compile_mode("script") +class ZBL(GraphModuleMixin, torch.nn.Module): + """Add a ZBL pair potential to the edge energy. + + PLEASE NOTE: This class is parameterized for ASE units of Å and eV (i.e. LAMMPS `metal` units) + and will give nonsense results if you are training in other units. + """ + + num_types: int + r_max: float + PolynomialCutoff_p: float + _qqr2exesquare: float + + def __init__( + self, + num_types: int, + r_max: float, + type_to_chemical_symbol: Optional[Dict[int, str]] = None, + PolynomialCutoff_p: float = 6.0, + irreps_in=None, + ): + super().__init__() + self._init_irreps( + irreps_in=irreps_in, irreps_out={AtomicDataDict.PER_ATOM_ENERGY_KEY: "0e"} + ) + if type_to_chemical_symbol is not None: + assert set(type_to_chemical_symbol.keys()) == set(range(num_types)) + atomic_numbers: List[int] = [ + ase.data.atomic_numbers[type_to_chemical_symbol[type_i]] + for type_i in range(num_types) + ] + else: + raise RuntimeError( + "Either chemical_symbol_to_type or type_to_chemical_symbol is required." + ) + assert len(atomic_numbers) == num_types + # LAMMPS note on units: + # > The numerical values of the exponential decay constants in the + # > screening function depend on the unit of distance. In the above + # > equation they are given for units of Angstroms. LAMMPS will + # > automatically convert these values to the distance unit of the + # > specified LAMMPS units setting. The values of Z should always be + # > given as multiples of a proton’s charge, e.g. 29.0 for copper. + # So, we store the atomic numbers directly. + self.register_buffer( + "atomic_numbers", + torch.as_tensor(atomic_numbers, dtype=torch.get_default_dtype()), + ) + # And we have to convert our value of prefector into the model's physical units + # Here, prefactor is (electron charge)^2 / (4 * pi * electrical permisivity of vacuum) + # we have a value for that in eV and Angstrom + # See https://github.com/lammps/lammps/blob/c415385ab4b0983fa1c72f9e92a09a8ed7eebe4a/src/update.cpp#L187 for values from LAMMPS + # LAMMPS uses `force->qqr2e * force->qelectron * force->qelectron` + # Allow other units later + self._qqr2exesquare = {"metal": 14.399645 * (1.0) ** 2}["metal"] + self.r_max = float(r_max) + self.PolynomialCutoff_p = float(PolynomialCutoff_p) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) + edge_center = data[AtomicDataDict.EDGE_INDEX_KEY][0] + + zbl_edge_eng = _zbl( + Z=self.atomic_numbers, + r=data[AtomicDataDict.EDGE_LENGTH_KEY], + atom_types=data[AtomicDataDict.ATOM_TYPE_KEY], + edge_index=data[AtomicDataDict.EDGE_INDEX_KEY], + r_max=self.r_max, + p=self.PolynomialCutoff_p, + qqr2exesquare=self._qqr2exesquare, + ).unsqueeze(-1) + atomic_eng = scatter( + zbl_edge_eng, + edge_center, + dim=0, + dim_size=len(data[AtomicDataDict.POSITIONS_KEY]), + ) + if AtomicDataDict.PER_ATOM_ENERGY_KEY in data: + atomic_eng = atomic_eng + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] = atomic_eng + return data + + def update_for_rescale(self, rescale_module: RescaleOutput): + if AtomicDataDict.PER_ATOM_ENERGY_KEY not in rescale_module.scale_keys: + return + # Our energy will be scaled by scale_by later, so we have to divide here to cancel out: + self._qqr2exesquare /= rescale_module.scale_by.item() + + +__all__ = [LennardJones, ZBL] diff --git a/tests/unit/model/test_pair.py b/tests/unit/model/test_pair.py new file mode 100644 index 00000000..7e6919ff --- /dev/null +++ b/tests/unit/model/test_pair.py @@ -0,0 +1,191 @@ +import pytest + +import textwrap +import tempfile +import os +import sys +import subprocess +import numpy as np +from pathlib import Path + +import ase.io +import ase.data + +import torch + +from nequip.data import ( + dataset_from_config, + AtomicInMemoryDataset, + AtomicData, + AtomicDataDict, +) +from nequip.model import model_from_config +from nequip.nn import GraphModel +from nequip.utils import Config + + +def _check_and_print(retcode): + __tracebackhide__ = True + if retcode.returncode: + if len(retcode.stdout) > 0: + print(retcode.stdout.decode("ascii")) + if len(retcode.stderr) > 0: + print(retcode.stderr.decode("ascii"), file=sys.stderr) + retcode.check_returncode() + + +@pytest.mark.skipif( + "LAMMPS" not in os.environ, + reason="test_zbl requires a LAMMPS installation pointed to by the LAMMPS environment variable", +) +def test_zbl(float_tolerance, BENCHMARK_ROOT): + config = textwrap.dedent( + f""" + root: results/ + run_name: minimal-pair + seed: 123 + + model_builders: + - PairPotential + - StressForceOutput + - RescaleEnergyEtc + + pair_style: ZBL + + dataset: npz # type of data set, can be npz or ase + dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip # url to download the npz. optional + dataset_file_name: {BENCHMARK_ROOT}/aspirin_ccsd-train.npz # path to data set file + key_mapping: + z: atomic_numbers # atomic species, integers + E: total_energy # total potential eneriges to train to + F: forces # atomic forces to train to + R: pos # raw atomic positions + npz_fixed_field_keys: # fields that are repeated across different examples + - atomic_numbers + r_max: 4.0 + dataset_statistics_stride: 1 + + chemical_symbols: + - H + - O + - C + """ + ) + with tempfile.TemporaryDirectory() as tmpdir: + with open(tmpdir + "/config.yaml", "w") as f: + f.write(config) + config = Config.from_file(tmpdir + "/config.yaml") + r_max: float = config["r_max"] + + dataset: AtomicInMemoryDataset = dataset_from_config(config) + dataset = dataset.index_select(list(range(10))) # just ten frames + model: GraphModel = model_from_config( + config=config, initialize=True, dataset=dataset, deploy=False + ) + + # note that ASE outputs lammps types in alphabetical order of chemical symbols + # since we use chem symbols in this test, just put the same + sym_to_lammps_types = dict( + zip( + sorted(set(config["chemical_symbols"])), + range(1, len(config["chemical_symbols"]) + 1), + ) + ) + pair_coeff = [] + for sym in config["chemical_symbols"]: + pair_coeff.append( + f"pair_coeff {sym_to_lammps_types[sym]} {sym_to_lammps_types[sym]} {ase.data.atomic_numbers[sym]:.1f} {ase.data.atomic_numbers[sym]:.1f}" + ) + pair_coeff = "\n".join(pair_coeff) + + newline = "\n" + PRECISION_CONST: float = 1e6 + lmp_in = textwrap.dedent( + f""" + units metal + atom_style atomic + thermo 1 + + boundary s s s + + read_data structure.data + + pair_style zbl {r_max} {r_max} # don't use switching function + {pair_coeff} +{newline.join(' mass %i 1.0' % i for i in range(1, len(config["chemical_symbols"]) + 1))} + + neighbor 1.0 bin + neigh_modify delay 0 every 1 check no + fix 1 all nve + timestep 0.001 + + compute atomicenergies all pe/atom + compute totalatomicenergy all reduce sum c_atomicenergies + compute stress all pressure NULL virial # NULL means without temperature contribution + + thermo_style custom step time temp pe c_totalatomicenergy etotal press spcpu cpuremain c_stress[*] + run 0 + print "$({PRECISION_CONST} * c_stress[1]) $({PRECISION_CONST} * c_stress[2]) $({PRECISION_CONST} * c_stress[3]) $({PRECISION_CONST} * c_stress[4]) $({PRECISION_CONST} * c_stress[5]) $({PRECISION_CONST} * c_stress[6])" file stress.dat + print $({PRECISION_CONST} * pe) file pe.dat + print $({PRECISION_CONST} * c_totalatomicenergy) file totalatomicenergy.dat + write_dump all custom output.dump id type x y z fx fy fz c_atomicenergies modify format float %20.15g + """ + ) + + # save out the LAMMPS input: + infile_path = tmpdir + "/test_repro.in" + with open(infile_path, "w") as f: + f.write(lmp_in) + + for structure in [dataset[i] for i in range(10)]: + struc_ase = structure.to_ase(type_mapper=dataset.type_mapper) + struc_ase.cell = np.eye(3) * 100 + struc_ase.positions += 50 + ase.io.write( + tmpdir + "/structure.data", + struc_ase, + format="lammps-data", + ) + + retcode = subprocess.run( + [os.environ["LAMMPS"], "-in", infile_path], + cwd=tmpdir, + env=os.environ, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + _check_and_print(retcode) + + # load dumped data + lammps_result = ase.io.read( + tmpdir + "/output.dump", format="lammps-dump-text" + ) + + # --- now check the OUTPUTS --- + nequip_out = model(AtomicData.to_AtomicDataDict(structure)) + with torch.no_grad(): + assert np.allclose( + nequip_out[AtomicDataDict.FORCE_KEY], + lammps_result.get_forces(), + atol=1e-4, + ) + assert np.allclose( + nequip_out[AtomicDataDict.PER_ATOM_ENERGY_KEY], + lammps_result.arrays["c_atomicenergies"].reshape(-1), + atol=5e-5, + ) + + # check system quantities + lammps_pe = ( + float(Path(tmpdir + "/pe.dat").read_text()) / PRECISION_CONST + ) + lammps_totalatomicenergy = ( + float(Path(tmpdir + "/totalatomicenergy.dat").read_text()) + / PRECISION_CONST + ) + assert np.allclose(lammps_pe, lammps_totalatomicenergy) + assert np.allclose( + nequip_out[AtomicDataDict.TOTAL_ENERGY_KEY], + lammps_pe, + atol=1e-6, + ) From abdc6f2d0ab739c11313c54301fa31029d3fac65 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 8 Feb 2023 14:24:24 -0500 Subject: [PATCH 068/157] fix tests --- nequip/utils/test.py | 6 ++++++ nequip/utils/unittests/model_tests.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index f349f5b6..a897d77a 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -186,6 +186,12 @@ def assert_AtomicData_equivariant( irreps_in.update(func.irreps_in) irreps_in = {k: v for k, v in irreps_in.items() if k in data_in[0]} irreps_out = func.irreps_out.copy() + # Remove batch-related keys from the irreps_out, if we aren't using batched inputs + irreps_out = { + k: v + for k, v in irreps_out.items() + if not (k in ("batch", "ptr") and "batch" not in data_in) + } # for certain things, we don't care what the given irreps are... # make sure that we test correctly for equivariance: for irps in (irreps_in, irreps_out): diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 6434ea0e..a889701e 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -97,7 +97,7 @@ def test_jit(self, model, atomic_batch, device): atol = { # tight, but not that tight, since GPU nondet has to pass # plus model insides are still float32 with global dtype float64 in the tests - torch.float32: 1e-6, + torch.float32: 5e-6, torch.float64: 5e-7, }[torch.get_default_dtype()] From fcb921f098d70aa74314e6ba0054b4e98f6ba15e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 01:46:09 -0500 Subject: [PATCH 069/157] Use less data nequip-benchmark --- CHANGELOG.md | 1 + nequip/scripts/benchmark.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6e50743..127b845a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Most recent change on the bottom. - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) - Will not reproduce previous versions' data shuffling order (for all practical purposes this does not matter, the `shuffle` option is unchanged) - [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`) +- `nequip-benchmark` now only uses `--n-data` frames to build the model ### Fixed - Work with `wandb>=0.13.8` diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 80ea1873..90ed0218 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -116,20 +116,22 @@ def main(args=None): dataset = dataset_from_config(config) dataset_time = time.time() - dataset_time print(f" loading dataset took {dataset_time:.4f}s") + print( + f" loaded dataset of size {len(dataset)} and sampled --n-data={args.n_data} frames" + ) dataset_rng = torch.Generator() dataset_rng.manual_seed(config.get("dataset_seed", config.get("seed", 12345))) + dataset = dataset.index_select( + torch.randperm(len(dataset), generator=dataset_rng)[: args.n_data] + ) datas_list = [ - AtomicData.to_AtomicDataDict(dataset[i].to(device)) - for i in torch.randperm(len(dataset), generator=dataset_rng)[: args.n_data] + AtomicData.to_AtomicDataDict(dataset[i].to(device)) for i in range(args.n_data) ] n_atom: int = len(datas_list[0]["pos"]) if not all(len(d["pos"]) == n_atom for d in datas_list): raise NotImplementedError( "nequip-benchmark does not currently handle benchmarking on data frames with variable number of atoms" ) - print( - f" loaded dataset of size {len(dataset)} and sampled --n-data={args.n_data} frames" - ) # print some dataset information print(" benchmark frames statistics:") print(f" number of atoms: {n_atom}") From 9fbee9124890a89e208f5607141bb9e8ba8c4fbf Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 16:24:51 -0500 Subject: [PATCH 070/157] warn on override of default dtype --- nequip/utils/_global_options.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index 907a9ed9..bba409dd 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -74,7 +74,13 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: if config.get("model_debug_mode", False): set_irreps_debug(enabled=True) if "default_dtype" in config: - torch.set_default_dtype(dtype_from_name(config["default_dtype"])) + old_dtype = torch.get_default_dtype() + new_dtype = dtype_from_name(config["default_dtype"]) + if warn_on_override and old_dtype != new_dtype: + warnings.warn( + f"Setting the GLOBAL value for torch.set_default_dtype to `{new_dtype}` which is different than the previous value of `{old_dtype}`" + ) + torch.set_default_dtype(new_dtype) if config.get("grad_anomaly_mode", False): torch.autograd.set_detect_anomaly(True) From 3167afea85a8e256f0289effcf82756232b23ca9 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 16:29:00 -0500 Subject: [PATCH 071/157] record and restore model and default dtype in deployment --- nequip/scripts/deploy.py | 23 ++++++++++------------- nequip/utils/misc.py | 6 ++++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 394c0005..95f217e3 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -17,8 +17,6 @@ import torch -import ase.data - from e3nn.util.jit import script from nequip.model import model_from_config @@ -26,6 +24,7 @@ from nequip.utils import Config from nequip.utils.versions import check_code_version, get_config_code_versions from nequip.scripts.train import default_config +from nequip.utils.misc import dtype_to_name from nequip.utils._global_options import _set_global_options CONFIG_KEY: Final[str] = "config" @@ -39,6 +38,8 @@ JIT_BAILOUT_KEY: Final[str] = "_jit_bailout_depth" JIT_FUSION_STRATEGY: Final[str] = "_jit_fusion_strategy" TF32_KEY: Final[str] = "allow_tf32" +DEFAULT_DTYPE_KEY: Final[str] = "default_dtype" +MODEL_DTYPE_KEY: Final[str] = "model_dtype" _ALL_METADATA_KEYS = [ CONFIG_KEY, @@ -51,6 +52,8 @@ JIT_BAILOUT_KEY, JIT_FUSION_STRATEGY, TF32_KEY, + DEFAULT_DTYPE_KEY, + MODEL_DTYPE_KEY, ] @@ -105,6 +108,7 @@ def load_deployed_model( if set_global_options: global_config_dict = {} global_config_dict["allow_tf32"] = bool(int(metadata[TF32_KEY])) + global_config_dict["default_dtype"] = str(metadata[DEFAULT_DTYPE_KEY]) # JIT strategy strategy = metadata.get(JIT_FUSION_STRATEGY, "") if strategy != "": @@ -235,17 +239,8 @@ def main(args=None): ) metadata[R_MAX_KEY] = str(float(config["r_max"])) - if "allowed_species" in config: - # This is from before the atomic number updates - n_species = len(config["allowed_species"]) - type_names = { - type: ase.data.chemical_symbols[atomic_num] - for type, atomic_num in enumerate(config["allowed_species"]) - } - else: - # The new atomic number setup - n_species = str(config["num_types"]) - type_names = config["type_names"] + n_species = str(config["num_types"]) + type_names = config["type_names"] metadata[N_SPECIES_KEY] = str(n_species) metadata[TYPE_NAMES_KEY] = " ".join(type_names) @@ -255,6 +250,8 @@ def main(args=None): "%s,%i" % e for e in config[JIT_FUSION_STRATEGY] ) metadata[TF32_KEY] = str(int(config["allow_tf32"])) + metadata[DEFAULT_DTYPE_KEY] = dtype_to_name(config["default_dtype"]) + metadata[MODEL_DTYPE_KEY] = dtype_to_name(config["model_dtype"]) metadata[CONFIG_KEY] = yaml.dump(dict(config)) metadata = {k: v.encode("ascii") for k, v in metadata.items()} diff --git a/nequip/utils/misc.py b/nequip/utils/misc.py index 47b57c0d..2bbc9257 100644 --- a/nequip/utils/misc.py +++ b/nequip/utils/misc.py @@ -10,6 +10,12 @@ def dtype_from_name(name: Union[str, torch.dtype]) -> torch.dtype: return {"float32": torch.float32, "float64": torch.float64}[name] +def dtype_to_name(name: Union[str, torch.dtype]) -> torch.dtype: + if isinstance(name, str): + return name + return {torch.float32: "float32", torch.float64: "float64"}[name] + + @contextlib.contextmanager def torch_default_dtype(dtype): """Set `torch.get_default_dtype()` for the duration of a with block, cleaning up with a `finally`. From 98a510fc26c5a1d92faf399e457d74db7399af69 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 17:50:17 -0500 Subject: [PATCH 072/157] refactor --- nequip/scripts/evaluate.py | 85 +++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 4ebcb92c..8837f4b0 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Tuple, List import sys import argparse import logging @@ -12,7 +12,7 @@ import torch from nequip.data import AtomicData, Collater, dataset_from_config, register_fields -from nequip.scripts.deploy import load_deployed_model, R_MAX_KEY +from nequip.scripts.deploy import load_deployed_model, R_MAX_KEY, TYPE_NAMES_KEY from nequip.scripts._logger import set_up_script_logger from nequip.scripts.train import default_config, check_code_version from nequip.utils._global_options import _set_global_options @@ -24,6 +24,48 @@ register_fields(graph_fields=[ORIGINAL_DATASET_INDEX_KEY]) +def _load_deployed_or_traindir( + path: Path, device +) -> Tuple[torch.nn.Module, bool, float, List[str]]: + loaded_deployed_model: bool = False + model_r_max = None + type_names = None + try: + model, metadata = load_deployed_model( + path, + device=device, + set_global_options=True, # don't warn that setting + ) + # the global settings for a deployed model are set by + # set_global_options in the call to load_deployed_model + # above + model_r_max = float(metadata[R_MAX_KEY]) + type_names = metadata[TYPE_NAMES_KEY].split(" ") + loaded_deployed_model = True + except ValueError: # its not a deployed model + loaded_deployed_model = False + # we don't do this in the `except:` block to avoid "during handing of this exception another exception" + # chains if there is an issue loading the training session model. This makes the error messages more + # comprehensible: + if not loaded_deployed_model: + # Use the model config, regardless of dataset config + global_config = path.parent / "config.yaml" + global_config = Config.from_file(str(global_config), defaults=default_config) + _set_global_options(global_config) + check_code_version(global_config) + del global_config + + # load a training session model + model, model_config = Trainer.load_model_from_training_session( + traindir=path.parent, model_name=path.name + ) + model = model.to(device) + model_r_max = model_config["r_max"] + type_names = model_config["type_names"] + model.eval() + return model, load_deployed_model, model_r_max, type_names + + def main(args=None, running_as_script: bool = True): # in results dir, do: nequip-deploy build --train-dir . deployed.pth parser = argparse.ArgumentParser( @@ -196,41 +238,10 @@ def main(args=None, running_as_script: bool = True): # Load model: logger.info("Loading model... ") - loaded_deployed_model: bool = False - model_r_max = None - try: - model, metadata = load_deployed_model( - args.model, - device=device, - set_global_options=True, # don't warn that setting - ) - logger.info("loaded deployed model.") - # the global settings for a deployed model are set by - # set_global_options in the call to load_deployed_model - # above - model_r_max = float(metadata[R_MAX_KEY]) - loaded_deployed_model = True - except ValueError: # its not a deployed model - loaded_deployed_model = False - # we don't do this in the `except:` block to avoid "during handing of this exception another exception" - # chains if there is an issue loading the training session model. This makes the error messages more - # comprehensible: - if not loaded_deployed_model: - # Use the model config, regardless of dataset config - global_config = args.model.parent / "config.yaml" - global_config = Config.from_file(str(global_config), defaults=default_config) - _set_global_options(global_config) - check_code_version(global_config) - del global_config - - # load a training session model - model, model_config = Trainer.load_model_from_training_session( - traindir=args.model.parent, model_name=args.model.name - ) - model = model.to(device) - logger.info("loaded model from training session") - model_r_max = model_config["r_max"] - model.eval() + model, loaded_deployed_model, model_r_max, _ = _load_deployed_or_traindir( + args.model, device=device + ) + logger.info(f" loaded{' deployed' if loaded_deployed_model else ''} model") # Load a config file logger.info( From 8ab8895389642a5a758012f6b47615b2c841c366 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 17:50:26 -0500 Subject: [PATCH 073/157] fix when cell not present --- nequip/nn/_grad_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index 3649c7b0..bfc1462f 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -343,9 +343,9 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: ).unsqueeze(-1) stress = virial / volume.view(num_batch, 1, 1) data[AtomicDataDict.CELL_KEY] = orig_cell - data[AtomicDataDict.STRESS_KEY] = stress else: stress = self._empty # torchscript + data[AtomicDataDict.STRESS_KEY] = stress # see discussion in https://github.com/libAtoms/QUIP/issues/227 about sign convention # they say the standard convention is virial = -stress x volume From 25c42d79108b076050037e906b0cb4e5caca71de Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 Feb 2023 17:50:41 -0500 Subject: [PATCH 074/157] fix RDF to give self-self RDFs --- examples/rdf.py | 16 ++++++++++++---- nequip/data/dataset.py | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/examples/rdf.py b/examples/rdf.py index 13377417..c44c9c71 100644 --- a/examples/rdf.py +++ b/examples/rdf.py @@ -16,6 +16,7 @@ description="Plot RDFs of dataset specified in a `nequip` YAML file" ) parser.add_argument("config", help="YAML file configuring dataset") +parser.add_argument("--output", help="File to write plot to", default=None) args = parser.parse_args() config = Config.from_file(args.config, defaults=default_config) _set_global_options(config) @@ -32,16 +33,23 @@ print("Plotting...") num_types: int = dataset.type_mapper.num_types -fig, axs = plt.subplots(nrows=int(comb(N=num_types, k=2)), sharex=True) +fig, axs = plt.subplots(nrows=int(comb(N=num_types, k=2, repetition=True)), sharex=True) -for i, (type1, type2) in enumerate(itertools.combinations(range(num_types), 2)): +for i, (type1, type2) in enumerate( + itertools.combinations_with_replacement(range(num_types), 2) +): ax = axs[i] ax.set_ylabel( - f"{dataset.type_mapper.type_names[type1]}-{dataset.type_mapper.type_names[type2]} RDF" + f"{dataset.type_mapper.type_names[type1]}-{dataset.type_mapper.type_names[type2]}" ) hist, bin_edges = rdfs[(type1, type2)] ax.plot(bin_edges[:-1], hist) ax.set_xlabel("Distance") +plt.suptitle("RDF") -plt.show() +plt.tight_layout() +if args.output is None: + plt.show() +else: + plt.savefig(args.output) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 526afdf1..af53e734 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -629,7 +629,7 @@ def rdf( # +1 since these are bin_edges including rightmost bins = bin_width * np.arange(n_bins + 1) - for type1, type2 in itertools.combinations( + for type1, type2 in itertools.combinations_with_replacement( range(self.type_mapper.num_types), 2 ): # Try to do as much of this as possible in-place From 7e0ecf8202d682803cfe6b0299f992640083a924 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 12:40:02 -0500 Subject: [PATCH 075/157] add plotting script --- examples/plot_dimers.py | 99 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 examples/plot_dimers.py diff --git a/examples/plot_dimers.py b/examples/plot_dimers.py new file mode 100644 index 00000000..bafac7ac --- /dev/null +++ b/examples/plot_dimers.py @@ -0,0 +1,99 @@ +"""Plot energies of two-atom dimers from a NequIP model.""" + +import argparse +import itertools +from pathlib import Path + +from scipy.special import comb +import matplotlib.pyplot as plt + +import torch + +from nequip.data import AtomicData, AtomicDataDict +from nequip.scripts.evaluate import _load_deployed_or_traindir + +# Parse arguments: +parser = argparse.ArgumentParser( + description="Plot energies of two-atom dimers from a NequIP model" +) +parser.add_argument("model", help="Training dir or deployed model", type=Path) +parser.add_argument( + "--device", help="Device", default="cuda" if torch.cuda.is_available() else "cpu" +) +parser.add_argument("--output", help="File to write plot to", default=None) +parser.add_argument("--r-min", default=1.0, type=float) +parser.add_argument("--r-max", default=None, type=float) +parser.add_argument("--n-samples", default=500, type=int) +args = parser.parse_args() + +print("Loading model... ") +model, loaded_deployed_model, model_r_max, type_names = _load_deployed_or_traindir( + args.model, device=args.device +) +print(f" loaded{' deployed' if loaded_deployed_model else ''} model") +num_types = len(type_names) + +if args.r_max is not None: + model_r_max = args.r_max + +print("Computing dimers...") +potential = {} +N_sample = args.n_samples +N_combs = len(list(itertools.combinations_with_replacement(range(num_types), 2))) +r = torch.zeros(N_sample * N_combs, 2, 3, device=args.device) +rs_one = torch.linspace(args.r_min, model_r_max, 500, device=args.device) +rs = rs_one.repeat([N_combs]) +assert rs.shape == (N_combs * N_sample,) +r[:, 1, 0] += rs # offset second atom along x axis +types = torch.as_tensor( + [list(e) for e in itertools.combinations_with_replacement(range(num_types), 2)] +) +types = types.reshape(N_combs, 1, 2).expand(N_combs, N_sample, 2).reshape(-1) +r = r.reshape(-1, 3) +assert types.shape == r.shape[:1] +N_at_total = N_sample * N_combs * 2 +assert len(types) == N_at_total +edge_index = torch.vstack( + ( + torch.arange(N_at_total, device=args.device, dtype=torch.long), + torch.arange(1, N_at_total + 1, device=args.device, dtype=torch.long) + % N_at_total, + ) +) +data = AtomicData(pos=r, atom_types=types, edge_index=edge_index) +data.batch = torch.arange(N_sample * N_combs, device=args.device).repeat_interleave(2) +data.ptr = torch.arange(0, 2 * N_sample * N_combs + 1, 2, device=args.device) +result = model(AtomicData.to_AtomicDataDict(data.to(device=args.device))) + +print("Plotting...") +energies = ( + result[AtomicDataDict.TOTAL_ENERGY_KEY] + .reshape(N_combs, N_sample) + .cpu() + .detach() + .numpy() +) +del result +rs_one = rs_one.cpu().numpy() +nrows = int(comb(N=num_types, k=2, repetition=True)) +fig, axs = plt.subplots( + nrows=nrows, + sharex=True, + figsize=(6, 2 * nrows), + dpi=120, +) + +for i, (type1, type2) in enumerate( + itertools.combinations_with_replacement(range(num_types), 2) +): + ax = axs[i] + ax.set_ylabel(f"{type_names[type1]}-{type_names[type2]}") + ax.plot(rs_one, energies[i]) + +ax.set_xlabel("Distance") +plt.suptitle("$E_\\mathrm{total}$ for two-atom pairs") +plt.tight_layout() +if args.output is None: + plt.show() +else: + plt.savefig(args.output) From 9e0e5721e7b2d551219d2737ab619833e478db2d Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 12:40:55 -0500 Subject: [PATCH 076/157] pair --- configs/minimal_pair.yaml | 19 +++++++++++++------ examples/lj/README.md | 5 +++++ nequip/nn/pair_potential.py | 19 ++++++++++++++----- tests/unit/model/test_pair.py | 27 +++++++-------------------- 4 files changed, 39 insertions(+), 31 deletions(-) diff --git a/configs/minimal_pair.yaml b/configs/minimal_pair.yaml index 7c50e766..ad178d1e 100644 --- a/configs/minimal_pair.yaml +++ b/configs/minimal_pair.yaml @@ -37,6 +37,7 @@ num_features: 16 # lj_delta_trainable: false # lj_epsilon_trainable: true pair_style: ZBL +units: real # Ang and kcal/mol # data set # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys @@ -64,14 +65,20 @@ wandb: false # verbose: debug # training -n_train: 5 -n_val: 5 -batch_size: 1 -validation_batch_size: 5 -max_epochs: 10 +n_train: 150 # number of training data +n_val: 50 # number of validation data +learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune +batch_size: 5 # batch size, we found it important to keep this small for most applications including forces (1-5); for energy-only training, higher batch sizes work better +validation_batch_size: 10 # batch size for evaluating the model during validation. This does not affect the training results, but using the highest value possible (<=n_val) without running out of memory will speed up your training. +max_epochs: 100000 +append: true # loss function -loss_coeffs: forces +loss_coeffs: + forces: 1 # if using PerAtomMSELoss, a default weight of 1:1 on each should work well + total_energy: + - 1 + - PerAtomMSELoss # optimizer optimizer_name: Adam diff --git a/examples/lj/README.md b/examples/lj/README.md index 424cbfb9..1483f2bb 100644 --- a/examples/lj/README.md +++ b/examples/lj/README.md @@ -1,3 +1,8 @@ +Lennard-Jones Custom Module Example +=================================== + +Note: for production simulations, a more appropriate Lennard-Jones energy term is provided in `nequip.model.PairPotentialTerm` / `nequip.model.PairPotential`. + Run commands with ``` PYTHONPATH=`pwd`:$PYTHONPATH nequip-* ... diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index c51c06a0..c2b42ebf 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -209,19 +209,19 @@ def _zbl( class ZBL(GraphModuleMixin, torch.nn.Module): """Add a ZBL pair potential to the edge energy. - PLEASE NOTE: This class is parameterized for ASE units of Å and eV (i.e. LAMMPS `metal` units) - and will give nonsense results if you are training in other units. + Args: + units (str): what units the model/data are in using LAMMPS names. """ num_types: int r_max: float PolynomialCutoff_p: float - _qqr2exesquare: float def __init__( self, num_types: int, r_max: float, + units: str, type_to_chemical_symbol: Optional[Dict[int, str]] = None, PolynomialCutoff_p: float = 6.0, irreps_in=None, @@ -258,8 +258,17 @@ def __init__( # we have a value for that in eV and Angstrom # See https://github.com/lammps/lammps/blob/c415385ab4b0983fa1c72f9e92a09a8ed7eebe4a/src/update.cpp#L187 for values from LAMMPS # LAMMPS uses `force->qqr2e * force->qelectron * force->qelectron` - # Allow other units later - self._qqr2exesquare = {"metal": 14.399645 * (1.0) ** 2}["metal"] + # Make it a buffer so rescalings are persistent, it still acts as a scalar Tensor + self.register_buffer( + "_qqr2exesquare", + torch.as_tensor( + {"metal": 14.399645 * (1.0) ** 2, "real": 332.06371 * (1.0) ** 2}[ + units + ], + dtype=torch.float64, + ) + * 0.5, # Put half the energy on each of ij, ji + ) self.r_max = float(r_max) self.PolynomialCutoff_p = float(PolynomialCutoff_p) diff --git a/tests/unit/model/test_pair.py b/tests/unit/model/test_pair.py index 7e6919ff..d0ed0f6a 100644 --- a/tests/unit/model/test_pair.py +++ b/tests/unit/model/test_pair.py @@ -51,24 +51,11 @@ def test_zbl(float_tolerance, BENCHMARK_ROOT): - RescaleEnergyEtc pair_style: ZBL + units: metal - dataset: npz # type of data set, can be npz or ase - dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip # url to download the npz. optional - dataset_file_name: {BENCHMARK_ROOT}/aspirin_ccsd-train.npz # path to data set file - key_mapping: - z: atomic_numbers # atomic species, integers - E: total_energy # total potential eneriges to train to - F: forces # atomic forces to train to - R: pos # raw atomic positions - npz_fixed_field_keys: # fields that are repeated across different examples - - atomic_numbers + # TODO: pairs of atoms! r_max: 4.0 dataset_statistics_stride: 1 - - chemical_symbols: - - H - - O - - C """ ) with tempfile.TemporaryDirectory() as tmpdir: @@ -164,11 +151,11 @@ def test_zbl(float_tolerance, BENCHMARK_ROOT): # --- now check the OUTPUTS --- nequip_out = model(AtomicData.to_AtomicDataDict(structure)) with torch.no_grad(): - assert np.allclose( - nequip_out[AtomicDataDict.FORCE_KEY], - lammps_result.get_forces(), - atol=1e-4, - ) + # assert np.allclose( + # nequip_out[AtomicDataDict.FORCE_KEY], + # lammps_result.get_forces(), + # atol=1e-4, + # ) assert np.allclose( nequip_out[AtomicDataDict.PER_ATOM_ENERGY_KEY], lammps_result.arrays["c_atomicenergies"].reshape(-1), From a9866aafa1474b3d7a8d9e343a3b26b07767eafb Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 20:33:37 -0500 Subject: [PATCH 077/157] rescale only when there is a scale --- nequip/nn/pair_potential.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index c2b42ebf..8a640292 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -156,6 +156,8 @@ def _f(e): def update_for_rescale(self, rescale_module: RescaleOutput): if AtomicDataDict.PER_ATOM_ENERGY_KEY not in rescale_module.scale_keys: return + if not rescale_module.has_scale: + return with torch.no_grad(): # Our energy will be scaled by scale_by later, so we have to divide here to cancel out: self.epsilon.copy_(self.epsilon / rescale_module.scale_by.item()) @@ -299,6 +301,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: def update_for_rescale(self, rescale_module: RescaleOutput): if AtomicDataDict.PER_ATOM_ENERGY_KEY not in rescale_module.scale_keys: return + if not rescale_module.has_scale: + return # Our energy will be scaled by scale_by later, so we have to divide here to cancel out: self._qqr2exesquare /= rescale_module.scale_by.item() From e941b4fcea1bf1651bc11e077e31009757e97b55 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 20:34:09 -0500 Subject: [PATCH 078/157] Test ZBL against LAMMPS --- tests/unit/model/test_pair.py | 178 ----------------------- tests/unit/model/test_pair/.gitignore | 2 + tests/unit/model/test_pair/test_zbl.py | 64 ++++++++ tests/unit/model/test_pair/zbl.npy | Bin 0 -> 86528 bytes tests/unit/model/test_pair/zbl_data.lmps | 46 ++++++ 5 files changed, 112 insertions(+), 178 deletions(-) delete mode 100644 tests/unit/model/test_pair.py create mode 100644 tests/unit/model/test_pair/.gitignore create mode 100644 tests/unit/model/test_pair/test_zbl.py create mode 100644 tests/unit/model/test_pair/zbl.npy create mode 100644 tests/unit/model/test_pair/zbl_data.lmps diff --git a/tests/unit/model/test_pair.py b/tests/unit/model/test_pair.py deleted file mode 100644 index d0ed0f6a..00000000 --- a/tests/unit/model/test_pair.py +++ /dev/null @@ -1,178 +0,0 @@ -import pytest - -import textwrap -import tempfile -import os -import sys -import subprocess -import numpy as np -from pathlib import Path - -import ase.io -import ase.data - -import torch - -from nequip.data import ( - dataset_from_config, - AtomicInMemoryDataset, - AtomicData, - AtomicDataDict, -) -from nequip.model import model_from_config -from nequip.nn import GraphModel -from nequip.utils import Config - - -def _check_and_print(retcode): - __tracebackhide__ = True - if retcode.returncode: - if len(retcode.stdout) > 0: - print(retcode.stdout.decode("ascii")) - if len(retcode.stderr) > 0: - print(retcode.stderr.decode("ascii"), file=sys.stderr) - retcode.check_returncode() - - -@pytest.mark.skipif( - "LAMMPS" not in os.environ, - reason="test_zbl requires a LAMMPS installation pointed to by the LAMMPS environment variable", -) -def test_zbl(float_tolerance, BENCHMARK_ROOT): - config = textwrap.dedent( - f""" - root: results/ - run_name: minimal-pair - seed: 123 - - model_builders: - - PairPotential - - StressForceOutput - - RescaleEnergyEtc - - pair_style: ZBL - units: metal - - # TODO: pairs of atoms! - r_max: 4.0 - dataset_statistics_stride: 1 - """ - ) - with tempfile.TemporaryDirectory() as tmpdir: - with open(tmpdir + "/config.yaml", "w") as f: - f.write(config) - config = Config.from_file(tmpdir + "/config.yaml") - r_max: float = config["r_max"] - - dataset: AtomicInMemoryDataset = dataset_from_config(config) - dataset = dataset.index_select(list(range(10))) # just ten frames - model: GraphModel = model_from_config( - config=config, initialize=True, dataset=dataset, deploy=False - ) - - # note that ASE outputs lammps types in alphabetical order of chemical symbols - # since we use chem symbols in this test, just put the same - sym_to_lammps_types = dict( - zip( - sorted(set(config["chemical_symbols"])), - range(1, len(config["chemical_symbols"]) + 1), - ) - ) - pair_coeff = [] - for sym in config["chemical_symbols"]: - pair_coeff.append( - f"pair_coeff {sym_to_lammps_types[sym]} {sym_to_lammps_types[sym]} {ase.data.atomic_numbers[sym]:.1f} {ase.data.atomic_numbers[sym]:.1f}" - ) - pair_coeff = "\n".join(pair_coeff) - - newline = "\n" - PRECISION_CONST: float = 1e6 - lmp_in = textwrap.dedent( - f""" - units metal - atom_style atomic - thermo 1 - - boundary s s s - - read_data structure.data - - pair_style zbl {r_max} {r_max} # don't use switching function - {pair_coeff} -{newline.join(' mass %i 1.0' % i for i in range(1, len(config["chemical_symbols"]) + 1))} - - neighbor 1.0 bin - neigh_modify delay 0 every 1 check no - fix 1 all nve - timestep 0.001 - - compute atomicenergies all pe/atom - compute totalatomicenergy all reduce sum c_atomicenergies - compute stress all pressure NULL virial # NULL means without temperature contribution - - thermo_style custom step time temp pe c_totalatomicenergy etotal press spcpu cpuremain c_stress[*] - run 0 - print "$({PRECISION_CONST} * c_stress[1]) $({PRECISION_CONST} * c_stress[2]) $({PRECISION_CONST} * c_stress[3]) $({PRECISION_CONST} * c_stress[4]) $({PRECISION_CONST} * c_stress[5]) $({PRECISION_CONST} * c_stress[6])" file stress.dat - print $({PRECISION_CONST} * pe) file pe.dat - print $({PRECISION_CONST} * c_totalatomicenergy) file totalatomicenergy.dat - write_dump all custom output.dump id type x y z fx fy fz c_atomicenergies modify format float %20.15g - """ - ) - - # save out the LAMMPS input: - infile_path = tmpdir + "/test_repro.in" - with open(infile_path, "w") as f: - f.write(lmp_in) - - for structure in [dataset[i] for i in range(10)]: - struc_ase = structure.to_ase(type_mapper=dataset.type_mapper) - struc_ase.cell = np.eye(3) * 100 - struc_ase.positions += 50 - ase.io.write( - tmpdir + "/structure.data", - struc_ase, - format="lammps-data", - ) - - retcode = subprocess.run( - [os.environ["LAMMPS"], "-in", infile_path], - cwd=tmpdir, - env=os.environ, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - _check_and_print(retcode) - - # load dumped data - lammps_result = ase.io.read( - tmpdir + "/output.dump", format="lammps-dump-text" - ) - - # --- now check the OUTPUTS --- - nequip_out = model(AtomicData.to_AtomicDataDict(structure)) - with torch.no_grad(): - # assert np.allclose( - # nequip_out[AtomicDataDict.FORCE_KEY], - # lammps_result.get_forces(), - # atol=1e-4, - # ) - assert np.allclose( - nequip_out[AtomicDataDict.PER_ATOM_ENERGY_KEY], - lammps_result.arrays["c_atomicenergies"].reshape(-1), - atol=5e-5, - ) - - # check system quantities - lammps_pe = ( - float(Path(tmpdir + "/pe.dat").read_text()) / PRECISION_CONST - ) - lammps_totalatomicenergy = ( - float(Path(tmpdir + "/totalatomicenergy.dat").read_text()) - / PRECISION_CONST - ) - assert np.allclose(lammps_pe, lammps_totalatomicenergy) - assert np.allclose( - nequip_out[AtomicDataDict.TOTAL_ENERGY_KEY], - lammps_pe, - atol=1e-6, - ) diff --git a/tests/unit/model/test_pair/.gitignore b/tests/unit/model/test_pair/.gitignore new file mode 100644 index 00000000..686a8db1 --- /dev/null +++ b/tests/unit/model/test_pair/.gitignore @@ -0,0 +1,2 @@ +log.lammps +zbl.dat \ No newline at end of file diff --git a/tests/unit/model/test_pair/test_zbl.py b/tests/unit/model/test_pair/test_zbl.py new file mode 100644 index 00000000..320bbec3 --- /dev/null +++ b/tests/unit/model/test_pair/test_zbl.py @@ -0,0 +1,64 @@ +import pytest + +import numpy as np +from pathlib import Path + +import ase +import ase.io +import ase.data + +import torch + +from nequip.data.transforms import TypeMapper +from nequip.model import model_from_config +from nequip.ase import NequIPCalculator +from nequip.nn import GraphModel +from nequip.utils import Config + + +@pytest.mark.parametrize("do_scale", [False, True]) +def test_zbl(do_scale: bool): + """Confirm our ZBL implementation matches LAMMPS.""" + if torch.get_default_dtype() != torch.float64: + pytest.skip() + chemical_symbols_to_type = {"H": 0, "O": 1, "C": 2, "N": 3, "Cu": 4, "Au": 5} + r_max: float = 8.0 # see zbl_data.lmps + ZBL_model: GraphModel = model_from_config( + config=Config.from_dict( + { + "model_dtype": "float64", + "model_builders": [ + "PairPotential", + "StressForceOutput", + "RescaleEnergyEtc", + ], + "global_rescale_scale": 3.7777 if do_scale else None, + "pair_style": "ZBL", + "units": "metal", + "num_types": len(chemical_symbols_to_type), + "chemical_symbol_to_type": chemical_symbols_to_type, + "r_max": r_max + 1, # To make cutoff envelope irrelevant + "PolynomialCutoff_p": 80, # almost a step function + } + ) + ) + tm = TypeMapper(chemical_symbol_to_type=chemical_symbols_to_type) + # make test system of two atoms: + atoms = ase.Atoms(positions=np.zeros((2, 3)), symbols=["H", "H"]) + atoms.calc = NequIPCalculator(ZBL_model, r_max=r_max, device="cpu", transform=tm) + # == load precomputed reference data == + # To regenerate this data, run + # $ lmp -in zbl_data.lmps + # $ python -c "import numpy as np; d = np.loadtxt('zbl.dat', skiprows=1); np.save('zbl.npy', d)" + refdata = np.load(Path(__file__).parent / "zbl.npy") + for (r, Zi, Zj, pe, fxi, fxj) in refdata: + if r >= r_max: + continue + atoms.positions[1, 0] = r + atoms.set_atomic_numbers([int(Zi), int(Zj)]) + # ZBL blows up for atoms being close, so the numerics differ to ours + # 1e-5 == 0.01 meV / Å + assert np.allclose(atoms.get_forces()[0, 0], fxi, atol=1e-5) + assert np.allclose(atoms.get_forces()[1, 0], fxj, atol=1e-5) + # 1e-4 == 0.1 meV system, 0.05 meV / atom + assert np.allclose(atoms.get_potential_energy(), pe, atol=1e-4) diff --git a/tests/unit/model/test_pair/zbl.npy b/tests/unit/model/test_pair/zbl.npy new file mode 100644 index 0000000000000000000000000000000000000000..626bccde5b339a435c949921f005381984b8c187 GIT binary patch literal 86528 zcmbTfd033yANVgSsi>q(8Z}Z1p^%8_oU~}ANXe3=g)EhjL|W`pvbIeSV`cqg!fZw$bB&@b~%L% z24xnVozdGwjRWFJuYdjpUz_;}dlm-&TJav;HA?&vzT^)9qW_+sZ_WJcA;wk47B5lq z_#=0toofjYsXp8lE>-=}%lA3>un z@2=8ZaDf0(zkh#PHS?>YQSHx<`%s%*wR2~y=Mx}2|BB6gofb)c51wYBNk8`Wa_f48 z08wF`8UG9^HS-tw8zgqRyA=(cd}-g^7kdd1eqXJd`PQmSjP_Y9y_w&q|Ei&pMR7<$`OaG%gOvn`X8-f+4R71bSJ0Rd z8Gbh!g^s;a=s^+)5U!tg&HUZAA1-Pvo`*`Gm>hNViy=U`&NQ3(sV^61wDy{fg0C*! z)J8I&0O5LT?)SXzSTjUrCUSVWv#gU(1OX!Ff1e<&W{*GKAtNmES};2HYGC#jJc|I~ z`fTnWsreMSXVVn))S=gI(fp|di2iwXjbGco+2bGC_pK&+x}4eZ`H6dekvY%}4Z69K~Ot79{NTt5f30qzLdFL_$I@#|0m zME`txb;xb*i~1y~OA7~~kr{Ct#Y+$Y!s}7g?D6ZZ9AEV^wL(ry$2}i1%!UBb_kZ3V z>YDq*zqa$N^RPs-cKp6M>W38p!s}PB+2j9k%zUoe(;O{QN^5!$+?N2+zb`Lor)GXg z{z_t589N7 zk(V@irFyvu0mA!(xS8*urr!PZWL=c!WVg0yo)H0}+qeGtY3_S^t8E=vqlwJlKl z|1|f@gW5(fJMhi6WaG_y`d`}_ijXy{A^^<`5vGuSB?5$#w4Y$7x8^wfmsf4Z8@C(O3NIQGK(~#bFzeNaD9eVzHe7 z8GXe+Kh~C^ar((fZQ3avUxicxWb~B?7m$Hwv2xp4%@ z=qnKxzgw-;{dxqtraY)NeONRBGWv>tzw#V&IZ!(kT^V)o@atQ136RlOB0Q?|xbfay zrlCsp;JjZe!wHblS0a3Pugzu6pa7KY?mVPV;B*3H^pyxFNN2qLZaoq0p4TCKwowoP zGWtq{&wAW@8)M>!GPi~K_idO&fQ-K4KQAP&bvxR7qrh|vWOD_Us^pyx7Xj9Zm!@v^0AKx!v_L^Y?$mlB({uZvUZP#`%s@nbXa#ZyakkMBnoPW6LwwHq;YCFWUcjA+t1jy(szDEbHSG(A! zBeEEMf6K|@ZUo5aD-q5rlfIjhqKOtyxffoXXGDOEz7pZJlWr;}*(;-O7nQ@0oG>6j zMqi0=?x7Z4dmnAEZCkfe=qoW+EZpO^c7QjYeL8kdao;!&$mlCEuGc#il0Fw> z;}sXTUy2LlfQ-HpV~tL?w{9%zgEij8^wnPH$pIOCCC0x7RjGB=?}Ed}9na7IW6J>< zeI>?Y@OPqiO&iBGF8=sL&4L3m`bv!7l$aGZc2vO%n{6dZWnDNRqp!sHkX!XWuULLP z?N!0xdKDcG$mlCE&UV#Rwkxkil>y%u>0fWd0U3QI#zSJeeNIfSKtCsqY_-#~1qa0P z*RRC5Ux)LSvqj~|CD+GyQm;A!Wb~C7TSbjpyhrpHEgIEzhm&?C0W$hZjK3FpELeG{ z82L_`y?twKIRP^IN{knt(%m@9{yOSZJ)!nUizfug=qoXP?yJ0QlxqRfXsIGv(5Zv~ z8GR+jlC?VbzTP>5&RO3V)hAsiKt^AQ@xz!?T-b}_Xxs<4*n18I1jy(sF;*`f;b~TI z2n`I_|4=*p3;{CwN{lC`rd6(r%|^r2lwaET9Vb9WUx~5G@RKDY%rcS4{Oa5vI}Q>c zqp$ea%h2%K-6mzA_y-U3n&$&Zs} z6Ck6n#Mn&og`3+x5cwHs8>=3eL4b_D5@Sb$H4h8)C!tR3tit^Cf(ek(S7Pj1{622* zD?jA8^yE~N6O##$(N|*Jq`bCK?}j(JTYfvJBz!ypGWtr4%gbt^R&GelQ z0W$iE|GfC6Jmt%6SF}pM()81Ukp#%-D={7y($z9Q+7WH=OV1qe(t`jQeZ_x%*^YF$ z+S3kM3~ZV+%fgue8GR+j=i-%9n#K)A8Y9A2Yi+kDKt^AQ@szB-eVmV4p%FC~T-!ZI z1jy(sF}~PFX-bH$B?>c%;BI~xM1YLG663GOCdZ}b_COzh{Aw4|(TV^WeZ{}uhB#lg z-Dr%wQ!4V>T38Ywqp!qx5tlvcSARof9(wweqN+IoGWv>t|4*@*X0F>24Q&;2Me$8H z0%Y`+7^`-^);egeCUQoRrq7QX5g?vux?N`l{C&3Sxw&|Lg1 z;9^?mpZhr=qpu`5bkc-~ue*KlxX_F*CQb<)kkMBXymiq+X^rPltmO56wO3Ox2W0e> z1Rp<`>F2-A5?4OIJ$254ksOfGR}y@oNy=S*YJ`=}HSTOPz>Whl`bvUtD^?V1p3=dR z$tp&oi3Pc8dhu(N zzjoL0Jf$B5$mlBxR`+$DT=w+|syU~&<5b9J0%Y`+1iSyaX`?k?hZTm{RS#s6%@F-@hJf^`bvT&s|!{wk19kO>8gveXWk<~Mqf$r+0bQk?V`@1 zGffWL!^YesKt^9la9Ha#W((Wqp{6amT7Geb1jy(s2_EA6u;_N=VPvXUx_qD6IRa$# zl?3+*bN*p?elM!{w5d%~$_WBw^pynXOHJmPYGtA5Jg0qW{<#Fm=qm|!D^wkI{p1$Z z=k<+4%d@iykkMBXtf+c5CL?4Ms^1l>5zsc102zHH!REJ&FS+kYL9u?5yB5ApCqPDD zN$_YL{q+NtS0fEQwFJYG4Ft&OEB^Ws?YbEzLMZKbxt5z}|3+K7WemVB|p%g=ajJ}fK z`=2vJ;ilo}?OuzaJzqu=AfvA&c;eCGv2U-1pod{42d(rX2$0cN5}Xq>cB00%K!l%8 z$ecrF5+I|mB)C`5=}!%@lhDz$(vA)9g9(t)R}$Rnea7>ku(4?D?~11t9#aUA(N_|D z&gA{tg5IOir_l0LZ|?~N$mlBx_OykDPT$8cu+WzT#hxhB>H}ryX*^l}9B9oC%Q8R}%cN zsJ!*IC4-UAkI!dU)Yuasqp$e)t0zY_vkI(`in8vsv)wQOGWtq_-?_INZm(yFE?dk> zEAC@MfQ-J9;O|G=HtfjifpV8*=_F$-0%Y`+1iL1=7;=k^k)@gW)dG7<0%Y`+1Rq#3 zfBL+ZhG_k z;quzXd+WDyKt^9l@sh)m^5Caq@YT&%N@ib)h+ZvCvGg>r zAV5Z6N%6~)nUifM7omOM=jm2umk}VNucWx+l%r9et7Xkg?+winr>yGpF<_0W$hZio<(J+Fv#~hFU4!&3Mx8 zA^|e`N{SuDt5p4d=Ac*CyKGZCeu@AYeI>;w6590(xx5-pvm4epICwJwGWtr2>+kFieYRvN>Un6^KJG^f0W$iEf4wwL zUGRHFED~oMc3xAmiU1jXCB?Uu-u)1{Mj-{&*U?u`FCjoiUrF&R>qKb^i9n)?nQMAW z7ZV_(ucUbT$eoize$7OOvsKMh(0l@9^cDYpl6Pu-@7cjet zJpnTMN{RymkKZo7Xo;%V&FyxEQfQ-J9;^t-b-=E0cALK^ioR7LjyRCQO8-l98`F7sF&{1n$a`t;vaI*NT_3mE;D8H|Gf5FK`Uv%nTqR!+bx?oyM zfb#o__kU!^cewVo6#cSZ^XH4>M*@`JSG?c(dF93nYww_Cfmhe;ZhS?6^81SS8|Do% zF|5M+G1Sr3+c;9zOXrCGTE}%urXWFCoMFc3nuXz9QibHG46!OuilEe2R zR$n1N`F+LvchAM0R4h4+YBv3zVpl^5P<~(W{>OWr3p|bYq7w16zF!{X6QKOQ;{EMQ z>-!s5>_jd`YY%UobA$lp_Z9EIkJ1~Pc0UdEAAfMngU5RbP<~(W{;CsK?)$i|M;G!> z1e}?iNr3YEiuZ?4lFVt*IuZ5zHRg@xoOA+|-&ef2;e`=iPVH`X5? zj*>5(DGI$ejsWHN74NH6D@;)N>4a=^6C3B<_a;F3eZ~9JuGzi2-`ftUls_^{8aRRg z<@XiuD_$L;U6(ru6}G>)LUHCW0+ioZykF+zYoFhvA9_33E^gUoI|7v7SG@1%y}hww zwK>x7*6M^txD5fy?OJRH>!QalkxGuq-3U;AU-7p`aoWBros*uzjGtr+{we>cIHhED6@X`_whwI&}iN3$9w1DPNF?Szt73yfHLb>)bHQW zw|?dBNq9i9xxRc;=VN_{j6aIiSq?74=OA?Vo6M(HOUE^?s9AD&~MP>sQo2 zRD}))jMl@`JC8r|=uKY^D6@V={qv8vM%BO8z{)$+V*-P^aX^{%E9#$C-8@w{O%Zq2 zIqDx1q|X6m)~~2P6mNTdxNjqxOuoK-*;k7L%B){e|JE_(pF>90pr60&V??^j98hNc ziu%hOCLE9&eMDnxf8@G;Z6ZLK^(*RoX=rKQTV95&mmV+5^{OR6ne{8`TWf5vqm|1?O81)_3v+j+4IRVP7Ur~QhdN<#ZudX8viP|T(iH`|T zX8nr#AAZF9)+rRCtWRrO8?&lJq%=#7em+tW`e=>M0iXN`(IbXV$0A<#%sJ}#U zLSE1Fn~-$t@S??;{COAuQD*&$`s*U}$B(_0g6vM%H7Z?_i{}{J479CCMw8?1IN&=Kw zzoP!q2|u+xH%FoOFI4hfm6i~o%=#7e<8FTUFtD0~-2D6#6*j~Wpv?Le^=ph>6Wmop z(b@;a9agK)CqS9?E9x&@GU;SS{8SWGL;9X?5kY`5>sQoo_i1(3i=Zj!`L~8WF}-FI zpv?Le^;iA6wB7vXcvPiz%Be;-m;hzguc&W-b@Y;!OMQ^hqEX))eJ2y3%=#7eOFb7T zx^D79_>BG3tB&IcP-gv#`YSWz(hH}$q1_gJMFXQo6QIoc74@ff(8#;*>WJz({@j-H z$&&zO)~~2t0@hg zC%))UfHLb>)Zc!^O0#ODAzG!~eXG(+3j&l`zoPz{$MM&Cm+PSS-}|p~vFbs9GV52= zH*z-&xpPw;jn?13KK;5e0m`gjQGaRNs!lqeTA;j?6DgBoB?Kt5enowi)<+vZf2*>M zs`<3jzF#N)`u~?Q`-9Z~-}*|7k83&YI&?7=k3DMDap#}A9FWmhVjOqgV@ToIC>-0< ztbV8zNp7J+z z===vqxip=2$0cN{Qa0~tHOGlEYm%HH|P=pGWtr4|2&=D+P}q7G)cRsQB(DK0%Y`+7?<8LneBOY zKRT$;%I!$6d;(+>@k5UMb(N|)eZe~3X&5J{4i~9J~d|O3; zjJ^_MgSW3+Pf1&VI(ew%-Pc%3fQ-HpWB1h+?Y>pbL7U1}dG8X%5+I|m#CW>1Bv?x; z4E5rcKbmS5MSzUH;_tun{=I*f%`}v=-mQP`od^PC^pzMB-!(yd-c3PxNzLngpIHRR z=qoYyDNCDsb>RdweM7HLzdr>NAfvCu_)^EMWp^8VP|0e^>*-%66Ck6n#Q5&PQ>befg368*b?A)?@4H)qDt$(O3Nam8WkV?y=kv5x1W<-kx3r z$mlCEUY0oQMQ%Dq4?nl*kU7we02zHH#`pHP#GIHr2xWBsHf6vJM*?K@l^7@9oOP(x z_3#ce)OO) zS{+?lB+ahdXH0;Mz7k{m8P%m~$66r8ig(H_eIx|P=qvty@HqGBK5 z1Y54!+{e6K5Psh29GYOih66JCiobthX8iVtXcV@s=@Bqs^E?j7=qvvIkqbR_*POJ+ zuFI99#UlbZAfvDN`|g(TOn1B`a9fWRn;keH zqp$e;t=g%Me{sYB&pVbDTIgWS0U3S8`}4FW`1jPtXWipMkNfxJfQ-K4@7E~sJzTR< z1^0JIUT?8Y!T}k5CBZKqulhAo(gJU=Du28`uLB2U^pym=47GaxF0BsT_n7HYF=Yl-HOlrIHtb>2ZZPI@A;zidh`EWvSxS08&uFDAfW2ucLHSel>|ST zd!l2{;%Dexal3%{SDy)x(N_}eG}EAE`R4m*-2RqdQ>@<-AfvA&`1-q)kxBCRh=qm|6@=BpgsV;|79L^iBQ@ulgjJ}d! zYq9E$vhOF+5sT<6x366$Kt^Bjzb}{UhoKEe(S+d7dG|gM0%Y`+1mkMIo)b+Dp!~g~ zB-$A#36RlO5`4_hI?ZCsZWMnudZ2&mQ37Q2l>}cjth&=XbO-W3bkyB_S`Gm+`ij5* zQO7RqyJi~NvcGkJ!`EE|$mlBxE>`dl-w?ANB|M8+^m^EK0%Y_R|N8Z-efVzi8f5Wc zbHsd6DgiS3N`f~Y(p~qs<#IIgVri$0!|Mo;(N_|@^x=utk!|A9+c7SOf_f(sAfvA& zxUtQV)8D5@qdi{Zu1=_3N`Q>M;_ttnB5oRyFc&RXs=4&pB8~tVeI>z%zTCU~P3Kz zzLMY(k2@VdxeKGK<}D9auX7_nMqf#=bbj&6=!ik+-s#pBD|R>%AfvA&SZm%hvvD5% z(8Sle_e_3b0%Y`+1UqbKg~i{@(Z$i~-HUT<2$0cN65REH#+7t`6Quw4S6IpS{shSA zD+vzv(fgQj*8oXhkNH^@V?lt7zT)qX4$6I;J5vWGeoo(e?R$3uWb~B;&kLB{Zm^a* zsxh8A({qk70W$iEzrUXQq48u!3v|HS>G;Ra5&~rO6@UM)YJYLk_e$IC`$lFRovufK zjK2CikJWq?>;Ii^|ND6-#h-LLT3iUp!(Zn{*!N!bnFBKVN{S0Va;^JK-iY(gu5_Gz zx_|>R`bvsTm3?=(>%`z)7Yo{ya=SSoqp$e;DUR)H@97$XA8!2d@YJAW4#?;$DR%2U zYDWL_WAN_XqqHg-7H~jDUrBMl-N`FF4P5bdL`(yoPvd}$zLMfO3F-qD>Pd00H@8gu zQ~fv~qpzgcOYcj-gtr#BvGI>jEa%1n8GR+izD*-4-gp?}Gwqd)I3L6T8GR+iwI%(k zUm59P-{AUJS{{8lAfvCO*m=&w!kAhOJnPw!6`G3OI3T01r1+y%7q<^L6!DcG`v!dP zr_TWyeI>PZ>QJi zuJUaX#e$DWl+^l)|Ds<6$mlC6ezYt3XZwaSRI+iAMf~Gh{`}s5$>=L7mNX>ywhelW z;`*JQ_2F*k1ev%oeR~y_8mDv zfQ-K4@4veeJapuUooIU9u$()Sa|w{qSN!Yi)0K6GwrQxt>^+~IgR%*b(N|KepxWY6 zUc2>3Wm>Fqa>`BuWb_sP_y5-8)z#Y7XvDOA$6Yt15g?v zYyxETl@wd|$-2_z@OZRmLszX;Cqf92(N|LZwc79S_vNF};kc0N)!qRF$mlC6{?mKk z7=L3=v}Qt%YMTB80%Y`+6!)y@RjOp}j4W5qSNHoengAJnCB@NO=B=CDZYbiqO&SuE z=t+Q#zLN62sy(rFxefAs9@2JxgEIj#`bvtA$EZ!b^sygGZaH|+lNUn?kkMEC{lZ%& z+12e_6V62mvzsN{Szt^~=#1WrA+H&mT8g4ywz@t%!`n&<|QKnWb~C3=PHyp z)Ng8umW|IHvpP~tfQ-J9Vsd53HZ{9C+g;l~m6fLH5Fn$k_{V?P=GwV2<+iIA?Ki?f zngqz`D=BVXR{#Bp+`i)H;g@=A_BeR?I=-xJ(|v_YtBXLp|NQ&#{{x?QyswwD?qL6; z*?4Th;X?-%9&kYUeZ~7_ONUR?%}B;NLlW8>s-EP4^81SS{r?oJi6$<<&-%JAJ*2&p z1Iq6!-oF(d<9#DRj14c-EbvNalUr)e zZX8g4U-ACMlK1K>zI4S)hpiso@3t)ml;2mpU(sgo8@n9--0rA7#ah2CIiUQ$;(edB z5$0VyHSqOFg_)%}CLBlq| z^GaGAP<~(W{&J-^=XKR<(6aJw=T~bgb3l0h|9(FXy*>y3@T#;*{(u5Qz7=+n{31a4 zeZ@b1S<#Y}#^tZjM8~j}=F4gcP<~(W{s7%ctK4QhL^%c8UDM(!2vB}s@&5Vm{nKxZ zD?$Mqdp;&W`F+Lvonl`tt+3(H^c?3Mw-Sp9 zP<~(Wer8OoR;4XZqLyVtmicVGMu77B>Yu;A${|Ht|0Bp}QpVQ}V=fY){J!G-^vJso zGsf;kIyasS<&sVjp!~k#eYX_L)zgpfL`!s^>{}InlmO-T74IK8e0qj)bt;d_4xc4Rf~1#z@Nn95hRlU<@XiuXYOuTb@kLrRQGXC@75vd1Sr3+cz;l< zH&)+n$DhPt< ztA@u31Sr3+c>l`UXK2>7>Bz&e>-fp$3kgtuU-5pYSq|qdCj=myq3RJ`w$33y`F+Lv zt2_2o%xpCt4T-pHYwt6Y0Oj`;?|)i7$m6YpH@doHeCrOefdnYOuXx{f^WN=7-X5si zuQQS%xf2LbeqZtaa&?PD8H|G|91CnX4`h#qOubiHYfX!AVB$j z#rx~)M=o>M8-V7ne{yW?UMB*S-&efCj92jr%^@(D8H|GKcVY@d&`d~A<^DTb{#%AOF?2;e`>DHbJt=zg+BV_Zz%75O z6bVp%U-AB}xXXLZZWY-E`0TrQdBg7uK$-go>F0~EzWTe5SdE08-%fpD=eJW|*!k_$ z7j}L-^@W|^PJLnLw^Lu(`R&vfc78kcg`M9{ePQRfQ(xHm?bH``emnJro!?G;XzMcBQ z=G&<+Y`&fP!sgqlFKoV@`oiYhsV{84o%+J&+o>;XzMcBQ=G&<+Y`&fP!sgqlFKoV@ z`oiYhsV{84o%+J&+o>;XzWx951wXT%2Y!tN1Af$^Yp^ssfCI|x4^n^L>c!`l zcNu_}wmz^>oH&XD%IptPzhJ-0ig8?b{8=^QhtU@Y4k)ufNd4I38imn44Dl`Vcc~My ztvR5~{vh>3jwhWSJWm^Uw(q+p`c6*{D6>CE{WS;D!g_yI!QOQvVr^eaIH1h_AoZQJ z6hHcoYk?OH`TDF(NrwZ<>oe~|i7=T~gKdFl;PYcnl&bB`YcC^O$qeT6Um7Mu2cj)JF@bvGOSg#cyd zuc&`(ptR5HC->2au%m}FE$tJCvPWGOfoQ6x2T>y+q*!0m|$TQs1+Kc*2o( zSCD4Y$7N`6DFMpt4^p25)|S>db10~E+OLD5cL-2se~|h&I}d+djZPuEc$aR&G_Me# z%>E$tKa{EYw6Hjao+jOj2<^`C>*xPcW`B_S+Q&Z{c+5S3?gZS?sD5*j0A=)D2PjuN2E{vh=u=hs>lUfO}&Vja8RKb1p(GW&znx7M$A_wq_Zzn)cx z5%b*yD6>CEebHXsql*r$M+2tl)@tnBPJlA|gVeWAyqD~GWesY5?0Nkkzf=O0*&n2S zpT($ar-92+`ll^zLtd>TK$-nP>R*1_)3ujnJbJhILehZwi3BLKKS=%X!ljuO+0m%8 zLh0i3-Io!d%>E$t4-QCGj>w&hdVKY}veqw-0A=?MnGINnuSW0m|$TQa{(bvHb}P zf26gw{N(v-QwdOJe~|j;`zTJTpW};GkNY@$XW|qBl-VDozP&_Q-Kp%Uvxy5?vIGh$@d~anf*cPABgDe z;c^C}iHZ7CDv28b%IptPzju%M^`F)cLbkWGzDHklBtV({LF$KIiSHR6*bnV89Xr^l zy&VC{>CE{T|_CZq}QrquM1F zH6AOB2~cK#kov(t4ND56TA-)z4VG(~O9)VAe~|jl`lp}ft5w;aPAMO@aIqc%!u!?V zd3NgmZ+#`f8XNn(d0Vj^{~ovRV3A5G2W0dW|9w?Wrq$CA$6}kc@2XT*9N>VAzLMbm z5sw}QzMG2sA1-;?%_M~bGWtq_pQmo!(f#~r{Nm@%n?|dnI3T01B)EF=)>O|PPIzm} zJ@#qOf;b?fulW0S8lP`Y@gIoyt44>V9~jL68GR+e&Y@n`_iW8^@Rh|**SZYjfQ-J9 z;7Z&0n_8{A;HxEd@pWx%I3T01Bsjxl$C=bVI{3Bz(|zLj-W-t8SN!)UCZ#U2(^A77 zp6&`;Hrt2;GWtq_W4$hXo_|aMZ+rUEXTf`24#?;$3AT00Emz3=fu35b)KzqD%K;gE zCBb#0(_Z^)SEHltdwcl5Z^;1}eI>#A$x|Fo<98@P^~0R!t$z|Aqpu`*mQ4@O0WL4m z#dldDqF2=f$mlBx*7Yn*^YD0pTKC^PqMh-30%Y`+1poZ(u`gFtgdE`TkU2^|S%vBVdpnUOq;R6C>^pymMAL?*t+3<_V#I-Uo+ogyA8GR+;&zBC$smwo( zz6NL7U)XS!02zHH!6BO*YTikYqh*WC&V@@a5+I|mB)EU?{Z?;&9z=Qma}GQRKTUv) zzT&TcW3%bTU-qEnXEV%~e>z5hjK1Q3UxO2p>JIHhR!Z-!9*sUofQ-J9V8_XoW)bt# zQK|8*wlZq#+%a! zkkMBX+#~#XY+U;lXxlNhD_<_JCqPDD@!v-oKhd!8Ry>+HXL5FZ;Ti&D^pyl_CXCSv zN?3@(%8q;Zby`kJ68dz%6M!Veaecb12qQp7UrF#OeM@=I@MqlyIPekhjnpb_%;3J!k98n4&Kt^9lFc)qb@icHGnqU6ttLD}T z1jy(s2^PoxS!O-e9huecu<>Z`OMr~N;_p9QzA~`%xg*-AY*qQ=w-*62`ig(Oj|xcm zQHas2Udj${^xO%M(N_}uQ7iBJhqZ%{nt^Igew`x$GWtq_vyXl5t~0tHD%Km<{(`w3 z0W$iEzhCcWdd-#!bF{9FUTSu^4FNLxN`lJ=>Yv>2Y=XL;8o&KbJ1YWY^cDYpp7|hb z_7(%Q;TWb_q(zwo9+=`$N0G|)5W#HQEX36RlO5}eZYVat9*4NX6~b91$) zF#$6AivRpt)%!?x-xf&S!F2G1_l5+>=qvvH%`$ODd_tw|2dlbPPMUfI$mpxTz16(3 z{@?lbzda_!pKZ>?6dyW;SM;{Mc0Ttj2W0dWf4|bUyKnu5r()}e!C5IQFLOXfUrF(b z_=s*5E8}okV%(m5cKt^9l@fDYpIY}?a;U}XV zy5~p6a6m?1NwJ>q?;GpfJ+PzFXL3MBUrBN7i{PQ97-NT~mfyW@j^}`kzLMes zzs3(=Z}r7_)!Mgb>W$!ljJ}d$(>Ijh0S5hpUU19wy^(m?$p6VOV zRuLehulW0Y+FxH8v+Ew3Jo0VU=Cn5i$mlC6PVcj}ji%)-WOLmjDXRP#0W$iE|Gqi3 zzIIKME}@O1lIs`GzfXXSzLMgZmT{XW?K+P-WQaw2_O}U;(N|JDa6^8A;{H4&y&k>W zU;8ovGWtr2b6Wjq(c;@-6kaz>_|t8eOJ!BU$d708GR+iRf~;#m?~{R{T^&y z@?vi$0W$iEe}9`(I9}g%4La?X`eB@Y1_3hqN{VmJTXl6(`(^0&?(nQYzYPS)=qo9{ z(9>wtuh3Ye?*C!G{e?9I$mlC64tef!B`0b=I)8Jp!s&oz1jy(sDSlS+vv2C^*(k#! z)oa}ASOR49761I)l@LN3<`W>JucX-LLuPu|&H%K}S+m8Jo3ja!(N|JD zYWSf8HzrI#mw$wp7wno&fQ-K4ef9915#L86QT*5Bp5FrqkkMDXzcnn?Im8qFIP5F3 zu9-l9jJ}fMx~VENuEaQ_$jR5Qmo4-mKt^Bj_hVL9mKF{kij42)4WIGWlK>fgCB+L& zLN^z;8;pK+b7^#&?m~c!zLH{3!@~`08v3Ez$bMhId>OTa-R0%Y`+6yLSe+-u|05}ob& z<8ZFEm;f1l#h*WL@ZGDjPv2~t%-(dm=&eJ5jJ}d$v1@tqETwYW9Q z1Iq6!-furT?AyYb>+pH+;E*8`&Tv5aeZ~7Pzqd@hSg;81nR6graa0xul;2mpAK!FE zac;qM>~Za6*=zG;4k*8`c;EETX%~kNWAV6MBI3V&AqSM-SG;fbc-zP9On3aGPrI#L zw;3EzeqZr^V$4sRSB~)h?e z^7ne(np%zqc>Ow{8Bs@o^81Q^{D!7ZrAy~NM#Yz3TvAi7B0%|l#rsdmmYU+z#i-r! zp+7ghdQE`x`-=D9wNAS@^!_zed2(WwXZTYBl;2mp-@EC=%r{XN(Vle`(&YB{2vB}s z@&1te%3^~fr%*{%(ZQ()ZV;gSzT*9;hwpnR#vMgB1}<8D{9FM6%I_=Qf8X=gL7%dH z=t9-%`_W-%2vB}s@qR(FdGgsanP}Y6b_o&r#|Thc!Bh!&nhw<;K*5weO z{J!G-n~Qc|@$S4H|6Dh0kV%9S)mCfb#o__buSIjp|g)guT{eqZta zlua6LEjEt&$uEv;EeTM5U-7=@1COseujr!bGiO>1SkRpS<@Xiu zmmOKMWOIfFGD!KbTP?umo4OSB14eqZtaE|ulK9Hnu0l?OkYv| z_`ys+|817I^7-wla~6!`fHHkW{R>S}?($P3taPq%XPW_b98jjOsDE3rqFD2k4wg(- zF%n(0)eq8%JdcW z)okC9nH%cRfb;DiEE%HC0cHA%`g108!8`LRk@{@i_G-5kIG{{lQNM-fDQD}&uTlQm zUB~m3eh?sBpTA`~y&9?eI!`Y9`UKUSQ`>PWe{LFAe=0#0fA*Q2 zp7(|T;q&Y7`J&V>P~07M?)wc?x+*Aeb>mY4l-VDozGQX5s^w9INF!Z!QTEJx1SqpV zNd2>+%jVieokM4u9JYszxk-RB>sQndYrV#7VcR^^v_)6TFRqXPW%`QxLwp|=-Htqr zOchI)?=w3`fHHkW{XSvNKMc?BMHQbmwP{K@L4Y!SMg4rK$vji7EEJvRv@gv+mjGq@ ziu!Jas-v!-+=BYNzHw-Ib~XXZ^cD3LRj1$`tG7#H$!GGMb*`c-lvk22~ei5 zsQ;$U$tGD6ht~H9(>BgqL4Y!SMSXHe$L-as1;{QUb5@(U1Ok-lE9xg5@=<=_JQvw7 z#~weFVhB*Cuc&|jbA~9~G#tI%YcaIv%SZy0=_~3_JX$>V?X?i}Fs$UDm0kn^%JdcW zbArZB)Yuk?@Y4yIbI42rl<6z#_X;}wsUda}I+|A6vEh9%0m}3h^;^Bqcpels7LEN~ z@wCEY3IWRW74^@VykA?;do=nKTAu3dJ%K+A{a?!T74V`7KZd!kp{JEn9?@+Cl- zzM_7@Gsm4bdbpvgc@@_jYey2GOkYvoVDr;2i}D?j*7qZ4U6O_qpiEy;zmH)KYUOE% zTyW)4$pL2ql<6z#KP)P5y=}>0N+E9!qg;X_; z%&!*MTN0p5Us3I*x+o%+Jg zZ>PSn^V_K}?EH4>3p>A^`ohj{r@pZB+o>I*x+o%+JgZ>PSn^V_K}?EH4>3p>A^`ohj{r@pZB+o>I*x+o%+JgZ>PSn z^V_K}?EH4>3p>A^`u|&Bi3QHL|9yP1!1;FS3!87JzOear>I<81r@pZHcIpe8Z>PSn z`F83Hn{TJSu=#fC3!87JzOear>I<81r@pZHcIpe8Z>PSn`F83Hn{TJSu=#fC3!87J zzOear>I<81r@pZHcIpe8Z>PSn`F83Hn{TJSu=#fC3!87JzOear>I<81r@pZHcIpe8 zZ>PSn`F83Hn{TJSu=#fC3!87JzOear>I<81r@pZHcIpe8Z>PSn`F83Hn{TJSu=#fC z3!87JzOear>I<81r@pZHcIpe8Z>PSn`F83Hn{TK7|JGL$ykL~=sU!0c8572V}FppzhI#6zE^vvV{Yi2JNn}F9FVa;$bWy+ z$X3%H8;!vgJ#NiPo4Sw#GWG}g?}wT);FEuXGrs0haCG;mX&jKTKPbUfv-+KAcwmDE zD{Z|qRAmeYWb6;}_oGE6`Ia{I#-BqRCQeRq;ed?&LH_&rqAKb=wi@A?+kf>(2ZwM# z#{QrLdmBvqRMfsB9@C}U^{TTL9FVa;D8V~R-|teI(iU^`B2(J-Fy?@a{Xq$y@bz$g zq-iU>u9O%C)rmMDJU_DggZ%eVIu0;Mi2sS!ztR5sCsTt1%FJK=z1}4FOj2=K^oXx$ zHhDd5<>zC=A6#8k}1(| zf1lg8cb)yLwSVW2bFF1vr`77@?ONUUzF#d|FxufWPK!mSS2W*QvonbRDf2=0{p0#1 z-bAyvqjkyAniEvw2#_)#)WSagqttwix1xrNXaBK&zMB9k^Fb{<>}uabW4~@h(&Lw8 z7tTf#AZ0$tem?8ltO#5jgsz{@TrojAf&eM=K`k8Ay+g;Kh4=G9T2! z%O*yruYA1}?Wve>JLSSU0;J3b+4oaZx-@=Gl{Ydx6`W;b8%ThZ`Jfiwa;ekau$~K1 zYpujxc^#J$AZ0$Ng$FIJ&y^2xMMEzYzy2}Cn*b^ELH7M9$?HeqCbLlZ*m-YK2f7m= zWj@Hhe_YWQU;XaRC`#&lWqFVb0aE6J?ECSpbogErGZ9VxQ8g~kd?o=>=7Ud&lK7KuTY+?+3Rcw}(+c z5Uwx$@L*(oDg&hSl?-da-IZVyQ zm;qAyN``MwiMe%4y9+*ad;W`Ysr?urrLSZ-;6lQ4{roog@|}h-w$S2T8}cg@wLb_}q|emnR5S+OMHwu8+Fw4$5G zjPlJ23=qElf1e-Z{1%!0i_?bNl)CG6JOZl?=C;7CP-o06`wB?=nN2ZxSG-uVgrP$+c#CK4u`<_TSfbuFoSt zN?*xv$lK`pQ~4)Qf7y->u2u{IQu<1U&!Ub?bekG*im)oTv{Qu<1USB@QAnOMFay@>7RvebM#0aE%(hJ81j3*BtG z7Fj4<%Bxn4AV5lAvA)HoE&=@m(D@AY@7Km`AV5lA$?#J*y$LP~zNq@h(R;7L*AgJ5 zuVgs+w|9(IT%2Il)jST(Mxu;@l$j~$z!@y22AlKKuTZ9aF=a|C+pmw zf#M|oH-69dAV5lA$?y-|MH%X&oKVoIr?U!uT?vrVSM2{U_qz17?e<9PNzXg((K88< z(pT*J=T6z1sC~o=eTY#g-d5>EfRw&spI1$<+&0H<6q4WeD6{K1fdDCe#XgU(aIo|Iy4gpg7N`{xH8)5G@&C#};{gp=^ zbtgbdU&-*;M{e~KLTlx>Z})1xra_eeDSaiw`p3tmD<7_uo9#VbV3e#(fRw(H;eVIa zpFa`oD|Ub0E?-Y6yQmn4WnlT}8EvzHO`acwpI@wx?Q+lR8l>Sv>%N7riG08Sh5L&2 zL#rH&%2r0>yY|Y5-S=H$fWm#n`a_luomr%~7H{nn-{);mA_EleE7rHi7u0^AScI33 zoGU$48p!~K`-=776&MfJ&6|y!!hOa1(=n>~8fJ$F&fPw{Md!r~P`IyH zf3lvrU)?%0{HS$s`im~J7@%-pvHrsOpA0=rhT_57f*wB3w`YLDeZ~5VDrFT~ef!{s zY5Q6`-yOvOh5L&21H%-e7G?Lqr9(1}#=SOTfWm#n`qwuOt2VjO8ISIg;v8w&j{yqz z73<%b&_~18whiuPpgrxDkro3K?km=B@j|tCNhNj-HDd{}& z8vzRU75nis!oH}EwY!hj_b--2&-+M#!hOa1%eUuDyYsaK{S#8{<>2*#0EPRC^<9c@ zT{y6^0Ij*(<_#14kN}1IiuE^Tv_3s%$7S?(h5qiA{-p#c+*kkk_|5Z2%^Y|h4Q+5( z);+Y40EPRC_50t^wDMh(jx=Yz{d}YT3IPiD73p;@u$OqXi)9_)UCyv2~fDNSbtgasz(EF`=MJc8dApZ3L!w@zG8i=!M;9ezMjbY zcfT*a0#^~Ba9^>$nYP!o_nxk(A#kwCFMbL`X|QQcMVim<$S1Ss5BtRH%BwZ>1|@hDO!YUfA0Sp+EDSFE30{8nn6V~&=F zHmpbqbs|9FzGD5Fp{rZWmmw4}E;zs4NP7Yl?km&bk{;< zHmAY{>@_4n;l5)1oZrp+E{jn|nRD*k)b#I5fWm#n`p2gvhf1_(dD{ysm*`LQML32pqnu|n?FpSPp- zGQcK%#reUcz8KSa?LiA%9B)fZL);l)lfL5ov!jYH zj!-wjdiMr~*JU{|z$Sgg`C}~fE<3vQ#UXp_FFS0tV1P~fiu0$wX_yhYv*HWPnZjiu30` zD!Z#=RgZ=)=%O_abzy)_`ik>awr8yFwYCQ3YIQ!jvZ^%$Y|>YpzenC_O<2Zj)c2l3 zb5oDs1PDLxnnAyF{@@tbQ}wr=AnW4(rP`N26CnKj`uqGK=f}7I*!`*X9rPq5($IcD z6#+Jx4|4wF63;m!<`kpkuS?b6Csh()lldU$|13}bkZYjf>+j_W1lXjnIDdWZL;r&twxPF$ z8AFDh+ed)t&+C%PzrKG?2X!3cp1TPxFthwQ(rqUJHt8$wd zNq|lIiu33HoWK0{#x+Qqq}gM0Nf-e(=_}4JYWeb;#5w>ew0pMV>bYP7Y|>Yp|HLF* z7D;@Oq{?%%zHBuCHt8$QUpVpL9QR+I=y+1QK5fu40&LP(od5hxW|Y2<8=5Z9?V4QW zLx4^Ciu0@XwApssWez&V1gso%%!2@%^cCmNuOE`v&NZ zRH|}7R}^Ml;C1*MqSBYma%`n;kk1lXjnIDfW|rS1})v8Y>z&7&4| zav;DaeZ~1Be*TPp2`ik>)7uO#epxz&a793ct zGSY+qoAedu2N?C~w3~e%(AqITqhR1*0&LP(oPXZ;bWWQi8fcD}=d5na2M}PBzT*6p z13&$H?NCQ=rYes0v)3iSCVj>EAqx@)_{vq#>P-Rf%GPKTV3WS${F(k|RyGT7g*;7+ z%5M61C%`6s#rd&S4=2c%|B@ffZ!;(`QjGwc^cCk9j2-VW*!Yt?eW+|v>rW~K*rcyG zpZUYD8%xtHV^Gbp|t0ir*zSI||>IRDR||NGyKe}DDYm*BVR{}^PspTVD&_>R^O z`@jGxeI>zH-Y~6)%-N35TnIJyId_=>Qu<1Q^;%6nV57DQCuCo4TgD_ZKuTXpux@&C zXM1aRd}sT&JDI~H7$BvuBzTbY$%e7m8`pLDVv%m68UCBasHcdo4Ztc({uII&Sh zQI`Qy`bvV|7-^4tU7(2bz9k!f9nzfvHu?P(_x-cE)jzKnC;UW57o}GW8{UZlHu?P( z=f_z5w7rp0gH)BYk?Z(Y46w=XuQ)&9={_yR%WsfmOY6LuD}NCnrLQFTZbHP5&UKZj zWcx~kbrqipkkVHYtW_7GFP~R|)(*Ll)Un@N0;Ken1kZdo@^lBy5@c*KW$>|kPYICH zR}#F~dQV+~S^>&59h>33<{kl3`bvUd-LBMp@%l3Q_$qGW@>?YYNa-sH*0^@E!z$mRBLnkAtdS=9rRPn;$|^yd|+XM)xZsYpzfIrOXq({;?}_qo0;Kd6`}P0s zP$Z4@M>_h8Mw&T?5Fn+mBzW)q-bb0w-pFzFQUeq3)dWcCD+xa1vt8x0k2_kk?CZLY zH~a~Z(pM6knDeRBrJDwYq3E8S!cMyrAf>M)_*3X^TO>3kkVHY+W4_gmJYQ23vhJ>0DAf>M)xV+DhROLy%(RG_; z(?;$zB0x%CNwBi|wU0_M8mKz1Uv$i50|KP|r2E2fNJs)o8)XvCXb?n8i-zLMZN zm-<65x^zGTijlfXjt&7*`bvV2DLk$FzN0x>KmC~VCVwdbQu<1QNuKFGC9^N`g#B+T z%XX;|Af>O^kAJ6a{zd1f@=YsKbnrYC0)*GArhO&B|1PUPe z`Qq-kbH2Fy?VK;}emm!jyWh_F;_kO|zPS7CoG>`Qq-kbH2Fy?VK;}emm!jyWh_F;_kO|zPS7C zoG>`Qq-k z^L$bF+j+jI`|X@B?tVMxi@V>>`Qq-kbH2Fy?VK;}emm!jyWh_F;_kO|zPS7CoG=Zo8K=X`Pd?VK-ezn$~N?YDEjxczp{7q{Qe`QrB5IbYm< zJLikrZ|C`<_S-pM+ z=Zo8K=X`Pd?VK-ezn$~N?YDEjxczp{7q{Qe`QrB5IbYmdzky5!f3dHmtosy43IJ(WS_s=z2Nc9X0h1w z(}Z4*Y>Ui0H)kASfRy zEwR@4=bn#qTp1u`J}AT6Y#d4lwjYUiwP_hX*~f_iQs#r~`yoC*aVoc;0q!;aeA=a< zV;LZ2J}AQ#SsU-|sOpK80~dR`*~u6nWj-jw(;pAMVx!*`A5C=`9lFDi0aE6JGF12ifP{w&vMcEpCa2C5*Uu&$~MVq|66p`1h~UxOvI-sOu?( z&i3azF+lkG|9yUt`~G~l_WRzT`JYkGW%v5U+O=kYP4?S4zozhBXwK#;q`g<;<4Mg1 z0;J3bW%$j+yl}~cr^x2pEBlLWzYrj0J}AQ-_qmTx@vT7hS?vRVUwB7=l=+|xpHY72 z*!}7)bUycdOsj%t1W1_=vd@cMIaRX0HXqrK@Tm9jy-$FY`5^oLUw!iy&DoKSlxHv4 zf2tRI5C0D-^FbMoj1T)f;z1_*{<)&2if-{`&1v7UKWQEAFsOZ^*EgXDf2-YUTE#C zrsTUDO-b*Qn^2HSfRyYNaSB0pZk4X)7yNSP1H@ZeoJ#$FdzpooG+N!Ay`36L@$WZ%!R zw#QLb13%O}@a_tw2f+kLnGdpG|E(Lf4n=yPG4a=ezb#)$fRy>53@17Dx;pEo3sTEf z{C&WC2?0{(gY5IskCg9O22MxgM+I2DnYD-jDf2-YUTyyNyz_DgWOU-fiuYRc2#_)# zWM2Z<`PxWj@HhUv2xhnT728FTbfg+|PETApuh6gEHL7%h>JW zAT>13uuO8Hx(@+T=7TbvvSW((%nR+2>W5cO!s8bF;NPDc|E?^-`5?PLpY7aHH(u>6)(*U-7wFtO8z|gY zte-Y`tws8qTxUX1$|AYQZW|r;1 zD#!EmC#=520EPRC_1_&jQlmXB7^jz3R(Bh9m;nm+73+`loUCcT%NM6x$Jh*;x}5R%^%dffEyW`=_R=U9qP`IyHf8)HiZ@15Pz^abr`{t|qF+kzI`p@TICUd;< z(gN?)?4TI>&4mF9_Z91}Rd!PtcYFjk81FiJ`)x-CDBM@9e?{MZ&37dOyjDT==Y%3F z1}NNDtiQ$}U~Jpsp7_~`X;TyQWDHQauULP+d*H8qza)6{j!kpUCm1q7;l5&h(=5#$ zN7|_1y-TN-nLX;w0EPRC^%KI+cinkk5l@nob_ze-odF8>73=>DD*0Ny=qEa;)9ulR zoK6f-xUX1$*gUh!Nrp8@=R=$%Te~#_g#TZy@t?m*@b`-KeYT%Ds$lyXy)NthzQv;7 z1Ss5BtUo)|(+5p_f-Dxk^+W?}2~fDNSbtfc$zg7-%F)$^EQ?FkRRk#9SFHclc-x}t zvLbY$MZfG_YEKDJxUX2h+fubx)#f>9X`Jr3XqUSLDBM@9udyX=o!`L=D5+nEmaT(| z2~fDNSl{D%)J>;H>FA&)*)_l;j{t@HiuLDynA$7*b_z9w^-2~fDNSbxoq@2@VOk3>#$X6q@2 zCJ>--U$Oqi5ZN*%rHyFrl)GjxZ^ja!a9^?h$wYZYZQdG`x_#n<<%^>TP`IyHf4F7A zF0GGC(Lke$k>h8F5uk8iv3}p@*=L`RUyPa!ztB8m)j9$c?km>6G-2}FJGHLJSWod{ zL(c#L6z(h5H(a6Qo7*rQJ#Qa0z%9+20EPRC_0v=aCKPsdKnGRo2Gpy}CqUu8V*O0# z3+GC&k3+M&q1vw>W)YxpU$H*($}fMMd^GBx@!-^bO(y~r?km=xwfSAxqPP+0_L*(U zMaRYypm1OP=i_@B+*epI7%dJi9sJGOoB)OUiuE_Vu}vs&?TZ#BWIBwBmJy(EU$Oq( z)%{byF4ab_9n)){Z5c{{!hOa1%44?^`SX8}WoyF@S9cB|K;gb(eS=PJrd1{?=yU$! zw>@6;BtYT5V*NV{PVARtw?v25rGI>Wx;p_1_Z91h)F>3iAFP+3%*ni5WZQ)Rh5L&2 zhb~_KHPfs{uJJ1RQQez%1Ss5Btbbu=Lg0_~FXbH$UvfN{+nfM}`-=5j)jh^<-rkYB zc%5@?Kcp@TC}-b%mVF0W&40a~YPRz0UyQ=NCLb$n$NlI>$}b3&JjGvBh6UXXO3OZ z)}(2Eabtju`-pCw0H z;`s?(Q{0Fq18m$^JbzjABjcq3KT(oGr{6c)c42^x`-e5g`2hg69W${_?XcKdOCsjCzo>C`<1%0m9Ft7Jt4k zdH%Pgb*lNF%Fr*9%?%&T-wFe0v%JHts8)uT-rtqs}9FG8EW(NZl5i~ z>WZJ z{XeTV4M5#>Tbx$*9zlSO`-hdUHts8)fB4wMY)30i6rcXt z;#KuP0&Lt@Jm1)~eg7rZ>ZoF@e@pWST>@;}S3F-w!^5{SNd-0RF**0OnKl77?kk>e zAj#J z#)&yWr~?5u?kk?3Zr9iQ_xX47w$r*yZquz50XFU{p1-r>@+xE3CvrW_3)Z*JG$X*q zzrW)74?YdO6elZ`_e_cDS7!Mm3)r}?c>bSm`uD$u`$~dC98~%p&AovE6*Qu<1Q3%yr46r7olQ_p!+EQsF304aSX!M7qEnL(c>;~dpV zTYh^4FhELQNpNi<`n^1QEN&U;o-ggbfB{naN`lwjiLh1KVT$F`8oG9rOkseOzLH@1 zI*;hby9{usRqsM2t8omF(pM6kUGl2^#*aO5h~=j7LyF`KkkVHYT-EmQbF)+l-W+h~ zR;OQv43N@S6700qcYwCNGQQ@o;8~X1n*mb#ihW*5R%j%hsE9B7*MD#??ZyBb|Ne^i z{qyk1`zvD}*P-@7CmSxc?8E>Y|Ne^SZ)o}al6w0ZwEn5?rA;cW7$AK8VLr(7M}C{| z&M4wFa&@o0qOJ9d04aUNet#ARhxU5<1kEz{YCd57X9A@3l>{5B&kh^saR;Sd?$UGD z+A0F1^pyl(`Z{!X!L(xJvVGnAT>Ym6Na-sHc1|-1zgwS&EN?`w!u=}Z$uOxW4|IINTQx7Axf_tNwEtv#J=_~g6-$=tv3r`(H!Rq&t!@N%tAf>O^=k<=C z^XOF_gBIF!$n0K`On{WWlHizA%T~2B*oKZYgrD*yaRf-|EB5P?SQmCRGZcM!A+7!gH)l)jSS$LFGLrlc)L@2?&@u*+~00aE%(g0E|u z&-xkai&Q`Mds0^sM1YjOlHk0p5746gg~-;t=kz%P0tt}PSM2xCqA`~YXSkpdquTpw z@AV@_qjMy#l9Zy zJiK4W!4~QMI%b_Wn_-l%u_g(9OOuVl)jSSqQ3k3>`RoR%G0qU z&JMLBKuTXpaMbsS>utIkqa{)I(zhPAAV5lAN$~T<1-rLh8-z&9feG7}VgjV}l?1Qu zel{tLy?<5Q$)}mdM-u|1^pyllbSw__G15XkT;891>uyMZl)hr0cYF6C_~t`()GBaX z+d1%L9w+H-Wwgrs`t|z(Is{1RD+vzjX?%ObiIzxmILA!&LpK7X^pynf z@}Bs9$?kgj?SR;dL5W=mkkVJ|^Vgvx4-dNaNq&FJv+-L}+Y=zAuh_ru35EBHpFfia z6kWxzWF~fRw%x`TXGjejZ8jBKL5{?Q9XQ-?iZL zX{T1%KuTXp@dp2a+ND2IaG3S+N1f~|7$Bvuq`0%AS8br@Hhjs!&E0gyc?L-7D=B{b zwRw2<<(2r5-;v#llj0d5rLUxTU45S7lFJLRZT{KHX9FS_Af>OQSg+yS*fFoC;%NyI zGIM_*1Elno6!)pv_a-UM2HzXhaWA9m!2l_JCB@;Ze&91t5l)G??%J-;bOuQ2D=98r zSmwQM>0qp9qIqQVy73H<(pOU4*M4P5nN$}qZ!u?6M1L~|Na-sn&TX~gcCK}IY`08f z=s<;`43N@SQtY`&L*tTFCmhh+yRfZN9|lP2D=D5fcy6nlIj!(KuRf!;X7pfyl)jSU z2V*Zp>23Ul++3V%e;0RVfRw(HVyElv3smzzqwZ5x^IuPH!vHCLCB;XbvL5ulTZInI zRjxhc)7Xqa8l@!<2cPk57Qh{#eKFU#Q|DFIT zeI>>B$)1{9=Wd~nr$+tQ@#Gl+Qu<1YU$)+rJ*qSxy*oQA-roB@0aE%(iuLPHdp-}y zMu)al$s#)6CO}GGNwH~ZE2+kbOjPo|_~`s21q4XxD=7}R^QxcX+LNeY*vbv3E?y=; zN?%FwtNw*Yohp-2*85GRE4OQ_;SR6hzsZA(6o~sgM2g636RoPQrydayIaqR zyOD+J^q23qr4k^eucY|;%EUZ}9#JT@x@M5on|K1G^pzB^T5M;R^Cb-3X})f1MO`cb zQu<1YYg@&{UOX6tyjLbW2UtZDAf>OQ*j?SmKr3ejTC!hFmb5pV04aSX#a)6P3{k~? zD8$F>O-^Aj0aE%(ir2m>-19oa15J|ND>&q}k^m`vCB-)spB;Gk)de;DxMJ4ZcL@Pf z`igyhORu@p^6GRH_+XLOaj!)LNa-u~^~Xjn{9Bd-`ts;o!+PU+1W4&CDek9GoYDJ@ zE$W$Rd+0{&Oai3zl@u%dI6G|R7IPFn`M^50u1*9<=_~g2VP5#P8?6zVKh}J4aJ3x) zQu<1Y=k8D**WA_^g_?av54&0tAf>OQ_(<#7m7Tv2LMx=xKc{430;Ken6i+gr9Oqu5 zi+<)snr;|Ai~uQpCB7V>&}_M`b$Ljt7ql@!~T+kQHjr;ZkSdKwrn?@NG` zzLMg~6T!hdW0jFcurB$0V#dOe*Tnygnxgb-$hG zi@M*=^F`fn=lP=UxAS~a_uF~CsQc|aU)23}o-gWtJI@z&zn$lcy5G+8Mcr@b`J(Q( z^L$bF+j+jI`|Uhm)ctmzFY10f&lh#So#%_X-_G+z-EZglqVBizd{Ot?dA_Lo?L1%9 z{dS%&>V7-V7j?g#=Zm`E&htgxZ|C`eml<>b-$hG zi@M*=^F`fn=lP=UxAS~a_uF~CsQc|aU)23}o-gWtJI@z&zn$lcy5G+8Mcr@b`J(Q( z^L$bF+j+jI`|Uhm)ctmzPsyuh!hIzZx8MH1k1rFq-_G+z?YHxMQTy#YU(|j(&lk1d z&htg>xAS~a`|Uhm)P6h97q#Ec^F{5q^L$bJ?L1%9eml<>wcpP3MeVoqd{O)DJYUp) zJI@!j-_G+z?YHxMQTy#YU(|j(&lk1d&htg>xAS~a`|Uhm)P6h97q#Ec^F{5q^L$bJ z?L1%9eml<>wcpP3MeVoqd{O)DJYUp)JI@!j-_G+z?YHxMQTy#YU(|j(&lk1d&htg> zxAS~a`|Uhm)P6h97q#Ec^F{5q^L$bJ?L1%9eml<>wcpP3MeVoqd{O)DJYUp)JJ0`r z^%eWy%|0)S)AMO&_H5+cPBnESvVp?$LDrv|TVj5w@-99$y6fxyq3yDPW|pG25J)^N=_VS38IFkVi&j(rm{xY`}ibvPs=ToEW_DoM?fWq@Z)_2?BR-ohKgBKm1 zG3L2X6ay5V53>I9EBE(w7&;dROfEpi1?w1~@O+T<2gHV_IaIOl!}c!ofS;x>0~DSQ zvi|hndzbqpTj06U*vOSLT^OM7e311gMqKL4>=}W5G8fPDiE?Cs!t+7aH?iD!(X*ui zJ~~Tr$*+f23{ZGJ$oeW7`{lQf_QccY#aCZgA!C5T^Fh|nQS?4w9?ITD-v6?^^CLqB zC_Eoz{Z_Th+b{my5%2VwFf+NGE&~*v53>I3<&34n=@xj{nzu&^BH8;7e<(a3Wc^9! zKMim*|AG9T&Y0H6vl9b^|6lO@AnX12?-k3V+5@Av+f<{dlwqsR8MR`7c;bKm0<`&` z_tzi#`hCJh%@?Sod}-Xfwe@>PRPgcJmuz&!C=Yy<|v|3NmJCTY)oBe2=Ff*F~h3A8;->*_X zck(-1SmWoWPPuH9%iUp#-OY$^Vap9j}f5oe312T{%Stp#?1(HI;8)= z0`~oYfWq@Z*6$QJ&qT{31m%v}yiUU;mH>t4gRF1tG)tqsBoNK`py%T1yo~^b=YyF_!J^v?|hC_EozeLWpNHP83+(G9;ulk`=q2~c=G_@B?;wqnbfJG0Q4 zep-8y(Gmg_o)5DAJgcp}`wewO{bORnrSBFJpzwT<^-n+F*4)p`2H7o3Q=L;ahX94= zgRFlbW@-7xvQcQ*z`~wc4<{3#@O+Tf@jV@}Lcg8zzswn{q&j(q*X8n~~|CAocAvSJ{XU$*& z6rK;V{<~v?4ZfyzLEWF8IQPM+F98bA2U*|jQAx0lLOYbSXNLXpS}g(;o)5Bq+Wgf) z*Hsiy)RUd^?9=K5C_Eoz{grpmnmY9PD)*e&)-ubpBLND}2U$N@%L`kLc_*)3?G`g^ zo+1GX&j(q5PrQ20+I5w3m-48SKc@Z60?Jj7Syrx4)%e%zXH3G!#~D$ja-(jlVFRbv zWC7)WdgI@|aqz_*Hdk`xUejLhU2*tz7O?U2gY4(=_w~8kFvkNASDlp4d(-^go~=)^ RfWq^^zyIBT|KrC0`akdGMREWD literal 0 HcmV?d00001 diff --git a/tests/unit/model/test_pair/zbl_data.lmps b/tests/unit/model/test_pair/zbl_data.lmps new file mode 100644 index 00000000..6afa6e0f --- /dev/null +++ b/tests/unit/model/test_pair/zbl_data.lmps @@ -0,0 +1,46 @@ +units metal +atom_style atomic +atom_modify map yes +thermo 1 + +region 1 block 0 10 0 10 0 10 +boundary s s s +create_box 2 1 +create_atoms 1 single 0.0 0.0 0.0 +create_atoms 2 single 0.1 0.0 0.0 + +mass 1 1.0 +mass 2 1.0 + +group 2 type 2 + +neighbor 1.0 nsq # tiny box +neigh_modify delay 0 every 1 check no + +variable rmax string 8.0 +variable N string 50 + +pair_style zbl $(v_rmax) $(v_rmax) +print "r Zi Zj pe fxi fxj" file zbl.dat + +variable Zi index 1.0 6.0 7.0 8.0 29.0 79.0 +label Ziloop + + variable Zj index 1.0 6.0 7.0 8.0 29.0 79.0 + label Zjloop + pair_coeff 1 1 $(v_Zi) $(v_Zi) + pair_coeff 2 2 $(v_Zj) $(v_Zj) + + variable i loop $(v_N) + label rloop + set atom 2 x $(v_i * v_rmax / v_N) + run 0 + print "$(x[2]) $(v_Zi) $(v_Zj) $(pe) $(fx[1]) $(fx[2])" append zbl.dat + next i + jump SELF rloop + + next Zj + jump SELF Zjloop + +next Zi +jump SELF Ziloop \ No newline at end of file From e36afb598319ddcfb1a66ae9a7a6af101eab4a09 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:13:51 -0500 Subject: [PATCH 079/157] ensure config --- nequip/model/_build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 35faf536..8ddee2ac 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -11,11 +11,12 @@ instantiate, dtype_from_name, torch_default_dtype, + Config, ) def model_from_config( - config, + config: Config, initialize: bool = False, dataset: Optional[AtomicDataset] = None, deploy: bool = False, @@ -40,6 +41,8 @@ def model_from_config( Returns: The build model. """ + if isinstance(config, dict): + config = Config.from_dict(config) # Pre-process config type_mapper = None if dataset is not None: From e9c7a8aa84dd84bcf8f0c9481b50e6813709b65e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:14:01 -0500 Subject: [PATCH 080/157] less indexing --- nequip/nn/pair_potential.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index 8a640292..59fc18a2 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -185,9 +185,12 @@ def _zbl( d3: float = -0.94229 d4: float = -3.19980 # compute - edge_types = torch.index_select(atom_types, 0, edge_index.reshape(-1)).view(2, -1) - Zi = torch.index_select(Z, 0, edge_types[0]) - Zj = torch.index_select(Z, 0, edge_types[1]) + edge_types = torch.index_select(atom_types, 0, edge_index.reshape(-1)) + Z = torch.index_select(Z, 0, edge_types.view(-1)).view( + 2, -1 + ) # [center/neigh, n_edge] + Zi, Zj = Z[0], Z[1] + del edge_types, Z x = ((torch.pow(Zi, pzbl) + torch.pow(Zj, pzbl)) * r) / a0 psi = ( c1 * (d1 * x).exp() From 5a7f3282bae8d595fd1f120928a9ab062691d061 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:14:09 -0500 Subject: [PATCH 081/157] allow empty tensors past tests --- nequip/utils/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index a897d77a..7597f226 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -341,11 +341,11 @@ def pre_hook(mod: GraphModuleMixin, inp): if k not in inp: pass elif isinstance(inp[k], torch.Tensor) and isinstance(ir, o3.Irreps): - if inp[k].ndim == 1: + if inp[k].ndim == 1 and inp[k].numel() > 0: raise ValueError( f"Field {k} in input to module {mname} has only one dimension (assumed to be batch-like); it must have a second irreps dimension even if irreps.dim == 1 (i.e. a single per atom scalar must have shape [N_at, 1], not [N_at])" ) - elif inp[k].shape[-1] != ir.dim: + elif inp[k].shape[-1] != ir.dim and inp[k].numel() > 0: raise ValueError( f"Field {k} in input to module {mname} has last dimension {inp[k].shape[-1]} but its irreps {ir} indicate last dimension {ir.dim}" ) @@ -366,11 +366,11 @@ def post_hook(mod: GraphModuleMixin, _, out): if k not in out: pass elif isinstance(out[k], torch.Tensor) and isinstance(ir, o3.Irreps): - if out[k].ndim == 1: + if out[k].ndim == 1 and out[k].numel() > 0: raise ValueError( f"Field {k} in output from module {mname} has only one dimension (assumed to be batch-like); it must have a second irreps dimension even if irreps.dim == 1 (i.e. a single per atom scalar must have shape [N_at, 1], not [N_at])" ) - elif out[k].shape[-1] != ir.dim: + elif out[k].shape[-1] != ir.dim and out[k].numel() > 0: raise ValueError( f"Field {k} in output from {mname} has last dimension {out[k].shape[-1]} but its irreps {ir} indicate last dimension {ir.dim}" ) From 1c056cf7cfc7da88fcbf4ef97a5b5637618bff1a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:14:25 -0500 Subject: [PATCH 082/157] test force smoothness --- nequip/utils/unittests/model_tests.py | 44 +++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index a889701e..8fd47e4b 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -19,6 +19,7 @@ from nequip.data.transforms import TypeMapper from nequip.model import model_from_config from nequip.nn import GraphModuleMixin +from nequip.utils import Config from nequip.utils.test import assert_AtomicData_equivariant @@ -55,7 +56,9 @@ def make_model(config, device, initialize: bool = True, deploy: bool = False): "types_names": ["H", "C", "O"], } ) - model = model_from_config(config, initialize=initialize, deploy=deploy) + model = model_from_config( + Config.from_dict(config), initialize=initialize, deploy=deploy + ) model = model.to(device) return model @@ -248,8 +251,10 @@ def test_embedding_cutoff(self, model, config, device): edge_embed = instance(AtomicData.to_AtomicDataDict(data)) if AtomicDataDict.EDGE_FEATURES_KEY in edge_embed: key = AtomicDataDict.EDGE_FEATURES_KEY - else: + elif AtomicDataDict.EDGE_EMBEDDING_KEY in edge_embed: key = AtomicDataDict.EDGE_EMBEDDING_KEY + else: + pytest.skip() edge_embed = edge_embed[key] data.pos[2, 1] = r_max # put it past the cutoff edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[key] @@ -464,3 +469,38 @@ def test_partial_forces(self, config, atomic_batch, device, strict_locality): AtomicDataDict.BATCH_KEY ].view(1, -1) assert torch.equal(adjacency, torch.any(partial_forces != 0, dim=-1)) + + def test_force_smoothness(self, model, config, device): + instance, out_fields = model + if AtomicDataDict.FORCE_KEY not in out_fields: + pytest.skip() + # see test_embedding_cutoff + with torch.no_grad(): + all_params = list(instance.parameters()) + old_state = [p.detach().clone() for p in all_params] + for p in all_params: + p.uniform_(-2.0, 2.0) + config, out_fields = config + r_max = config["r_max"] + + # make a synthetic three atom example + data = AtomicData( + atom_types=np.random.choice([0, 1, 2], size=3), + pos=np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [r_max, 0.0, 0.0]]), + edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]), + ) + data = data.to(device) + out = instance(AtomicData.to_AtomicDataDict(data)) + forces = out[AtomicDataDict.FORCE_KEY] + assert ( + forces[:2].abs().sum() > 1e-4 + ) # some nonzero terms on the two connected atoms + assert torch.allclose( + forces[2], + torch.zeros(1, device=device, dtype=forces.dtype), + ) # the atom at the cutoff should be zero + + # restore previous model state + with torch.no_grad(): + for p, v in zip(all_params, old_state): + p.copy_(v) From 9a71e1526d2df6983cf98ab78ae41f69479af270 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:14:36 -0500 Subject: [PATCH 083/157] test ZBL thoroughly --- tests/unit/model/test_pair/test_zbl.py | 111 ++++++++++++++++--------- 1 file changed, 71 insertions(+), 40 deletions(-) diff --git a/tests/unit/model/test_pair/test_zbl.py b/tests/unit/model/test_pair/test_zbl.py index 320bbec3..20289ce3 100644 --- a/tests/unit/model/test_pair/test_zbl.py +++ b/tests/unit/model/test_pair/test_zbl.py @@ -10,55 +10,86 @@ import torch from nequip.data.transforms import TypeMapper +from nequip.data import AtomicDataDict from nequip.model import model_from_config from nequip.ase import NequIPCalculator from nequip.nn import GraphModel from nequip.utils import Config +from nequip.utils.unittests.model_tests import BaseEnergyModelTests -@pytest.mark.parametrize("do_scale", [False, True]) -def test_zbl(do_scale: bool): - """Confirm our ZBL implementation matches LAMMPS.""" - if torch.get_default_dtype() != torch.float64: - pytest.skip() - chemical_symbols_to_type = {"H": 0, "O": 1, "C": 2, "N": 3, "Cu": 4, "Au": 5} - r_max: float = 8.0 # see zbl_data.lmps - ZBL_model: GraphModel = model_from_config( - config=Config.from_dict( +class TestNequIPModel(BaseEnergyModelTests): + @pytest.fixture + def strict_locality(self): + return True + + @pytest.fixture( + params=[False, True], + scope="class", + ) + def config(self, request): + do_scale = request.param + config = { + "model_builders": [ + "PairPotential", + "ForceOutput", + "RescaleEnergyEtc", + ], + "global_rescale_scale": 3.7777 if do_scale else None, + "pair_style": "ZBL", + "units": "metal", + "r_max": 5.0, + "chemical_symbol_to_type": {"H": 0, "C": 1, "O": 2}, + } + return config, [ + AtomicDataDict.TOTAL_ENERGY_KEY, + AtomicDataDict.FORCE_KEY, + AtomicDataDict.PER_ATOM_ENERGY_KEY, + ] + + def test_lammps_repro(self, config): + if torch.get_default_dtype() != torch.float64: + pytest.skip() + config, _ = config + config = config.copy() + r_max: float = 8.0 # see zbl_data.lmps + config.update( { "model_dtype": "float64", - "model_builders": [ - "PairPotential", - "StressForceOutput", - "RescaleEnergyEtc", - ], - "global_rescale_scale": 3.7777 if do_scale else None, - "pair_style": "ZBL", - "units": "metal", - "num_types": len(chemical_symbols_to_type), - "chemical_symbol_to_type": chemical_symbols_to_type, "r_max": r_max + 1, # To make cutoff envelope irrelevant "PolynomialCutoff_p": 80, # almost a step function } ) - ) - tm = TypeMapper(chemical_symbol_to_type=chemical_symbols_to_type) - # make test system of two atoms: - atoms = ase.Atoms(positions=np.zeros((2, 3)), symbols=["H", "H"]) - atoms.calc = NequIPCalculator(ZBL_model, r_max=r_max, device="cpu", transform=tm) - # == load precomputed reference data == - # To regenerate this data, run - # $ lmp -in zbl_data.lmps - # $ python -c "import numpy as np; d = np.loadtxt('zbl.dat', skiprows=1); np.save('zbl.npy', d)" - refdata = np.load(Path(__file__).parent / "zbl.npy") - for (r, Zi, Zj, pe, fxi, fxj) in refdata: - if r >= r_max: - continue - atoms.positions[1, 0] = r - atoms.set_atomic_numbers([int(Zi), int(Zj)]) - # ZBL blows up for atoms being close, so the numerics differ to ours - # 1e-5 == 0.01 meV / Å - assert np.allclose(atoms.get_forces()[0, 0], fxi, atol=1e-5) - assert np.allclose(atoms.get_forces()[1, 0], fxj, atol=1e-5) - # 1e-4 == 0.1 meV system, 0.05 meV / atom - assert np.allclose(atoms.get_potential_energy(), pe, atol=1e-4) + config["chemical_symbol_to_type"] = { + "H": 0, + "O": 1, + "C": 2, + "N": 3, + "Cu": 4, + "Au": 5, + } + tm = TypeMapper(chemical_symbol_to_type=config["chemical_symbol_to_type"]) + config["num_types"] = tm.num_types + ZBL_model = model_from_config(Config.from_dict(config), initialize=True) + ZBL_model.eval() + # make test system of two atoms: + atoms = ase.Atoms(positions=np.zeros((2, 3)), symbols=["H", "H"]) + atoms.calc = NequIPCalculator( + ZBL_model, r_max=r_max, device="cpu", transform=tm + ) + # == load precomputed reference data == + # To regenerate this data, run + # $ lmp -in zbl_data.lmps + # $ python -c "import numpy as np; d = np.loadtxt('zbl.dat', skiprows=1); np.save('zbl.npy', d)" + refdata = np.load(Path(__file__).parent / "zbl.npy") + for (r, Zi, Zj, pe, fxi, fxj) in refdata: + if r >= r_max: + continue + atoms.positions[1, 0] = r + atoms.set_atomic_numbers([int(Zi), int(Zj)]) + # ZBL blows up for atoms being close, so the numerics differ to ours + # 1e-5 == 0.01 meV / Å + assert np.allclose(atoms.get_forces()[0, 0], fxi, atol=1e-5) + assert np.allclose(atoms.get_forces()[1, 0], fxj, atol=1e-5) + # 1e-4 == 0.1 meV system, 0.05 meV / atom + assert np.allclose(atoms.get_potential_energy(), pe, atol=1e-4) From a232c88ccbca6b11a8e8d04d59f2af3f1b8985b5 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:18:34 -0500 Subject: [PATCH 084/157] Test with pair potential --- tests/unit/model/test_nequip_model.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py index 86de6ee8..ee4d9ab5 100644 --- a/tests/unit/model/test_nequip_model.py +++ b/tests/unit/model/test_nequip_model.py @@ -10,7 +10,10 @@ COMMON_CONFIG = { "avg_num_neighbors": None, "num_types": 3, - "types_names": ["H", "C", "O"], + "chemical_symbol_to_type": {"H": 0, "C": 1, "O": 2}, + # Just in case for when that builder exists: + "pair_style": "ZBL", + "units": "metal", } r_max = 3 minimal_config1 = dict( @@ -78,15 +81,26 @@ def base_config(self, request): AtomicDataDict.FORCE_KEY, ], ), + # # Save some time in the tests + # ( + # ["EnergyModel"], + # [ + # AtomicDataDict.TOTAL_ENERGY_KEY, + # AtomicDataDict.PER_ATOM_ENERGY_KEY, + # ], + # ), ( - ["EnergyModel"], + ["EnergyModel", "StressForceOutput"], [ AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.PER_ATOM_ENERGY_KEY, + AtomicDataDict.FORCE_KEY, + AtomicDataDict.STRESS_KEY, + AtomicDataDict.VIRIAL_KEY, ], ), ( - ["EnergyModel", "StressForceOutput"], + ["EnergyModel", "PairPotentialTerm", "StressForceOutput"], [ AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.PER_ATOM_ENERGY_KEY, From 73e414268c442ddf1f6ed801dd7691e92c8eca12 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 13 Feb 2023 14:49:32 -0500 Subject: [PATCH 085/157] GPU OOM offloading mode (#300) --- CHANGELOG.md | 1 + nequip/scripts/train.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 127b845a..27bbc9fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Most recent change on the bottom. - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary` - `nequip-benchmark --pdb` for debugging model (builder) errors - More information in `nequip-deploy info` +- GPU OOM offloading mode ### Changed - Minimum e3nn is now 0.4.4 diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 1795813b..724d974c 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -42,6 +42,7 @@ model_debug_mode=False, equivariance_test=False, grad_anomaly_mode=False, + gpu_oom_offload=False, append=False, _jit_bailout_depth=2, # avoid 20 iters of pain, see https://github.com/pytorch/pytorch/issues/52286 # Quote from eelison in PyTorch slack: @@ -77,7 +78,22 @@ def main(args=None, running_as_script: bool = True): # Train trainer.save() - trainer.train() + if config.get("gpu_oom_offload", False): + if not torch.cuda.is_available(): + raise RuntimeError( + "CUDA is not available; --gpu-oom-offload doesn't make sense." + ) + warnings.warn( + "! GPU OOM Offloading is ON:\n" + "This is meant for training models that would be impossible otherwise due to OOM.\n" + "Note that this comes at a speed cost and SHOULD NOT be used if your training fits in GPU memory without it.\n" + "Please also consider whether a smaller model is a more appropriate solution!\n" + "Also, a warning from PyTorch: 'If you overuse pinned memory, it can cause serious problems when running low on RAM!'" + ) + with torch.autograd.graph.save_on_cpu(pin_memory=True): + trainer.train() + else: + trainer.train() return @@ -106,6 +122,11 @@ def parse_command_line(args=None): help="enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production training!", action="store_true", ) + parser.add_argument( + "--gpu-oom-offload", + help="Use `torch.autograd.graph.save_on_cpu` to offload intermediate tensors to CPU (host) memory in order to train models that would be impossible otherwise due to OOM. Note that this comes as at a speed cost and SHOULD NOT be used if your training fits in GPU memory without it. Please also consider whether a smaller model is a more appropriate solution.", + action="store_true", + ) parser.add_argument( "--log", help="log file to store all the screen logging", @@ -115,7 +136,12 @@ def parse_command_line(args=None): args = parser.parse_args(args=args) config = Config.from_file(args.config, defaults=default_config) - for flag in ("model_debug_mode", "equivariance_test", "grad_anomaly_mode"): + for flag in ( + "model_debug_mode", + "equivariance_test", + "grad_anomaly_mode", + "gpu_oom_offload", + ): config[flag] = getattr(args, flag) or config[flag] return config From f5a19f4e923abfe2f52093e4d0e6fe4688669558 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 13 Feb 2023 18:17:29 -0500 Subject: [PATCH 086/157] lint --- tests/unit/model/test_pair/test_zbl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/model/test_pair/test_zbl.py b/tests/unit/model/test_pair/test_zbl.py index 20289ce3..b862b624 100644 --- a/tests/unit/model/test_pair/test_zbl.py +++ b/tests/unit/model/test_pair/test_zbl.py @@ -13,7 +13,6 @@ from nequip.data import AtomicDataDict from nequip.model import model_from_config from nequip.ase import NequIPCalculator -from nequip.nn import GraphModel from nequip.utils import Config from nequip.utils.unittests.model_tests import BaseEnergyModelTests From ead9c5def3140f5ae5e8fededddc5ba0f1485558 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:57:09 -0500 Subject: [PATCH 087/157] --output-fields-from-original-dataset --- CHANGELOG.md | 1 + nequip/data/AtomicData.py | 12 +++++++++++ nequip/data/__init__.py | 2 ++ nequip/scripts/evaluate.py | 44 +++++++++++++++++++++++++++++++------- 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f634fc9..dfd53b61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Most recent change on the bottom. - `AtomicInMemoryDataset.rdf()` - `type_to_chemical_symbol` - Pair potential terms +- `nequip-evaluate --output-fields-from-original-dataset` ### Changed - Always require explicit `seed` diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 4b4bb50c..2fc35042 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -79,6 +79,7 @@ def register_fields( node_fields: set = set(node_fields) edge_fields: set = set(edge_fields) graph_fields: set = set(graph_fields) + long_fields: set = set(long_fields) allfields = node_fields.union(edge_fields, graph_fields) assert len(allfields) == len(node_fields) + len(edge_fields) + len(graph_fields) _NODE_FIELDS.update(node_fields) @@ -110,6 +111,17 @@ def deregister_fields(*fields: Sequence[str]) -> None: _GRAPH_FIELDS.discard(f) +def _register_field_prefix(prefix: str) -> None: + """Re-register all registered fields as the same type, but with `prefix` added on.""" + assert prefix.endswith("_") + register_fields( + node_fields=[prefix + e for e in _NODE_FIELDS], + edge_fields=[prefix + e for e in _EDGE_FIELDS], + graph_fields=[prefix + e for e in _GRAPH_FIELDS], + long_fields=[prefix + e for e in _LONG_FIELDS], + ) + + def _process_dict(kwargs, ignore_fields=[]): """Convert a dict of data into correct dtypes/shapes according to key""" # Deal with _some_ dtype issues diff --git a/nequip/data/__init__.py b/nequip/data/__init__.py index 377f84d7..21cfd3de 100644 --- a/nequip/data/__init__.py +++ b/nequip/data/__init__.py @@ -3,6 +3,7 @@ PBC, register_fields, deregister_fields, + _register_field_prefix, _NODE_FIELDS, _EDGE_FIELDS, _GRAPH_FIELDS, @@ -18,6 +19,7 @@ PBC, register_fields, deregister_fields, + _register_field_prefix, AtomicDataset, AtomicInMemoryDataset, NpzDataset, diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 8837f4b0..092fa432 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -11,7 +11,13 @@ import torch -from nequip.data import AtomicData, Collater, dataset_from_config, register_fields +from nequip.data import ( + AtomicData, + Collater, + dataset_from_config, + register_fields, + _register_field_prefix, +) from nequip.scripts.deploy import load_deployed_model, R_MAX_KEY, TYPE_NAMES_KEY from nequip.scripts._logger import set_up_script_logger from nequip.scripts.train import default_config, check_code_version @@ -19,8 +25,8 @@ from nequip.train import Trainer, Loss, Metrics from nequip.utils import load_file, instantiate, Config - -ORIGINAL_DATASET_INDEX_KEY: str = "original_dataset_index" +ORIGINAL_DATASET_PREFIX: str = "original_dataset_" +ORIGINAL_DATASET_INDEX_KEY: str = ORIGINAL_DATASET_PREFIX + "index" register_fields(graph_fields=[ORIGINAL_DATASET_INDEX_KEY]) @@ -154,6 +160,12 @@ def main(args=None, running_as_script: bool = True): type=str, default="", ) + parser.add_argument( + "--output-fields-from-original-dataset", + help="Extra fields from the ORIGINAL REFERENCE DATASET (names comma separated with no spaces) to write to the `--output` with the added prefix `original_dataset_*`", + type=str, + default="", + ) parser.add_argument( "--log", help="log file to store all the metrics and screen logging.debug", @@ -206,9 +218,20 @@ def main(args=None, running_as_script: bool = True): if args.output is not None: if args.output.suffix != ".xyz": raise ValueError("Only .xyz format for `--output` is supported.") - args.output_fields = [e for e in args.output_fields.split(",") if e != ""] + [ - ORIGINAL_DATASET_INDEX_KEY + args.output_fields_from_original_dataset = [ + e for e in args.output_fields_from_original_dataset.split(",") if e != "" ] + args.output_fields = [e for e in args.output_fields.split(",") if e != ""] + ase_all_fields = ( + args.output_fields + + [ + ORIGINAL_DATASET_PREFIX + e + for e in args.output_fields_from_original_dataset + ] + + [ORIGINAL_DATASET_INDEX_KEY] + ) + if len(args.output_fields_from_original_dataset) > 0: + _register_field_prefix(ORIGINAL_DATASET_PREFIX) output_type = "xyz" else: assert args.output_fields == "" @@ -385,22 +408,27 @@ def main(args=None, running_as_script: bool = True): with torch.no_grad(): # Write output if output_type == "xyz": + output_out = out.copy() # add test frame to the output: - out[ORIGINAL_DATASET_INDEX_KEY] = torch.LongTensor( + output_out[ORIGINAL_DATASET_INDEX_KEY] = torch.LongTensor( this_batch_test_indexes ) + for field in args.output_fields_from_original_dataset: + # batch is from the original dataset + output_out[ORIGINAL_DATASET_PREFIX + field] = batch[field] # append to the file ase.io.write( output, - AtomicData.from_AtomicDataDict(out) + AtomicData.from_AtomicDataDict(output_out) .to(device="cpu") .to_ase( type_mapper=dataset.type_mapper, - extra_fields=args.output_fields, + extra_fields=ase_all_fields, ), format="extxyz", append=True, ) + del output_out # Accumulate metrics if do_metrics: From cb3a34795cdff5fddfe9a8737ff00e3477a4658b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:57:25 -0500 Subject: [PATCH 088/157] Add parity plot example script --- examples/parity_plot.py | 60 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 examples/parity_plot.py diff --git a/examples/parity_plot.py b/examples/parity_plot.py new file mode 100644 index 00000000..3e825c8a --- /dev/null +++ b/examples/parity_plot.py @@ -0,0 +1,60 @@ +"""Example script to make a parity plot from the results of `nequip-evaluate`. + +Thanks to Hongyu Yu for useful input: https://github.com/mir-group/nequip/discussions/223#discussioncomment-4923323 +""" + +import argparse +import numpy as np + +import matplotlib.pyplot as plt + +import ase.io + +# Parse arguments: +parser = argparse.ArgumentParser( + description="Make a parity plot from the results of `nequip-evaluate`." +) +parser.add_argument( + "xyzoutput", + help=".xyz file from running something like `nequip-evaluate ... --output out.xyz --output-fields-from-original-dataset total_energy,forces", +) +parser.add_argument("--output", help="File to write plot to", default=None) +args = parser.parse_args() + +forces = [] +true_forces = [] +energies = [] +true_energies = [] +for frame in ase.io.iread(args.xyzoutput): + forces.append(frame.get_forces().flatten()) + true_forces.append(frame.arrays["original_dataset_forces"].flatten()) + energies.append(frame.get_potential_energy()) + true_energies.append(frame.info["original_dataset_total_energy"]) +forces = np.concatenate(forces, axis=0) +true_forces = np.concatenate(true_forces, axis=0) +energies = np.asarray(energies) +true_energies = np.asarray(true_energies) + +fig, axs = plt.subplots(ncols=2, figsize=(8, 4)) + +ax = axs[0] +ax.set_xlabel("True force component") +ax.set_ylabel("Model force component") +ax.plot([0, 1], [0, 1], transform=ax.transAxes, linestyle="--", color="gray") +ax.scatter(true_forces, forces) +ax.set_aspect("equal") + +ax = axs[1] +ax.set_xlabel("True energy") +ax.set_ylabel("Model energy") +ax.plot([0, 1], [0, 1], transform=ax.transAxes, linestyle="--", color="gray") +ax.scatter(true_energies, energies) +ax.set_aspect("equal") + +plt.suptitle("Parity Plots") + +plt.tight_layout() +if args.output is None: + plt.show() +else: + plt.savefig(args.output) From 90d7c0c04031bc46c147edc50fda2ac112b688a6 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 14 Feb 2023 22:56:03 -0500 Subject: [PATCH 089/157] Warn/error on unused keys (#301) --- CHANGELOG.md | 7 +++---- nequip/model/_build.py | 5 +++++ nequip/scripts/deploy.py | 2 +- nequip/scripts/train.py | 34 ++++++++++++++++++--------------- nequip/train/trainer.py | 3 +++ nequip/utils/_global_options.py | 3 ++- nequip/utils/auto_init.py | 8 ++++++-- nequip/utils/config.py | 31 +++++++++++++++++++++++++++--- nequip/utils/wandb.py | 11 ++++++++--- tests/integration/conftest.py | 7 ++++--- tests/integration/test_train.py | 3 ++- tests/unit/utils/test_config.py | 12 ++++++------ 12 files changed, 87 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfd53b61..6f70714a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,15 @@ Most recent change on the bottom. ## Unreleased - 0.6.0 ### Added +- add Tensorboard as logger option - [Breaking] Refactor overall model logic into `GraphModel` top-level module - [Breaking] Added `model_dtype` - `BATCH_PTR_KEY` in `AtomicDataDict` -- `AtomicInMemoryDataset.rdf()` +- `AtomicInMemoryDataset.rdf()` and `examples/rdf.py` - `type_to_chemical_symbol` - Pair potential terms - `nequip-evaluate --output-fields-from-original-dataset` +- Error (or warn) on unused options in YAML that likely indicate typos ### Changed - Always require explicit `seed` @@ -31,9 +33,6 @@ Most recent change on the bottom. ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) -### Added -- add Tensorboard as logger option - ## [0.5.6] - 2022-12-19 ### Added - sklearn dependency removed diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 8ddee2ac..372ea90b 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -13,6 +13,7 @@ torch_default_dtype, Config, ) +from nequip.utils.config import _GLOBAL_ALL_ASKED_FOR_KEYS def model_from_config( @@ -65,6 +66,10 @@ def model_from_config( config["num_types"] = type_mapper.num_types config["type_names"] = type_mapper.type_names config["type_to_chemical_symbol"] = type_mapper.type_to_chemical_symbol + # We added them, so they are by definition valid: + _GLOBAL_ALL_ASKED_FOR_KEYS.update( + {"num_types", "type_names", "type_to_chemical_symbol"} + ) default_dtype = torch.get_default_dtype() model_dtype: torch.dtype = dtype_from_name(config.get("model_dtype", default_dtype)) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 95f217e3..b99a92b6 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -252,7 +252,7 @@ def main(args=None): metadata[TF32_KEY] = str(int(config["allow_tf32"])) metadata[DEFAULT_DTYPE_KEY] = dtype_to_name(config["default_dtype"]) metadata[MODEL_DTYPE_KEY] = dtype_to_name(config["model_dtype"]) - metadata[CONFIG_KEY] = yaml.dump(dict(config)) + metadata[CONFIG_KEY] = yaml.dump(Config.as_dict(config)) metadata = {k: v.encode("ascii") for k, v in metadata.items()} torch.jit.save(model, args.out_file, _extra_files=metadata) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 724d974c..0e104eba 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -16,6 +16,7 @@ from nequip.utils import Config from nequip.data import dataset_from_config from nequip.utils import load_file +from nequip.utils.config import _GLOBAL_ALL_ASKED_FOR_KEYS from nequip.utils.test import assert_AtomicData_equivariant from nequip.utils.versions import check_code_version from nequip.utils._global_options import _set_global_options @@ -44,6 +45,7 @@ grad_anomaly_mode=False, gpu_oom_offload=False, append=False, + warn_unused=False, _jit_bailout_depth=2, # avoid 20 iters of pain, see https://github.com/pytorch/pytorch/issues/52286 # Quote from eelison in PyTorch slack: # https://pytorch.slack.com/archives/CDZD1FANA/p1644259272007529?thread_ts=1644064449.039479&cid=CDZD1FANA @@ -55,6 +57,8 @@ # even if the number of atoms is fixed: _jit_fusion_strategy=[("DYNAMIC", 3)], ) +# All default_config keys are valid / requested +_GLOBAL_ALL_ASKED_FOR_KEYS.update(default_config.keys()) def main(args=None, running_as_script: bool = True): @@ -133,6 +137,11 @@ def parse_command_line(args=None): type=Path, default=None, ) + parser.add_argument( + "--warn-unused", + help="Warn instead of error when the config contains unused keys", + action="store_true", + ) args = parser.parse_args(args=args) config = Config.from_file(args.config, defaults=default_config) @@ -140,6 +149,7 @@ def parse_command_line(args=None): "model_debug_mode", "equivariance_test", "grad_anomaly_mode", + "warn_unused", "gpu_oom_offload", ): config[flag] = getattr(args, flag) or config[flag] @@ -172,7 +182,7 @@ def fresh_start(config): else: from nequip.train.trainer import Trainer - trainer = Trainer(model=None, **dict(config)) + trainer = Trainer(model=None, **Config.as_dict(config)) # what is this # to update wandb data? @@ -199,9 +209,6 @@ def fresh_start(config): ) logging.info("Successfully built the network...") - # by doing this here we check also any keys custom builders may have added - _check_old_keys(config) - # Equivar test if config.equivariance_test > 0: n_train: int = len(trainer.dataset_train) @@ -229,6 +236,14 @@ def fresh_start(config): # Store any updated config information in the trainer trainer.update_kwargs(config) + unused = config._unused_keys() + if len(unused) > 0: + message = f"The following keys in the config file were not used, did you make a typo?: {', '.join(unused)}. (If this sounds wrong, please file an issue: the detection of unused keys is in beta. You can turn this error into a warning with `--warn-unused`.)" + if config.warn_unused: + warnings.warn(message) + else: + raise KeyError(message) + return trainer @@ -296,16 +311,5 @@ def restart(config): return trainer -def _check_old_keys(config) -> None: - """check ``config`` for old/depricated keys and emit corresponding errors/warnings""" - # compile_model - k = "compile_model" - if k in config: - if config[k]: - raise ValueError("the `compile_model` option has been removed") - else: - warnings.warn("the `compile_model` option has been removed") - - if __name__ == "__main__": main(running_as_script=True) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 1cb6a1ab..8211cfee 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -48,6 +48,7 @@ ) from nequip.utils.versions import check_code_version from nequip.model import model_from_config +from nequip.utils.config import _GLOBAL_ALL_ASKED_FOR_KEYS from .loss import Loss, LossStat from .metrics import Metrics @@ -279,6 +280,8 @@ def __init__( for key in self.init_keys: setattr(self, key, locals()[key]) _local_kwargs[key] = locals()[key] + # all init_keys of the Trainer are valid config keys + _GLOBAL_ALL_ASKED_FOR_KEYS.add(key) self.ema = None diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index bba409dd..2567564c 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -9,6 +9,7 @@ from .misc import dtype_from_name from .auto_init import instantiate from .test import set_irreps_debug +from .config import Config # for multiprocessing, we need to keep track of our latest global options so @@ -35,7 +36,7 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: """ # update these options into the latest global config. global _latest_global_config - _latest_global_config.update(dict(config)) + _latest_global_config.update(Config.as_dict(config)) # Set TF32 support # See https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices if torch.cuda.is_available() and "allow_tf32" in config: diff --git a/nequip/utils/auto_init.py b/nequip/utils/auto_init.py index 8a9a9917..b7f8230f 100644 --- a/nequip/utils/auto_init.py +++ b/nequip/utils/auto_init.py @@ -2,7 +2,7 @@ import inspect import logging -from .config import Config +from .config import Config, _GLOBAL_ALL_ASKED_FOR_KEYS def instantiate_from_cls_name( @@ -140,7 +140,7 @@ def instantiate( if k not in key_mapping["optional"] } - final_optional_args = dict(config) + final_optional_args = Config.as_dict(config) # for nested argument, it is possible that the positional args contain unnecesary keys if len(parent_builders) > 0: @@ -221,6 +221,10 @@ def instantiate( for t in key_mapping: for k, v in key_mapping[t].items(): string = f" {t:>10s}_args : {k:>50s}" + # key mapping tells us how values got from the + # users config (v) to the object being built (k) + # thus v is by definition a valid key + _GLOBAL_ALL_ASKED_FOR_KEYS.add(v) if k != v: string += f" <- {v:>50s}" logging.debug(string) diff --git a/nequip/utils/config.py b/nequip/utils/config.py index d13e0546..99bddeb6 100644 --- a/nequip/utils/config.py +++ b/nequip/utils/config.py @@ -34,6 +34,8 @@ If a parameter is updated, the updated value will be formatted back to the same type. """ +from typing import Set, Dict, Any, List + import inspect from copy import deepcopy @@ -42,7 +44,12 @@ from nequip.utils.savenload import save_file, load_file +_GLOBAL_ALL_ASKED_FOR_KEYS: Set[str] = set() + + class Config(object): + _items: Dict[str, Any] + def __init__( self, config: Optional[dict] = None, @@ -76,10 +83,20 @@ def keys(self): def _as_dict(self): return self._items - def as_dict(self): - return dict(self) + @staticmethod + def as_dict(obj): + # don't use `dict(self)`, since that + # calls __getitem__ + if isinstance(obj, dict): + return obj.copy() + elif isinstance(obj, Config): + return obj._items.copy() + else: + raise TypeError def __getitem__(self, key): + # any requested key is a valid key + _GLOBAL_ALL_ASKED_FOR_KEYS.add(key) return self._items[key] def get_type(self, key): @@ -115,7 +132,6 @@ def allow_list(self): return self._allow_list def __setitem__(self, key, val): - # typehint if key.endswith("_type") and key.startswith("_"): @@ -157,6 +173,7 @@ def __contains__(self, key): return key in self._items def pop(self, *args): + _GLOBAL_ALL_ASKED_FOR_KEYS.add(args[0]) return self._items.pop(*args) def update_w_prefix( @@ -227,6 +244,7 @@ def update(self, dictionary: dict, allow_val_change=None): return set(keys) - set([None]) def get(self, *args): + _GLOBAL_ALL_ASKED_FOR_KEYS.add(args[0]) return self._items.get(*args) def persist(self): @@ -338,3 +356,10 @@ def from_function(function, remove_kwargs=False): return Config(config=default_params, allow_list=param_keys) load = from_file + + def _get_nomark(self, key: str) -> Any: + return self._items.get(key) + + def _unused_keys(self) -> List[str]: + unused = [k for k in self.keys() if k not in _GLOBAL_ALL_ASKED_FOR_KEYS] + return unused diff --git a/nequip/utils/wandb.py b/nequip/utils/wandb.py index 310b0f31..7f0d5e10 100644 --- a/nequip/utils/wandb.py +++ b/nequip/utils/wandb.py @@ -1,11 +1,14 @@ -import wandb import logging import secrets + +from nequip.utils import Config + +import wandb from wandb.util import json_friendly_val def init_n_update(config): - conf_dict = dict(config) + conf_dict = Config.as_dict(config) # wandb mangles keys (in terms of type) as well, but we can't easily correct that because there are many ambiguous edge cases. (E.g. string "-1" vs int -1 as keys, are they different config keys?) if any(not isinstance(k, str) for k in conf_dict.keys()): raise TypeError( @@ -30,7 +33,9 @@ def init_n_update(config): skip = False if k in config.keys(): # double check the one sanitized by wandb - v_old = json_friendly_val(config[k]) + # because we're preprocessing the config and looping over + # _every_ key, don't mark accessed keys as valid => _get_nomark + v_old = json_friendly_val(config._get_nomark(k)) if repr(v_new) == repr(v_old): skip = True if skip: diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b98ee2bc..ceb840b7 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -15,9 +15,9 @@ def _check_and_print(retcode): __tracebackhide__ = True if retcode.returncode: - if len(retcode.stdout) > 0: + if retcode.stdout is not None and len(retcode.stdout) > 0: print(retcode.stdout.decode("ascii")) - if len(retcode.stderr) > 0: + if retcode.stderr is not None and len(retcode.stderr) > 0: print(retcode.stderr.decode("ascii"), file=sys.stderr) retcode.check_returncode() @@ -136,7 +136,8 @@ def _training_session(conffile, model_dtype, builder, BENCHMARK_ROOT): ) retcode = subprocess.run( - ["nequip-train", "conf.yaml"], + # we use --warn-unused because we are using configs with many unused keys for testing + ["nequip-train", "conf.yaml", "--warn-unused"], cwd=tmpdir, env=env, stdout=subprocess.PIPE, diff --git a/tests/integration/test_train.py b/tests/integration/test_train.py index 83abacae..b9935b3c 100644 --- a/tests/integration/test_train.py +++ b/tests/integration/test_train.py @@ -161,7 +161,8 @@ def test_requeue(nequip_dataset, BENCHMARK_ROOT, conffile): ) retcode = subprocess.run( - ["nequip-train", "conf.yaml"], + # Supress the warning cause we use general config for all the fake models + ["nequip-train", "conf.yaml", "--warn-unused"], cwd=tmpdir, env=env, stdout=subprocess.PIPE, diff --git a/tests/unit/utils/test_config.py b/tests/unit/utils/test_config.py index 0cd3151e..35ae7b68 100644 --- a/tests/unit/utils/test_config.py +++ b/tests/unit/utils/test_config.py @@ -36,12 +36,12 @@ def test_init(self, config): @config_testlist def test_set_attr(self, config): - dict_config = dict(config) + dict_config = Config.as_dict(config) config.intv = 2 dict_config["intv"] = 2 - assert dict(config) == dict_config - print("dict", dict(config)) + assert Config.as_dict(config) == dict_config + print("dict", Config.as_dict(config)) @config_testlist def test_get_attr(self, config): @@ -69,7 +69,7 @@ def test_save_yaml(self, config): @one_test def test_load_yaml(self, config): config2 = config.load(filename=f"{self.filename}.yaml") - assert dict(config) == dict(config2) + assert Config.as_dict(config) == dict(config2) remove(f"{self.filename}.yaml") @@ -81,14 +81,14 @@ class TestConfigUpdate: @config_testlist def test_update(self, config): - dict_config = dict(config) + dict_config = Config.as_dict(config) dict_config["new_intv"] = 9 newdict = {"new_intv": 9} config.update(newdict) - assert dict(config) == dict_config + assert Config.as_dict(config) == dict_config @config_testlist def test_update_settype(self, config): From 38082494fedf064013317499ecff8e7e91337cf7 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 19 Feb 2023 22:13:43 -0500 Subject: [PATCH 090/157] fix unused error for LR, early stopping, etc. options --- nequip/scripts/train.py | 19 +++++++++++------- nequip/train/trainer.py | 3 +++ nequip/utils/auto_init.py | 41 +++++++++++++++++++++++---------------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 0e104eba..1ebe5ccb 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -236,13 +236,18 @@ def fresh_start(config): # Store any updated config information in the trainer trainer.update_kwargs(config) - unused = config._unused_keys() - if len(unused) > 0: - message = f"The following keys in the config file were not used, did you make a typo?: {', '.join(unused)}. (If this sounds wrong, please file an issue: the detection of unused keys is in beta. You can turn this error into a warning with `--warn-unused`.)" - if config.warn_unused: - warnings.warn(message) - else: - raise KeyError(message) + # Only run the unused check as a callback after the trainer has + # initialized everything (metrics, early stopping, etc.) + def _unused_check(): + unused = config._unused_keys() + if len(unused) > 0: + message = f"The following keys in the config file were not used, did you make a typo?: {', '.join(unused)}. (If this sounds wrong, please file an issue: the detection of unused keys is in beta. You can turn this error into a warning with `--warn-unused`.)" + if config.warn_unused: + warnings.warn(message) + else: + raise KeyError(message) + + trainer._post_init_callback = _unused_check return trainer diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 8211cfee..68c2c1ec 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -771,6 +771,9 @@ def train(self): self.init_metrics() + if getattr(self, "_post_init_callback", None) is not None: + self._post_init_callback() + while not self.stop_cond: self.epoch_step() diff --git a/nequip/utils/auto_init.py b/nequip/utils/auto_init.py index b7f8230f..157c9ce4 100644 --- a/nequip/utils/auto_init.py +++ b/nequip/utils/auto_init.py @@ -213,25 +213,32 @@ def instantiate( for t in key_mapping: key_mapping[t].pop(key, None) + # debug info + if len(parent_builders) == 0: + # ^ we only want to log or consume arguments for the "unused keys" check + # if this is a root-level build. For subbuilders, we don't want to log + # or, worse, mark keys without prefixes as consumed. + logging.debug( + f"{'get args for' if return_args_only else 'instantiate'} {builder.__name__}" + ) + for t in key_mapping: + for k, v in key_mapping[t].items(): + string = f" {t:>10s}_args : {k:>50s}" + # key mapping tells us how values got from the + # users config (v) to the object being built (k) + # thus v is by definition a valid key + _GLOBAL_ALL_ASKED_FOR_KEYS.add(v) + if k != v: + string += f" <- {v:>50s}" + logging.debug(string) + logging.debug(f"...{builder.__name__}_param = dict(") + logging.debug(f"... optional_args = {final_optional_args},") + logging.debug(f"... positional_args = {positional_args})") + + # Short circuit for return_args_only if return_args_only: return key_mapping, final_optional_args - - # debug info - logging.debug(f"instantiate {builder.__name__}") - for t in key_mapping: - for k, v in key_mapping[t].items(): - string = f" {t:>10s}_args : {k:>50s}" - # key mapping tells us how values got from the - # users config (v) to the object being built (k) - # thus v is by definition a valid key - _GLOBAL_ALL_ASKED_FOR_KEYS.add(v) - if k != v: - string += f" <- {v:>50s}" - logging.debug(string) - logging.debug(f"...{builder.__name__}_param = dict(") - logging.debug(f"... optional_args = {final_optional_args},") - logging.debug(f"... positional_args = {positional_args})") - + # Otherwise, actually build the thing: try: instance = builder(**positional_args, **final_optional_args) except Exception as e: From e1ee4c6f0b756240aef37cb9cfc0c6fbc5c6ea79 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 20 Feb 2023 13:45:34 -0500 Subject: [PATCH 091/157] remove default run name --- CHANGELOG.md | 1 + nequip/scripts/train.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f70714a..13ed2ae9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Most recent change on the bottom. ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) +- Default run name/WandB project name of `NequIP`, they must now always be provided explicitly ## [0.5.6] - 2022-12-19 ### Added diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 1ebe5ccb..4d81e7b8 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -24,10 +24,8 @@ default_config = dict( root="./", - run_name="NequIP", tensorboard=False, wandb=False, - wandb_project="NequIP", model_builders=[ "SimpleIrrepsConfig", "EnergyModel", From 29f089ad44f2097247ba6733cafb187a88284c24 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 20 Feb 2023 22:32:25 -0500 Subject: [PATCH 092/157] more aggressive test to compensate for nondet numerics --- nequip/utils/unittests/model_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 8fd47e4b..bd311ce5 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -479,7 +479,7 @@ def test_force_smoothness(self, model, config, device): all_params = list(instance.parameters()) old_state = [p.detach().clone() for p in all_params] for p in all_params: - p.uniform_(-2.0, 2.0) + p.uniform_(-3.0, 3.0) config, out_fields = config r_max = config["r_max"] From 91e498fff287836ca663e00aded0887e1f3baa3f Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 20 Feb 2023 23:12:56 -0500 Subject: [PATCH 093/157] backward compatibility --- nequip/scripts/deploy.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index b99a92b6..b16d5a49 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -10,6 +10,8 @@ import logging import yaml import itertools +import packaging.version +import warnings # This is a weird hack to avoid Intel MKL issues on the cluster when this is called as a subprocess of a process that has itself initialized PyTorch. # Since numpy gets imported later anyway for dataset stuff, this shouldn't affect performance. @@ -108,7 +110,18 @@ def load_deployed_model( if set_global_options: global_config_dict = {} global_config_dict["allow_tf32"] = bool(int(metadata[TF32_KEY])) - global_config_dict["default_dtype"] = str(metadata[DEFAULT_DTYPE_KEY]) + if DEFAULT_DTYPE_KEY in metadata: + default_dtype = metadata[DEFAULT_DTYPE_KEY] + else: + default_dtype = "float32" + warnings.warn( + "Models deployed before v0.6.0 don't contain information about their default_dtype; assuming the old default of float32, but this might not be right if you had explicitly set float64." + ) + # If there isn't a dtype, it should be older than 0.6.0: + assert packaging.version.parse( + metadata[NEQUIP_VERSION_KEY] + ) < packaging.version.parse("0.6.0") + global_config_dict["default_dtype"] = str(default_dtype) # JIT strategy strategy = metadata.get(JIT_FUSION_STRATEGY, "") if strategy != "": From 3ac367b3d68f976b067decb9d9cb0e0e0ccf89e6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 21 Feb 2023 13:47:58 -0500 Subject: [PATCH 094/157] backwards compat, again --- nequip/scripts/deploy.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index b16d5a49..7f16a7bb 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -105,23 +105,28 @@ def load_deployed_model( model = torch.jit.freeze(model) # Everything we store right now is ASCII, so decode for printing metadata = {k: v.decode("ascii") for k, v in metadata.items()} + # Update metadata for backward compatibility + if metadata[DEFAULT_DTYPE_KEY] == "": + # Default and model go together + assert metadata[MODEL_DTYPE_KEY] == "" + # If there isn't a dtype, it should be older than 0.6.0: + assert packaging.version.parse( + metadata[NEQUIP_VERSION_KEY] + ) < packaging.version.parse("0.6.0") + # i.e. no value due to L85 above + # The old pre-0.6.0 defaults: + metadata[DEFAULT_DTYPE_KEY] = "float32" + metadata[MODEL_DTYPE_KEY] = "float32" + warnings.warn( + "Models deployed before v0.6.0 don't contain information about their default_dtype or model_dtype; assuming the old default of float32 for both, but this might not be right if you had explicitly set default_dtype=float64." + ) + # Set up global settings: assert set_global_options in (True, False, "warn") if set_global_options: global_config_dict = {} global_config_dict["allow_tf32"] = bool(int(metadata[TF32_KEY])) - if DEFAULT_DTYPE_KEY in metadata: - default_dtype = metadata[DEFAULT_DTYPE_KEY] - else: - default_dtype = "float32" - warnings.warn( - "Models deployed before v0.6.0 don't contain information about their default_dtype; assuming the old default of float32, but this might not be right if you had explicitly set float64." - ) - # If there isn't a dtype, it should be older than 0.6.0: - assert packaging.version.parse( - metadata[NEQUIP_VERSION_KEY] - ) < packaging.version.parse("0.6.0") - global_config_dict["default_dtype"] = str(default_dtype) + global_config_dict["default_dtype"] = str(metadata[DEFAULT_DTYPE_KEY]) # JIT strategy strategy = metadata.get(JIT_FUSION_STRATEGY, "") if strategy != "": From 28c0643cf5483c8e8d9f70f736874cc339ec4556 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 22 Feb 2023 20:56:20 -0500 Subject: [PATCH 095/157] fix relaxed atol to be in both checks --- nequip/utils/unittests/model_tests.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index bd311ce5..e0fd7cf6 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -79,11 +79,18 @@ def test_jit(self, model, atomic_batch, device): instance = instance.to(device=device) model_script = script(instance) + atol = { + # tight, but not that tight, since GPU nondet has to pass + # plus model insides are still float32 with global dtype float64 in the tests + torch.float32: 5e-6, + torch.float64: 5e-7, + }[torch.get_default_dtype()] + for out_field in out_fields: assert torch.allclose( instance(data)[out_field], model_script(data)[out_field], - atol=1e-6, + atol=atol, ) # - Try saving, loading in another process, and running - @@ -97,13 +104,6 @@ def test_jit(self, model, atomic_batch, device): load_model = torch.jit.load(tmpdir + "/model.pt") load_dat = torch.load(tmpdir + "/dat.pt") - atol = { - # tight, but not that tight, since GPU nondet has to pass - # plus model insides are still float32 with global dtype float64 in the tests - torch.float32: 5e-6, - torch.float64: 5e-7, - }[torch.get_default_dtype()] - for out_field in out_fields: assert torch.allclose( model_script(data)[out_field], From 6fa97f3b6f67d29ec4a32d118cc1953166fb982b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 11:39:51 -0500 Subject: [PATCH 096/157] add error --- nequip/nn/pair_potential.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index 59fc18a2..7c0a5769 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -241,6 +241,10 @@ def __init__( ase.data.atomic_numbers[type_to_chemical_symbol[type_i]] for type_i in range(num_types) ] + if min(atomic_numbers) < 1: + raise ValueError( + f"Your chemical symbols don't seem valid (minimum atomic number is {min(atomic_numbers)} < 1); did you try to use fake chemical symbols for arbitrary atom types? If so, instead provide atom_types directly in your dataset and specify `type_names` and `type_to_chemical_symbol` in your config. `type_to_chemical_symbol` then tells ZBL what atomic numbers to use for the various atom types in your system." + ) else: raise RuntimeError( "Either chemical_symbol_to_type or type_to_chemical_symbol is required." From 8f3e6f336086696129a1c386271849c26a7633bb Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 11:40:42 -0500 Subject: [PATCH 097/157] StressForceOutput default --- CHANGELOG.md | 1 + nequip/scripts/train.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13ed2ae9..4aa7d391 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Most recent change on the bottom. - Will not reproduce previous versions' data shuffling order (for all practical purposes this does not matter, the `shuffle` option is unchanged) - [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`) - `nequip-benchmark` now only uses `--n-data` frames to build the model +- [Breaking] By default models now use `StressForceOutput`, not `ForceOutput` ### Fixed - Work with `wandb>=0.13.8` diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 4d81e7b8..7a3ef897 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -30,7 +30,7 @@ "SimpleIrrepsConfig", "EnergyModel", "PerSpeciesRescale", - "ForceOutput", + "StressForceOutput", "RescaleEnergyEtc", ], dataset_statistics_stride=1, From 373e1202e34cf9f3bbaf3bf87f597e618548f484 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:46:00 -0500 Subject: [PATCH 098/157] better version parsing --- nequip/scripts/deploy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 7f16a7bb..3215e82f 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -263,7 +263,11 @@ def main(args=None): metadata[TYPE_NAMES_KEY] = " ".join(type_names) metadata[JIT_BAILOUT_KEY] = str(config[JIT_BAILOUT_KEY]) - if int(torch.__version__.split(".")[1]) >= 11 and JIT_FUSION_STRATEGY in config: + if ( + packaging.version.parse(torch.__version__) + >= packaging.version.parse("1.11") + and JIT_FUSION_STRATEGY in config + ): metadata[JIT_FUSION_STRATEGY] = ";".join( "%s,%i" % e for e in config[JIT_FUSION_STRATEGY] ) From aecf025bc58bc2dfbf90a94c99b73c8adfc3e821 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:47:03 -0500 Subject: [PATCH 099/157] global options fuse --- nequip/scripts/train.py | 2 ++ nequip/utils/_global_options.py | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 7a3ef897..d01d78de 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -54,6 +54,8 @@ # We default to DYNAMIC alone because the number of edges is always dynamic, # even if the number of atoms is fixed: _jit_fusion_strategy=[("DYNAMIC", 3)], + # Due to what appear to be ongoing bugs with nvFuser, we default to NNC (fuser1) for now: + _jit_fuser="fuser1", ) # All default_config keys are valid / requested _GLOBAL_ALL_ASKED_FOR_KEYS.update(default_config.keys()) diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index 2567564c..cd999766 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -1,4 +1,5 @@ import warnings +from packaging import version import torch @@ -49,7 +50,7 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: torch.backends.cuda.matmul.allow_tf32 = config["allow_tf32"] torch.backends.cudnn.allow_tf32 = config["allow_tf32"] - if int(torch.__version__.split(".")[1]) >= 11: + if version.parse(torch.__version__) >= version.parse("1.11"): # PyTorch >= 1.11 k = "_jit_fusion_strategy" if k in config: @@ -71,6 +72,25 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: f"Setting the GLOBAL value for jit bailout depth to `{new_depth}` which is different than the previous value of `{old_depth}`" ) + # Deal with fusers + # The default PyTorch fuser changed to nvFuser in 1.12 + # fuser1 is NNC, fuser2 is nvFuser + # See https://github.com/pytorch/pytorch/blob/master/torch/csrc/jit/OVERVIEW.md#fusers + # And https://github.com/pytorch/pytorch/blob/e0a0f37a11164f59b42bc80a6f95b54f722d47ce/torch/jit/_fuser.py#L46 + default_fuser = ( + "fuser2" + if version.parse(torch.__version__) >= version.parse("1.12") + else "fuser1" + ) + fuser = config.get("_jit_fuser", default_fuser) + # context manager just restores old fuser afterwards + torch.jit.fuser(fuser).__enter__() + if warn_on_override and fuser != default_fuser: + # ^ meh assumption, but better than hardcoding getting the old state + warnings.warn( + f"Setting the GLOBAL value for JIT fuser to `{fuser}`, which is different than the default for your current PyTorch version ({torch.__version__}) of `{default_fuser}`" + ) + # TODO: warn_on_override for the rest here? if config.get("model_debug_mode", False): set_irreps_debug(enabled=True) From 57789df9fc48dbfa36109487212b458fb7d35dd7 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:49:36 -0500 Subject: [PATCH 100/157] absmax --- CHANGELOG.md | 1 + nequip/data/dataset.py | 3 +++ nequip/model/_scaling.py | 8 ++++---- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aa7d391..09921e0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Most recent change on the bottom. - Pair potential terms - `nequip-evaluate --output-fields-from-original-dataset` - Error (or warn) on unused options in YAML that likely indicate typos +- `dataset_*_absmax` statistics option ### Changed - Always require explicit `seed` diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index af53e734..2764a062 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -471,6 +471,9 @@ def statistics( std = torch.std(arr, dim=0, unbiased=unbiased) out.append((mean, std)) + elif ana_mode == "absmax": + out.append((arr.abs().max(),)) + elif ana_mode.startswith("per_species_"): # per-species algorithm_kwargs = kwargs.pop(field + ana_mode, {}) diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index f9b5d208..73d894a6 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -281,7 +281,7 @@ def _compute_stats( stat_strs = [] ids = [] tuple_ids = [] - tuple_id_map = {"mean": 0, "std": 1, "rms": 0} + tuple_id_map = {"mean": 0, "std": 1, "rms": 0, "absmax": 0} input_kwargs = {} for name in str_names: @@ -302,9 +302,9 @@ def _compute_stats( if stat in ["mean", "std"]: stat_mode = prefix + "mean_std" stat_str = field + prefix + "mean_std" - elif stat in ["rms"]: - stat_mode = prefix + "rms" - stat_str = field + prefix + "rms" + elif stat in ["rms", "absmax"]: + stat_mode = prefix + stat + stat_str = field + prefix + stat else: raise ValueError(f"Cannot handle {stat} type quantity") From 190a9aabc659d4c1bfbd673d5c183261644f4138 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:59:41 -0500 Subject: [PATCH 101/157] document absmax --- configs/full.yaml | 6 +++++- nequip/data/dataset.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configs/full.yaml b/configs/full.yaml index 5997b14f..1e248906 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -314,6 +314,7 @@ per_species_rescale_scales: dataset_forces_rms # initial atomic energy scale for each species. Optional. # the value can be a constant float value, an array for each species, or a string # string option include: +# * "dataset_forces_absmax", which computes the dataset maxmimum force component magnitude # * "dataset_per_atom_total_energy_std", which computes the per atom energy std # * "dataset_per_species_total_energy_std", which uses the GP model uncertainty # * "dataset_per_species_forces_rms", which compute the force rms for each species @@ -332,7 +333,10 @@ per_species_rescale_scales: dataset_forces_rms # Warning: if this value is not None, the model is no longer size extensive global_rescale_shift: null -# global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_total_energy_std", the stdev of energies in the dataset. When null, disables the global scale. When a number, used directly. +# global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. +# When "dataset_forces_absmax", the maximum force component magnitude in the dataset. +# When "dataset_total_energy_std", the stdev of energies in the dataset. +# When null, disables the global scale. When a number, used directly. # If not provided, defaults to either dataset_force_rms or dataset_total_energy_std, depending on whether forces are being trained. global_rescale_scale: dataset_forces_rms diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 2764a062..580070bc 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -541,6 +541,8 @@ def _per_atom_statistics( return mean, std elif ana_mode == "rms": return (torch.sqrt(torch.mean(arr.square())),) + elif ana_mode == "absmax": + return (torch.max(arr.abs()),) else: raise NotImplementedError( f"{ana_mode} for per-atom analysis is not implemented" @@ -595,6 +597,10 @@ def _per_species_statistics( for i in range(dims): square = square.mean(axis=-1) return (torch.sqrt(square),) + else: + raise NotImplementedError( + f"Statistics mode {ana_mode} isn't yet implemented for per_species_" + ) else: raise NotImplementedError From 9f99f8e0d154410325f5fbd8872a4fe676cea3a6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Mar 2023 14:46:19 -0500 Subject: [PATCH 102/157] better statistics N<2 error than nan --- CHANGELOG.md | 1 + nequip/data/dataset.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09921e0e..2b11bf64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Most recent change on the bottom. ### Fixed - Work with `wandb>=0.13.8` +- Better error for standard deviation with too few data ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 580070bc..bc513c16 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -467,6 +467,10 @@ def statistics( elif ana_mode == "mean_std": # mean and std + if len(arr) < 2: + raise ValueError( + "Can't do per species standard deviation without at least two samples" + ) mean = torch.mean(arr, dim=0) std = torch.std(arr, dim=0, unbiased=unbiased) out.append((mean, std)) @@ -536,6 +540,10 @@ def _per_atom_statistics( arr = arr / N assert arr.shape == (len(N),) + data_dim if ana_mode == "mean_std": + if len(arr) < 2: + raise ValueError( + "Can't do standard deviation without at least two samples" + ) mean = torch.mean(arr, dim=0) std = torch.std(arr, unbiased=unbiased, dim=0) return mean, std @@ -583,6 +591,10 @@ def _per_species_statistics( arr = arr.type(self.dtype) if ana_mode == "mean_std": + if torch.any(N < 2): + raise ValueError( + "Can't do per species standard deviation without at least two samples per species" + ) mean = scatter_mean(arr, atom_types, dim=0) assert mean.shape[1:] == arr.shape[1:] # [N, dims] -> [type, dims] assert len(mean) == N.shape[1] From d88687dcf83bb7741d0fc65e9679d96280121499 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Mar 2023 15:10:25 -0500 Subject: [PATCH 103/157] fix default --- nequip/scripts/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 90ed0218..933af3dc 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -63,7 +63,7 @@ def main(args=None): "--n-data", help="Number of frames to use.", type=int, - default=1, + default=2, ) parser.add_argument( "--timestep", From 539a1a497e93a704468d8a8a209ee34cecb99262 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Mar 2023 15:10:59 -0500 Subject: [PATCH 104/157] document --- configs/minimal_toy_emt.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configs/minimal_toy_emt.yaml b/configs/minimal_toy_emt.yaml index 0bc62361..9a7ebcb6 100644 --- a/configs/minimal_toy_emt.yaml +++ b/configs/minimal_toy_emt.yaml @@ -20,7 +20,10 @@ num_features: 16 num_layers: 4 # data set -dataset: EMTTest # type of data set, can be npz or ase +# A toy dataset of metalic bulks using ASE's toy EMT potential +# Useful for quick tests, but NOT REAL DATA +dataset: EMTTest +dataset_supercell: [4, 4, 4] dataset_element: Cu dataset_num_frames: 50 chemical_symbols: From ff2d2c6648864e7adf226118db5960bcbbadd510 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 Mar 2023 21:35:44 -0500 Subject: [PATCH 105/157] fix on CPU --- nequip/scripts/train.py | 2 ++ nequip/utils/_global_options.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index d01d78de..c7316b01 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -55,6 +55,8 @@ # even if the number of atoms is fixed: _jit_fusion_strategy=[("DYNAMIC", 3)], # Due to what appear to be ongoing bugs with nvFuser, we default to NNC (fuser1) for now: + # TODO: still default to NNC on CPU regardless even if change this for GPU + # TODO: default for ROCm? _jit_fuser="fuser1", ) # All default_config keys are valid / requested diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index cd999766..2959df90 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -78,7 +78,9 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: # See https://github.com/pytorch/pytorch/blob/master/torch/csrc/jit/OVERVIEW.md#fusers # And https://github.com/pytorch/pytorch/blob/e0a0f37a11164f59b42bc80a6f95b54f722d47ce/torch/jit/_fuser.py#L46 default_fuser = ( - "fuser2" + "fuser2" # TODO: does this make sense for ROCm? + if torch.cuda.is_available() + else "fuser1" # default to NNC on CPU for now no matter what if version.parse(torch.__version__) >= version.parse("1.12") else "fuser1" ) From 3a6ae34a41e7eca4445d6f6357a447695a5b6417 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 2 Mar 2023 11:59:40 -0500 Subject: [PATCH 106/157] fix data count errors --- nequip/data/dataset.py | 4 +++- tests/integration/test_deploy.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index bc513c16..1d40e3f7 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -591,7 +591,9 @@ def _per_species_statistics( arr = arr.type(self.dtype) if ana_mode == "mean_std": - if torch.any(N < 2): + # There need to be at least two occurances of each atom type in the + # WHOLE dataset, not in any given frame: + if torch.any(N.sum(dim=0) < 2): raise ValueError( "Can't do per species standard deviation without at least two samples per species" ) diff --git a/tests/integration/test_deploy.py b/tests/integration/test_deploy.py index 93f2d69e..51d6b936 100644 --- a/tests/integration/test_deploy.py +++ b/tests/integration/test_deploy.py @@ -49,7 +49,7 @@ def test_deploy(BENCHMARK_ROOT, device, model_dtype): true_config["default_dtype"] = dtype true_config["model_dtype"] = model_dtype true_config["max_epochs"] = 1 - true_config["n_train"] = 1 + true_config["n_train"] = 2 true_config["n_val"] = 1 config_path = "conf.yaml" full_config_path = f"{tmpdir}/{config_path}" From 08a35be5714b30607a97d635609d3d38f43070e0 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 3 Mar 2023 18:14:16 -0500 Subject: [PATCH 107/157] fix load_model_state with CUDA to CPU --- CHANGELOG.md | 1 + nequip/model/_weight_init.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b11bf64..20c47aa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Most recent change on the bottom. ### Fixed - Work with `wandb>=0.13.8` - Better error for standard deviation with too few data +- `load_model_state` GPU -> CPU ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) diff --git a/nequip/model/_weight_init.py b/nequip/model/_weight_init.py index d0f7003d..1ba57c5d 100644 --- a/nequip/model/_weight_init.py +++ b/nequip/model/_weight_init.py @@ -53,7 +53,10 @@ def load_model_state( raise KeyError( f"initialize_from_state requires the `{_prefix}` option specifying the state to initialize from" ) - state = torch.load(config[_prefix]) + # Make sure we map to CPU if there is no GPU, otherwise just leave it alone + state = torch.load( + config[_prefix], map_location=None if torch.cuda.is_available() else "cpu" + ) graph_model.load_state_dict(state, strict=config.get(_prefix + "_strict", True)) return graph_model From 54805d24352922d4fabe0f766aef2e691cf612bd Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 6 Mar 2023 11:53:35 -0800 Subject: [PATCH 108/157] Fix torchscript when no shifts/scales --- nequip/nn/_atomwise.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index a03ccbf1..bd238072 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -184,6 +184,8 @@ def __init__( self.shifts = torch.nn.Parameter(shifts) else: self.register_buffer("shifts", shifts) + else: + self.register_buffer("shifts", torch.Tensor()) self.has_scales = scales is not None if scales is not None: @@ -199,6 +201,8 @@ def __init__( self.scales = torch.nn.Parameter(scales) else: self.register_buffer("scales", scales) + else: + self.register_buffer("scales", torch.Tensor()) self.arguments_in_dataset_units = arguments_in_dataset_units From 3d14cbe28c185fece543260d14ed490ac61f2899 Mon Sep 17 00:00:00 2001 From: Peter Eastman Date: Fri, 17 Mar 2023 13:25:59 -0700 Subject: [PATCH 109/157] Add HDF5 based dataset option (#227) Co-authored-by: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> --- .github/workflows/tests.yml | 1 + .github/workflows/tests_develop.yml | 1 + CHANGELOG.md | 1 + docs/howto/dataset.rst | 2 +- nequip/data/__init__.py | 9 +- nequip/data/_dataset/__init__.py | 6 + nequip/data/_dataset/_ase_dataset.py | 238 ++++++++++++ .../_base_datasets.py} | 367 +----------------- nequip/data/_dataset/_hdf5_dataset.py | 171 ++++++++ nequip/data/_dataset/_npz_dataset.py | 141 +++++++ tests/unit/data/test_dataset.py | 65 +++- 11 files changed, 618 insertions(+), 384 deletions(-) create mode 100644 nequip/data/_dataset/__init__.py create mode 100644 nequip/data/_dataset/_ase_dataset.py rename nequip/data/{dataset.py => _dataset/_base_datasets.py} (67%) create mode 100644 nequip/data/_dataset/_hdf5_dataset.py create mode 100644 nequip/data/_dataset/_npz_dataset.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 79df1094..cd8c3d3f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,6 +32,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel pip install torch==${TORCH} -f https://download.pytorch.org/whl/cpu/torch_stable.html + pip install h5py pip install --upgrade-strategy only-if-needed . - name: Install pytest run: | diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml index 5aa1ab08..27bdd93f 100644 --- a/.github/workflows/tests_develop.yml +++ b/.github/workflows/tests_develop.yml @@ -32,6 +32,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel pip install torch==${TORCH} -f https://download.pytorch.org/whl/cpu/torch_stable.html + pip install h5py pip install --upgrade-strategy only-if-needed . - name: Install pytest run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 20c47aa6..0ecb77ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Most recent change on the bottom. - `nequip-evaluate --output-fields-from-original-dataset` - Error (or warn) on unused options in YAML that likely indicate typos - `dataset_*_absmax` statistics option +- `HDF5Dataset` (#227) ### Changed - Always require explicit `seed` diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst index 7b18073e..7c535d47 100644 --- a/docs/howto/dataset.rst +++ b/docs/howto/dataset.rst @@ -37,7 +37,7 @@ For example, ``dataset_file_name`` is used for training data and ``validation_da Python interface ~~~~~~~~~~~~~~~~ -See ``nequip.data.dataset.AtomicInMemoryDataset``. +See ``nequip.data.AtomicInMemoryDataset``. Prepare dataset and specify in yaml config ------------------------------------------ diff --git a/nequip/data/__init__.py b/nequip/data/__init__.py index 21cfd3de..02c41d55 100644 --- a/nequip/data/__init__.py +++ b/nequip/data/__init__.py @@ -9,7 +9,13 @@ _GRAPH_FIELDS, _LONG_FIELDS, ) -from .dataset import AtomicDataset, AtomicInMemoryDataset, NpzDataset, ASEDataset +from ._dataset import ( + AtomicDataset, + AtomicInMemoryDataset, + NpzDataset, + ASEDataset, + HDF5Dataset, +) from .dataloader import DataLoader, Collater, PartialSampler from ._build import dataset_from_config from ._test_data import EMTTestDataset @@ -24,6 +30,7 @@ AtomicInMemoryDataset, NpzDataset, ASEDataset, + HDF5Dataset, DataLoader, Collater, PartialSampler, diff --git a/nequip/data/_dataset/__init__.py b/nequip/data/_dataset/__init__.py new file mode 100644 index 00000000..9948e377 --- /dev/null +++ b/nequip/data/_dataset/__init__.py @@ -0,0 +1,6 @@ +from ._base_datasets import AtomicDataset, AtomicInMemoryDataset +from ._ase_dataset import ASEDataset +from ._npz_dataset import NpzDataset +from ._hdf5_dataset import HDF5Dataset + +__all__ = [ASEDataset, AtomicDataset, AtomicInMemoryDataset, NpzDataset, HDF5Dataset] diff --git a/nequip/data/_dataset/_ase_dataset.py b/nequip/data/_dataset/_ase_dataset.py new file mode 100644 index 00000000..3246d791 --- /dev/null +++ b/nequip/data/_dataset/_ase_dataset.py @@ -0,0 +1,238 @@ +import tempfile +import functools +import itertools +from os.path import dirname, basename, abspath +from typing import Dict, Any, List, Union, Optional, Sequence + +import ase +import ase.io + +import torch +import torch.multiprocessing as mp + + +from nequip.utils.multiprocessing import num_tasks +from .. import AtomicData +from ..transforms import TypeMapper +from ._base_datasets import AtomicInMemoryDataset + + +def _ase_dataset_reader( + rank: int, + world_size: int, + tmpdir: str, + ase_kwargs: dict, + atomicdata_kwargs: dict, + include_frames, + global_options: dict, +) -> Union[str, List[AtomicData]]: + """Parallel reader for all frames in file.""" + if world_size > 1: + from nequip.utils._global_options import _set_global_options + + # ^ avoid import loop + # we only `multiprocessing` if world_size > 1 + _set_global_options(global_options) + # interleave--- in theory it is better for performance for the ranks + # to read consecutive blocks, but the way ASE is written the whole + # file gets streamed through all ranks anyway, so just trust the OS + # to cache things sanely, which it will. + # ASE handles correctly the case where there are no frames in index + # and just gives an empty list, so that will succeed: + index = slice(rank, None, world_size) + if include_frames is None: + # count includes 0, 1, ..., inf + include_frames = itertools.count() + + datas = [] + # stream them from ase too using iread + for i, atoms in enumerate(ase.io.iread(**ase_kwargs, index=index, parallel=False)): + global_index = rank + (world_size * i) + datas.append( + ( + global_index, + AtomicData.from_ase(atoms=atoms, **atomicdata_kwargs) + if global_index in include_frames + # in-memory dataset will ignore this later, but needed for indexing to work out + else None, + ) + ) + # Save to a tempfile--- + # there can be a _lot_ of tensors here, and rather than dealing with + # the complications of running out of file descriptors and setting + # sharing methods, since this is a one time thing, just make it simple + # and avoid shared memory entirely. + if world_size > 1: + path = f"{tmpdir}/rank{rank}.pth" + torch.save(datas, path) + return path + else: + return datas + + +class ASEDataset(AtomicInMemoryDataset): + """ + + Args: + ase_args (dict): arguments for ase.io.read + include_keys (list): in addition to forces and energy, the keys that needs to + be parsed into dataset + The data stored in ase.atoms.Atoms.array has the lowest priority, + and it will be overrided by data in ase.atoms.Atoms.info + and ase.atoms.Atoms.calc.results. Optional + key_mapping (dict): rename some of the keys to the value str. Optional + + Example: Given an atomic data stored in "H2.extxyz" that looks like below: + + ```H2.extxyz + 2 + Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F" + H 0.00000000 0.00000000 0.00000000 + H 0.00000000 0.00000000 1.02000000 + ``` + + The yaml input should be + + ``` + dataset: ase + dataset_file_name: H2.extxyz + ase_args: + format: extxyz + include_keys: + - user_label + key_mapping: + user_label: label0 + chemical_symbols: + - H + ``` + + for VASP parser, the yaml input should be + ``` + dataset: ase + dataset_file_name: OUTCAR + ase_args: + format: vasp-out + key_mapping: + free_energy: total_energy + chemical_symbols: + - H + ``` + + """ + + def __init__( + self, + root: str, + ase_args: dict = {}, + file_name: Optional[str] = None, + url: Optional[str] = None, + AtomicData_options: Dict[str, Any] = {}, + include_frames: Optional[List[int]] = None, + type_mapper: TypeMapper = None, + key_mapping: Optional[dict] = None, + include_keys: Optional[List[str]] = None, + ): + self.ase_args = {} + self.ase_args.update(getattr(type(self), "ASE_ARGS", dict())) + self.ase_args.update(ase_args) + assert "index" not in self.ase_args + assert "filename" not in self.ase_args + + self.include_keys = include_keys + self.key_mapping = key_mapping + + super().__init__( + file_name=file_name, + url=url, + root=root, + AtomicData_options=AtomicData_options, + include_frames=include_frames, + type_mapper=type_mapper, + ) + + @classmethod + def from_atoms_list(cls, atoms: Sequence[ase.Atoms], **kwargs): + """Make an ``ASEDataset`` from a list of ``ase.Atoms`` objects. + + If `root` is not provided, a temporary directory will be used. + + Please note that this is a convinience method that does NOT avoid a round-trip to disk; the provided ``atoms`` will be written out to a file. + + Ignores ``kwargs["file_name"]`` if it is provided. + + Args: + atoms + **kwargs: passed through to the constructor + Returns: + The constructed ``ASEDataset``. + """ + if "root" not in kwargs: + tmpdir = tempfile.TemporaryDirectory() + kwargs["root"] = tmpdir.name + else: + tmpdir = None + kwargs["file_name"] = tmpdir.name + "/atoms.xyz" + atoms = list(atoms) + # Write them out + ase.io.write(kwargs["file_name"], atoms, format="extxyz") + # Read them in + obj = cls(**kwargs) + if tmpdir is not None: + # Make it keep a reference to the tmpdir to keep it alive + # When the dataset is garbage collected, the tmpdir will + # be too, and will (hopefully) get deleted eventually. + # Or at least by end of program... + obj._tmpdir_ref = tmpdir + return obj + + @property + def raw_file_names(self): + return [basename(self.file_name)] + + @property + def raw_dir(self): + return dirname(abspath(self.file_name)) + + def get_data(self): + ase_args = {"filename": self.raw_dir + "/" + self.raw_file_names[0]} + ase_args.update(self.ase_args) + + # skip the None arguments + kwargs = dict( + include_keys=self.include_keys, + key_mapping=self.key_mapping, + ) + kwargs = {k: v for k, v in kwargs.items() if v is not None} + kwargs.update(self.AtomicData_options) + n_proc = num_tasks() + with tempfile.TemporaryDirectory() as tmpdir: + from nequip.utils._global_options import _get_latest_global_options + + # ^ avoid import loop + reader = functools.partial( + _ase_dataset_reader, + world_size=n_proc, + tmpdir=tmpdir, + ase_kwargs=ase_args, + atomicdata_kwargs=kwargs, + include_frames=self.include_frames, + # get the global options of the parent to initialize the worker correctly + global_options=_get_latest_global_options(), + ) + if n_proc > 1: + # things hang for some obscure OpenMP reason on some systems when using `fork` method + ctx = mp.get_context("forkserver") + with ctx.Pool(processes=n_proc) as p: + # map it over the `rank` argument + datas = p.map(reader, list(range(n_proc))) + # clean up the pool before loading the data + datas = [torch.load(d) for d in datas] + datas = sum(datas, []) + # un-interleave the datas + datas = sorted(datas, key=lambda e: e[0]) + else: + datas = reader(rank=0) + # datas here is already in order, stride 1 start 0 + # no need to un-interleave + # return list of AtomicData: + return [e[1] for e in datas] diff --git a/nequip/data/dataset.py b/nequip/data/_dataset/_base_datasets.py similarity index 67% rename from nequip/data/dataset.py rename to nequip/data/_dataset/_base_datasets.py index 1d40e3f7..d49b91d2 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/_dataset/_base_datasets.py @@ -1,20 +1,13 @@ import numpy as np import logging -import tempfile import inspect -import functools import itertools import yaml import hashlib import math -from os.path import dirname, basename, abspath -from typing import Tuple, Dict, Any, List, Callable, Union, Optional, Sequence - -import ase -import ase.io +from typing import Tuple, Dict, Any, List, Callable, Union, Optional import torch -import torch.multiprocessing as mp from torch_runstats.scatter import scatter_std, scatter_mean @@ -28,13 +21,11 @@ _NODE_FIELDS, _EDGE_FIELDS, _GRAPH_FIELDS, - _LONG_FIELDS, ) from nequip.utils.batch_ops import bincount from nequip.utils.regressor import solver from nequip.utils.savenload import atomic_write -from nequip.utils.multiprocessing import num_tasks -from .transforms import TypeMapper +from ..transforms import TypeMapper class AtomicDataset(Dataset): @@ -670,357 +661,3 @@ def rdf( results[(type2, type1)] = results[(type1, type2)] return results - - -class NpzDataset(AtomicInMemoryDataset): - """Load data from an npz file. - - To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``, - or ``npz_fixed_fields_keys``. - - Args: - key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional - include_keys (list): the attributes to be processed and stored. Optional - npz_fixed_field_keys: the attributes that only have one instance but apply to all frames. Optional - Note that the mapped keys (as determined by the _values_ in ``key_mapping``) should be used in - ``npz_fixed_field_keys``, not the original npz keys from before mapping. If an npz key is not - present in ``key_mapping``, it is mapped to itself, and this point is not relevant. - - Example: Given a npz file with 10 configurations, each with 14 atoms. - - position: (10, 14, 3) - force: (10, 14, 3) - energy: (10,) - Z: (14) - user_label1: (10) # per config - user_label2: (10, 14, 3) # per atom - - The input yaml should be - - ```yaml - dataset: npz - dataset_file_name: example.npz - include_keys: - - user_label1 - - user_label2 - npz_fixed_field_keys: - - cell - - atomic_numbers - key_mapping: - position: pos - force: forces - energy: total_energy - Z: atomic_numbers - graph_fields: - - user_label1 - node_fields: - - user_label2 - ``` - - """ - - def __init__( - self, - root: str, - key_mapping: Dict[str, str] = { - "positions": AtomicDataDict.POSITIONS_KEY, - "energy": AtomicDataDict.TOTAL_ENERGY_KEY, - "force": AtomicDataDict.FORCE_KEY, - "forces": AtomicDataDict.FORCE_KEY, - "Z": AtomicDataDict.ATOMIC_NUMBERS_KEY, - "atomic_number": AtomicDataDict.ATOMIC_NUMBERS_KEY, - }, - include_keys: List[str] = [], - npz_fixed_field_keys: List[str] = [], - file_name: Optional[str] = None, - url: Optional[str] = None, - AtomicData_options: Dict[str, Any] = {}, - include_frames: Optional[List[int]] = None, - type_mapper: TypeMapper = None, - ): - self.key_mapping = key_mapping - self.npz_fixed_field_keys = npz_fixed_field_keys - self.include_keys = include_keys - - super().__init__( - file_name=file_name, - url=url, - root=root, - AtomicData_options=AtomicData_options, - include_frames=include_frames, - type_mapper=type_mapper, - ) - - @property - def raw_file_names(self): - return [basename(self.file_name)] - - @property - def raw_dir(self): - return dirname(abspath(self.file_name)) - - def get_data(self): - - data = np.load(self.raw_dir + "/" + self.raw_file_names[0], allow_pickle=True) - - # only the keys explicitly mentioned in the yaml file will be parsed - keys = set(list(self.key_mapping.keys())) - keys.update(self.npz_fixed_field_keys) - keys.update(self.include_keys) - keys = keys.intersection(set(list(data.keys()))) - - mapped = {self.key_mapping.get(k, k): data[k] for k in keys} - - for intkey in _LONG_FIELDS: - if intkey in mapped: - mapped[intkey] = mapped[intkey].astype(np.int64) - - fields = {k: v for k, v in mapped.items() if k not in self.npz_fixed_field_keys} - num_examples, num_atoms, n_dim = fields[AtomicDataDict.POSITIONS_KEY].shape - assert n_dim == 3 - - # now we replicate and add the fixed fields: - for fixed_field in self.npz_fixed_field_keys: - orig = mapped[fixed_field] - if fixed_field in _NODE_FIELDS: - assert orig.ndim >= 1 # [n_atom, feature_dims] - assert orig.shape[0] == num_atoms - replicated = np.expand_dims(orig, 0) - replicated = np.tile( - replicated, - (num_examples,) + (1,) * len(replicated.shape[1:]), - ) # [n_example, n_atom, feature_dims] - elif fixed_field in _GRAPH_FIELDS: - # orig is [feature_dims] - replicated = np.expand_dims(orig, 0) - replicated = np.tile( - replicated, - (num_examples,) + (1,) * len(replicated.shape[1:]), - ) # [n_example, feature_dims] - else: - raise KeyError( - f"npz_fixed_field_keys contains `{fixed_field}`, but it isn't registered as a node or graph field" - ) - fields[fixed_field] = replicated - return fields - - -def _ase_dataset_reader( - rank: int, - world_size: int, - tmpdir: str, - ase_kwargs: dict, - atomicdata_kwargs: dict, - include_frames, - global_options: dict, -) -> Union[str, List[AtomicData]]: - """Parallel reader for all frames in file.""" - if world_size > 1: - from nequip.utils._global_options import _set_global_options - - # ^ avoid import loop - # we only `multiprocessing` if world_size > 1 - _set_global_options(global_options) - # interleave--- in theory it is better for performance for the ranks - # to read consecutive blocks, but the way ASE is written the whole - # file gets streamed through all ranks anyway, so just trust the OS - # to cache things sanely, which it will. - # ASE handles correctly the case where there are no frames in index - # and just gives an empty list, so that will succeed: - index = slice(rank, None, world_size) - if include_frames is None: - # count includes 0, 1, ..., inf - include_frames = itertools.count() - - datas = [] - # stream them from ase too using iread - for i, atoms in enumerate(ase.io.iread(**ase_kwargs, index=index, parallel=False)): - global_index = rank + (world_size * i) - datas.append( - ( - global_index, - AtomicData.from_ase(atoms=atoms, **atomicdata_kwargs) - if global_index in include_frames - # in-memory dataset will ignore this later, but needed for indexing to work out - else None, - ) - ) - # Save to a tempfile--- - # there can be a _lot_ of tensors here, and rather than dealing with - # the complications of running out of file descriptors and setting - # sharing methods, since this is a one time thing, just make it simple - # and avoid shared memory entirely. - if world_size > 1: - path = f"{tmpdir}/rank{rank}.pth" - torch.save(datas, path) - return path - else: - return datas - - -class ASEDataset(AtomicInMemoryDataset): - """ - - Args: - ase_args (dict): arguments for ase.io.read - include_keys (list): in addition to forces and energy, the keys that needs to - be parsed into dataset - The data stored in ase.atoms.Atoms.array has the lowest priority, - and it will be overrided by data in ase.atoms.Atoms.info - and ase.atoms.Atoms.calc.results. Optional - key_mapping (dict): rename some of the keys to the value str. Optional - - Example: Given an atomic data stored in "H2.extxyz" that looks like below: - - ```H2.extxyz - 2 - Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F" - H 0.00000000 0.00000000 0.00000000 - H 0.00000000 0.00000000 1.02000000 - ``` - - The yaml input should be - - ``` - dataset: ase - dataset_file_name: H2.extxyz - ase_args: - format: extxyz - include_keys: - - user_label - key_mapping: - user_label: label0 - chemical_symbols: - - H - ``` - - for VASP parser, the yaml input should be - ``` - dataset: ase - dataset_file_name: OUTCAR - ase_args: - format: vasp-out - key_mapping: - free_energy: total_energy - chemical_symbols: - - H - ``` - - """ - - def __init__( - self, - root: str, - ase_args: dict = {}, - file_name: Optional[str] = None, - url: Optional[str] = None, - AtomicData_options: Dict[str, Any] = {}, - include_frames: Optional[List[int]] = None, - type_mapper: TypeMapper = None, - key_mapping: Optional[dict] = None, - include_keys: Optional[List[str]] = None, - ): - self.ase_args = {} - self.ase_args.update(getattr(type(self), "ASE_ARGS", dict())) - self.ase_args.update(ase_args) - assert "index" not in self.ase_args - assert "filename" not in self.ase_args - - self.include_keys = include_keys - self.key_mapping = key_mapping - - super().__init__( - file_name=file_name, - url=url, - root=root, - AtomicData_options=AtomicData_options, - include_frames=include_frames, - type_mapper=type_mapper, - ) - - @classmethod - def from_atoms_list(cls, atoms: Sequence[ase.Atoms], **kwargs): - """Make an ``ASEDataset`` from a list of ``ase.Atoms`` objects. - - If `root` is not provided, a temporary directory will be used. - - Please note that this is a convinience method that does NOT avoid a round-trip to disk; the provided ``atoms`` will be written out to a file. - - Ignores ``kwargs["file_name"]`` if it is provided. - - Args: - atoms - **kwargs: passed through to the constructor - Returns: - The constructed ``ASEDataset``. - """ - if "root" not in kwargs: - tmpdir = tempfile.TemporaryDirectory() - kwargs["root"] = tmpdir.name - else: - tmpdir = None - kwargs["file_name"] = tmpdir.name + "/atoms.xyz" - atoms = list(atoms) - # Write them out - ase.io.write(kwargs["file_name"], atoms, format="extxyz") - # Read them in - obj = cls(**kwargs) - if tmpdir is not None: - # Make it keep a reference to the tmpdir to keep it alive - # When the dataset is garbage collected, the tmpdir will - # be too, and will (hopefully) get deleted eventually. - # Or at least by end of program... - obj._tmpdir_ref = tmpdir - return obj - - @property - def raw_file_names(self): - return [basename(self.file_name)] - - @property - def raw_dir(self): - return dirname(abspath(self.file_name)) - - def get_data(self): - ase_args = {"filename": self.raw_dir + "/" + self.raw_file_names[0]} - ase_args.update(self.ase_args) - - # skip the None arguments - kwargs = dict( - include_keys=self.include_keys, - key_mapping=self.key_mapping, - ) - kwargs = {k: v for k, v in kwargs.items() if v is not None} - kwargs.update(self.AtomicData_options) - n_proc = num_tasks() - with tempfile.TemporaryDirectory() as tmpdir: - from nequip.utils._global_options import _get_latest_global_options - - # ^ avoid import loop - reader = functools.partial( - _ase_dataset_reader, - world_size=n_proc, - tmpdir=tmpdir, - ase_kwargs=ase_args, - atomicdata_kwargs=kwargs, - include_frames=self.include_frames, - # get the global options of the parent to initialize the worker correctly - global_options=_get_latest_global_options(), - ) - if n_proc > 1: - # things hang for some obscure OpenMP reason on some systems when using `fork` method - ctx = mp.get_context("forkserver") - with ctx.Pool(processes=n_proc) as p: - # map it over the `rank` argument - datas = p.map(reader, list(range(n_proc))) - # clean up the pool before loading the data - datas = [torch.load(d) for d in datas] - datas = sum(datas, []) - # un-interleave the datas - datas = sorted(datas, key=lambda e: e[0]) - else: - datas = reader(rank=0) - # datas here is already in order, stride 1 start 0 - # no need to un-interleave - # return list of AtomicData: - return [e[1] for e in datas] diff --git a/nequip/data/_dataset/_hdf5_dataset.py b/nequip/data/_dataset/_hdf5_dataset.py new file mode 100644 index 00000000..5fce39e2 --- /dev/null +++ b/nequip/data/_dataset/_hdf5_dataset.py @@ -0,0 +1,171 @@ +from typing import Dict, Any, List, Callable, Union, Optional +from collections import defaultdict +import numpy as np + +import torch + +from .. import ( + AtomicData, + AtomicDataDict, +) +from ..transforms import TypeMapper +from ._base_datasets import AtomicDataset + + +class HDF5Dataset(AtomicDataset): + """A dataset that loads data from a HDF5 file. + + This class is useful for very large datasets that cannot fit in memory. It + efficiently loads data from disk as needed without everything needing to be + in memory at once. + + To use this, ``file_name`` should point to the HDF5 file, or alternatively a + semicolon separated list of multiple files. Each group in the file contains + samples that all have the same number of atoms. Typically there is one + group for each unique number of atoms, but that is not required. Each group + should contain arrays whose length equals the number of samples, one for each + type of data. The names of the arrays can be specified with ``key_mapping``. + + Args: + key_mapping (Dict[str, str]): mapping of array names in the HDF5 file to ``AtomicData`` keys + file_name (string): a semicolon separated list of HDF5 files. + """ + + def __init__( + self, + root: str, + key_mapping: Dict[str, str] = { + "pos": AtomicDataDict.POSITIONS_KEY, + "energy": AtomicDataDict.TOTAL_ENERGY_KEY, + "forces": AtomicDataDict.FORCE_KEY, + "atomic_numbers": AtomicDataDict.ATOMIC_NUMBERS_KEY, + "types": AtomicDataDict.ATOM_TYPE_KEY, + }, + file_name: Optional[str] = None, + AtomicData_options: Dict[str, Any] = {}, + type_mapper: Optional[TypeMapper] = None, + ): + super().__init__(root=root, type_mapper=type_mapper) + self.key_mapping = key_mapping + self.key_list = list(key_mapping.keys()) + self.value_list = list(key_mapping.values()) + self.file_name = file_name + self.r_max = AtomicData_options["r_max"] + self.index = None + self.num_frames = 0 + import h5py + + files = [h5py.File(f, "r") for f in self.file_name.split(";")] + for file in files: + for group_name in file: + for key in self.key_list: + if key in file[group_name]: + self.num_frames += len(file[group_name][key]) + break + file.close() + + def setup_index(self): + import h5py + + files = [h5py.File(f, "r") for f in self.file_name.split(";")] + self.has_forces = False + self.index = [] + for file in files: + for group_name in file: + group = file[group_name] + values = [None] * len(self.key_list) + samples = 0 + for i, key in enumerate(self.key_list): + if key in group: + values[i] = group[key] + samples = len(values[i]) + for i in range(samples): + self.index.append(tuple(values + [i])) + + def len(self) -> int: + return self.num_frames + + def get(self, idx: int) -> AtomicData: + if self.index is None: + self.setup_index() + data = self.index[idx] + i = data[-1] + args = {"r_max": self.r_max} + for j, value in enumerate(self.value_list): + if data[j] is not None: + args[value] = data[j][i] + return AtomicData.from_points(**args) + + def statistics( + self, + fields: List[Union[str, Callable]], + modes: List[str], + stride: int = 1, + unbiased: bool = True, + kwargs: Optional[Dict[str, dict]] = {}, + ) -> List[tuple]: + assert len(modes) == len(fields) + # TODO: use RunningStats + if len(fields) == 0: + return [] + if self.index is None: + self.setup_index() + results = [] + indices = self.indices() + if stride != 1: + indices = list(indices)[::stride] + for field, mode in zip(fields, modes): + count = 0 + if mode == "rms": + total = 0.0 + elif mode in ("mean_std", "per_atom_mean_std"): + total = [0.0, 0.0] + elif mode == "count": + counts = defaultdict(int) + else: + raise NotImplementedError(f"Analysis mode '{mode}' is not implemented") + for index in indices: + data = self.index[index] + i = data[-1] + if field in self.value_list: + values = data[self.value_list.index(field)][i] + elif callable(field): + values, _ = field(self.get(index)) + values = np.asarray(values) + else: + raise RuntimeError( + f"The field key `{field}` is not present in this dataset" + ) + length = len(values.flatten()) + if length == 1: + values = np.array([values]) + if mode == "rms": + total += np.sum(values * values) + count += length + elif mode == "count": + for v in values: + counts[v] += 1 + else: + if mode == "per_atom_mean_std": + values /= len(data[0][i]) + for v in values: + count += 1 + delta1 = v - total[0] + total[0] += delta1 / count + delta2 = v - total[0] + total[1] += delta1 * delta2 + if mode == "rms": + results.append(torch.tensor((np.sqrt(total / count),))) + elif mode == "count": + values = sorted(counts.keys()) + results.append( + (torch.tensor(values), torch.tensor([counts[v] for v in values])) + ) + else: + results.append( + ( + torch.tensor(total[0]), + torch.tensor(np.sqrt(total[1] / (count - 1))), + ) + ) + return results diff --git a/nequip/data/_dataset/_npz_dataset.py b/nequip/data/_dataset/_npz_dataset.py new file mode 100644 index 00000000..3b28daaf --- /dev/null +++ b/nequip/data/_dataset/_npz_dataset.py @@ -0,0 +1,141 @@ +import numpy as np +from os.path import dirname, basename, abspath +from typing import Dict, Any, List, Optional + + +from .. import AtomicDataDict, _LONG_FIELDS, _NODE_FIELDS, _GRAPH_FIELDS +from ..transforms import TypeMapper +from ._base_datasets import AtomicInMemoryDataset + + +class NpzDataset(AtomicInMemoryDataset): + """Load data from an npz file. + + To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``, + or ``npz_fixed_fields_keys``. + + Args: + key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional + include_keys (list): the attributes to be processed and stored. Optional + npz_fixed_field_keys: the attributes that only have one instance but apply to all frames. Optional + Note that the mapped keys (as determined by the _values_ in ``key_mapping``) should be used in + ``npz_fixed_field_keys``, not the original npz keys from before mapping. If an npz key is not + present in ``key_mapping``, it is mapped to itself, and this point is not relevant. + + Example: Given a npz file with 10 configurations, each with 14 atoms. + + position: (10, 14, 3) + force: (10, 14, 3) + energy: (10,) + Z: (14) + user_label1: (10) # per config + user_label2: (10, 14, 3) # per atom + + The input yaml should be + + ```yaml + dataset: npz + dataset_file_name: example.npz + include_keys: + - user_label1 + - user_label2 + npz_fixed_field_keys: + - cell + - atomic_numbers + key_mapping: + position: pos + force: forces + energy: total_energy + Z: atomic_numbers + graph_fields: + - user_label1 + node_fields: + - user_label2 + ``` + + """ + + def __init__( + self, + root: str, + key_mapping: Dict[str, str] = { + "positions": AtomicDataDict.POSITIONS_KEY, + "energy": AtomicDataDict.TOTAL_ENERGY_KEY, + "force": AtomicDataDict.FORCE_KEY, + "forces": AtomicDataDict.FORCE_KEY, + "Z": AtomicDataDict.ATOMIC_NUMBERS_KEY, + "atomic_number": AtomicDataDict.ATOMIC_NUMBERS_KEY, + }, + include_keys: List[str] = [], + npz_fixed_field_keys: List[str] = [], + file_name: Optional[str] = None, + url: Optional[str] = None, + AtomicData_options: Dict[str, Any] = {}, + include_frames: Optional[List[int]] = None, + type_mapper: TypeMapper = None, + ): + self.key_mapping = key_mapping + self.npz_fixed_field_keys = npz_fixed_field_keys + self.include_keys = include_keys + + super().__init__( + file_name=file_name, + url=url, + root=root, + AtomicData_options=AtomicData_options, + include_frames=include_frames, + type_mapper=type_mapper, + ) + + @property + def raw_file_names(self): + return [basename(self.file_name)] + + @property + def raw_dir(self): + return dirname(abspath(self.file_name)) + + def get_data(self): + + data = np.load(self.raw_dir + "/" + self.raw_file_names[0], allow_pickle=True) + + # only the keys explicitly mentioned in the yaml file will be parsed + keys = set(list(self.key_mapping.keys())) + keys.update(self.npz_fixed_field_keys) + keys.update(self.include_keys) + keys = keys.intersection(set(list(data.keys()))) + + mapped = {self.key_mapping.get(k, k): data[k] for k in keys} + + for intkey in _LONG_FIELDS: + if intkey in mapped: + mapped[intkey] = mapped[intkey].astype(np.int64) + + fields = {k: v for k, v in mapped.items() if k not in self.npz_fixed_field_keys} + num_examples, num_atoms, n_dim = fields[AtomicDataDict.POSITIONS_KEY].shape + assert n_dim == 3 + + # now we replicate and add the fixed fields: + for fixed_field in self.npz_fixed_field_keys: + orig = mapped[fixed_field] + if fixed_field in _NODE_FIELDS: + assert orig.ndim >= 1 # [n_atom, feature_dims] + assert orig.shape[0] == num_atoms + replicated = np.expand_dims(orig, 0) + replicated = np.tile( + replicated, + (num_examples,) + (1,) * len(replicated.shape[1:]), + ) # [n_example, n_atom, feature_dims] + elif fixed_field in _GRAPH_FIELDS: + # orig is [feature_dims] + replicated = np.expand_dims(orig, 0) + replicated = np.tile( + replicated, + (num_examples,) + (1,) * len(replicated.shape[1:]), + ) # [n_example, feature_dims] + else: + raise KeyError( + f"npz_fixed_field_keys contains `{fixed_field}`, but it isn't registered as a node or graph field" + ) + fields[fixed_field] = replicated + return fields diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py index 365dc320..001f0c3c 100644 --- a/tests/unit/data/test_dataset.py +++ b/tests/unit/data/test_dataset.py @@ -14,6 +14,7 @@ AtomicInMemoryDataset, NpzDataset, ASEDataset, + HDF5Dataset, dataset_from_config, register_fields, deregister_fields, @@ -64,6 +65,27 @@ def npz_dataset(npz_data, temp_data): yield a +@pytest.fixture(scope="function") +def hdf5_dataset(npz, temp_data): + try: + import h5py + except ModuleNotFoundError: + pytest.skip("h5py is not installed") + + with tempfile.NamedTemporaryFile(suffix=".hdf5") as path: + f = h5py.File(path.name, "w") + group = f.create_group("samples") + group.create_dataset("atomic_numbers", data=npz["Z"], dtype=np.int8) + group.create_dataset("pos", data=npz["positions"], dtype=np.float32) + group.create_dataset("energy", data=npz["energy"], dtype=np.float32) + group.create_dataset("forces", data=npz["force"], dtype=np.float32) + yield HDF5Dataset( + file_name=path.name, + root=temp_data + "/test_dataset", + AtomicData_options={"r_max": 3}, + ) + + @pytest.fixture(scope="function") def root(): with tempfile.TemporaryDirectory(prefix="datasetroot") as path: @@ -118,9 +140,10 @@ def test_callable(self, npz_dataset, npz): # By default we follow torch convention of defaulting to the unbiased std assert np.allclose(np.std(f_raveled, ddof=1), f_std) - def test_statistics(self, npz_dataset, npz): - - (eng_mean, eng_std), (Z_unique, Z_count) = npz_dataset.statistics( + @pytest.mark.parametrize("dataset_type", ["npz_dataset", "hdf5_dataset"]) + def test_statistics(self, dataset_type, npz, request): + dataset = request.getfixturevalue(dataset_type) + (eng_mean, eng_std), (Z_unique, Z_count) = dataset.statistics( fields=[AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.ATOMIC_NUMBERS_KEY], modes=["mean_std", "count"], ) @@ -138,9 +161,9 @@ def test_statistics(self, npz_dataset, npz): assert np.all(Z_unique == uniq) assert np.all(Z_count == count) - def test_with_subset(self, npz_dataset, npz): - - dataset = npz_dataset.index_select([0]) + @pytest.mark.parametrize("dataset_type", ["npz_dataset", "hdf5_dataset"]) + def test_with_subset(self, dataset_type, npz, request): + dataset = request.getfixturevalue(dataset_type).index_select([0]) ((Z_unique, Z_count), (force_rms,)) = dataset.statistics( [AtomicDataDict.ATOMIC_NUMBERS_KEY, AtomicDataDict.FORCE_KEY], @@ -155,8 +178,10 @@ def test_with_subset(self, npz_dataset, npz): force_rms.numpy(), np.sqrt(np.mean(np.square(npz["force"][0]))) ) - def test_atom_types(self, npz_dataset): - ((avg_num_neigh, _),) = npz_dataset.statistics( + @pytest.mark.parametrize("dataset_type", ["npz_dataset", "hdf5_dataset"]) + def test_atom_types(self, dataset_type, request): + dataset = request.getfixturevalue(dataset_type) + ((avg_num_neigh, _),) = dataset.statistics( fields=[ lambda data: ( torch.unique( @@ -170,11 +195,13 @@ def test_atom_types(self, npz_dataset): # They are all homogenous in this dataset: assert ( avg_num_neigh - == torch.bincount(npz_dataset[0][AtomicDataDict.EDGE_INDEX_KEY][0])[0] + == torch.bincount(dataset[0][AtomicDataDict.EDGE_INDEX_KEY][0])[0] ) - def test_edgewise_stats(self, npz_dataset): - ((avg_edge_length, std_edge_len),) = npz_dataset.statistics( + @pytest.mark.parametrize("dataset_type", ["npz_dataset", "hdf5_dataset"]) + def test_edgewise_stats(self, dataset_type, request): + dataset = request.getfixturevalue(dataset_type) + ((avg_edge_length, std_edge_len),) = dataset.statistics( fields=[ lambda data: ( ( @@ -190,15 +217,21 @@ def test_edgewise_stats(self, npz_dataset): ], modes=["mean_std"], ) - collater = Collater.for_dataset(npz_dataset) - all_data = collater([npz_dataset[i] for i in range(len(npz_dataset))]) + collater = Collater.for_dataset(dataset) + all_data = collater([dataset[i] for i in range(len(dataset))]) all_data = AtomicData.to_AtomicDataDict(all_data) all_data = AtomicDataDict.with_edge_vectors(all_data, with_lengths=True) assert torch.allclose( - avg_edge_length, torch.mean(all_data[AtomicDataDict.EDGE_LENGTH_KEY]) + avg_edge_length, + torch.mean(all_data[AtomicDataDict.EDGE_LENGTH_KEY]).to( + avg_edge_length.dtype + ), ) assert torch.allclose( - std_edge_len, torch.std(all_data[AtomicDataDict.EDGE_LENGTH_KEY]) + std_edge_len, + torch.std(all_data[AtomicDataDict.EDGE_LENGTH_KEY]).to( + avg_edge_length.dtype + ), ) @@ -277,7 +310,6 @@ def test_per_node_field(self, npz_dataset, mode, subset): @pytest.mark.parametrize("full_rank", [True, False]) @pytest.mark.parametrize("subset", [True, False]) def test_per_graph_field(self, npz_dataset, alpha, full_rank, subset): - if alpha <= 1e-4 and not full_rank: return @@ -444,7 +476,6 @@ def generate_E(N, mean_min, mean_max, std): def set_up_transformer(npz_dataset, full_rank, subset): - if full_rank: unique = torch.unique(npz_dataset.data[AtomicDataDict.ATOMIC_NUMBERS_KEY]) npz_dataset.transform = TypeMapper( From 93f2112037e75e6eaa8881d078aa260761c16345 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 19 Mar 2023 19:00:34 -0400 Subject: [PATCH 110/157] slightly relax test numerics in float32 --- nequip/utils/test.py | 2 +- nequip/utils/unittests/model_tests.py | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index 7597f226..a7d7c9f3 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -13,7 +13,7 @@ ) -PERMUTATION_FLOAT_TOLERANCE = {torch.float32: 1e-5, torch.float64: 1e-10} +PERMUTATION_FLOAT_TOLERANCE = {torch.float32: 5e-5, torch.float64: 1e-10} # https://discuss.pytorch.org/t/how-to-quickly-inverse-a-permutation-by-using-pytorch/116205/4 diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index e0fd7cf6..57d0bc96 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -82,16 +82,19 @@ def test_jit(self, model, atomic_batch, device): atol = { # tight, but not that tight, since GPU nondet has to pass # plus model insides are still float32 with global dtype float64 in the tests - torch.float32: 5e-6, + torch.float32: 5e-5, torch.float64: 5e-7, }[torch.get_default_dtype()] + out_instance = instance(data.copy()) + out_script = model_script(data.copy()) + for out_field in out_fields: assert torch.allclose( - instance(data)[out_field], - model_script(data)[out_field], + out_instance[out_field], + out_script[out_field], atol=atol, - ) + ), f"JIT didn't repro non-JIT on field {out_field} with max error {(out_instance[out_field] - out_script[out_field]).abs().max().item()}" # - Try saving, loading in another process, and running - with tempfile.TemporaryDirectory() as tmpdir: @@ -104,12 +107,15 @@ def test_jit(self, model, atomic_batch, device): load_model = torch.jit.load(tmpdir + "/model.pt") load_dat = torch.load(tmpdir + "/dat.pt") + out_script = model_script(data.copy()) + out_load = load_model(load_dat.copy()) + for out_field in out_fields: assert torch.allclose( - model_script(data)[out_field], - load_model(load_dat)[out_field], + out_script[out_field], + out_load[out_field], atol=atol, - ) + ), f"JIT didn't repro save-and-loaded JIT on field {out_field} with max error {(out_script[out_field] - out_load[out_field]).abs().max().item()}" def test_forward(self, model, atomic_batch, device): instance, out_fields = model From a3f7536ff97829be827c534eaa20393260b5f585 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 19 Mar 2023 22:10:46 -0400 Subject: [PATCH 111/157] fix tests --- nequip/nn/_graph_mixin.py | 4 ++++ nequip/nn/_graph_model.py | 1 + nequip/utils/test.py | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/nequip/nn/_graph_mixin.py b/nequip/nn/_graph_mixin.py index 2eb1f64b..806dd566 100644 --- a/nequip/nn/_graph_mixin.py +++ b/nequip/nn/_graph_mixin.py @@ -217,6 +217,7 @@ def from_parameters( OrderedDict(zip(layers.keys(), built_modules)), ) + @torch.jit.unused def append(self, name: str, module: GraphModuleMixin) -> None: r"""Append a module to the SequentialGraphNetwork. @@ -229,6 +230,7 @@ def append(self, name: str, module: GraphModuleMixin) -> None: self.irreps_out = dict(module.irreps_out) return + @torch.jit.unused def append_from_parameters( self, shared_params: Mapping, @@ -254,6 +256,7 @@ def append_from_parameters( self.append(name, instance) return + @torch.jit.unused def insert( self, name: str, @@ -311,6 +314,7 @@ def insert( return + @torch.jit.unused def insert_from_parameters( self, shared_params: Mapping, diff --git a/nequip/nn/_graph_model.py b/nequip/nn/_graph_model.py index 7d543816..d33ad378 100644 --- a/nequip/nn/_graph_model.py +++ b/nequip/nn/_graph_model.py @@ -114,5 +114,6 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # == Helpers == + @torch.jit.unused def get_device(self) -> torch.device: return _get_device(self) diff --git a/nequip/utils/test.py b/nequip/utils/test.py index a7d7c9f3..7c0bde3f 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -13,7 +13,8 @@ ) -PERMUTATION_FLOAT_TOLERANCE = {torch.float32: 5e-5, torch.float64: 1e-10} +# This has to be somewhat large because of float32 sum reductions over many edges/atoms +PERMUTATION_FLOAT_TOLERANCE = {torch.float32: 1e-4, torch.float64: 1e-10} # https://discuss.pytorch.org/t/how-to-quickly-inverse-a-permutation-by-using-pytorch/116205/4 From 12d3da94cadedf2c6e8794e7ff9c3b8fa3b869c6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 22 Mar 2023 21:51:49 -0400 Subject: [PATCH 112/157] better message --- nequip/scripts/evaluate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 092fa432..a09c1641 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -86,7 +86,7 @@ def main(args=None, running_as_script: bool = True): Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr. - WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic. + Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic. This is very rarely important in practice, but can be unintuitive. """ ) ) @@ -250,7 +250,7 @@ def main(args=None, running_as_script: bool = True): logger.info(f"Using device: {device}") if device.type == "cuda": logger.info( - "WARNING: please note that models running on CUDA are usually nondeterministc and that this manifests in the final test errors; for a _more_ deterministic result, please use `--device cpu`", + "Please note that _all_ machine learning models running on CUDA hardware are generally somewhat nondeterministic and that this can manifest in small, generally unimportant variation in the final test errors.", ) if args.use_deterministic_algorithms: From 73b0c6f9da78edf6e50f6fb8287ce507584bed1d Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 23 Mar 2023 21:58:13 -0400 Subject: [PATCH 113/157] fix adjacency test --- nequip/utils/unittests/model_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py index 57d0bc96..37e9dcb6 100644 --- a/nequip/utils/unittests/model_tests.py +++ b/nequip/utils/unittests/model_tests.py @@ -474,7 +474,8 @@ def test_partial_forces(self, config, atomic_batch, device, strict_locality): adjacency = data[AtomicDataDict.BATCH_KEY].view(-1, 1) == data[ AtomicDataDict.BATCH_KEY ].view(1, -1) - assert torch.equal(adjacency, torch.any(partial_forces != 0, dim=-1)) + # for non-adjacent atoms, all partial forces must be zero + assert torch.all(partial_forces[~adjacency] == 0) def test_force_smoothness(self, model, config, device): instance, out_fields = model From 327a25053cf5b4ce88ed2f438bc08001fe5e7d6e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 23 Mar 2023 22:55:13 -0400 Subject: [PATCH 114/157] allow registered extra metadata --- nequip/scripts/deploy.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 3215e82f..959a6390 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -1,9 +1,9 @@ import sys if sys.version_info[1] >= 8: - from typing import Final + from typing import Final, Optional else: - from typing_extensions import Final + from typing_extensions import Final, Optional from typing import Tuple, Dict, Union import argparse import pathlib @@ -59,6 +59,24 @@ ] +def _register_metadata_key(key: str) -> None: + _ALL_METADATA_KEYS.append(key) + + +_current_metadata: Optional[dict] = None + + +def _set_deploy_metadata(key: str, value) -> None: + # TODO: not thread safe but who cares? + global _current_metadata + if _current_metadata is None: + pass # not deploying right now + elif key in _current_metadata: + raise RuntimeError(f"{key} already set in the deployment metadata") + else: + _current_metadata[key] = value + + def _compile_for_deploy(model): model.eval() @@ -233,6 +251,8 @@ def main(args=None): check_code_version(config) # -- load model -- + global _current_metadata + _current_metadata = {} if args.train_dir is not None: model, _ = Trainer.load_model_from_training_session( args.train_dir, model_name="best_model.pth", device="cpu" @@ -276,7 +296,14 @@ def main(args=None): metadata[MODEL_DTYPE_KEY] = dtype_to_name(config["model_dtype"]) metadata[CONFIG_KEY] = yaml.dump(Config.as_dict(config)) + for k, v in _current_metadata.items(): + if k in metadata: + raise RuntimeError(f"Custom deploy key {k} was already set") + metadata[k] = v + _current_metadata = None + metadata = {k: v.encode("ascii") for k, v in metadata.items()} + torch.jit.save(model, args.out_file, _extra_files=metadata) else: raise ValueError From e30ce3e312f85782ba05ad17e1ff896aa040bc2e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 23 Mar 2023 23:04:35 -0400 Subject: [PATCH 115/157] remove stress warning --- nequip/nn/_grad_output.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index bfc1462f..c03ec350 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -1,5 +1,4 @@ from typing import List, Union, Optional -import warnings import torch @@ -193,10 +192,6 @@ def __init__( ): super().__init__() - warnings.warn( - "!! Stresses in NequIP are in BETA and UNDER DEVELOPMENT: _please_ carefully check the sanity of your results and report any (potential) issues on the GitHub" - ) - if not do_forces: raise NotImplementedError self.do_forces = do_forces From 0d8a5674c96409829155f65c00fe137c1a73a219 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 26 Mar 2023 20:05:27 -0400 Subject: [PATCH 116/157] fix type converstion for type_to_chemical_symbol --- nequip/data/transforms.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py index e39aeb98..fc4afe51 100644 --- a/nequip/data/transforms.py +++ b/nequip/data/transforms.py @@ -39,6 +39,14 @@ def __init__( chemical_symbol_to_type = {k: i for i, k in enumerate(chemical_symbols)} del chemical_symbols + if type_to_chemical_symbol is not None: + type_to_chemical_symbol = { + int(k): v for k, v in type_to_chemical_symbol.items() + } + assert all( + v in ase.data.chemical_symbols for v in type_to_chemical_symbol.values() + ) + # Build from chem->type mapping, if provided self.chemical_symbol_to_type = chemical_symbol_to_type if self.chemical_symbol_to_type is not None: @@ -98,11 +106,9 @@ def __init__( self.num_types = len(type_names) # Check type_names self.type_names = type_names - if type_to_chemical_symbol is not None: + self.type_to_chemical_symbol = type_to_chemical_symbol + if self.type_to_chemical_symbol is not None: assert set(type_to_chemical_symbol.keys()) == set(range(self.num_types)) - self.type_to_chemical_symbol = type_to_chemical_symbol - else: - self.type_to_chemical_symbol = None def __call__( self, data: Union[AtomicDataDict.Type, AtomicData], types_required: bool = True From c3ab69706153174a59a9d3cb21483ac92bc5dd83 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 26 Mar 2023 20:27:19 -0400 Subject: [PATCH 117/157] consistency with minimal.yaml --- configs/minimal_pair.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/configs/minimal_pair.yaml b/configs/minimal_pair.yaml index ad178d1e..5ea7a2b9 100644 --- a/configs/minimal_pair.yaml +++ b/configs/minimal_pair.yaml @@ -65,13 +65,11 @@ wandb: false # verbose: debug # training -n_train: 150 # number of training data -n_val: 50 # number of validation data -learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune -batch_size: 5 # batch size, we found it important to keep this small for most applications including forces (1-5); for energy-only training, higher batch sizes work better -validation_batch_size: 10 # batch size for evaluating the model during validation. This does not affect the training results, but using the highest value possible (<=n_val) without running out of memory will speed up your training. -max_epochs: 100000 -append: true +n_train: 5 +n_val: 5 +batch_size: 1 +validation_batch_size: 5 +max_epochs: 10 # loss function loss_coeffs: From bc162cca3c84d49e8932e87160ec0e55b1dde8dc Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 26 Mar 2023 21:33:00 -0400 Subject: [PATCH 118/157] add NEQUIP_ERROR_ON_NO_EDGES --- nequip/data/AtomicData.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 2fc35042..4c40ffa0 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -7,6 +7,7 @@ from copy import deepcopy from typing import Union, Tuple, Dict, Optional, List, Set, Sequence from collections.abc import Mapping +import os import numpy as np import ase.neighborlist @@ -693,6 +694,11 @@ def without_nodes(self, which_nodes): return type(self)(**new_dict) +_ERROR_ON_NO_EDGES: bool = os.environ.get("NEQUIP_ERROR_ON_NO_EDGES", "true").lower() +assert _ERROR_ON_NO_EDGES in ("true", "false") +_ERROR_ON_NO_EDGES = _ERROR_ON_NO_EDGES == "true" + + def neighbor_list_and_relative_vec( pos, r_max, @@ -785,7 +791,7 @@ def neighbor_list_and_relative_vec( bad_edge = first_idex == second_idex bad_edge &= np.all(shifts == 0, axis=1) keep_edge = ~bad_edge - if not np.any(keep_edge): + if _ERROR_ON_NO_EDGES and (not np.any(keep_edge)): raise ValueError( f"Every single atom has no neighbors within the cutoff r_max={r_max} (after eliminating self edges, no edges remain in this system)" ) From 18c37a2b99c97650e953dfff534cd314668f2dc4 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:52:34 -0400 Subject: [PATCH 119/157] doc --- configs/minimal_pair.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/minimal_pair.yaml b/configs/minimal_pair.yaml index 5ea7a2b9..fe7b46b3 100644 --- a/configs/minimal_pair.yaml +++ b/configs/minimal_pair.yaml @@ -37,7 +37,7 @@ num_features: 16 # lj_delta_trainable: false # lj_epsilon_trainable: true pair_style: ZBL -units: real # Ang and kcal/mol +units: real # Ang and kcal/mol, LAMMPS unit names; allowed values "metal" and "real" # data set # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys From f1e0b743962bf5690847ff30fd4e31958d2d0e51 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 6 Apr 2023 16:28:57 -0400 Subject: [PATCH 120/157] add freeze option --- nequip/scripts/evaluate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index a09c1641..1358495e 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -31,7 +31,7 @@ def _load_deployed_or_traindir( - path: Path, device + path: Path, device, freeze: bool = True ) -> Tuple[torch.nn.Module, bool, float, List[str]]: loaded_deployed_model: bool = False model_r_max = None @@ -41,6 +41,7 @@ def _load_deployed_or_traindir( path, device=device, set_global_options=True, # don't warn that setting + freeze=freeze, ) # the global settings for a deployed model are set by # set_global_options in the call to load_deployed_model From 671c369feba4e0140e50b9e34730c62e4023958a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:01:16 -0400 Subject: [PATCH 121/157] EDGE_CUTOFF_KEY --- nequip/data/AtomicData.py | 1 + nequip/data/_keys.py | 2 ++ nequip/model/_pair_potential.py | 2 ++ nequip/nn/embedding/__init__.py | 13 ++++++++++-- nequip/nn/embedding/_edge.py | 33 +++++++++++++++++++++++++++---- nequip/nn/pair_potential.py | 35 +++++---------------------------- 6 files changed, 50 insertions(+), 36 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 4c40ffa0..f98b42a2 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -50,6 +50,7 @@ AtomicDataDict.EDGE_ATTRS_KEY, AtomicDataDict.EDGE_EMBEDDING_KEY, AtomicDataDict.EDGE_FEATURES_KEY, + AtomicDataDict.EDGE_CUTOFF_KEY, } _DEFAULT_GRAPH_FIELDS: Set[str] = { AtomicDataDict.TOTAL_ENERGY_KEY, diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py index 9ebb6e19..d61d44f1 100644 --- a/nequip/data/_keys.py +++ b/nequip/data/_keys.py @@ -45,6 +45,8 @@ # [n_edge, dim] invariant embedding of the edges EDGE_EMBEDDING_KEY: Final[str] = "edge_embedding" EDGE_FEATURES_KEY: Final[str] = "edge_features" +# [n_edge, 1] invariant of the radial cutoff envelope for each edge, allows reuse of cutoff envelopes +EDGE_CUTOFF_KEY: Final[str] = "edge_cutoff" NODE_FEATURES_KEY: Final[str] = "node_features" NODE_ATTRS_KEY: Final[str] = "node_attrs" diff --git a/nequip/model/_pair_potential.py b/nequip/model/_pair_potential.py index 0aa66482..c1538759 100644 --- a/nequip/model/_pair_potential.py +++ b/nequip/model/_pair_potential.py @@ -1,4 +1,5 @@ from nequip.nn import SequentialGraphNetwork, AtomwiseReduce +from nequip.nn.embedding import AddRadialCutoffToData from nequip.data import AtomicDataDict from nequip.nn.pair_potential import LennardJones, ZBL @@ -22,6 +23,7 @@ def PairPotential(config) -> SequentialGraphNetwork: return SequentialGraphNetwork.from_parameters( shared_params=config, layers={ + "cutoff": AddRadialCutoffToData, "pair_potential": {"LJ": LennardJones, "ZBL": ZBL}[config.pair_style], "total_energy_sum": ( AtomwiseReduce, diff --git a/nequip/nn/embedding/__init__.py b/nequip/nn/embedding/__init__.py index dfc9b710..9a0c0d86 100644 --- a/nequip/nn/embedding/__init__.py +++ b/nequip/nn/embedding/__init__.py @@ -1,4 +1,13 @@ from ._one_hot import OneHotAtomEncoding -from ._edge import SphericalHarmonicEdgeAttrs, RadialBasisEdgeEncoding +from ._edge import ( + SphericalHarmonicEdgeAttrs, + RadialBasisEdgeEncoding, + AddRadialCutoffToData, +) -__all__ = [OneHotAtomEncoding, SphericalHarmonicEdgeAttrs, RadialBasisEdgeEncoding] +__all__ = [ + OneHotAtomEncoding, + SphericalHarmonicEdgeAttrs, + RadialBasisEdgeEncoding, + AddRadialCutoffToData, +] diff --git a/nequip/nn/embedding/_edge.py b/nequip/nn/embedding/_edge.py index 3705ae35..4585fec7 100644 --- a/nequip/nn/embedding/_edge.py +++ b/nequip/nn/embedding/_edge.py @@ -76,14 +76,39 @@ def __init__( self.out_field = out_field self._init_irreps( irreps_in=irreps_in, - irreps_out={self.out_field: o3.Irreps([(self.basis.num_basis, (0, 1))])}, + irreps_out={ + self.out_field: o3.Irreps([(self.basis.num_basis, (0, 1))]), + AtomicDataDict.EDGE_CUTOFF_KEY: "0e", + }, ) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) edge_length = data[AtomicDataDict.EDGE_LENGTH_KEY] - edge_length_embedded = ( - self.basis(edge_length) * self.cutoff(edge_length)[:, None] - ) + cutoff = self.cutoff(edge_length).unsqueeze(-1) + edge_length_embedded = self.basis(edge_length) * cutoff data[self.out_field] = edge_length_embedded + data[AtomicDataDict.EDGE_CUTOFF_KEY] = cutoff + return data + + +@compile_mode("script") +class AddRadialCutoffToData(GraphModuleMixin, torch.nn.Module): + def __init__( + self, + cutoff=PolynomialCutoff, + cutoff_kwargs={}, + irreps_in=None, + ): + super().__init__() + self.cutoff = cutoff(**cutoff_kwargs) + self._init_irreps( + irreps_in=irreps_in, irreps_out={AtomicDataDict.EDGE_CUTOFF_KEY: "0e"} + ) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) + edge_length = data[AtomicDataDict.EDGE_LENGTH_KEY] + cutoff = self.cutoff(edge_length).unsqueeze(-1) + data[AtomicDataDict.EDGE_CUTOFF_KEY] = cutoff return data diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index 7c0a5769..b3dcfdb6 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -43,8 +43,6 @@ def __init__( lj_exponent: Optional[float] = None, lj_per_type: bool = True, lj_style: str = "lj", - cutoff=PolynomialCutoff, - cutoff_kwargs={}, irreps_in=None, ) -> None: super().__init__() @@ -86,12 +84,6 @@ def __init__( lj_exponent = 6.0 self.exponent = lj_exponent - self._has_cutoff = cutoff is not None - if self._has_cutoff: - self.cutoff = cutoff(**cutoff_kwargs) - else: - self.cutoff = torch.nn.Identity() - def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) edge_center = data[AtomicDataDict.EDGE_INDEX_KEY][0] @@ -127,9 +119,8 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # TODO: this is probably broken with NaNs at delta lj_eng = lj_eng * (edge_len < (2 ** (1.0 / self.exponent) + delta)) - if self._has_cutoff: - # apply the cutoff for smoothness - lj_eng = lj_eng * self.cutoff(edge_len) + # apply the cutoff for smoothness + lj_eng = lj_eng * data[AtomicDataDict.EDGE_CUTOFF_KEY] # sum edge LJ energies onto atoms atomic_eng = scatter( @@ -169,8 +160,6 @@ def _zbl( r: torch.Tensor, atom_types: torch.Tensor, edge_index: torch.Tensor, - r_max: float, - p: float, qqr2exesquare: float, ) -> torch.Tensor: # from LAMMPS pair_zbl_const.h @@ -199,15 +188,7 @@ def _zbl( + c4 * (d4 * x).exp() ) eng = qqr2exesquare * ((Zi * Zj) / r) * psi - - # compute cutoff envelope - r = r / r_max - cutoff = 1.0 - (((p + 1.0) * (p + 2.0) / 2.0) * torch.pow(r, p)) - cutoff = cutoff + (p * (p + 2.0) * torch.pow(r, p + 1.0)) - cutoff = cutoff - ((p * (p + 1.0) / 2) * torch.pow(r, p + 2.0)) - cutoff = cutoff * (r < 1.0) - - return cutoff * eng + return eng @compile_mode("script") @@ -219,16 +200,12 @@ class ZBL(GraphModuleMixin, torch.nn.Module): """ num_types: int - r_max: float - PolynomialCutoff_p: float def __init__( self, num_types: int, - r_max: float, units: str, type_to_chemical_symbol: Optional[Dict[int, str]] = None, - PolynomialCutoff_p: float = 6.0, irreps_in=None, ): super().__init__() @@ -278,8 +255,6 @@ def __init__( ) * 0.5, # Put half the energy on each of ij, ji ) - self.r_max = float(r_max) - self.PolynomialCutoff_p = float(PolynomialCutoff_p) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) @@ -290,10 +265,10 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: r=data[AtomicDataDict.EDGE_LENGTH_KEY], atom_types=data[AtomicDataDict.ATOM_TYPE_KEY], edge_index=data[AtomicDataDict.EDGE_INDEX_KEY], - r_max=self.r_max, - p=self.PolynomialCutoff_p, qqr2exesquare=self._qqr2exesquare, ).unsqueeze(-1) + # apply cutoff + zbl_edge_eng = zbl_edge_eng * data[AtomicDataDict.EDGE_CUTOFF_KEY] atomic_eng = scatter( zbl_edge_eng, edge_center, From 8310052ccc022fe4ddf8231933f037c525051a5c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 7 Apr 2023 18:41:40 -0400 Subject: [PATCH 122/157] lint --- nequip/nn/pair_potential.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index b3dcfdb6..7d0f0e22 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -9,7 +9,6 @@ from nequip.data import AtomicDataDict from nequip.nn import GraphModuleMixin, RescaleOutput -from nequip.nn.cutoffs import PolynomialCutoff @torch.jit.script From 6967c1bbbf7e5472d9ad3c8f5eadeba00f5b3ddd Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 13 Apr 2023 17:31:04 -0700 Subject: [PATCH 123/157] fix typo --- nequip/model/_weight_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/model/_weight_init.py b/nequip/model/_weight_init.py index 1ba57c5d..bd177d95 100644 --- a/nequip/model/_weight_init.py +++ b/nequip/model/_weight_init.py @@ -27,7 +27,7 @@ def initialize_from_state( return graph_model # do nothing return load_model_state( config=config, - model=graph_model, + graph_model=graph_model, initialize=initialize, _prefix="initial_model_state", ) From 621f57cbb87cdac7e10f56ef4f2fe7e0ffde4eaf Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sat, 15 Apr 2023 18:32:11 -0700 Subject: [PATCH 124/157] typo --- nequip/scripts/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 1358495e..20382eef 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -70,7 +70,7 @@ def _load_deployed_or_traindir( model_r_max = model_config["r_max"] type_names = model_config["type_names"] model.eval() - return model, load_deployed_model, model_r_max, type_names + return model, loaded_deployed_model, model_r_max, type_names def main(args=None, running_as_script: bool = True): From a9c96a45759e5af78dbc5f21e2db4dec4950811a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Sun, 16 Apr 2023 14:19:00 -0700 Subject: [PATCH 125/157] fix lr sched docs --- configs/full.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/full.yaml b/configs/full.yaml index 1e248906..61548a8f 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -282,8 +282,9 @@ optimizer_weight_decay: 0 # setting to inf or null disables it max_gradient_norm: null -# lr scheduler, currently only supports the two options listed below, if you need more please file an issue +# lr scheduler # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch +# you can also set other options of the underlying PyTorch scheduler, for example lr_scheduler_threshold lr_scheduler_name: ReduceLROnPlateau lr_scheduler_patience: 100 lr_scheduler_factor: 0.5 From da3c2b781bfe9dc6002e688f5235633f03201db8 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 21 Apr 2023 15:51:23 -0400 Subject: [PATCH 126/157] add logging --- nequip/model/_weight_init.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nequip/model/_weight_init.py b/nequip/model/_weight_init.py index bd177d95..31d9624d 100644 --- a/nequip/model/_weight_init.py +++ b/nequip/model/_weight_init.py @@ -1,4 +1,5 @@ import math +import logging import torch @@ -57,7 +58,11 @@ def load_model_state( state = torch.load( config[_prefix], map_location=None if torch.cuda.is_available() else "cpu" ) - graph_model.load_state_dict(state, strict=config.get(_prefix + "_strict", True)) + strict: bool = config.get(_prefix + "_strict", True) + graph_model.load_state_dict(state, strict=strict) + logging.info( + f"Loaded model state {'' if strict else ' with strict=False'} (parameters/weights/persistent buffers) from state {_prefix}={config[_prefix]}" + ) return graph_model From 26e26459267c7eea3aeea1fda1cd46a62f09720c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 8 May 2023 17:30:06 -0400 Subject: [PATCH 127/157] better error message --- nequip/train/_loss.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nequip/train/_loss.py b/nequip/train/_loss.py index 1b00dfe2..6442c0d4 100644 --- a/nequip/train/_loss.py +++ b/nequip/train/_loss.py @@ -4,7 +4,7 @@ import torch.nn from torch_runstats.scatter import scatter, scatter_mean -from nequip.data import AtomicDataDict +from nequip.data import AtomicDataDict, _GRAPH_FIELDS from nequip.utils import instantiate_from_cls_name @@ -72,6 +72,10 @@ def __call__( key: str, mean: bool = True, ): + if key not in _GRAPH_FIELDS: + raise RuntimeError( + f"Doesn't make sense to do a `PerAtom` loss on field `{key}`, which isn't registered as a graph (global) field. If it is a graph-level field, register it with `graph_fields: [\"{key}\"]`; otherwise you don't need to specify `PerAtom` for loss on per-node fields." + ) ref_dict = ref ref = ref[key] # make sure prediction is promoted to dtype of reference From 4673bc4fefc5a6c0ff9dbc88f0cf09da6cdb29df Mon Sep 17 00:00:00 2001 From: Albert Zhu <55062951+albertzhu01@users.noreply.github.com> Date: Fri, 12 May 2023 14:12:05 -0400 Subject: [PATCH 128/157] GMM Uncertainty Quantification (#310) Also: * `get_default_device_name` refactor * don't try to set CUDA fuser on CPU * add config include baseline with `include_file_as_baseline_config` * add `nequip-deploy --using-dataset` --------- Co-authored-by: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_develop.yml | 2 +- CHANGELOG.md | 3 + configs/minimal.yaml | 7 ++ configs/minimal_gmm.yaml | 45 +++++++++ examples/gmm_script.py | 86 +++++++++++++++++ nequip/model/__init__.py | 2 + nequip/model/_gmm.py | 96 +++++++++++++++++++ nequip/nn/__init__.py | 40 ++++++-- nequip/nn/_gmm.py | 61 ++++++++++++ nequip/nn/_graph_mixin.py | 14 ++- nequip/scripts/deploy.py | 37 ++++++-- nequip/scripts/train.py | 2 + nequip/train/trainer.py | 3 +- nequip/utils/_global_options.py | 3 +- nequip/utils/config.py | 29 +++++- nequip/utils/gmm.py | 142 ++++++++++++++++++++++++++++ nequip/utils/misc.py | 4 + tests/unit/utils/test_gmm.py | 123 ++++++++++++++++++++++++ 19 files changed, 673 insertions(+), 28 deletions(-) create mode 100644 configs/minimal_gmm.yaml create mode 100644 examples/gmm_script.py create mode 100644 nequip/model/_gmm.py create mode 100644 nequip/nn/_gmm.py create mode 100644 nequip/utils/gmm.py create mode 100644 tests/unit/utils/test_gmm.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cd8c3d3f..2e206edf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel pip install torch==${TORCH} -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install h5py + pip install h5py scikit-learn # install packages that aren't required dependencies but that the tests do need pip install --upgrade-strategy only-if-needed . - name: Install pytest run: | diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml index 27bdd93f..1444a128 100644 --- a/.github/workflows/tests_develop.yml +++ b/.github/workflows/tests_develop.yml @@ -32,7 +32,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel pip install torch==${TORCH} -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install h5py + pip install h5py scikit-learn # install packages that aren't required dependencies but that the tests do need pip install --upgrade-strategy only-if-needed . - name: Install pytest run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ecb77ac..2d63a9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,9 @@ Most recent change on the bottom. - Error (or warn) on unused options in YAML that likely indicate typos - `dataset_*_absmax` statistics option - `HDF5Dataset` (#227) +- `include_file_as_baseline_config` for simple modifications of existing configs +- `nequip-deploy --using-dataset` to support data-dependent deployment steps +- Support for Gaussian Mixture Model uncertainty quantification (https://doi.org/10.1063/5.0136574) ### Changed - Always require explicit `seed` diff --git a/configs/minimal.yaml b/configs/minimal.yaml index 6a636e42..31956922 100644 --- a/configs/minimal.yaml +++ b/configs/minimal.yaml @@ -10,6 +10,13 @@ run_name: minimal seed: 123 dataset_seed: 456 +model_builders: + - SimpleIrrepsConfig # update the config with all the irreps for the network if using the simplified `l_max` / `num_features` / `parity` syntax + - EnergyModel # build a full NequIP model + - PerSpeciesRescale # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum + - StressForceOutput # wrap the energy model in a module that uses autodifferention to compute the forces + - RescaleEnergyEtc + # network num_basis: 8 r_max: 4.0 diff --git a/configs/minimal_gmm.yaml b/configs/minimal_gmm.yaml new file mode 100644 index 00000000..c31602cf --- /dev/null +++ b/configs/minimal_gmm.yaml @@ -0,0 +1,45 @@ +# Example config for DEPLOYING a TRAINED model with an additional Gaussian Mixture Model (GMM) for uncertainty quantification +# See https://doi.org/10.1063/5.0136574 +# This config assumes you have run `configs/minimal.yaml` to train a model; the paths must be updated appropriately for real use +# You can use this config to build a model as +# +# nequip-deploy build --using-dataset --model configs/minimal_gmm.yaml deployed-with-gmm.pth +# + +# This special key loads ALL KEYS from the specified YAML file +# We use it here to load all the hyperparameters, etc. of the model we're trying to deploy from its training dir +# This guerantees they can't get out of sync/disagree +include_file_as_baseline_config: results/aspirin/minimal/config.yaml +# \_____________________/ | +# root + run_name \- autogenerated file containing the FINAL config of the trained model +# (train dir) different than the original config you trained from + +# Then, later in this file, we can override selected keys from config.yaml +# In particular, we need to override the `model_builders` to add the GMM: +# !! IMPORTANT !!: this should MATCH YOUR ORIGINAL TRAINING; these are just the right ones for `minimal.yaml`: + +model_builders: + # from minimal.yaml + - SimpleIrrepsConfig # update the config with all the irreps for the network if using the simplified `l_max` / `num_features` / `parity` syntax + - EnergyModel # build a full NequIP model + - PerSpeciesRescale # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum + - StressForceOutput # wrap the energy model in a module that uses autodifferention to compute the forces + - RescaleEnergyEtc + # !! NEW !! + - load_model_state + - GaussianMixtureModelUncertainty # add, AND FIT a GMM + +# we have to tell the `load_model_state` model builder what state/weights to load +# we want to load the best model from the original training: +load_model_state: results/aspirin/minimal/best_model.pth +# (note best_model.pth is not a deployed model, it's just a weight checkpoint from the original training directory `results/aspirin/minimal/`) + +# GaussianMixtureModelUncertainty requires the training dataset to fit on, but remember: +# `include_file_as_baseline_config` included ALL options from `minimal.yaml`, including +# the definition of the training set. + +# We can also set options that were never set in the original config to begin with, +# like this for the GMM: +# gmm_n_components can be either an int, or `null`, in which case the Bayesian Information Criterion +# will be used to choose the number of components. +gmm_n_components: null diff --git a/examples/gmm_script.py b/examples/gmm_script.py new file mode 100644 index 00000000..239c6e99 --- /dev/null +++ b/examples/gmm_script.py @@ -0,0 +1,86 @@ +"""Example script to plot GMM uncertainties vs. atomic force errors from the results of `nequip-evaluate` + +To obtain GMM uncertainties for each atom in a system, a NequIP model must be trained +(e.g., using `nequip-train configs/minimal.yaml`) and then deployed. To fit a GMM +during deployment, run + + nequip-deploy build --using-dataset --model deployment.yaml deployed_model.pth + +where deployment.yaml is a config file that adds and fits a GMM to the deployed model +(for an example, see configs/minimal_gmm.yaml). Lastly, to obtain negative log +likelihoods (NLLs) on some test data, the NequIP model must be evaluated on a data set using +`nequip-evaluate` with `--output-fields node_features_nll` and +`--output-fields-from-original-dataset forces`. For example, running + + nequip-evaluate --dataset-config path/to/dataset-config.yaml --model deployed_model.pth --output out.xyz --output-fields node_features_nll --output-fields-from-original-dataset forces + +will evaluate deployed_model.pth (which includes the fitted GMM) on the data set in the config at +path/to/dataset-config.yaml and will write the NLLs and the true atomic forces (along +with the typical outputs of `nequip-evaluate`) to out.xyz. + +IMPORTANT: The data set config must contain the lines + + node_fields: + - node_features_nll + +in order for nequip-evaluate to recognize `node_features_nll` as a legitimate output. + +This script can then use out.xyz to create a plot of NLL vs. atomic force RMSE: + + python gmm_script.py out.xyz --output plot.png +""" + +import argparse + +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt + +from ase.io import read + +# Parse arguments +parser = argparse.ArgumentParser( + description="Make a plot of GMM NLL uncertainty vs. atomic force RMSE from the results of `nequip-evaluate`." +) +parser.add_argument( + "xyzoutput", + help=".xyz file from running `nequip-evaluate ... --output out.xyz --output-fields node_features_nll --output-fields-from-original-dataset forces", +) +parser.add_argument("--output", help="File to write plot to", default=None) +args = parser.parse_args() + +pred_forces = [] +true_forces = [] +nlls = [] + +# Extract predicted forces, true forces, and per-atom NLLs from evaluation +for frame in read(args.xyzoutput, index=":", format="extxyz"): + pred_forces.append(frame.get_forces()) + true_forces.append(frame.get_array("original_dataset_forces")) + nlls.append(frame.get_array("node_features_nll")) +pred_forces = np.concatenate(pred_forces, axis=0) +true_forces = np.concatenate(true_forces, axis=0) +nlls = np.concatenate(nlls, axis=0) + +# Compute force RMSE for each atom +force_rmses = np.sqrt(np.mean(np.square(true_forces - pred_forces), axis=-1)) + +# Plot per-atom NLL vs. per-atom force RMSE +f = plt.figure(figsize=(6, 6)) +plt.hist2d( + force_rmses, + nlls, + bins=(100, 100), + cmap="viridis", + norm=mpl.colors.LogNorm(), + cmin=1, +) +plt.title("NLL vs. Atomic Force RMSE") +plt.xlabel("Per-atom Force RMSE [force units]") +plt.ylabel("Per-atom Negative Log Likelihood (NLL) [unitless]") +plt.grid(linestyle="--") +plt.tight_layout() +if args.output is None: + plt.show() +else: + plt.savefig(args.output) diff --git a/nequip/model/__init__.py b/nequip/model/__init__.py index 26fafc36..30031146 100644 --- a/nequip/model/__init__.py +++ b/nequip/model/__init__.py @@ -6,6 +6,7 @@ initialize_from_state, load_model_state, ) +from ._gmm import GaussianMixtureModelUncertainty from ._pair_potential import PairPotential, PairPotentialTerm from ._build import model_from_config @@ -23,6 +24,7 @@ uniform_initialize_FCs, initialize_from_state, load_model_state, + GaussianMixtureModelUncertainty, model_from_config, PairPotential, PairPotentialTerm, diff --git a/nequip/model/_gmm.py b/nequip/model/_gmm.py new file mode 100644 index 00000000..196ab360 --- /dev/null +++ b/nequip/model/_gmm.py @@ -0,0 +1,96 @@ +from typing import Optional + +from tqdm.auto import tqdm + +import torch + +from nequip.nn import GraphModel, SequentialGraphNetwork +from nequip.nn import ( + GaussianMixtureModelUncertainty as GaussianMixtureModelUncertaintyModule, +) +from nequip.data import AtomicDataDict, AtomicData, AtomicDataset, Collater +from nequip.utils import find_first_of_type + + +def GaussianMixtureModelUncertainty( + graph_model: GraphModel, + config, + deploy: bool, + initialize: bool, + dataset: Optional[AtomicDataset] = None, + feature_field: str = AtomicDataDict.NODE_FEATURES_KEY, + out_field: Optional[str] = None, +): + r"""Use a GMM on some latent features to predict an uncertainty. + + Only for deployment time! See `configs/minimal_gmm.yaml`. + """ + # it only makes sense to add or fit a GMM to a deployment model whose features are already trained + if (not deploy) or initialize: + raise RuntimeError( + "GaussianMixtureModelUncertainty can only be used at deployment time, see `configs/minimal_gmm.yaml`." + ) + + # = add GMM = + if out_field is None: + out_field = feature_field + "_nll" + + # TODO: this is VERY brittle!!!! + seqnn: SequentialGraphNetwork = find_first_of_type( + graph_model, SequentialGraphNetwork + ) + + gmm: GaussianMixtureModelUncertaintyModule = seqnn.append_from_parameters( + builder=GaussianMixtureModelUncertaintyModule, + name=feature_field + "_gmm", + shared_params=config, + params=dict(feature_field=feature_field, out_field=out_field), + ) + + if dataset is None: + raise RuntimeError( + "GaussianMixtureModelUncertainty requires a dataset to fit the GMM on; did you specify `nequip-deploy --using-dataset`?" + ) + + # = evaluate features = + # set up model + prev_training: bool = graph_model.training + prev_device: torch.device = graph_model.get_device() + device = config.get("device", None) + graph_model.eval() + graph_model.to(device=device) + # evaluate + features = [] + collater = Collater.for_dataset(dataset=dataset) + batch_size: int = config.get("validation_batch_size", config.batch_size) + stride: int = config.get("dataset_statistics_stride", 1) + # TODO: guard TQDM on interactive? + for batch_start_i in tqdm( + range(0, len(dataset), stride * batch_size), + desc="GMM eval features on train set", + ): + batch = collater( + [dataset[batch_start_i + i * stride] for i in range(batch_size)] + ) + # TODO: !! assumption that final value of feature_field is what the + # GMM gets is very brittle, should really be extracting it + # from the GMM module somehow... not sure how that works. + # give it a training mode and exfiltrate it through a buffer? + # it is correct, however, for NequIP and Allegro energy models + features.append( + graph_model(AtomicData.to_AtomicDataDict(batch.to(device=device)))[ + feature_field + ] + .detach() + .to("cpu") # offload to not run out of GPU RAM + ) + features = torch.cat(features, dim=0) + assert features.ndim == 2 + # restore model + graph_model.train(mode=prev_training) + graph_model.to(device=prev_device) + # fit GMM + gmm.fit(features, seed=config["seed"]) + del features + + return graph_model diff --git a/nequip/nn/__init__.py b/nequip/nn/__init__.py index fac27f93..6585e698 100644 --- a/nequip/nn/__init__.py +++ b/nequip/nn/__init__.py @@ -1,14 +1,34 @@ -from ._graph_mixin import GraphModuleMixin, SequentialGraphNetwork # noqa: F401 -from ._graph_model import GraphModel # noqa: F401 -from ._atomwise import ( # noqa: F401 +from ._graph_mixin import GraphModuleMixin, SequentialGraphNetwork +from ._graph_model import GraphModel +from ._atomwise import ( AtomwiseOperation, AtomwiseReduce, AtomwiseLinear, PerSpeciesScaleShift, -) # noqa: F401 -from ._interaction_block import InteractionBlock # noqa: F401 -from ._grad_output import GradientOutput, PartialForceOutput, StressOutput # noqa: F401 -from ._rescale import RescaleOutput # noqa: F401 -from ._convnetlayer import ConvNetLayer # noqa: F401 -from ._util import SaveForOutput # noqa: F401 -from ._concat import Concat # noqa: F401 +) +from ._interaction_block import InteractionBlock +from ._grad_output import GradientOutput, PartialForceOutput, StressOutput +from ._rescale import RescaleOutput +from ._convnetlayer import ConvNetLayer +from ._util import SaveForOutput +from ._concat import Concat +from ._gmm import GaussianMixtureModelUncertainty + +__all__ = [ + GraphModel, + GraphModuleMixin, + SequentialGraphNetwork, + AtomwiseOperation, + AtomwiseReduce, + AtomwiseLinear, + PerSpeciesScaleShift, + InteractionBlock, + GradientOutput, + PartialForceOutput, + StressOutput, + RescaleOutput, + ConvNetLayer, + SaveForOutput, + Concat, + GaussianMixtureModelUncertainty, +] diff --git a/nequip/nn/_gmm.py b/nequip/nn/_gmm.py new file mode 100644 index 00000000..51882dcd --- /dev/null +++ b/nequip/nn/_gmm.py @@ -0,0 +1,61 @@ +from typing import Optional + +import torch + +from e3nn import o3 + +from nequip.data import AtomicDataDict + + +from ._graph_mixin import GraphModuleMixin +from nequip.utils.gmm import GaussianMixture + + +class GaussianMixtureModelUncertainty(GraphModuleMixin, torch.nn.Module): + """Compute GMM NLL uncertainties based on some input featurization. + + Args: + gmm_n_components (int or None): if None, use the BIC to determine the number of components. + """ + + feature_field: str + out_field: str + + def __init__( + self, + feature_field: str, + out_field: str, + gmm_n_components: Optional[int] = None, + gmm_covariance_type: str = "full", + irreps_in=None, + ): + super().__init__() + self.feature_field = feature_field + self.out_field = out_field + self._init_irreps( + irreps_in=irreps_in, + required_irreps_in=[feature_field], + irreps_out={out_field: "0e"}, + ) + feature_irreps = self.irreps_in[self.feature_field].simplify() + if not (len(feature_irreps) == 1 and feature_irreps[0].ir == o3.Irrep("0e")): + raise ValueError( + f"GaussianMixtureModelUncertainty feature_field={feature_field} must be only scalars, instead got {feature_irreps}" + ) + # GaussianMixture already correctly registers things as parameters, + # so they will get saved & loaded in state dicts + self.gmm = GaussianMixture( + n_components=gmm_n_components, + n_features=feature_irreps.num_irreps, + covariance_type=gmm_covariance_type, + ) + + @torch.jit.unused + def fit(self, X, seed=None) -> None: + self.gmm.fit(X, rng=seed) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + if self.gmm.is_fit(): + nll_scores = self.gmm(data[self.feature_field]) + data[self.out_field] = nll_scores + return data diff --git a/nequip/nn/_graph_mixin.py b/nequip/nn/_graph_mixin.py index 806dd566..eef7d571 100644 --- a/nequip/nn/_graph_mixin.py +++ b/nequip/nn/_graph_mixin.py @@ -237,7 +237,7 @@ def append_from_parameters( name: str, builder: Callable, params: Dict[str, Any] = {}, - ) -> None: + ) -> GraphModuleMixin: r"""Build a module from parameters and append it. Args: @@ -245,6 +245,9 @@ def append_from_parameters( name (str): the name for the module builder (callable): a class or function to build a module params (dict, optional): extra specific parameters for this module that take priority over those in ``shared_params`` + + Returns: + the build module """ instance, _ = instantiate( builder=builder, @@ -254,7 +257,7 @@ def append_from_parameters( all_args=shared_params, ) self.append(name, instance) - return + return instance @torch.jit.unused def insert( @@ -323,7 +326,7 @@ def insert_from_parameters( params: Dict[str, Any] = {}, after: Optional[str] = None, before: Optional[str] = None, - ) -> None: + ) -> GraphModuleMixin: r"""Build a module from parameters and insert it after ``after``. Args: @@ -333,6 +336,9 @@ def insert_from_parameters( params (dict, optional): extra specific parameters for this module that take priority over those in ``shared_params`` after: the name of the module to insert after before: the name of the module to insert before + + Returns: + the inserted module """ if (before is None) is (after is None): raise ValueError("Only one of before or after argument needs to be defined") @@ -351,7 +357,7 @@ def insert_from_parameters( all_args=shared_params, ) self.insert(after=after, before=before, name=name, module=instance) - return + return instance # Copied from https://pytorch.org/docs/stable/_modules/torch/nn/modules/container.html#Sequential # with type annotations added diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 959a6390..e90466c2 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -22,7 +22,7 @@ from e3nn.util.jit import script from nequip.model import model_from_config -from nequip.train import Trainer +from nequip.data import dataset_from_config from nequip.utils import Config from nequip.utils.versions import check_code_version, get_config_code_versions from nequip.scripts.train import default_config @@ -204,6 +204,13 @@ def main(args=None): help="Path to a working directory from a training session to deploy.", type=pathlib.Path, ) + build_parser.add_argument( + "--using-dataset", + help="Allow model builders to use a dataset during deployment. By default uses the training dataset, but can point to a YAML file for another dataset.", + type=pathlib.Path, + const=True, + nargs="?", + ) build_parser.add_argument( "out_file", help="Output file for deployed model.", @@ -236,11 +243,15 @@ def main(args=None): logging.debug(f"Model had config:\n{config}") elif args.command == "build": + state_dict = None if args.model and args.train_dir: raise ValueError("--model and --train-dir cannot both be specified.") if args.train_dir is not None: logging.info("Loading best_model from training session...") config = Config.from_file(str(args.train_dir / "config.yaml")) + state_dict = torch.load( + str(args.train_dir / "best_model.pth"), map_location="cpu" + ) elif args.model is not None: logging.info("Building model from config...") config = Config.from_file(str(args.model), defaults=default_config) @@ -251,16 +262,24 @@ def main(args=None): check_code_version(config) # -- load model -- + # figure out first if a dataset is involved + dataset = None + if args.using_dataset: + dataset_config = config + if args.using_dataset is not True: + dataset_config = Config.from_file(str(args.using_dataset)) + dataset = dataset_from_config(dataset_config) + if args.using_dataset is True: + # we're using the one from training config + # downselect to training set + dataset = dataset.index_select(config.train_idcs) + # build the actual model] + # reset the global metadata dict so that model builders can fill it: global _current_metadata _current_metadata = {} - if args.train_dir is not None: - model, _ = Trainer.load_model_from_training_session( - args.train_dir, model_name="best_model.pth", device="cpu" - ) - elif args.model is not None: - model = model_from_config(config, deploy=True) - else: - raise AssertionError + model = model_from_config(config, dataset=dataset, deploy=True) + if state_dict is not None: + model.load_state_dict(state_dict, strict=True) # -- compile -- model = _compile_for_deploy(model) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index c7316b01..694b70bd 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -19,6 +19,7 @@ from nequip.utils.config import _GLOBAL_ALL_ASKED_FOR_KEYS from nequip.utils.test import assert_AtomicData_equivariant from nequip.utils.versions import check_code_version +from nequip.utils.misc import get_default_device_name from nequip.utils._global_options import _set_global_options from nequip.scripts._logger import set_up_script_logger @@ -34,6 +35,7 @@ "RescaleEnergyEtc", ], dataset_statistics_stride=1, + device=get_default_device_name(), default_dtype="float64", model_dtype="float32", allow_tf32=False, diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 68c2c1ec..986e7874 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -49,6 +49,7 @@ from nequip.utils.versions import check_code_version from nequip.model import model_from_config from nequip.utils.config import _GLOBAL_ALL_ASKED_FOR_KEYS +from nequip.utils.misc import get_default_device_name from .loss import Loss, LossStat from .metrics import Metrics @@ -225,7 +226,7 @@ def __init__( self, model, model_builders: Optional[list] = [], - device: str = "cuda" if torch.cuda.is_available() else "cpu", + device: str = get_default_device_name(), seed: Optional[int] = None, dataset_seed: Optional[int] = None, loss_coeffs: Union[dict, str] = AtomicDataDict.TOTAL_ENERGY_KEY, diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index 2959df90..7a78d36f 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -86,7 +86,8 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: ) fuser = config.get("_jit_fuser", default_fuser) # context manager just restores old fuser afterwards - torch.jit.fuser(fuser).__enter__() + if torch.cuda.is_available(): + torch.jit.fuser(fuser).__enter__() if warn_on_override and fuser != default_fuser: # ^ meh assumption, but better than hardcoding getting the old state warnings.warn( diff --git a/nequip/utils/config.py b/nequip/utils/config.py index 99bddeb6..18e8dbce 100644 --- a/nequip/utils/config.py +++ b/nequip/utils/config.py @@ -272,7 +272,17 @@ def save(self, filename: str, format: Optional[str] = None): @staticmethod def from_file(filename: str, format: Optional[str] = None, defaults: dict = {}): - """Load arguments from file""" + """Load arguments from file + + Has support for including another config file as a baseline with: + ``` + # example of using another config as a baseline and overriding only selected options + # this option will read in configs/minimal.yaml and take ALL keys from that file + include_file_as_baseline_config: configs/minimal.yaml + # keys specified in this file WILL OVERRIDE keys from the `include_file_as_baseline_config` file + l_max: 1 # overrides l_max: 2 in minimal.yaml + ``` + """ supported_formats = {"yaml": ("yml", "yaml"), "json": "json"} dictionary = load_file( @@ -280,6 +290,23 @@ def from_file(filename: str, format: Optional[str] = None, defaults: dict = {}): filename=filename, enforced_format=format, ) + k: str = "include_file_as_baseline_config" + if k in dictionary: + # allow one level of subloading + baseline_fname = dictionary.pop(k) + dictionary_baseline = load_file( + supported_formats=supported_formats, + filename=baseline_fname, + enforced_format=format, + ) + if k in dictionary_baseline: + raise NotImplementedError( + f"Multiple levels of `{k}` are not allowed, but {baseline_fname} contained `{k}`" + ) + # override baseline options with the main config + dictionary_baseline.update(dictionary) + dictionary = dictionary_baseline + del dictionary_baseline, baseline_fname return Config.from_dict(dictionary, defaults) @staticmethod diff --git a/nequip/utils/gmm.py b/nequip/utils/gmm.py new file mode 100644 index 00000000..2610dc60 --- /dev/null +++ b/nequip/utils/gmm.py @@ -0,0 +1,142 @@ +from typing import Optional, Union + +import math +import torch +import numpy as np +from sklearn import mixture +from e3nn.util.jit import compile_mode + + +@torch.jit.script +def _compute_log_det_cholesky(matrix_chol: torch.Tensor, n_features: int): + """Compute the log-det of the cholesky decomposition of matrices.""" + + n_components = matrix_chol.size(dim=0) + + # https://github.com/scikit-learn/scikit-learn/blob/d9cfe3f6b1c58dd253dc87cb676ce5171ff1f8a1/sklearn/mixture/_gaussian_mixture.py#L379 + log_det_chol = torch.sum( + torch.log(matrix_chol.view(n_components, -1)[:, :: n_features + 1]), dim=1 + ) + + return log_det_chol + + +@torch.jit.script +def _estimate_log_gaussian_prob( + X: torch.Tensor, means: torch.Tensor, precisions_chol: torch.Tensor +): + """Estimate the log Gaussian probability.""" + + n_features = X.size(dim=1) + + # https://github.com/scikit-learn/scikit-learn/blob/d9cfe3f6b1c58dd253dc87cb676ce5171ff1f8a1/sklearn/mixture/_gaussian_mixture.py#L423 + log_det = _compute_log_det_cholesky(precisions_chol, n_features) + + # dim(X) = [n_sample, n_feature] + # dim(precisions_chol) = [n_component, n_feature, n_feature] + # [n_sample, 1, n_feature] - [1, n_component, n_feature] = [n_sample, n_component, n_feature] + # dim(X_centered) = [n_sample, n_component, n_feature] + X_centered = X.unsqueeze(-2) - means.unsqueeze(0) + log_prob = ( + torch.einsum("zci,cij->zcj", X_centered, precisions_chol).square().sum(dim=-1) + ) + + # https://github.com/scikit-learn/scikit-learn/blob/d9cfe3f6b1c58dd253dc87cb676ce5171ff1f8a1/sklearn/mixture/_gaussian_mixture.py#L454 + return -0.5 * (n_features * math.log(2 * math.pi) + log_prob) + log_det + + +@compile_mode("script") +class GaussianMixture(torch.nn.Module): + """Calculate NLL of samples under a Gaussian Mixture Model (GMM). + + Supports fitting the GMM outside of PyTorch using `sklearn`. + """ + + covariance_type: str + n_components: int + n_features: int + seed: int + + def __init__( + self, + n_features: int, + n_components: Optional[int] = 0, + covariance_type: str = "full", + ): + super(GaussianMixture, self).__init__() + assert covariance_type in ( + "full", + ), f"covariance type was {covariance_type}, should be full" + self.covariance_type = covariance_type + self.n_components = n_components + self.n_features = n_features + + self.register_buffer("means", torch.Tensor()) + self.register_buffer("weights", torch.Tensor()) + self.register_buffer("covariances", torch.Tensor()) + self.register_buffer("precisions_cholesky", torch.Tensor()) + + @torch.jit.export + def is_fit(self) -> bool: + return self.weights.numel() != 0 + + def forward(self, X: torch.Tensor) -> torch.Tensor: + """Compute the NLL of samples ``X`` under the GMM.""" + + # Check if model has been fitted + assert self.is_fit(), "model has not been fitted" + + estimated_log_probs = _estimate_log_gaussian_prob( + X, self.means, self.precisions_cholesky + ) + estimated_weights = torch.log(self.weights) + return -torch.logsumexp(estimated_log_probs + estimated_weights, dim=1) + + @torch.jit.unused + def fit( + self, + X: torch.Tensor, + max_components: int = 50, + rng: Optional[Union[torch.Generator, int]] = None, + ) -> None: + """Fit the GMM to the samples `X` using sklearn.""" + + # if RNG is an int, just use it as a seed; + # if RNG is None, use the current torch random state; + # if RNG is a torch.Generator, use that to generate an int seed for sklearn + # this way, this is by default seeded by torch without setting the numpy or sklearn seeds + random_state = ( + rng + if isinstance(rng, int) + else torch.randint(2**16, (1,), generator=rng).item() + ) + + gmm_kwargs = dict( + covariance_type=self.covariance_type, + random_state=random_state, + ) + + # If self.n_components is not provided (i.e, 0), set number of Gaussian + # components using BIC. The number of components should not exceed the + # number of samples in X and is capped at a heuristic of max_components + if not self.n_components: + components = list(range(1, min(max_components, X.size(dim=0)))) + gmms = [ + mixture.GaussianMixture(n_components=n, **gmm_kwargs) + for n in components + ] + bics = [model.fit(X).bic(X) for model in gmms] + self.n_components = components[np.argmin(bics)] + del gmms, bics, components + + # Fit GMM + gmm = mixture.GaussianMixture(n_components=self.n_components, **gmm_kwargs) + gmm.fit(X) + + # Save info from GMM into the register buffers + self.register_buffer("means", torch.from_numpy(gmm.means_)) + self.register_buffer("weights", torch.from_numpy(gmm.weights_)) + self.register_buffer("covariances", torch.from_numpy(gmm.covariances_)) + self.register_buffer( + "precisions_cholesky", torch.from_numpy(gmm.precisions_cholesky_) + ) diff --git a/nequip/utils/misc.py b/nequip/utils/misc.py index 2bbc9257..34b04f7f 100644 --- a/nequip/utils/misc.py +++ b/nequip/utils/misc.py @@ -16,6 +16,10 @@ def dtype_to_name(name: Union[str, torch.dtype]) -> torch.dtype: return {torch.float32: "float32", torch.float64: "float64"}[name] +def get_default_device_name() -> str: + return "cuda" if torch.cuda.is_available() else "cpu" + + @contextlib.contextmanager def torch_default_dtype(dtype): """Set `torch.get_default_dtype()` for the duration of a with block, cleaning up with a `finally`. diff --git a/tests/unit/utils/test_gmm.py b/tests/unit/utils/test_gmm.py new file mode 100644 index 00000000..84628ddd --- /dev/null +++ b/tests/unit/utils/test_gmm.py @@ -0,0 +1,123 @@ +import torch +import pytest +import numpy as np +from nequip.utils import gmm +from sklearn import mixture +from e3nn.util.test import assert_auto_jitable + + +class TestGMM: + # Seed for tests + @pytest.fixture + def seed(self): + return 678912345 + + # Data sets for fitting GMMs and scoring NLLs + @pytest.fixture(params=[[10, 8], [500, 32]]) + def feature_data(self, seed, request): + fit_data = 2 * ( + torch.randn( + request.param[0], + request.param[1], + generator=torch.Generator().manual_seed(seed), + ) + - 0.5 + ) + test_data = 2 * ( + torch.randn( + request.param[0] * 2, + request.param[1], + generator=torch.Generator().manual_seed(seed - 123456789), + ) + - 0.5 + ) + return {"fit_data": fit_data, "test_data": test_data} + + # Sklearn GMM for tests + @pytest.fixture + def gmm_sklearn(self, seed): + return mixture.GaussianMixture( + n_components=8, covariance_type="full", random_state=seed + ) + + # Torch GMM for small data set tests + @pytest.fixture + def gmm_torch(self, feature_data): + return gmm.GaussianMixture( + n_features=feature_data["fit_data"].size(dim=1), n_components=8 + ) + + # Test compilation + def test_compile(self, gmm_torch): + assert_auto_jitable(gmm_torch) + + # Test agreement between sklearn and torch GMMs + def test_fit_forward(self, seed, gmm_sklearn, gmm_torch, feature_data): + gmm_sklearn.fit(feature_data["fit_data"].numpy()) + gmm_torch.fit(feature_data["fit_data"], rng=seed) + + assert torch.allclose(torch.from_numpy(gmm_sklearn.means_), gmm_torch.means) + assert torch.allclose( + torch.from_numpy(gmm_sklearn.covariances_), gmm_torch.covariances + ) + assert torch.allclose(torch.from_numpy(gmm_sklearn.weights_), gmm_torch.weights) + assert torch.allclose( + torch.from_numpy(gmm_sklearn.precisions_cholesky_), + gmm_torch.precisions_cholesky, + ) + + sklearn_nll = gmm_sklearn.score_samples(feature_data["test_data"].numpy()) + torch_nll = gmm_torch(feature_data["test_data"]) + + assert torch.allclose(-torch.from_numpy(sklearn_nll), torch_nll) + + # Test agreement between sklearn and torch using BIC + def test_fit_forward_bic(self, seed, feature_data): + components = list(range(1, min(50, feature_data["fit_data"].size(dim=0)))) + gmms = [ + mixture.GaussianMixture( + n_components=n, covariance_type="full", random_state=seed + ) + for n in components + ] + bics = [ + model.fit(feature_data["fit_data"]).bic(feature_data["fit_data"]) + for model in gmms + ] + + gmm_sklearn = mixture.GaussianMixture( + n_components=components[np.argmin(bics)], + covariance_type="full", + random_state=seed, + ) + gmm_torch = gmm.GaussianMixture(n_features=feature_data["fit_data"].size(dim=1)) + + gmm_sklearn.fit(feature_data["fit_data"].numpy()) + gmm_torch.fit(feature_data["fit_data"], rng=seed) + + assert torch.allclose(torch.from_numpy(gmm_sklearn.means_), gmm_torch.means) + assert torch.allclose( + torch.from_numpy(gmm_sklearn.covariances_), gmm_torch.covariances + ) + assert torch.allclose(torch.from_numpy(gmm_sklearn.weights_), gmm_torch.weights) + assert torch.allclose( + torch.from_numpy(gmm_sklearn.precisions_cholesky_), + gmm_torch.precisions_cholesky, + ) + + sklearn_nll = gmm_sklearn.score_samples(feature_data["test_data"].numpy()) + torch_nll = gmm_torch(feature_data["test_data"]) + + assert torch.allclose(-torch.from_numpy(sklearn_nll), torch_nll) + + # Test assertion error for covariance type other than "full" + def test_full_cov(self): + with pytest.raises(AssertionError) as excinfo: + _ = gmm.GaussianMixture(n_features=2, covariance_type="tied") + assert "covariance type was tied, should be full" in str(excinfo.value) + + # Test assertion error for evaluating unfitted GMM + def test_unfitted_gmm(self, gmm_torch, feature_data): + with pytest.raises(AssertionError) as excinfo: + _ = gmm_torch(feature_data["test_data"]) + assert "model has not been fitted" in str(excinfo.value) From f156438b468de03e9c2ee828c01d9b40f08c0e66 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 12 May 2023 14:20:14 -0400 Subject: [PATCH 129/157] allow_tf32 default true --- CHANGELOG.md | 2 +- configs/example.yaml | 6 +++++- configs/full.yaml | 8 +++++--- nequip/scripts/train.py | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d63a9e2..b06dd586 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ Most recent change on the bottom. - Don't log as often by default - [Breaking] Default nonlinearities are `silu` (`e`) and `tanh` (`o`) - Will not reproduce previous versions' data shuffling order (for all practical purposes this does not matter, the `shuffle` option is unchanged) -- [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`) +- [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`, `allow_tf32: true` by default--- see https://arxiv.org/abs/2304.10061) - `nequip-benchmark` now only uses `--n-data` frames to build the model - [Breaking] By default models now use `StressForceOutput`, not `ForceOutput` diff --git a/configs/example.yaml b/configs/example.yaml index 127e4c6e..170304a8 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -9,7 +9,11 @@ run_name: example-run-toluene seed: 123 # model seed dataset_seed: 456 # data set seed append: true # set true if a restarted run should append to the previous log file -default_dtype: float32 # type of float to use, e.g. float32 and float64 + +# see https://arxiv.org/abs/2304.10061 for discussion of numerical precision +default_dtype: float64 +model_dtype: float32 +allow_tf32: true # consider setting to false if you plan to mix training/inference over any devices that are not NVIDIA Ampere or later # network r_max: 4.0 # cutoff radius in length units, here Angstrom, this is an important hyperparamter to scan diff --git a/configs/full.yaml b/configs/full.yaml index 61548a8f..f43b3d49 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -13,9 +13,11 @@ run_name: example-run-toluene seed: 123 # model seed dataset_seed: 456 # data set seed append: true # set true if a restarted run should append to the previous log file -default_dtype: float32 # type of float to use, e.g. float32 and float64 -allow_tf32: false # whether to use TensorFloat32 if it is available -# device: cuda # which device to use. Default: automatically detected cuda or "cpu" + +# see https://arxiv.org/abs/2304.10061 for discussion of numerical precision +default_dtype: float64 +model_dtype: float32 +allow_tf32: true # consider setting to false if you plan to mix training/inference over any devices that are not NVIDIA Ampere or later # == network == diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 694b70bd..9c9653fa 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -38,7 +38,7 @@ device=get_default_device_name(), default_dtype="float64", model_dtype="float32", - allow_tf32=False, + allow_tf32=True, verbose="INFO", model_debug_mode=False, equivariance_test=False, From 00f4da8cd6343204e7602ea3fc6b3acaaa355eb1 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 12 May 2023 14:41:15 -0400 Subject: [PATCH 130/157] remove _params suffix --- CHANGELOG.md | 1 + nequip/scripts/train.py | 2 +- nequip/utils/config.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b06dd586..601d7943 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Most recent change on the bottom. ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) - Default run name/WandB project name of `NequIP`, they must now always be provided explicitly +- [Breaking] Removed `_params` as an allowable subconfiguration suffix (i.e. instead of `optimizer_params` now only `optimizer_kwargs` is valid, not both) ## [0.5.6] - 2022-12-19 ### Added diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 9c9653fa..3d10049b 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -245,7 +245,7 @@ def fresh_start(config): def _unused_check(): unused = config._unused_keys() if len(unused) > 0: - message = f"The following keys in the config file were not used, did you make a typo?: {', '.join(unused)}. (If this sounds wrong, please file an issue: the detection of unused keys is in beta. You can turn this error into a warning with `--warn-unused`.)" + message = f"The following keys in the config file were not used, did you make a typo?: {', '.join(unused)}. (If this sounds wrong, please file an issue. You can turn this error into a warning with `--warn-unused`, but please make sure that the key really is correctly spelled and used!.)" if config.warn_unused: warnings.warn(message) else: diff --git a/nequip/utils/config.py b/nequip/utils/config.py index 18e8dbce..ca79f576 100644 --- a/nequip/utils/config.py +++ b/nequip/utils/config.py @@ -204,7 +204,7 @@ def update_w_prefix( keys = self.update(prefix_dict, allow_val_change=allow_val_change) keys = {k: f"{prefix}_{k}" for k in keys} - for suffix in ["params", "kwargs"]: + for suffix in ["kwargs"]: if f"{prefix}_{suffix}" in dictionary: key3 = self.update( dictionary[f"{prefix}_{suffix}"], From 47375ef1060fc7699ac64891b1eb382f0f8e8d83 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 12 May 2023 15:38:27 -0400 Subject: [PATCH 131/157] docs updates --- docs/commandline/commands.rst | 134 +------------------- docs/conf.py | 7 +- docs/errors/errors.md | 22 ++++ docs/errors/errors.rst | 12 -- docs/faq/FAQ.md | 8 ++ docs/faq/FAQ.rst | 14 --- docs/howto/conventions.md | 29 +++++ docs/howto/conventions.rst | 5 - docs/howto/howto.rst | 2 +- docs/index.rst | 7 +- docs/installation/install.rst | 39 ------ docs/{lammps => integrations}/all.rst | 0 docs/{lammps => integrations}/ase.rst | 0 docs/{lammps => integrations}/lammps.rst | 0 docs/introduction/intro.md | 5 + docs/introduction/intro.rst | 4 - docs/options/HOWTO.md | 32 ----- docs/options/dataset.rst | 68 ----------- docs/options/general.rst | 28 ----- docs/options/logging.rst | 8 -- docs/options/model.rst | 149 ----------------------- docs/options/options.rst | 10 -- docs/options/training.rst | 8 -- docs/yaml/yaml.rst | 4 - 24 files changed, 74 insertions(+), 521 deletions(-) create mode 100644 docs/errors/errors.md delete mode 100644 docs/errors/errors.rst create mode 100644 docs/faq/FAQ.md delete mode 100644 docs/faq/FAQ.rst create mode 100644 docs/howto/conventions.md delete mode 100644 docs/howto/conventions.rst delete mode 100644 docs/installation/install.rst rename docs/{lammps => integrations}/all.rst (100%) rename docs/{lammps => integrations}/ase.rst (100%) rename docs/{lammps => integrations}/lammps.rst (100%) create mode 100644 docs/introduction/intro.md delete mode 100644 docs/introduction/intro.rst delete mode 100644 docs/options/HOWTO.md delete mode 100644 docs/options/dataset.rst delete mode 100644 docs/options/general.rst delete mode 100644 docs/options/logging.rst delete mode 100644 docs/options/model.rst delete mode 100644 docs/options/options.rst delete mode 100644 docs/options/training.rst delete mode 100644 docs/yaml/yaml.rst diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst index b58c87ab..f371dc2b 100644 --- a/docs/commandline/commands.rst +++ b/docs/commandline/commands.rst @@ -1,132 +1,2 @@ -Command-line Executables -======================== - -``nequip-train`` ----------------- - - .. code :: - - usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config - -Train (or restart training of) a NequIP model. - -positional arguments: - config YAML file configuring the model, dataset, and other options - -optional arguments: - -h, --help show this help message and exit - --equivariance-test test the model's equivariance before training - --model-debug-mode enable model debug mode, which can sometimes give much more useful error messages at the - cost of some speed. Do not use for production training! - --grad-anomaly-mode enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production - training! - --log LOG log file to store all the screen logging - -``nequip-evaluate`` -------------------- - - .. code :: - - usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG] - [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE] - [--device DEVICE] [--output OUTPUT] [--log LOG] - -Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified -in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority, -the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2) -the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format -to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models -are rarely exactly reproducible, and that even CPU models can be nondeterministic. - -optional arguments: - -h, --help show this help message and exit - --train-dir TRAIN_DIR - Path to a working directory from a training session. - --model MODEL A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in - `train_dir`. - --dataset-config DATASET_CONFIG - A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml` - in `train_dir` will be used - --metrics-config METRICS_CONFIG - A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in - `train_dir` will be used. If the config does not specify `metrics_components`, the default - is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the - literal string `None`, no metrics will be computed. - --test-indexes TEST_INDEXES - Path to a file containing the indexes in the dataset that make up the test set. If omitted, - all data frames *not* used as training or validation data in the training session - `train_dir` will be used. - --batch-size BATCH_SIZE - Batch size to use. Larger is usually faster on GPU. - --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU - otherwise. - --output OUTPUT XYZ file to write out the test set and model predicted forces, energies, etc. to. - --log LOG log file to store all the metrics and screen logging.debug - -``nequip-deploy`` ------------------ - - .. code :: - - usage: nequip-deploy [-h] {info,build} ... - -Deploy and view information about previously deployed NequIP models. - -optional arguments: - -h, --help show this help message and exit - -commands: - {info,build} - info Get information from a deployed model file - build Build a deployment model - -``nequip-deploy info`` -~~~~~~~~~~~~~~~~~~~~~~ - - .. code :: - - usage: nequip-deploy info [-h] model_path - -positional arguments: - model_path Path to a deployed model file. - -optional arguments: - -h, --help show this help message and exit - - -``nequip-deploy build`` -~~~~~~~~~~~~~~~~~~~~~~~ - - .. code :: - - usage: nequip-deploy build [-h] train_dir out_file - -positional arguments: - train_dir Path to a working directory from a training session. - out_file Output file for deployed model. - -optional arguments: - -h, --help show this help message and exit - - -``nequip-benchmark`` --------------------- - - .. code :: - - usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP] - config - -Benchmark the approximate MD performance of a given model configuration / dataset pair. - -positional arguments: - config configuration file - -optional arguments: - -h, --help show this help message and exit - --profile PROFILE Profile instead of timing, creating and outputing a Chrome trace JSON to the given path. - --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU - otherwise. - -n N Number of trials. - --n-data N_DATA Number of frames to use. - --timestep TIMESTEP MD timestep for ns/day esimation, in fs. Defauts to 1fs. +Command-line tools +================== diff --git a/docs/conf.py b/docs/conf.py index 11a5afca..ff701842 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,7 +27,12 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme"] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_rtd_theme", + "myst_parser", +] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/errors/errors.md b/docs/errors/errors.md new file mode 100644 index 00000000..a4422fa0 --- /dev/null +++ b/docs/errors/errors.md @@ -0,0 +1,22 @@ +Common errors and warnings +========================== + +#### Unused keys + + - ```txt + KeyError: 'The following keys in the config file were not used, did you make a typo?: optimizer_params. + ``` + Since >=0.6.0, using `prefix_params` style subdictionaries of options is no longer supported. Only `_kwargs` is supported, i.e. `optimizer_kwargs`. Please update your YAML configs. + +#### Out-of-memory errors + + - ...with `nequip-evaluate` + + Choose a lower ``--batch-size``; while the highest value that fits in your GPU memory is good for performance, + lowering this does *not* affect the final results (beyond numerics). + +#### Other + + - Various shape errors + + Check the sanity of the shapes in your dataset. \ No newline at end of file diff --git a/docs/errors/errors.rst b/docs/errors/errors.rst deleted file mode 100644 index 576e553d..00000000 --- a/docs/errors/errors.rst +++ /dev/null @@ -1,12 +0,0 @@ -Errors -====== - -Common errors -------------- - -Various shape errors - Check the sanity of the shapes in your dataset. - -Out-of-memory errors with `nequip-evaluate` - Choose a lower ``--batch-size``; while the highest value that fits in your GPU memory is good for performance, - lowering this does *not* affect the final results (beyond numerics). diff --git a/docs/faq/FAQ.md b/docs/faq/FAQ.md new file mode 100644 index 00000000..bd00a4c2 --- /dev/null +++ b/docs/faq/FAQ.md @@ -0,0 +1,8 @@ +# FAQ + +## Loss functions + + - Despite changing the coefficients in `loss_coeffs`, the magnitude of my training loss isn't changing! + + Inidividual loss terms like `training_loss_f`, `training_loss_e`, etc. are reported **before** they are scaled by their coefficients for summing into the total loss. + diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst deleted file mode 100644 index 411e77c1..00000000 --- a/docs/faq/FAQ.rst +++ /dev/null @@ -1,14 +0,0 @@ -FAQ -=== - -How do I... ------------ - -... continue to train a model that reached a stopping condition? - There will be an answer here. - -1. Reload the model trained with version 0.3.3 to the code in 0.4. - check out the migration note at :ref:`migration_note`. - -2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`. - diff --git a/docs/howto/conventions.md b/docs/howto/conventions.md new file mode 100644 index 00000000..3964fef2 --- /dev/null +++ b/docs/howto/conventions.md @@ -0,0 +1,29 @@ +# Conventions and units + +## Conventions + - Cells vectors are given in ASE style as the **rows** of the cell matrix + - The first index in an edge tuple (``edge_index[0]``) is the center atom, and the second (``edge_index[1]``) is the neighbor + +## Units + +`nequip` has no prefered system of units; models, errors, predictions, etc. will always be in the units of the original dataset used. + +```{warning} +`nequip` cannot and does not check the consistency of units in inputs you provide, and it is your responsibility to ensure consistent treatment of input and output units +``` + +Losses (`training_loss_f`, `validation_loss_e`, etc.) do **not** have physical units. Errors (`training_f_rmse`, `validation_f_rmse`) are always reported in physical units. + +## Pressure / stress / virials + +`nequip` always expresses stress in the "consistent" units of `energy / length^3`, which are **not** the typical physical units used by many codes for stress. + +```{warning} +Training labels for stress in the original dataset must be pre-processed by the user to be in consistent units. +``` + +Stress also includes an arbitrary sign convention, for which we adopt the choice that `virial = -stress x volume <=> stress = (-1/volume) * virial`. + +```{warning} +Training labels for stress in the original dataset must be pre-processed by the user to be in **this sign convention**, which they may or may not already be depending on their origin. +``` \ No newline at end of file diff --git a/docs/howto/conventions.rst b/docs/howto/conventions.rst deleted file mode 100644 index f4679a76..00000000 --- a/docs/howto/conventions.rst +++ /dev/null @@ -1,5 +0,0 @@ -Conventions -=========== - - - Cells vectors are given in ASE style as the **rows** of the cell matrix - - The first index in an edge tuple (``edge_index[0]``) is the center atom, and the second (``edge_index[1]``) is the neighbor \ No newline at end of file diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst index 07e84e84..eb376f54 100644 --- a/docs/howto/howto.rst +++ b/docs/howto/howto.rst @@ -3,5 +3,5 @@ How-to Tutorials .. toctree:: + conventions dataset - migrate diff --git a/docs/index.rst b/docs/index.rst index d2edd1a6..0bd1922b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,18 +14,13 @@ NequIP is an open-source package for creating, training, and using E(3)-equivari introduction/intro cite - installation/install - yaml/yaml howto/howto faq/FAQ commandline/commands - lammps/all - options/options + integrations/all api/nequip errors/errors - - Indices and tables ================== diff --git a/docs/installation/install.rst b/docs/installation/install.rst deleted file mode 100644 index 3e946815..00000000 --- a/docs/installation/install.rst +++ /dev/null @@ -1,39 +0,0 @@ -Installation -============ - -NequIP requires: - - * Python >= 3.6 - * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation `_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP. - -To install: - - * We use `Weights&Biases `_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here `_ and install the Python package:: - - pip install wandb - - * Install the latest stable NequIP:: - - pip install https://github.com/mir-group/nequip/archive/main.zip - -To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3). - -To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch. - -Installation Issues -------------------- - -The easiest way to check if your installation is working is to train a _toy_ model:: - - nequip-train configs/minimal.yaml - -If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests:: - - pip install pytest - pytest tests/unit/ - -To run the full tests, including a set of longer/more intensive integration tests, run:: - - pytest tests/ - -If a GPU is present, the unit tests will use it. \ No newline at end of file diff --git a/docs/lammps/all.rst b/docs/integrations/all.rst similarity index 100% rename from docs/lammps/all.rst rename to docs/integrations/all.rst diff --git a/docs/lammps/ase.rst b/docs/integrations/ase.rst similarity index 100% rename from docs/lammps/ase.rst rename to docs/integrations/ase.rst diff --git a/docs/lammps/lammps.rst b/docs/integrations/lammps.rst similarity index 100% rename from docs/lammps/lammps.rst rename to docs/integrations/lammps.rst diff --git a/docs/introduction/intro.md b/docs/introduction/intro.md new file mode 100644 index 00000000..acdf9ada --- /dev/null +++ b/docs/introduction/intro.md @@ -0,0 +1,5 @@ +# Overview + +## Installation + +See [`README.md`](https://github.com/mir-group/nequip/) diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst deleted file mode 100644 index e0dcc32c..00000000 --- a/docs/introduction/intro.rst +++ /dev/null @@ -1,4 +0,0 @@ -Overview -======== - -TODO diff --git a/docs/options/HOWTO.md b/docs/options/HOWTO.md deleted file mode 100644 index 44bc5508..00000000 --- a/docs/options/HOWTO.md +++ /dev/null @@ -1,32 +0,0 @@ -Add this code to `auto_init.py`: - -```python -f = open("auto_all_options.rst", "w") - - -def print_option(builder, file): - print(f"!! {builder.__name__}", file=f) - if inspect.isclass(builder): - builder = builder.__init__ - sig = inspect.signature(builder) - for k, v in sig.parameters.items(): - if k == "self": - continue - print(k, file=f) - print(len(k) * "^", file=f) - if v.default == inspect.Parameter.empty: - print(f" | Type:", file=f) - print( - f" | Default: n/a\n", - file=f, - ) - else: - typestr = type(v.default).__name__ - print(f" | Type: {typestr}", file=f) - print( - f" | Default: ``{str(v.default)}``\n", - file=f, - ) -``` - -and call the function in every `instantiate`. \ No newline at end of file diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst deleted file mode 100644 index 356f549a..00000000 --- a/docs/options/dataset.rst +++ /dev/null @@ -1,68 +0,0 @@ -Dataset -======= - -Basic ------ - -r_max -^^^^^ - See :ref:`r_max_option`. - -type_names -^^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -chemical_symbols -^^^^^^^^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -chemical_symbol_to_type -^^^^^^^^^^^^^^^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -avg_num_neighbors -^^^^^^^^^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -key_mapping -^^^^^^^^^^^ - | Type: dict - | Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}`` - -include_keys -^^^^^^^^ - | Type: list - | Default: ``[]`` - -npz_fixed_field_keys -^^^^^^^^^^^^^^^^^^^^ - | Type: list - | Default: ``[]`` - -file_name -^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -url -^^^ - | Type: NoneType - | Default: ``None`` - -include_frames -^^^^^^^^^^^^^^ - | Type: NoneType - | Default: ``None`` - -ase_args -^^^^^^^^ - | Type: dict - | Default: ``{}`` - -Advanced --------- -See tutorial on :ref:`../guide/_dataset_note`. diff --git a/docs/options/general.rst b/docs/options/general.rst deleted file mode 100644 index 1b75b6d9..00000000 --- a/docs/options/general.rst +++ /dev/null @@ -1,28 +0,0 @@ -General -======= - -Basic ------ - -root -^^^^ - | Type: - | Default: n/a - -run_name -^^^^^^^^ - | Type: path - | Default: n/a - - ``run_name`` specifies something about whatever - -Advanced --------- - -allow_tf32 -^^^^^^^^^^ - | Type: bool - | Default: ``False`` - - If ``False``, the use of NVIDIA's TensorFloat32 on Tensor Cores (Ampere architecture and later) will be disabled. - If ``True``, the PyTorch defaults (use anywhere possible) will remain. \ No newline at end of file diff --git a/docs/options/logging.rst b/docs/options/logging.rst deleted file mode 100644 index 675cdc45..00000000 --- a/docs/options/logging.rst +++ /dev/null @@ -1,8 +0,0 @@ -Logging -======= - -Basic ------ - -Advanced --------- \ No newline at end of file diff --git a/docs/options/model.rst b/docs/options/model.rst deleted file mode 100644 index b5659224..00000000 --- a/docs/options/model.rst +++ /dev/null @@ -1,149 +0,0 @@ -Model -===== - -Edge Basis -********** - -Basic ------ - -.. _r_max_option: - -r_max -^^^^^ - | Type: float - | Default: n/a - - The cutoff radius within which an atom is considered a neighbor. - -irreps_edge_sh -^^^^^^^^^^^^^^ - | Type: :ref:`Irreps` or int - | Default: n/a - - The irreps to use for the spherical harmonic projection of the edges. - If an integer, specifies all spherical harmonics up to and including that integer as :math:`\ell_{\text{max}}`. - If provided as explicit irreps, all multiplicities should be 1. - -num_basis -^^^^^^^^^ - | Type: int - | Default: ``8`` - - The number of radial basis functions to use. - -chemical_embedding_irreps_out -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | Type: :ref:`Irreps` - | Default: n/a - - The size of the linear embedding of the chemistry of an atom. - -Advanced --------- - -BesselBasis_trainable -^^^^^^^^^^^^^^^^^^^^^ - | Type: bool - | Default: ``True`` - - Whether the Bessel radial basis should be trainable. - -basis -^^^^^ - | Type: type - | Default: ```` - - The radial basis to use. - -Convolution -*********** - -Basic ------ - -num_layers -^^^^^^^^^^ - | Type: int - | Default: ``3`` - - The number of convolution layers. - - -feature_irreps_hidden -^^^^^^^^^^^^^^^^^^^^^ - | Type: :ref:`Irreps` - | Default: n/a - - Specifies the irreps and multiplicities of the hidden features. - Typically, include irreps with all :math:`\ell` values up to :math:`\ell_{\text{max}}` (see `irreps_edge_sh`_), each with both even and odd parity. - For example, for ``irreps_edge_sh: 1``, one might provide: ``feature_irreps_hidden: 16x0e + 16x0o + 16x1e + 16x1o``. - -Advanced --------- - -invariant_layers -^^^^^^^^^^^^^^^^ - | Type: int - | Default: ``1`` - - The number of hidden layers in the radial neural network. - -invariant_neurons -^^^^^^^^^^^^^^^^^ - | Type: int - | Default: ``8`` - - The width of the hidden layers of the radial neural network. - -resnet -^^^^^^ - | Type: bool - | Default: ``False`` - -nonlinearity_type -^^^^^^^^^^^^^^^^^ - | Type: str - | Default: ``gate`` - -nonlinearity_scalars -^^^^^^^^^^^^^^^^^^^^ - | Type: dict - | Default: ``{'e': 'silu', 'o': 'tanh'}`` - -nonlinearity_gates -^^^^^^^^^^^^^^^^^^ - | Type: dict - | Default: ``{'e': 'silu', 'o': 'tanh'}`` - -use_sc -^^^^^^ - | Type: bool - | Default: ``True`` - -Output block -************ - -Basic ------ - -conv_to_output_hidden_irreps_out -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | Type: :ref:`Irreps` - | Default: n/a - - The middle (hidden) irreps of the output block. Should only contain irreps that are contained in the output of the network (``0e`` for potentials). - -Advanced --------- - - - - - - - - - - - diff --git a/docs/options/options.rst b/docs/options/options.rst deleted file mode 100644 index 95ab66ea..00000000 --- a/docs/options/options.rst +++ /dev/null @@ -1,10 +0,0 @@ -All Options -=========== - - .. toctree:: - - general - dataset - model - training - logging diff --git a/docs/options/training.rst b/docs/options/training.rst deleted file mode 100644 index b8c1711b..00000000 --- a/docs/options/training.rst +++ /dev/null @@ -1,8 +0,0 @@ -Training -======== - -Basic ------ - -Advanced --------- \ No newline at end of file diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst deleted file mode 100644 index fd804436..00000000 --- a/docs/yaml/yaml.rst +++ /dev/null @@ -1,4 +0,0 @@ -YAML input -========== - -TODO From 9e94e997d4ff381f6a6ef09a38e416a5ec4c92cd Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 12 May 2023 15:55:52 -0400 Subject: [PATCH 132/157] style docs --- docs/_static/custom.css | 48 ++++++++++++++++++++++++++++++++++++++++ docs/conf.py | 6 +++++ docs/logo.png | Bin 0 -> 51467 bytes 3 files changed, 54 insertions(+) create mode 100644 docs/_static/custom.css create mode 100644 docs/logo.png diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 00000000..53c893b2 --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,48 @@ +/* MAIN BODY AREA ------------------------- */ + +/* change sidebar header color*/ +/* the color of the dropdown menu area */ +.wy-menu-vertical, +.wy-nav-side { + background: #2f2f2f; +} + +/* change sidebar header color*/ +.wy-side-nav-search, +.wy-nav-top { + background: #2f2f2f; +} + +/* LINK COLORS ---------------------- */ +/* link colors in general*/ +a:link { + color: #1e52b4; +} + +/* keep clicked link colors the same */ +a:visited { + color: #0040b6; +} + +/* link colors when moused over */ +a:hover { + color: #496cab; +} + +/* hippo color #1e52b4 */ +/* change "contents" label in the sidebar */ +.wy-menu-vertical header, +.wy-menu-vertical p.caption { + color: #1e52b4; +} + +/* change sidebar logo size */ +.wy-side-nav-search .wy-dropdown>a img.logo, +.wy-side-nav-search>a img.logo { + max-width: 90%; +} + +/* link colors in sidebar */ +.wy-menu-vertical a { + color: #d9d9d9; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index ff701842..88066baa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,3 +54,9 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] + +html_logo = "logo.png" + + +def setup(app): + app.add_css_file("custom.css") diff --git a/docs/logo.png b/docs/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..deb4ee3b7406f75561af21b139ffefaf618b3e7e GIT binary patch literal 51467 zcmYIv1yq!6(DnifiV{mVxF8^iN|*EkDh*3XH`1NbFGz!cNXODkNjD2hcXvs5cmB`% zec$;xhr>C0mgk=PnVD;5u9*o@l$XZAroaY)KsX;iK$Srtj9w53o%{(V@X2?(ViDla zQ@anEjv&w*dekqPTaJhe@FBUAq=u7R1OK@QH1uM0>Ke4p1k zrUNP6vN4kE2fuG+A^Q(-xRNFZ}3i;_QWi|TFF}E1Al`oKxK$+3i=%1I0OQ3(?wL7#k7LVtU(lJ?GTGNG3#{3 z3OE&gXc^|YU?3N(x9Rf;3gvS6n2CW81}l9}0uEgM7Ae1DJb-h%6FebFd!Dpl$)!)nhCEM^EQ!D?MU}*blk2n3+jv= z?v4eMnz(pOVWs>X*?PU_XRCg(|Mgn(%{(InE*nvVCkcT-JW^qUTGzOi*`FGZV?mx= zOEfG}3uu8_n|6=!LFfEct#=4->5mR~S=!}tP^jl0(?mf2M^m=<3V9v_*_u+T#>}`R zE7UlfT%tYLsl}U5h@!_or=@(3VD}eDS|4h-pc%Ba3f{ zFJIsQxBqj*HQp@UrREn4xz4acpDvRbC^U;K*#rWCLwJnN*lF&K0= zWwW#nU8`CnFj^;Od2936g31gejzKnn;-V;%#N}G@F0X9rVaX$;?b)6(16uPe)*O?~x)YHrYk;X8@PGx$#T19J@ zkBOn^)70Xr8SxaP>9R-rsR{Waf zDiO**Bfg&>;*Ec8QBuynomSj@2Za(1*xlg+DmF0P{xTWY*-7>F+0q!sl-CL~0&)3q zGBnADa>i*Gp*!@IOLmJJ|BCMX^g=-U35bv`=q$$bF4%m~JkX8(IM%#FX8&m&%^`{G;vF;K-rI7I5EDLhhbrk? z*0x_uhU@{CSe$xI2v*XC0Z_K?q2dFwe+%uZDWYIF2x0Q1(AD;G=U8?d2B2C^Awd=E z)z#ZzDveD(&)kL#u^LtPmYGutvZ*8J0S2ivv4Cnbm7?Q~Ai7!K`;`PokRunJq*H#< zJ}miuu&m@E%j`3S%0J`x>_DLqM16WlX%DlNBlzY%CG4@F$jG$IfCRWL6b3(!{L@bD zSk=OxHKe&PV1wb5$}*9k>rKj_5><>mm~#%4R|Uo7TMNs!@ndypF-R>u{Q1k z6ybK8zV;lv_spyS)VWXGj)e&ZM^J@u{-j|&Dsz2eY|8MM`%-zMXXZdknHBv9H}4Pr zh1J2}HY7;jD)XdK%2jl46y&{@7mMd!uqrY!?{j}Gp2xz-X}YLP5MFwjL3Th8%t;zO zB5sE6#AJ1h6cE(AqoHXC$MOYWj~oSiM2XP(9uK2Atq1ohW$0;dDyu~^p-$5w!ZH~z zknjNNQsQ5Ymu3=_L?E9+3NV<1;zx_JP#>28s3Fo>Y*@;q%>?2qm-(`W@C-iy7rqQY7^1xSl-8V`gvdD71`Me6F4BxG~BmF-s=(4iE1wwEi~W^@V_G>3C^sYJ8E6U z{;p>=i$6?N!Po!$I=1k0d!}0AZ_lR|ez*K}BCuZZLqp4T&Gp^e;<>|?A6C)dx=#5; zbps)l>Wh8qb_LEuDaYd z5bB{Nw!TY3$uM|eqjf4HTKSGVI9Q7JpMpP)-{#N~+3s%t%EK<4!7>=tY761rKdaMU z8UVD>{#Q8&qIzciz__d%6@4LLCc9S{x>Vglt33mR3cX_iQj_BP^GJ!m;di)Dc`Sx& zHq4vsSNRXA18fT3gqBISnd#}l=1%`o49!B_+GWxF3;Z@JeAC96(}!tl!F7R^jtA^z zsP#5VswrSIf#XS?*Wb|vmav7Ngo;^fbwJE_;Jg5yIS6Tvcyz?`79Zl|FZe8!Mh+``KU0j|H{4fAdnG4!S>Gv zJ9w;;9MGr;9UgvBKUZ$pZuq>DjtTN3$4l^{Nn`kRfX%D7j@aIB?y_$xMbND*ec)TMU+?zk3~dSqZwN< zJRW}t$i-Y-oUUKuX#Pk;?Lk|cUue1J9~NNz5F$~NKF#08mV7&Onj{!Sn{)-Z029gu zDiuPw-UQi;P?;9}u6D6bcRJJZ5gNtI`0Ax7C$x9&;Jn-mlI^cc&`-YZR&Xi#x{RT- z7k2;L_hLEn&5WbMN>(Z>jZZq}9VyyW^aXi)fz$HZgcX=Fe#Dp+@c8%V|Y*{uZ+>WbHF8So-mF`v>ZI7i-^Kmr? zIw+%6>Urb!-+^(`JGRo3+tIO6o42kq%bgUZtMgB8jsAAzwJNAeR6@fqIAEC8(YJ(; z-Wur7dd*+>SaqE{5rQQnP$}}Bs_GpgJ%o?cL{L7XVPtFdo5w8aok@-J&gaSCBjMUX zaf;^&t!xL{o{NE{#>oZ>1kSE{hO*sq{KVi>%XgPmB@>d9Qbb`=30r5Z>*TeQA$*)C zf+~j>aUTb~b8_renoSd`m*UCO!fzghdq%dvOnE-tjL$THvwgOjgg#+5zdx>!+M*7!QS>7IA2z;k=Al;vKI(x)Z|rzdjQ+>7PH~Vf2=hmjud` zg62A_xG%OIYHu1R9b-C~Zx7J6H*6U?XN4A%w)i{Z(dhMti4P+yOL%f7f zq{uA5k6YpuJfEVvI6zra(1x^^4w6_{(LgfjNdVRF>)|JLbT*lV>#c^fm3`& znbjqcCGD#9XFD5HTH?<0m?z|@nPm9sw6f}=cLaNxi7(VOucC{=ep#eA$!$!*w68gb zp}Zanb^ncu&+3&Zg&^b64`J3$@FD7i@#)Nzn-PCRn@Gp zD*ryEI)tC@<$q5n5c%gdob))zVJYDQlipDhC=#E-Y z{W1=S#WJY}u{~o$VlRwEM`BPr$q!lg*Z+N)!)eM}X21xC|I4rz7}q+5Db^!m5O+ow z@yL5Oy}@D~PWl`ROwMAF7PjAtTKcCZEr>i2%@|rROv51rgCQ^1?t0wfY zd?vN?z4))6_BDhj?EDE&;oG<_E4fK?Vc&lY{^mi+uA3@y6fEc)bel#Z4~@e>A;T#n zQ2xlji#%z^ml&PEy{DYuGO>$RiY5L|H~PDyE$aFA4ZYm{FKO_|n`x`gr@JL|Lhq2C z?fy3oiRqig^=hdn4RM@q@5l6TLe6zeTEWz1U0IznOf1p$;Ht=FxzP3Y~8C;V}Mg|mUejV7|& z#0D7 zJMe*t8NI;7SY}7LYD<$nVzGK3cHi(C#@*y_*QUz|X>S#V<`>t`-48>)YzO~ssXbrcU;_{z;sj@P~Ftg(Vl@7NUIt(|iE z*q0O1x{~VSM#6L1k4n`hc@}5u5J$ERUUWQr9N>+NqGz+GBdz*Np%Hswyu0hJKAPMI z1Ai2i?>YU9-S(2H*4<)7FqURD&0auy2%Av_JV?}Vu1^Bm7ov$nF#koln^ioOfNfzS zJY6-otv9OI16LH9;%fFj0`qOQ_Jg=Co@C&N@|X3tRk2q2HvrOxoDZfIJWZFJs=BNDvO0u0O>7*433oz*j{{IBj|OTZ zcf_UB^m=0+eLY+s*(PCgSq2xD`?kVt`E&Zu*MMO8P)63|gx)Iume7Y+*iHhSO!R%G>#oXd zO)FxI|6s^mXWMqVbht>Rzo#EgzIj{OT^ey1!HUQ+h=+CxVLN(Vv~pp8(8)rCRd`&E5F38T0)WIYrUVj z{o10rrn+J^VaS!xzI`R13g?S-r0yt2`oQ7Q)0b(EO4UHEu!ez35dEEv9LwExGn4PK zb>e$dB9Fd`a@xp0_W0e$e_`UaSe`Dw2-5G=#kPG`#MM}y)LUA>{ADtO)|2 zg3Jr>MXYEn8mRwzsbO_IwQyke?u7yQ=FEK!@sPe8<6!6x8-UC4_<4>%2oFQuGlcA7 zwMZ)8i+OT=#@i__z?B~5?zn_^&>dbUYnsqY7e!*5eaAvkunAPk-WfJyO!zN+~h;3}l(~_0iT2$oLS0!_n?t z_3O6kXAt;54vS(fviX#eG#byCQ_A{!UwdQE`?MZS)p*?Q=MRPc{mOlx^7|56bb-qm zy@Ei@heGdT!`YkPALp%|eXvzx&GVhBCeJNACcmkU{(b24csWTQ$O$4(9zKC%`y3+J z3I}ITw!@!CxF2oZs0NsEcuXBpIHpDgGpnQEMXD}MUXVJvMu)Y6347iuV9Yivab7rK zenss3q7{A9M(;N6vrpCUd)0t@$y8QLg>4HhH_MBx^xkc#4P1Q3LL`s(gYA12YXrUa ztj(%k1X$VzhpO6knj~Y@l!%Zf z>5`i^1}C>0xZhh_PHOX!nWoJvUlIH2rw@j>A5@NP<4?AO!ICK6a+o>q!@-FQtXXAghMWCWBAg@RGOa`tW=GPIS~%%Okupe+gM57e2BWqYMolSV$*vUP1*MQ z6B{2_*a;zy86}Oz2(R<;by#LA9$nY$FHM|d64dU*?{?( zgrI~qH)|YuowhJEdcCWcg@Zr$<0uQ;-pGVD(#0G8cc&~}JAPlA|G|+W+a|KU`g6pl zm46NPeZsI&q^r62IuoSlIp8&LC3$Ovz=>JFB8D9KVnj*^Y*B=U$h^Zdx$P$+Wf&# zKi5ni__~5BaO>zv|S z-5C*Xxn@G;B^ z{=H}Z{B&M4y7lAeP3|`GdFAd+Xx=;BTLXgV@M=f5vZL6O7{C7w=2E=fjVp1h>v~@H zUd{gbau3gArtdmC66g`@rF^gSG=qb(sw|T*GcVJbEeZ@JiJZ@0e=s?YTj`j)U8jT0 zaH8-y4*Yo6^5F-qQB$6J%Zxfl58Upr0O>00vthfnyqZP-$$K3fg*}f`+j}LhkaNCH zQK=~MZ;OQlO09>_0Lo=E-u91nk^4=Qoe_Ff7?;o>1UaNv)L ze5xnL+lqP8aPTk{zp1=uN>b%@RipqVdi=)v1%zUaV=sRETE`s+y=A)oL`v}0J-g^a zQ>ocw4o!?wTv5i0-kz~hZ$5gpc)l*a<=of+73x7Dm~1y+eseW}j&+DCQW?r5F?WPc z_$^;l@&$`82!Z(ZaY{cc)C&Jd7d%{f9iULCejW31e`dPwrbw;zdPabXFP@-8(z2&I2DQtr$cT{r23wify5M52i!RawkDbHjewEHoI0Oh(v z$n`{Fgq!;QiJ0K!+Q5Y5BDwzuWu3)qjNprIS(@`vLDDBFqT!C=?E4}3oRrb?PEC)q zTYFjRM`l_DJ+x1_K2w}H_PiSwQJpNUf4ZzqU}0B(kUel-K@-9^f|M0L+GaNyHFZWK zYbZ6_D^bVYZ+4t-C&&GwZ@zL)Tr{z_`I&wT35 zToO3GN&qMED;~>GfWPMK$&UcwV*kL>1O7yLaiLR-*=J?DSC6Lr1E^k$0;~S>j&Kge zSBfDW3H}R3eDuspnVQQmNmXX@Jd?B zaKOZ3t!Yn$dqYX&|DFM^kJLVWVr4g&madqpTXTH)G;bm9%ulQiy!Tn@#)$HH3c@0t zVy5m}Z~{g+C9=t4x#9CK0mg37=KY_MRl|;)L`4kl)f+ZV|%rri!xAoS6LpBl@UH z2A#i)m5rY*>SzI4{9XX&NSsEt^24XxV(x_qR6^oNCs6W_6p&C*`eMpR;HU-jE#wR{ zDESGV_bRq=dT3|VJKD){0UI1|;bc>(Cw3IPY zk(zsT3FT@;y>tqbTs95iERW^Ln->dJzQ*VAW_U1eYSj|2tv!vAf~0!x8WsxCJ&=A4 zT0QZvS-l%eoO63;wN|py)18od)t^KHSU7sA)M4pLdgf|W{YNkfCpi3)V5)G!igF&HoIknp>XMQ@)+WgO`o{FlUWZIKwO zT}?uH(IVLX+}js>|5Y`;EA!D%Sr)9PH^Hj5zPPEeJ-AR2d#_6p*4fLNa@oe-tVM&R zEs9}})`E!+@{@BH^~kk1RJD_l0fF<0L5fv0Tztym>Ns?0A9RJ)BfeYmb=1fI-z)&( z$-%vm>ka(7yBEn?Dq;~r;Hj5TCN)>VVrf&4s97n;pH$>d#n@syN2n59+k?Cg)v}S`%zTjs`G|cazhH z7&4IOeM`$fKT}-T;ixeq=XVZhyb9D)6Fj4nPQ@T)b@)J^<;j68&ss;=Lv4BP3Vjnb zcjkV7&TzljHqDPIRq_LJZ+dntGS}Z&Xtx}*nWH-koa$5;oJr&LIC5A#SNheCKn}<@ zM#Bw(PzVz+X=_=Wr(b!IU1>};b=zj0<4usiM{&4xF&-~q{d&3u6CzWfaX-w``m(aO zA&giP)u|P@%)2q$7z)-jKPl+*$l8R_R#K=m4U(^=%q;IF=?kT^KArtO7r|hh`(eAi zI9%r-rq)}QH#ukDNJx1Bq9fLnGr7}qZn_BJH@BcDxIhpNzx$V4Jxb;WsFZ@3Hjad; zxlP3OiMPd4}~_TS*c7XXO`DiYqQbhqUZUdW2t18oT}IC zPsuskAj)FO-4dLN`hy1zsY!ld7D0Eal9 zO_0Ms5%C4e0_y>Z?=^KMZ_y?xFLvHVaB#U*<~fc=rv-{aE1CXzel{ch=z@JA-%;1T zm>SOhu}TxIw$EbS4jjS%xk$(HufDqF9nSrL+yR<@%6=FLyYnajWWoq5E|1eW$NN}| z0QEnSq=co5vjB$R7}i#u^U zFR~tNNC82RO*t*9*Ut`K+d$8xiWjinY$vfdY!Ug~div;V^RCi3gz)QjZ=i9(TRbbi zTq=$8fVD|%D*3%S{IxnttsCqpK^XOK)#=%3^$Bd4|o?lZw6id zqX7~w_I(sU7$7VTu8V5dCh0=QCHxuu^*@?fTFdNrZ{lRvH?Ii3tA%L)DZ=pfSjt~G z@7RDxNzrSL`^S&Az3RV)?MxQPRY~0W1gl)zjO)XL9f|049}TH1P3R3=Y8sJ?Y?Vq34#inSRV(e=Uas+a?ctlWU|m{c4GkAFO?()gD9>%J?FJ1nxoWyKEjmes4PTF?A3nG#FLL zfb~Z^oJm6|_2N$K6)1`n@OFzlP0rk{;^;ZN#7@8+ZAeV^JPc(D6KkVSlr5%-<-}G# z_yeord7uoW{Qitu!37eUp`T8ODJRWn4H7q1Ouh|C|9y*N6&I2p z{4dEJph%)7y8=TQ*I?4o)e~uxRog{3^q+Vv_K+D@o7>mto-cKeJX`t#AZev_(3Jmh za~G0JuemscMn@8qH)*lC?$ipqWg0#|Tpuk&Cf%c(&_Q~h{4^j0;~0fxr?91YdPJI_ zCsWksH#8ZTFl0&88oTZ# z=8j?3Jo0`}_Y@?t#pQkr9>z!Yhqz>c22GN~$`9RGOKQ1`J2TfCO_{7NE*5I9*q`}H zr}p+M>S88+(SxP&yjqIqxA%E}gKk$4_>|ct1V)Tc``!m9qJJ;1v3mRzG@oNJ`E>WL zbty90bN^=Zs>-P6#2e@Rm z3yMu2y;-Y1keHfM47AlK{HglV@?SlZX83ferOA9J+E;|bxsO782p07lu?M%R?K+$G z^_KH9XV-^y`}gOuPHn@$G5@Mv17`LIQqI4%={?0_qj^}ja_N}aqSgzC@q8!o0V8FX z0L8fZJ4ca{f%mT2?qt+%TyuRdAN%mXCy`H8MUMt1AIuJtSa~omT!MENiW#(jq;x<~0fE`Ik-8KX39V%Gj|g+xq%6hktNM(iIute3H- zQa=I#(d-8Sn&F+Ysw*s_C56bP@yXZhm_@hTCU#lDQJ2aV->Do7{PZZx?H<&^r6AVc*{ghliI&IE=ce@a ziKtc^Fq{>NiE`n;h5&2q&l>#)L1!62th0xMCzW6UOYrWojvYITM-zBSdqzBif#bc4 zXQf-;{MG8hCSDB>z4k4w)vS{X@)Vgq%L^XR@-{UTH)Pm?tZd5xgK@t60vfY_{%% zAi2QPVFfk>H+K?Pbh>1eC`yHwNXcCB!(Y8M-mZIhlD18aL zb!EqwTtDD`h2Jbxq({POl;Y4=t}~IfAkpaeuMt2%Xbya$B}}<9-*v~xS(xoQV!qousicvzEAH=wd2Hk%-_u{+hZP>!j-FK;~{IN@XPtLS|KU$)-C>i-%tj|0kR^gA4hkw{4;07@3! z4!w+Tdrbz!KfxM}STZyZnn=EXG$H?HrlFV$c&&!{5L=>snJ>8 z?qdJH7qP%B=}{OZSm_8HeDZhq+g8xnQnl?GhkbW#i1hCS@@WMe`z74ooGGClng!5@ zDcW~rEAc+11o|LYk^nqDXNMq(Muss%vDG4Fqr7B!ozym2gy z`+8Z$oBQI)!q48h$0t(?BTq&Of0IVDM{1o1ZlZ&6*qMZ%JoTYl5>aTQJ?ZgZkODKKI36WE-nP0TGmg&iCTeKd3Gv5ng<|ZtP`20R2L=FUkW) zR}p;o#BwyVv+#_7>1#DZk1X)oeNnG{_`6ZLCsp&*y)8(N=clIG>spgWQM{)*Ex9!t z96m3sHN6%h(ex~faz%=H&h*c1r~BfL*$E@N6a?V6BgOMtL{nfOfU#sRi|W!!Lg{h; z2}s`|#Gbq{evRi`oqD2Ja)O&}L}-xGZO|DO@?#?@Gn%Jna(`D>^FqSQ#JbpGD0R<` z{^-zlEd)E?wV${&G-0Tp!wjmJm)5$ZxOLa8KMuPw`ULfII3=oP*M9|JaJw%N+EOwoJrD#X0a z{u#Azuz3RK=*GQyu1ZqAn!Rkad`AQ$9v7opflVX21vCR))yUd|Aj2N~QDU6fye{xL z1=B(nrPqLdS6$>OVGczow&T* z!5zOxrplJ?rmfN*FT%4{ei55b2jfcZomcQ?;b$)g28Jb3oyXzX?mqs>&X}@So#KX_ zp)08w2wDRB4vD`@5pu=oz8Uz|aEog*jnP$UHHA5KhbvP}YYQ)!CU%L_*NL0b$Dfsi zU6kA7spN?nS%}8xl1x?-w!3}<&hZo#qLw%N2AeO(5U>->V_32R+`L&jQV_MF0v} zM!(*H!q#Exj~En|4S?V9=Gd4quG{uuf!_>XSla+mS2uozUZ#=BC+llz)L1mWiQGq9 zPwX-(HD$!Z&-`)*iV`+rUFM#kccqmr+_c%1Ic36CzJlY@ebC~A9-_bajl$=Mli$CW_(%U^-h{y7}Q5x@?Wx)>P^_$F1*t@DWnohfS) zU-i!1y!rZayI!dHObf9ic#qk7H2tmI=K$D$=yd3R=Qwb76sJCn%ib_ZH{iG27LOVm zTj72GEBo!_)j9MQ#y$5s*(C5kWKNQKt;+ot6C8|c=0$u^FNK`aJ?p` zxYr-6hsMZDLq2BRZe$3)jCt|{7>GwEz%AG@|WZn>;5@t2)mkA5hbynwKf0Pbn7 zAv3n%m15s$E1oiq5A0aPx+dL!G28J!2=c^ev{hSm#Z35VPldC;ZR3^8Ht8<+`rJ#D z(U`rBU3Z!vbUgg}89*8AC{di|x!Uy13R}Pp86yRohk#xnyyNaAlUal;VmrEvnOgR?siED z<-{@%y>+zm&^hVGXB!lba?hF6TcmWRfoDdaq?y!yMU(hnTawbXj49k!XLD@|6%k9R z8~W~GHMy`!2jzu)@G&yFS^jXK=3RJpk=h_Xj>K*eF8!{Q++0(!a)sOw$fI9SM>DQD zr}PD>6s|Yj3~6upXwkTxWX7DOv-(}403!7puy%wUAo7?2ZeTFuzs6}ea*mV0Ze}p$ zwWs`SycoY?CmpHJ{lNxpmxQ?}__Uxl8QQlbLd%P9q}uk$;}Fl=qEn>_*?9RF#NX7~ z51Z^^DlJ3^{tmTA;_557zH!AAyAG>sR!ytczk2SMLSR$bW;IvXWpZDW1kinMBel=| z+&+@B-VS80Knt5$qtT2KyMBacVpEhOr*;4hlb z$4t_9Fb7N93sz*qSI%Q+56A2zD|OZqnZ5_fZ*E2pI{Mq7s||1H{NqQ+%B``Vp7gc# zET)Ee7zE9K<(LZ5ZeL7NmCAImm_0LoPz-4Im6m<{v)KTJbD#W_P^jo+42mTUIeV8Ullfz~oo&HD; zC%fptYIMc?Kwet0tEgW#c)U(sYy6mnJsydFY7-2@Wo(O zu}bkS7V$tizI4HudM&Zhv|+<$4F8_%u^`3?5(4qRzJu)`2~=rBgiAV%UA@LB{mso> zJy%3fNi0x0bOOa)=dH;o;oipNE3GkS1>r?;_Uxn)-VwTrmB;G?gIR`g}Dnsvu7at0x#ODHl1!igqOCJ z#dOBJ$41Aw4A1ge?#4ZOHn}8h&%Um>Y_d{}WmGWp0#R-{-SA?4N-;oEb^IV0f`xjA zctH4qy$iYO=Zv6jpzaXz{=h^6EJ?v|;t{lS3-isJ1us(5bywmP`b=ukOP#7px~|g9 zKz5C8Lii336^2F^&jxR8iTDAB$ah1DmN_}J#4r*z2<~hmiSCx*Da{u@A_az3V!a+T z%KrMX-UM0Mcu-=nh8~7JZJYV#7f-iASCOpk*T8)0hO^(wDOZrVBS^Uugk*l@zJvqn_@~q$ zsL9^(MT44cKO#&+WI*`RPPjQK%2Q;H@t%Nr+pFF;#F26h1&N#$+Gkx3D{#~1VO<-YK z@#%%X*51s*M29rIOAKabcq#M(Jgjy2j2^ruXXq^>wDcMafB#cj_#=f6EbfwKor@EP zOC^6_uAREZ(=}^W@PX%Zv5?bE+eKJe2|gW@ATBPBuHt0fTXR6-qSTJe6;z&BsdyPTj{(kM|Rr zSP)mEf*on-_P9^@-w&lG+lH|`Q)>O#MNDGbK}!Q34n{E{M0JSA{j|h+t3J_DyghL2 z!Usz82PV(YgWep+*Id~}xL&A?ovR-&yd~QK^LSD+l6G3h6 zrurRq($Xk~XT!mC0AFj~_7~6&uw7)M+QgT`3!(LllXU$oa&};p=6OrR(?c(zurmz+ z-MIq#4009=s%mf$j>kv({(Q=0D}TNC8tIzKyi7yt(uldG)ly>)Km$E0$cM_F-I~y{ zcgzJTN>d$#377CVTL9A#2f`<^DlFI6Fy?vc4YrfJ`^UZyu3eOriVbMq+b2f-OOM}{ zq)~32$r7;Mh+hu7sBi@7N&XDVUvkj(Fa9-$jf#i>3#`PDn|>8`@tX~a6MdX-sU^1v zb{t9fD9Ew*9IGneJ1r5|X@&l&I;aTv@vYYX;_<`-R^w7gTJ>frs4&TO!{FUO%C8mG zDt8qbqXF0Vgb@q`**wGo7+|F_Al*?KV6mJ>IM`--W=h4jcjl#U&jh%RK&9VH z8rB>!Kr{~o7Qx-rgD8Xzz^bx&^9+}`{^f7rMf|I8){K7_ERt{3 zJG`aiq-T3QmASR52{1E4zC8OlIp(m5H>CtC+(Fc(6V+$c=?`Dy5`(d=uPf<0MXO%d zv`>wYs?tEW2JLsb^Te}*S=h6lb+<^8cGR@Ae8z-tL; zGXIagtmnuVwyGxS;`4zPU^P21nuD2N7a(l!UbPiRRTte$-slPCTAHv?nuNl}Z`XbR zs{$l0jgZGShcg=PW4h=ke%)*}*+z)Vx=Bp@o)htXxo(3$p z+dNu=Dl2A8;Mr`>9UFVO%Fr*$E3Oip_^_*lyV_43VK-D(+!aZ~`=GGbV@WBckpS-; zf*JxTS8lRv_Y>V!16I7oVIp2$uCuY}SZ>SQYu{>3^w5oE6UwmnK3yQMbMiUC#Lwhx zX7O?ev8e-s^EXlapNUn+uGXh90TE1NDHwM#PRW5`maIDrHB$>UH2X%*3TeLVR%kB2 zhAh00p^smw!`k>P5r?%19_~W**E!Y|$S$1O_od;8ya{C4%@y*iUj)b6_YXy@pG14g zX`hatwp<9Rvu$@tyn(}C*j#06TWZWASwm68-+!p{8I&A1sEiEL!w_i}cGl_}L4(VQ zxiA=a$N~c4-F~A#r)c{l^SoIDaZw1ChOh`j>U=~t^E@CQ0L}ahmKVS49SIT-vl*Z_1Qc**&Elrp&?&lC^>GJzu_jgIv_0swzxZrTOSo8@w z5B5b4Gm6M01e96%R+-(UXB^*Al&GEPx-(|Ag$dwIo2?a+Wh7JZOhuG=HKrsfX==;g zUc;c0A4aj(KmU&Saf~GcEJC5;V$-KSTVWjrmoE(MT$|)nQ`#5H+b1qiyiRsRK1h@v?0$^31#wm9t z|0T>V@&zM4-^`b&KBEn%BN^6J?YB+CvmflWiKB)_*H<>|6%k*3l!R3>Orj%P-U+c%?smFt$hQ-}P@8S@$Hsif&RYJBv`w#UNw(>Tfh%>qQv z=7u4jKb)ucc4er2R*Q5*flB(J!+nxgwQjC|_0i}VUGxqn`F z$^-034F;{GJnnCbKZ{jApSw_^iHEwYS1tYOrQ4r-9dvrRrQ^IaJNV$ZAPy$xf2+%_ z)+XCk9Lj><&V&h8vi}Qgy?9WD{PdxwXvG`la{D#=%%)!mJiMSf`8xTf&8=3~siWMT z83t=!k*&zL2#yE~wK24|?wp_*B0L)gI2bj6+4K=^W75=;66g7HwzS)CFf)g}tBGVd z?IqBXCMshW3F^7*le%Pe#c`rp92JOSr7<5Hg)Oe8gazL~A1MXji<|>J?UHg`jLQ?x z!9L?HbT&3NIYu@OsLd-aNzq4PMLE_0OLHAH=4rE2+Tcv@& z)>F{ax6Ws2Q=2tQ%g`iDGVuO7<_LB9pZX?ZysXku% z9FVJ(eQKjB`!o0qW_`FpH+~XPQawFrTt^M; z>HB?j^m+?53{dB@kbuUixS1pdgFw2KPBt^F`p<~`d`!&#yk3ZrPGm4?I)7#eadf@a z)QNyC{v7kXgW>i-1v9(vMxGz4bhF*2DL^p@P=ZLMsfkcPRKH*qG%s1tP3>k73}dmR zd;dS4zB(?d@A-Ralx~o8ML@a{XhY26n_@acHT57;E)k1F(iaPT>8`qv3F&$YC=6)cAPYB zFuYU%s0U4vkKsQhEzm+a8ir#$aM7xTGEJkzig%>=WxWvf1rDj>4 z<^`yt2(`$;tjk7bJ%~;6&}ejWFR8g(yquHwb^Id8HZ%EQt7lkhmN5D#UH|izbOP&5 zg#_7Fl?gjcj1qrf!b_qcqV;(R0Dskb&P_p8uhb=h7vWaUh5GT4r>RJV?tEdL;2LyR zN`$~42Wh@=>mm(HT#+6mpWiM3ikc(8ZRa2%_7m=!A!$jDQAK_(I*Z~?jRim$F;7<0 zkIyil-`op=!-Mc4cJD+HcAxekZ0{a|EmHzeL8CDWw743c@aED<@?l>wg)4da+~ z)@9=~hW*KyWawhOek-L)>;l}I5ZN@Bas7jeMMJ$wTG1U?G%>K~mu#^V6JIp;Whxub zRaQPLLcUvJrgFaKk`;+xnmA6#eF1@phjt2TW)~Hsum#1JX~);(@Em~I2C<)W4u4vy zJ2E;cN)?4b8tz90J!wM z!TXt+<3DRsegnWyfyURePeDWzFHDL=&%YJl&lU#E*8ZzEKQ+rXktMc*cL7-D#Q_w> z{0gMH|D!O8wvLcB2jk2AD8Y@1=&``jA~LeA>=l>{C)?W;nDSSb$xcKVx!0u#sR#7~m_T;V1YvPaLQfmMBdW6!?+$rJfSzCLWE+!T zzFNP|kK_c%i2z=N5kW#HK{lr3{j7Et6$!1kxt9=Y3$rSpTKV z`LiM=K2XHdH@^P~i2PnFDOs;SOW^;3YlMV_z08WJX6hbixg>$!TL-H9|8IIJ`Iykm zP0`<*9nA#Qg5sF)^T|e{MbFBgWmmwfIDuE;^m)p6P7ohQthn9J^UMB~(`#&)o7)Sz zpw-ieV(I@a@mJW7hKHOi{6UetfI1t_7zyw$OC@}Pp8fjY7_##iS|S7{(A(K!eJqWa zpZc3!4BoK&67qczP)^Oe0#zS*2VlX{^8Bo&pdk1qeWsf>pp#bjqEJe?ozbkwh-AB; z9K5~j1}nGq8LP^OStlJ2>@`U)Aho&b6ufCcGYA8!V^0w`a}i{E{(<{7pwKxOF@O-{ zlmm1VtdB8xQ-H<7!xzrVxP2@sKxGDU5GqIJvpgf{H-B<6GK)8QH~%&!mPM7D)@R?J16pcvFUPNuQs25E z%0DH#O_IYnVm$NAbsOpYQ9U&8-GCwP_d{Z7Q{kUq4rMjRd>H!-uodF4_qQ4oB0{$u z2TcesV7cFiO6%q()aT&b_nY_PNm}=t^lHGpP0fwCx_UqTzjwX;>J|$w%5;kbu0olA zj32GCf6jKbJ@Hvj&HpnX!WZ`!z0v#C&)!<_`2yy_C$yI?~% zAW<|#&clnvT8i>dS?H4jC^x>RT(-Su1YVHW3Kj(gPvjcj({GuqbWnf4LQbEXWlctK zPTIZHK&75*T;+0I9^{tbKh2lGd?lnrY(J>7E|4(WONIY?H6WZ0a}%xN_t*~(j(P8w zkPB&=le1B1y0?!F4wPh6g!ju-?!JzUW7!}oPV!##SNjEwd$P`s_GT`i&W|$*UL(8T zwI>+&qJHXs!mRd@Qj?PCMgMDRK#rdn-+l;;AXH}WRH_)dr>?0DpiXvP=>P|+_fQ)# z0N!J#gHB8G9*{#C1}2RKDJPJipd#-8xGvC>zVn~4=>JlDdtiFa0U`}&5IN{7zHp9} zua;2A^ggXW*~>%r+UaLh;gTdUqffuxI{`42tGPD@+zZ*m%Wh`~h>GBT4Yuc%Tr&kk zFVa6f_)Pdn_?DJk4}!2Bn!1w^5VbDv{Sy8Z(iE-{uQblj&K?+s`~5+965tZnVYWuF zs?9+IMgX8WKya^MI$#r=t50#(ehCRGD?wEP+nBC%^k|zt8yaaD^#@vSlZQemI{{JOD>^mg({fLn&vDpa)F5=4iT zi)u*32`F7BlK#woaZXS?*32R}7l%_XpSBOuDWUDuhKD{8?au|$JT z!;V5ZH{?p=g@NH060&ZFE{pppiTmIN3g`q`!mx}}uI-wAjaE<>9t~CI{Y-tE5Odc7 z1QmH{kpFHKvemG5lb*h6CX$0BJn7G$CYXcsUD0(*&G7#xr?%>r2bAW}oXd9gbZQ#; zuZU$lnw6Zbr8lURvfO$mG$8xps!&zTDxXr*|B(>-f{YZIn>>oI_UfM^BiTTKvU{q`i*b@X2*R8l%QedUyylYZ2>x`{Z*|YFo0c(pdNN;K$oP&feOSFC}~ld&mF;!rr|_wXQ4LL}@Et61E3uqMKi)#kv#B*oeRd5_rCb@KHxypKe%%I z*csBoVrhFBbrn8xV%-*|-rD@I=lT!cVeXBzXwvNTi_-?eS@3`>r$WSts+j!R>R}ck zaKX_Apjmf$Qg9Tw?RMEPhgM*mxqdJ=$XI2|uvcmPYbq%GaDl5@{YumbXN6k%EPEht zNduQ6In7dK%|X|@Jo27~sCya^pFhi%2ILMZ9p~i71LvbX!MGEsp8MisSc#jo_Qlfl zY{8WnpOI&D)$~F*E~l2@9>b68TQxL$0?+HSMj(p%-U>gQ5d(8_EHvNVnfDbS4@dY< ze+PwmlA|#K|EQgCIDHYn6#MX`R&{F8F42g!k+|?&#upXZqkgFqYb5-mE9U5QM8mjo z)~0TDznrHrpPq;)sB-~w`O4X`k?dh}uN(#73Q)fd+(?Klcog%^R1+fgjmxR1^b_r6R)KNO*j>$vTWQk5z~hM3bN7~_ zW$dI``61DBA0ycx|J++WmW@-}O2Kg@3KSsV z+iyds0mlV|bl`>T*!*S5#vkTg#eszI#3GXFR0H)boz@PaUuFKz;XDpF{9Y)S4fL|V z)E!p6X&Spr34+}{+QrrJBf~$cD{Q%@N`>v(rY~p z_Gcif`sbGt_Amos(onV%iPs$(Igrh786Ye3(3n3^DvKZFjXj6ij;rHERl%n>Dm^>T zzae79$bmdGm~{z6`h+HS@;l}#=OaI#CgVSbtIgfNH-_kY&`-eDpj07rs-S~8d z4%emeA=|6U>rt_Y7Ge?AnT4K_%gIcoe3Gpv@e#=c(pYwuSl;A;5zDHi?fM{;fgN%$ z?2LD9-r1vPsQwvKc$6>ISFlH6yX#Dozlv3-yws84v&3#YeO`)V^0{I8P)dBzA%&8S zWeNtVP66*MOA?JW^4yTe%FX;h7eWU}q)}hxAQeqaT5#?IJ%>vQ*&I&F_z#-Yd=x6P z{he)lR`oCq)41b^eODAep+H`1ZKJHL_K#$;BIP%KqQTsK>Mv3AYhc-JS%D#wOvSE+ z;)K!i_XqrOE@Sns%$&8Ub$(=RiJ&Ntc&?AcHUCPH`{kFN-CsOaCy@|S*EF06PWzMn zL)DsAc*1M(VpE0A_YmbW`2fdEY7XJZ7=my|&HdH9+nG#{+KgdD28UBr+D&TvQy-nC zhkd*LIQZH+4y!4Ne0u1=bpaTG9(IRikd7`R$s0GUQxP5C4!tttTv^BHxt7ePmG@N( zdc0(Z-Cs@y=Mx(-q*H`~B%V?@LR(d@713C=btVsRd{%o(m&ptGNik3fPJ_ba0)uy!!`1u)~&x4Vva0(9#X8MYl8Y9`5MNU*%@| zoW$mcXVO)N=9-reC_~K;SU&RP{L`!Mz4=~eo$oY}mY-ucQyLX%TJn$+L@DR49s+^5 zp#m(o1iLK@bVX9g``&>_ix(-sQEIL}(%UaAdp$puLNzS%9{*Gdewn&gpE8qMl0={V zoIft)R^A8WCJVimAJDO*iTC+eiu@MSfhtICXh9Aq@=NS#qKJxNa*Y|W>~YHWY6PTp zS9JPzn2=7;W}c_lm2VWIprq(lgYWH?tPE0G*>+6AplSVUro$|i(m8Bu<;AIEhFdNJ z8Eewv8G5Nk5>#{_2{<6&<2`fuL0&zDz^0JmG1#R*XfEvf}7H*YLncud=J z3FcOP*xPu_;3LaX-el4geuS1ZW`G&uP9Y_7bAMj&CMHw1G;@_R>l}RXC1smvjVs<0 zdx^&3?-r9No8c?EWh~4%H!^DYcWT#h^zu)#@J{2dfs+Zz#^Je5Y@Y`G*`ZYw-4ixJ zB`WHfRcvc^?9Sg*F~PJXAV~0!tHNIGu`)ly7!YEp%fN$~ z6cFh4vLKkv2P$(nKOOV_LJ?#D`gH3vfOkDp!$67+qi2v-uHCPO>PWASOCF9KFiQkP zmD(*sc}8$m+!e~NXc!FnzQpDp4e7I5yc;>#`bNAUFb zZ3xwz z^cICU1}PIC{3(4_so#yWd6jRv=g_Gkx%sT^U|$gG@HaAgn+|Q%9`B;mp{>pA%cyd2 zbIYcidh^=Kb@*peT3A;oW21)3CTk+mB)D<-%I|KTuExW88oeZ%T_#;S9^*S8+rrW# zfx}PNRNUVJ+|N+@QQNGyr}sz^F$!hjS;>bC&NLEAQ741fvF0_ zSI4)pc-ghXMXL#;qb~#2Kw6qr{205asyPd2^{coa5v$~!QXXi}~ zSWy6lL08Zp!l|VQl`0Vlf8G}3`;VJ^OLR6Kk=n{%;(xDvA<4)l3XM`3IttgX0l{Y4 z0wBhj(Ad^9oc);+$7W-Z3EG#vKv1OV;dby`2?guny8T{oghwHz4b5-z2e~s*8WX^-^MGxGkZ(`>!|rTr4_m zeBs=oUy2>BlZqr`f9p(M%VRUPs4``YS~YYCX7iFQk~7rOt}`M<_3GO4@@mCqZ{{6% zEe4b5zOXBRD{%tPd>jb&QGT^mKQAsk0!o@rjtEq$|IOl?4M&k=@7aQc_SCKrB2Gz=t*as7lI*DYx3vLl9YNYPjKVhw%j?f&JHga$w z_^2Hd1`#2r$A0!KJwW<^ZC4HW)W}ag=e< z`(77iVznYhh%~5jwr9D*q-6@p+A{LWJl3iR(XB^tf>9_=-9&Lkkg zR*JkvUQ?6mqpHn26mMbEo(adMvmi_J6rJ?9i?g+*9^Lc%G)DE}2fR2c>+$E4PxXDD z8vm*`P|VOm6gqAEa(yS|`P*+Tt7&k(hTxIBCL=tC3ctyDQjD1(|G_u^TAo)2LEtiw$4fwnw|EodNY zkmSG$+^en>AsE3&C`jv98eg35v6s6rmCQ|25;dFLeD`{MQ2Yih;;oEU(vtjFR*h=` z8r$4LG6UDMRlD8}dHETLs#VXZ%LZFLZaD6Nsf?e=bz69Rd2zXy<_WJHI}kdb!Q!&j zUtrZ=Xp{4T{@HF(bxIqS^FnpmEvSti%h7>|ITp5e7B=GOgla9@k1_oMgI!<}vJijz z@SNrx%imkq(`bQa6dB!#Q>SH9-l1F264|@t<)5alv*Wt+$eoS8=3jdu%;!wVKIvHX4f~V;8b3?L=|0B)o`_6=Gx!0^vFZ+1|RzXWW$Z6FsuuTb%lEQH0lWq6m5h*fJY-;sSfR35sJF zt_C$cF`X=k`JZ56mBn79iAVpBh$5W0uC?sotpNgUJ%LD|89$ z!(N5qd63r~j*a4FQhdh~n^Qx7f!qw4z0vwMd zU#V;o$|~fys*drFttB3+#?u^+M-M%MLS7?8)OnXmz6pSz06*1h>;D*bU86{EbU(NH( z4?l1+NYBLR`wCCoB27k3y9KZKTPQOAT7H&x!!QY+pn99t+_3F}SGr>L(hyubFY~v` zRGAk;<+D0=aZRCv5z;>0&%rWAtG-92^^Y@OBeXfJ&S=i?2>LBcI)L3%|=lQsv36oB~2ggZ3UU`V49hKz7bMUTrX-fqBSnE|Lzti;&F`ng+~2A*SGc{+EAafd@0BXB z&6q5MTev{OYp(?a*k1f&w-9<{WqCv!QhZbGMe$_3nc7)ng(v9T=LmV|ZMl~sV|c?+ zYoxiI)kI%Hb7}e?5nzSkfDpqc6f-4;=ZwSyA+H|y(v3=s0NW15fBD!7fpNf#W#5XD zS+*K^`SUp|dE9B8Ih{GFt5-OgB#2ki1}oav3AoEN9mh=Rm_lM19rL_G z&-`+&hOS8{cpBK2Da^osr!NeD)t=w*lr{Q&)j_3KmAd*LQu*6%#6I*PVKuAEO7M)G zBihOzPEE?b`KbMFQr9s8RZp;fTsvs^SfkDHxUTfdWGA<^(D3vhi4m3);s}pQQ=IRB z8vUdOkj<%G-)i{DLPD_o8NvjWWgmytDA)8qEIoy{tXjv@PzTefogAqQ~{2(mg>sTT>N28gu4fWseF9^!_63 zR}vh0hLV_}@iCVBfyqxb&4$t~L?Q<|9p5VTWY$=7Wc7Ype!`fuCiYvHrxkTjN6N|v zs_u38H9t$;>p$!_-Khhcb}mv9M&mStWP|A?3`PmvkCUk)rBSvXwugm}&d_WWm!>&8 zUBOlVZd@L5ep{h|fU7!=VqI^=S&GkR=e1Enw=;FT^E2mjvFQ#_U$5!hL@5kz)z5|> zSTwr7S4jMLm7U4Nv42Us;m3YJ{9S81im|2iFmNED#Gb+i%;uskO8i|ULX!-eXd|C{ zVASB_sfJNqfF#R!h0l*y^6b&Y27SeeHhb2|Ud&Ui3tklv+ri?LfA(HV3OcKebprCO zI1Ede`S7?aDd^8tPCq+?(bb6$yOBl|?OUYozQskfVVCDgbf+7Rt^}P)bRJ#5f4PQp{VNFzuEy1vT!pXPrmxS zJ?P$?t1$RsckNnq(FMrCQFHBkMwplJ&{-{6vUEbw=5Ys%S@y9a+nWQv&Ld5ZD9_6~ zD<%1o8pThBW}vZ)yq!A(hZD!yb~bVZeCTZPa>;b2uxp5E_T`YSm{&U$W2u!}L%$19 z=N&|5T)d(`GX!Ihn@o1uKS6a)-`qG*fZ9;xQq?y9|GX?$L zE)-aQMr9BVr$y|EIzISLm2%f92;v|nL3Ffzt@w73Owb;uW~LJXu6$gYo4+JHVZ-ZJ z;Hk4gBOjG2&oX`6++6sAM%~=j>U5}h8rd*(*=C>98xIvD?sIrAXt87?7NfvQJ;8JE z2$X^;>>N;C%hos>dV51*mlcL-E35>U3+9!@3~`H0V(qfW`glI0E?p>BkzIGH9$>I~ zxo%k!aC0aHl*XCwu6*LE0s?R;+vTu+w^oB&X?hpy8V|pMoZX-lM7j|lW+J8QorXI$ zR_#+7m)x7Qr>aXq=FGG04KJ}7$xA$v6BXOQvt?dsHLf&L=SG0(WDkS9%*Y{uc$Hl#$M1aZLxdWW%zd6~v zkzQS7QuwX*0)<<`YL7&7PnCH#uQ zdfVh=vA<$cw=34p-dQH=Wnr{Buzu3$^BF-X0;VM$96)w$R{FT#u^y0P#fB_%9S|S& zfv%c2qc8I4Vr)?h>avHqBQJHjJiAxJp^PrkRVvBKk2Mv8(=U|1UQI;cN!N|2 z!nT`h|8=y;iVa~N2!>YThWX3Dyf$bB#Lz7`wFJQc&3h53Pm@9ZO_mGvO zU{8$Gmd2MC(4sGUfiLFCHyH7g$1DFW%_7hu`Gu<P2>Id)RGzU`81(}ajx#bpA_Tphp9s7s9JRXHO_vJ|Vx3)4CML@r z1>O!`4ukC`v$VPgfmhjrEhX%H0R@}EHKjXFf0nCHFl(evmwzgMU~SxKidg7DFIiBR z{o2cmNy(8yVn)RY{VFQF4uyP^d6_|s1-zZ9MsDIy%WuFde%BFYLJGRF;5W%7zKoh& zl-D^#uQb*Oy+EQnyb~LQD_%SCSFGS|{*fnIos(SMN$FsX*!xGotX|*8Szb7 z^ltfkYu%4c+Q_~@PuQhH^XpEoLAq+o{=_=AIvxpw;FVOyo@ks1rl`^Seag^e9IL5* zD9K4w!y8J+3oh+z)c3>}T%$B5lWj()PjnDdhG1}yP@+13ei0uWa<$Ofj_;(c^YFL% zt2|mHQZty4HoGsJh}&ek8|ZChCfqlO5+Qe{vR94?^RLVCmNRS=#}Vbm(ug`$89yv2 zX4Y6G3_t0)EAkh7=S#IC_wt4R>Gcyh$gekaDyeQ1OUQL^geEr@>lx?)F{Ak4?5? zmI{bOsDKAJ1uRUUS(5o+{(2Mp^N(1Q;m?{(=s?=)MHN3+Nf5Z2zm?}MdIsK1UwOQU z)W%maS0iOayUFWpFu)oXMJV?6%!+5DTL``uMuSW9ym=j*%KEpgARcd^ZHi~4Y|$c*=muFQ_O1~@9~kya-L1{h?v36LTFiFFjS;_s z`c|?Kac(To*{{l)(i)RG7EWVMzh$GKU8Cim0a{P_)19OXO(0uIyYE(6ktPnPPb+BONHkI+Sjvv zJzvD04?fAM*xag``R+sce9rMtOHpe%-;#-*Jmq#?MC0Wh8JsHT<#fy65~6+wDWSkM zr;*NXhLE|e>kSbeu`k7qW@)E4l46$L1~wJ5u(}6LwpQ7q^;R*^OvI8Nj9AYfxl}i$ zemiE77duzitFzpcKRD$376vr}=k@)5fOSYvjY{=ZOSWbi!>9@54jJ}$w;%rKzbpr> z?-Z;+PxzABFa*rxWSc-Ab82(ZjOynub8%(2bB%OT$%A7@=lj;I)^)zf0Bp@z)Bw@C zpWFcl`;V4EA4EC0KIT7xoeceaLvh)ZH5)pi>nvoP`GK0lGP8{kNEI~s#z90_Hqy4{ zrcu~qXpzg=9_ z5X00we{nBvKNm(#nV`JsD!0V%g^ugfBqiG8i0v1H6!w+vwV5vfc{o3-C*^DSNo&@; zs}4s2h$xJLy{L%25E{S%6?}DHcD?25FD+KrB!UesX`25KSW%Z0y7>uUWSn$VnjLhv zqvL(nCwCZs>$x(sx)fAtwBy=2Oc!1AdQR$D67MhldmCBgH({7VDDtY;x6=^j&*8K8 zgrYl1U7UdMn}>UCMtX9b1JEZXyGsLD*tB|y^0kT^(e{{uVw#LcaH5=-wR8s7{(b;)mJqgT(h+z(>adFDgasp#EOa)}+pp*4W>mN4+Xe8!qRN z$NKlsA&H&Y5gi$K@vM2>`I+D(vnHi}9(Z}@Ye754 zXuKH1XaRsz9+Ub8xGm=x>Yv4Tp7=Z?4)5A*tgs^`8xv@~3YFD(P~&8QEXadIdB9jj z&wdZf9lyCReiRinI!x5}i|q_jSr`qf1deVmGs*oLC}J~+bk z1nx?9vTk`sFsaB!2^+mzDasc+^(>>w`MmRrxpnReqZ5Rd47b?9IYH``R#OW|EDT(~vMTyauwf069;WoD#ku{?q%04`fIB7lBl3EQ}a6cm7 z3Y;2iZaw7XLq%n{Oe(cqM^(v=8>HVerk!+J+-zSTwLCxjLH8C^RL=%fTk0!(NXaT1 z&xdpl2WfN)RsBiE9DY~xGgDDqemjn!mke*w*POials~`W8syy1k52v&+9_WJ*D_3} z0q_L%o~rwdl}i0_m`DbWh%{zJ?b20`SDHq%Vq;@#_8*FUXQtuX`gXy{#v#h^_s=b_ z(nAqLLr=%orSdVHs{@MIf;abY#q@ydf^=DnX5Keem9JOa1hI1Fc*M`vP)$Aj2@&7_ zvzq@R#m?V6ID-Ooo#;H6WEyt)TxD|+R$a|QPFG!>led-Z>99Y2JRo6}sI%&YzqcpY zsAzs!y?9Yxka2fdk3Og(=ke3%8p7-vC;f#bXWPy+p4f7A4}TVfM@d#I3cC}m$sl<( zm7A^m`3Cm)giv=DNl{{*dI`7=_oZCG?tpVXg4-Il+<7)+4(!15oNR zHQo8gL|{$g-87l~@MA51_m@VRH>d0JEhK#f6%rVh{MexnojG!Xe?PdXm)hac&O8=` zWa#Y6LVV>zuOd{1X1>H zdgAYy^cMKJ^k8}ch2aX+4Fiou9Du<^Y^BL>}?NBH)jeN-V?5G3L;IFL4Ir%?3^1)s}v z?F;NRi*pm3#o68K%2D^4;lq(_lhfBbzxh^a0&c(J3XH#iCWB+&Y)m4X@^c+7;>rX& z^H%z_u$`*{lW_+nL~qe@02~pbU7>xxh-zw|Qp3_bh@)2B%F!Ec-3@_(W=tYRhGq#ha1Ao(mf~ z_SoHI4Kad^X9udf-~Jk!-y4>=br$kVa3B}_Oi#M1DU$MJA5ecX6BJ7hYJAI~D)WzZ znY6T$gM?hRI;g@Vali>)ys!zCNU`JH$ZE2CSCf94xA1+DqR5mD+lRQByrJ&o%8@Ho zu$*CCTwho#uGZ>s*bzXMhMmc{s{sW*ad;S-xv`TqHr` zF_h_-SvR_}bXbZ*1yE+_eU?zK-3^M!L-Kbl+;1;}Vf$x-T-{tq*23KEOqE!NHEDNmq+l zi!}Z=!No)NkK9j6gIQMIvzz4DI-Ka=t3)!}*#A{CHqGCGA0`DPP$#CE%9?==13@u$8R2zb{ql0)?fytKOX$`u>9rA6O=;DCRMb^J zjy*_|sjBq5{5XnTxcNupuH3|9;F&G+d0c9YZKlj=qO~=CM_C*)fk(VtOv&+@8r!js zUf5FEcP@Jso{~%yM?ogCja9cfX9#>v%QreBtS^0wDBg+JDdl2WH-?U@f+j5M$${Eq zfgJDcYiU}AxTbt?&h}@|^N+zDWiKI!UBG<4tDXW{&3&iy9loX%;U8gCJBPk%r><#U zJ=Jnbp8zcMYG;|y*AiXlZ8O{8RdFI6GJ#kKkTZUI=@!ypzZ=vW0mMHKv=B|~a}{Iv z!3R2ZmO68$uBq@uflhE2M24;f8FU_fci=o}iOgn5xPBeft}yl-s6UQYAV|j@J-r=^ zT>a12qr*ql;x=oa=mf7~G>RYS>pdwdy1mKr*(YNh77FM>#|h29bPP zW-q41)0wJO^F)O{=gyZ4LYLAYsKT-DB*?c?AsxIde~p&Kon~}<*CuiBUZ#{@U_Dbp>bZfvZACR%)QsVcV4YKUp);w ze8`!cZMRlwWsfy7EDUZtf3s(CsH&xodxqfn!6QDMdU+JE^)-=q+J=fx{~c zPB_5P&`0b1&dswq6u0)8wx%#OyO#X`rWRIY`AIMQKSMgm}}zwRkz=$h9%p9UL3=mL)FRh zPu3#XI4)K(=~qVq8x;p1*RFvyO;IPXF>Gv;s7kCe&UF=C&oBM=N@Hg(Ms$dn;4QjZ z@E=(he4;h(mH>aSUZh&VVxB&5^h&f~9G7Ut`!{ONu@dHK#5G^MJ{^&g@MbXYy^%D4 zd`QSPJ52Z`Ev2S%22uPlgL~_AABBxO+B&7Juod-U>4PLJ`+i@)tBvx$UVYAu$O3b^ z49@N0;C(g>30wLPFE}rGdKH~V7w3S?3!Y}<`+OCHk%EAx0oN$Ni&SA7>~&1bE83TZ zBMCw&on<;>j~;z&yU2K8jf+xB7$-$4u4MToI6a8Sh?yO_GxSyHtlVH*eV^7~=AZOQ z0Cw=u>cgX^F*?LP%*qS+BHChtm})>zMDoMkyb6SQ(u16pLgdYxBvo9>Zc-}YXc@cl zv5I2SSc7zekt;<-*WuBH)S~Fh!Y`o(hv*{xI{?kXb<>>cPC$A?G;>#TwV3C*pLuBw z46JKhu(aBWLbr~WtP|Ox37Q2poCI8DsBV2`)&!r|b2ML7b zM^vh}EzM@=&N9?9CK-#Bv6gS`TG)b$Q642lgDC2S>E+qUHQt!8u!->xsI%+!=~9}= zBmg7zO#qBc`@2xK<)l0~q8cqXvn3@=WpF#aKI{P~u5P(HBW8R=TB>T}Q zV`=*80x!zSVU>gIR5NMLnGC<=O95_fcajvS3Qeb_Lof#e?LUrne~69mofrW6I6V-e-hGOq4m zqrA`h#xNZ^qlJDz!IM1C4oeSvh6kEO>CLftp|83#dD2Rk0Vu4jqU#o4&kV1mn$Gfp z=T|$*jR9Ng_;TwPrE#4uxCKdE8#M&51M9RNtCFyXk?`t_ahxoIwpHnWgXEU26ev1V zv&wJU$7A>)C70s7Z{L3Pi#5)2zqZF}Lulb9&gdhvPP`ID;s}SM^#NuUKth5*>Sz3OIm&W${3u9@i#e)=A6rxy;RFV z4|AX7V>rXZ4JvxjLn;nakTS+GDbKL}E258f&^lNJryU;9WNcX5^k=HBXQtZU;PGte z@>7bgzTcpuO-v)YC!N#EW?qJVujf>OrkPM~@f9aAsk_3l)XuUKbuw9naL@cmsIY1m z7fm<5PE_CkEReAOfNGPm(#~x<6>5n}0yeEg0`tw#FsURnJcdvld(Cz(`KL?41KeY3 zK`{8rgQ)6bbWc8Cg=js`0cV2}x`pcnGlIjT&Iagk1By4c4&Z}F$t<=cIM2DQf-)G( z9j{I2cmP^vS?#S(n_Q{n%HErdJSe&diB^5$&kQF4zRA}Rz5S;Y1S3X-+C8MZjNQv? z76+ci_ovHjSev^)F=>|B;J;v#_Q9B1p@qjSWj(`QY>BuBqKN?=Vup_&t~FK|n177f zRRrD{0_k2a^8_$N6{*03(XUzUudl3bDMT;rWLFVjcR5WW$6f1W^v?af<7JVa@^vrA z0H_P$Tui6SMN~q;EI(joso`Wmm?rRs+~23GRSA9MBsWF_Pj^tWtVg|R z+<%tYU+sA``ayqM1$1F*e9v!9oP z!@{oQ91P~D-fJo0gl^euUD`G^9yZ;dm^IZVqPJ71f7CLT&a@JL)Se6ZB|pViPk{}A zV99^|hC|Az>wy~{y;;G4Np_Q$W841YKh`+z0pWRZZo{5TjnzJDO@EMw-J(yp{fstJ z{oVsX(38`memvSRB1EWG1Q|oZtH<5^^*;AiyZ^YiG+pU@@rH4DIv=OdfgN=52g76L z)JmhH5CO{3%iF#zais~r7g*#1<94tVfAdlZrKodnQw1^G@w4wrXb{urHS6U5dYhG2`m1Huw~#6zGXH15+OP(gJ9 z94*~mKC$NF^@ik<3#Scw#mTKEeUj;3*2?PK)b!S?gI*TmG#vc8EMEXP(1#cxo&x%i z{v;Wpy%kS~2EE_f5l?7)P))TmRuo4Iz&iei1?V_B|LqnzJ(Vb7V0l~wT+avMx1Xz! zY#Fo(y-}JYvX;=fEO5HMB^k4o#oe~e2Z+_YFV$jZQOWMt7p0;ItIw2_Hd!I2%rxDa zfn6O(_@IhHMUh>!g4I5qdI;dU!>oI&Lpxn~HMM(ytj6Sc81sLBJz4y9x#NKCjFbs@ z?PZAOKf9tJqZih+cP4DgkpfX+$?C|LwgTNsx5;+qMzv$BHlsb)@7!81ae?;bAx0pQ z`sJY2Ya9Hn!l0hjk0HS|s-VHt-eIw7kUpR9A%Xx%e9-U%AikYYmnCw{Uq`| zT`NoMf}V5cH5VKZ)9W_exGu;*_Sa?U99bO`d9>ox)vSkG34$H~qWwfm02?%|+MWM% z4{jF|5ppeli@}`oB-C>{N8N2m#=)4URSjr6wd+6jry0XoBe}b&S^gnDE|41|)xovm z>O}D`Lnol1jXCG<1$DXgj|iHeBH3D*m*)1Pt1R12QuGoT`slVdhiI4!D81B1uP&`Tp~Lo zE}0JiX|AoUJKQ2WkAPhR{Hld8U5)1$)83vr>yDT_l*-MqY@hw#KhLruw@&~y4KLZM zNZWw?o%|rQ$Oa@uf%9Zo*luvadQyeN^PdadCgQz?VAIkiy?0dwk_SLP?fnm_^KQAC zCh)Zh%K6R8dPN%ZMRZuR&&_vvc@rAo#xCIJEoKI0$HKDXvDZP^%A!Bx3hMZR^=!)t z5cl_70RbQ4!H`ww(OL>^$y!C4I^~YGcAX(9kn8&I2jVEV;|gBitnjJN<9GUyx#jz$ zz7|N_Y~9z-fIzS>9ysDEDA?OFj}G7yRMpWEQtdL?b{Di!SW|K~gR;g{6)NT~QMfNy~4Ls%A*8%C2GR?4A9 zRQNl-fwJ2vt|NY_Wn?*bNy<-#6*&zUN3+bRoC(8XuqVxH&pFyKDP%!ypj;F^wsFiI zD5wn-hQSdS8zl36>464U;53NG>{nvk1Ydss?Ww!4u=SJH_pv$@!Tsc!^2{M2G33s| zSU-(bKqg??ryye7LvMIQHbxWPOPWA6{#x>a_^#; zfYv@(su3-k3>(!{eaQ zf#u)AVEv%axT?1roK7uSlO-CC1iW!D__7;@DLZ;W)VR0peZ(+n#`4gQPM?jA1&mO;z)iCee_xl z{AH9}i;7ImOu=z=?P(L{mj=3YFK0t72cI%EdVF@Sq(AH+Y3TwiQvL7`y?>*CkNIw= z{J-Y@`YWpMjT?v000K&jq%;aB-AFeoLnzYSATS^f-60_gBGL^CD%~)EbT=c-&0zcFr~iFBOoOjOAGuhaRS^INa)*1^?Y>1g77(#?wL`(qj@ zJJcNDxZrn(*qYr)9zS2O!9M0 zeLem2d*NE|p&k)f9{(&%nbHdP!oLPQeVTI&2n(FOu65MJ1#my(o!w{aM;ca_oQ7+i zUfQzDEH6BU@hfVve(tb#`*nz&{a4;vi<2KkxX_O37f5AZR_lRs`ODuUu5k@!m$<;a3V4whf^U8U=jSW$^O|jjsZoE?xUA&pGkA( zD_85eW$>&6r~E0pD;i7P9inu(anHi@BwT~v*)n~TY=MVyNCpk(klc~~wRvS(U9{u( zPJ>Et;fIF?3D)xa;iDv(zvZKhFa13%_?`{I-(;syL!&tfe)yMaCl<1`h7hI^oN`g- z0BLu1NG=lx^mHg;f11(RbzFa4y~~PBE%NlE>A$@bi}2qisgg7Rr-tiR?K2%p_4K}! zM6%q$MTMBhKYjXIe@%Yd$QvPMoTrtpnxGcxH z5}$U;PB(3o4&Wuug)M7XxM34WtLCxNjL(+B&U>I#{B67vdKj)n!sl&3Qwzy@jZ`BR zyO))D`SDj3R~ljfTxRCtKQ+%dwZd^*e`QiqjJ(D2zt^dVRP#HdE=$=o6@7jOPyt0zg(m}193^g)? zZ%u4_tp;@3tl~71JPYV*^6hNHObGpF;iRp#qbUjYPDHb z`SzJq-`cxRGHEH;AL?CQ38a_Bo|ZGDlIrtGZ>|Y^FXTD*3JgfhogbQ-f%jaye$bD& z5@aqnYbX~>D5){sEw?}Ax#x8qGudf(Euuz;C3Z`-XC__W&vPD&CX5Zha{CpN@Ij0J z&b4IH)9@q5R1ZDipPlv*Lgw3rlXO#{2s7zRZCHhBLWm1%LsPI#(vu1n(wF}4vW9RI z$E*5ThBM!vNQ&f!M#KfVAQv*j2V-S9gyRBzTrYNsmUG)2>(754QWEw}J$wI1bK=VW z$NFjA)wNaUd5|^x+?RsGyUgXz7JT}z3y*yPm1J1Y)7JO(w}K_%*1&qioq4liI@0&^ zjN49%S8cMj&m0j7E8(NvIs8GI8^ml!(wutK3I}r^A-+HKHYczbS9D$eEnQ8+c)Qqe zdH#v*Ji46gB*{Zseb56Kex`2haO(bb5z(*#nw9$ds5vC*qZ9Rn9(YV|~(s z)>u6Z$(*{L_mTl;wNJzEWWDQtxXL&`nF4AvrvEkx{b13Yi0CQUVcSdE0;%$&q=u=|T7ND9=AF z1siek>E}h+?@xP9^qa8wTK=Bj^>sVnT1n0u+!Y%4C7=#}DBg&*TzsHPMIs9KXAZ?C zZd!?r?d7q@J)Fujbd{6iz1a<*im0|Uy&}UWQ1h^&w-jB{`1U-x;cPQAchHcppYi22coB%pqPvB*)a_x=1HlKxrC5mMIa&?`?c)aTB|*L)6% z!~230NwED#%6CI{PQnd#db%j!c$}m!O}am^YXUXKr;+C~H!gjDsDDwqI!y z)$aLr@WTX17B^X%s-g!N@*y1`8Nhv6C5f19ha5luPqShax`lhJhCI46^?*T4R} z)im+yDLDh6Fef?rd0(lCC-TXHPoCC^@&e*lY^Md1-p)?b%oVld+z*dLhsi|>834Pd zbzZF0?EkG}G$C`lw2;ux@~NKpSJ{7&_3njY^vbZd{j9>yx=%fcm})MvS@sSst>me}uUQ8FoQG8Qm07mQFjD!ei4{#% zCeoM2Yo^kc6!r!<3tD-pPz!nA-QB}kDj=V$(MLI^e7%CHP!RRi?bEVv$B_?`rWMR+ zs7OdAAHTBkMZr}w?(ywc+-M?yl$ z&RTKlJu=LhDpwV8 z!f3A+cId=Pu*|Hl6!k>P4rz!&p^=LNm)|VB<;E^b2C|TuCxIJmbaC0!d^hi5Xg5FS z#LN-dp`Ds!nrwK`*dh>sAIb`$iz|DF-!ou(sch$zs(U`s*i@edMti$d-QxzCw1&J> z=98<=Pp0u73=*okI5YWCEUpf(jeQGs{&Y2q&B0aW%)SNws&jNNFZMIpdxd;9wL+55 zgO}R^52wE8{qzF@TW=e7|~f9V`{KMux?U^bEf8$52%@QfE8p3DSshkid0qI$? z@JLGhk)SztD-w*-?A(CsgG;DOw7_j@RC(cC7JkL zZ>-yXo7v^pK!W1uAG0k5E(~NncIoU7uKX;(eZib5>=a&iaT>?{Nd(0dO6p^gCvkFW zDZsUx?dEUHIr)b)&Dxd3@bK=hf>f8^j1Tyz^OM!dfRIkknYTP(`z#RF zKcmH+TS^(U(}0CCL#+>nu}^8u5!ocg4AQ5aD?9TV?)%0#@)1~ydZPYUq34&E<}?YX zU)BFg;tp$onIcvQj1$b|><|iKynNM%I)EIKe#tL-{P((fTHPeFhWqr}`ok8n4zLQ0 z(xujj#X$U>otdbxdY|~d%TUq}AF=~#j&^9IE-&AKB4;7! zOywaPyc?=zYWA-{%_G83yi3OY(k+h_x6gC}E)vpYgHXU{4kc}c7~16{4rLjc+76`2 z4BdyU0i9%?9ub>w6f@OpCGL?X*4YWjQzv%TvJ3(cAO^F210NF-1i%v%(FM`<*W?_-GPHvT}d9J`?oofvc$V1=MqW zyDCHz@;@gc`W^o976(k1pXA8={QFv|Nz{16)Ozu+w%ME19a4F)VJ=vLFntiKXP3mp znb(nQNB$0vbOEzoBc_FbzKz#iNjs$2U_iUB2L}03%(#J1R!US6Hv_e!&t1P-T(!c( z|0{(&V(+AsY~$aJZy{md^>DTQibv#LB|3U5H&!uXt?fkQ!}rQOYg$*WsTp%EE;gY7C>ObsE%;f*m|COryQ0zR`LNXCYCB98DE#d z&UCf0r&JOs+TWl`I1P+H;D)#H5{W z@lHFo_GpA%i(H;o`(^wGt+n8~UU(OieIP^NsMcuN?`e3BxZ8r}>sP!l2fr%B#5qoc`B!1E9&@8%wFBRW8 z71k4!TkpPc2_&phqH?n3$oAeNX+q@M`y#i$vvg~ z2_b7-&X+NE0^If?c~6!qzAGMW=BRIyJC&sbSX~;S?Y0ZhmWgRCg-xqJ3i=g~KJUjW z=gtUimlhTfEc`Af0?!VNDca!;mu$2C*D7wsvrCS#^Tw7NUoRUk{L#Z;jc*@b%djlu z=NXoNeu2`XROfb%(U!76{K!hLo5ZWy7B5AAfuL>v6^!oxzI62-55q+fUm+@s&mTwq zjFnq{?|ma@)>(VMe&rs}B=GJgvc>8y{yg8i9Y!8egX{PbZZlo+y#3)rwP1nUU01>g6QU*3{67#Pbm|xT$x!{N$LtP zt4&twe~%sekG$gDW#KyGZT1Wgn6rbEE9+r=D_(jxNERaF|rB}_Z@rDtMr>0;CC`QncDtlHE7C}k+D~!a{{ITA&9KK)d+R( z(O=S$AAofHfYpDKEz(C8?!S437|*6tg0(RSERji_SOX@)#$Ww}!h}0svVLZTCO-HM zF3p_X)In+>wVCR>S2X4Eq8BtL9gPI$)iYk!|$i* zKR=#0YtGd~uNQhuleb)gy(_4%?4B#+z5u`%;apd=q*2Hfgv~-h;IfQU7KBB{JSX%wio?}H|fJ;!vxy4#=$dkBG^i|g1 z4o;hcmGrdvbdmqNC|pE}-;dXCI!KMa6W767XbFfNDCuEVvU*NtO-BKp;iXcTHC|O$ z@L|h_4U)c2w!bD}K)pxM_t_GOsK&OjV>h`D(Qa82Oxz*>T*md0d!^`TYzvmxzAuW^ zMe7h~mxHB0sCL3(BDCJ>nvTklKd)}Sv2cxrs<_Fq7isa)=-W-OHx*4-ZjgFr%l4=5+%35UhSD8ONbI@Odn>X(9d6}0u6h-prPZ?E=!MlU3;HS6muMhj$ zGyBGF{#9EWZ?tG&X=kzsR3jsVrA?_FbT9+6Bik&3+lp4)tC)1gy`K?3LDhT!BuX_9JWsuGd9cpaES`UXVzTqlQFz>Yu@3D#5lccXOKco3N}W*Tv7WFOi3i}f#^p(X@hqZ2`CECN&qa#j-l}8fOR#Lt zoj0bBTj$k0@SQg*3tt9?rS7Z~guwL4igv-cOWp1Ze(?@_WJN;4u?Bl-LO?Uy@@w7>dkk`8S|;XJkRY|C^XiF-xEaD@z87u zq4@PUbBaCnr(|*GrDdRCio1m)>EL3sD!6hyecHjVncO z|FKb{Ok=QQ`~S<*hI)KE`INL1G}h?>H(c46NxD=}Ky5FHRn9Bgk5v836~{L zLrAw#PX}hkJ2}OvD<|PyH&#NKTSxp+)oug8&u;Xxjoggf_~5NDJ-=o>WsJ}Ge^X;o zjuqd9el}JFtEwOiKa$M^vM`pVAW4-{eD20RgVH+5f{Zo=G9OTL+S4 z#YW&tjN^IQ`3@zW#NUR-?*i56Xg_mT|BkmTX)8zk^eU4EZ!DzD#4Ssw|KU-Q?Kk}| z?ck{iZl~a6-4UTwtCdx}YR5+`jCv>A9-v$~ z7vHvOR%l;51q4gz8rsk;pE2Lfbh$`UXC1~a_^0E@kK*@iH*?+n-?%-aZC++7e$mAl z;}nI*2w&cOnTZaRA7dzaOB|{tDzJwQR^N}>mqj*D@Z#XA*w6d0=MYdAq%R{`#n8g) zsA$PD8-E&V@^mpB5&H#yB$0v=<$!f+0L1IP0XQ#!z<8$XFB&7)1-UJi=%hd0rH-02 zm5(Pb6Nk>ksMnHVWIPIQpDr9cBuPqvvOgfL`eFNqCQvk8OfU8zef?mF2hc+@6m*|c zlBqjjaC)fLkiaq_k{dJUm<#SEl|(YYVzhbq=>{T*?x^0go(6?TmJuLW) zx@M@CrRIS(mW!R9%|e?x9eY891+dyRk2P>8`>3rPT*+8167o#GBmdV65V`K$-$0s> z{!+!kRM=QZPk`%T0_@kcB)A}|DWvd}Bhd_t_qVQs)mvxuehG9DD<^GDmX(JlQKi6K zUgB=>tIl2D2lM0-{D(k;Vt>Hc$Bb&_&)#LnH^~xew$J3ktf}LR5fW)%rHLI z7PEc(z09-V^J^r1t1-3YcuQN|r8=d2Vv)EhU;5&=s>t4ebA?}302|m>V5@P9JUIMA z0!4+E)hrsJm+Or5BMSyd`ze;Ta@ESm!SAsb6SR`f--5$d$SsxI``lii@OeXw#p>+F zdV5?8kBMyb{zSChH8GM#on=^%@J};lHAJM9Lu!3e)pF8--J6t42JGD@os>5IF%-@? zzlhl`=Vd3ReJ`hH83nlNyD{vMe7Y<>4=z3mZ4lr%xBI9N|QwmNMsU(&W@ zVIsB|bQ+FkEO}Qi;3bPtaLF#PS^9& z{@s;rEEcYqNz8_20(N?oeZk>dQWFrvxk687C@)(pJ0jaS)?a}$%`oCi&5Q=AUYIrb zY^wseH@u;hsZf$47ng5aT8}g zwQ8_7Q5C#RFjB9P%7KGJlhA?3XICKD_3kpAAT6`T44bK2un)5s61z*cAZWz_OW^u1 z2>XdFrf!Y4ur~*y=~M(IT+_HlvKr_UAS;I@YT(1&5&~R4isB~wmTZqek#NiMi{nE! z=|GAi_>$Q7S*cG?ag$-)w0T$8RhR_X>v@><5L5M5Mjx<(N_c9)+b*HTj(=+2Q6l#K z(e{QpO*1|cGS%jFIXW@Jl5z^&)T1v=ZiC}WDB5}iXp_Y~2 zN!E$pu>#<4iBPbL0v}xRtE^^p-!gNo_tvxNe1s&@j~i+d0Uu@|=_%cMh4xVP569O4 zas8lW0_N$Pkyw1`<-aZXyf(Oqi<9ONP0mjJ0)vBqiAX_GWPG-u*u) zjlagBBfv#Kdgg54X0m$p&Tu!gE>1QzO}e)OJ~MVB$M23OpyIPm4KzXFfiKt)jtl5W9hEKCClKN0qMwTj!V=mI7d2n9lV zkG@i)+B15Vo44XR`c>t1HV!9)xkgf4A@80J8f^)tFVe9EYKR&$u2#O_)4fXoGSTfv1FT(Q~c|g5qsGFi8muGz(f%o>IFHciB z;X+n9zo86Fy!FZI;@<}VIXLlJ%$H*A?DmE?ADc21v+;(K+IViDUcWQLY!(6T2UvXE zSZ2vPWFbm~>P6rQDD>Aq`#wygd7P3#`#gajkOr0JdgvKdpsu5iM$u>O6}acvutU%B zZv5{GNP``s^{&A(-52ydemkhp>W`Qhqq|o7Fp@RB{>Q~8EgJaDl3+?G8J9ujn*)-{ zWDpr)uoZ!O_a?8~1R$ocC~YZ_Un3;6f9r9%ryME{$OxhqaacmFJol^B&4eTcxEwkc zR}Pxy!3>20)>O%an+*a-R{TS-UCd?%=Hl5h)JGIZB8vr~Wj7>|p9K_@*;SGTsefZM zG~9F$gA;ysrdCdct@GY7ev6O;m(B-pa3VbwWBh0Wb=49hT1GRd6Ci1z>?u7052a2j z&Y>?s_?1`#W}WAG55s!LFXX!|`SHF%rt%(MFOVDL&YELYiopZHEnf@z7%KP;nuxMQko11c3$BW^Qj&H8PZnH}6foVb zpgTmub+=_nQnfyt!X#H-1kCL)aJH3UcvYzHFREgu$Ww4aUq4Z%Y7KaoNz)>^W)7|Oh&yyflgrG3- zJ*Npw>)vmdE%=rqra<;O3*@OYyh74jm_?vpAc?L!&psL% zpHQ+~p}N5HZjgLiLGj1AWv?uc^frZh)1?deN#`1$L~wY(5&7 zIdq+uCV@b%a<01Wk_B*p=0`xxe9F+FH@UINESm;Mikw^Dx06wTWSNQxHc{R~+?Vz@ zk!ai!Juv^rwX6UEm)*K5RFtk8%)RKcSV72|Bby2Zl_SM~o>C*GaW?tXh^;BWJqbp3 zt$-Kf&tNto(^vD5znCyIRT_p+I2zhmpV|%bOh!lhU%b(;J;y?lU2B6rg+RId)SW#_ z17}1QFEl9xWCEccTN7D!ks{h=u@E@buACO#J&m6zB-N9LZhNQ0YOl#%DSaz~QYa+0br5UgNdWv}=!d1nammij4?V7MYn zA-d+hPuKm#nPWtWB^mV(1*LaORHf5FuE7cceyS|{Umvy0YP`yXrOBNy=#TzcqdW8$ z>Ht+L&s5QW1_UD{GJGr#JCxLdX{FZ{VgP!b3fSW!0na{olvruC;+O+dj3e_g}<%n;aC$Yz1mkO?fOF5(C=r=kPQajA3?mqy>0nQZE^1mJouF><) z@nO;mZ00uCAcOy5KE-Zuw&8Ue3<;z7%sxfF)IxsSU?Z8O6ysA-WRREyML&*lZ;GTQ zqE4oa+#iemXNYp2o7ECke+nX^&KvIW;TKwK40NxW-~Z z7)gW;kCI74%v#BM{XDm7ehbNMx9XDRTQtQicnc;gJsvZs{oftH?JzuCj8Cb91iMtt zgOGYTlwN5(hm|FU-ch{cF2=_fz;TZHgsQr?u$k^9{iQQ-UFY_I=AD)^m)%*4RB6|? z$8(zMxK8T$lSj#7g zIDU$}E050)q6Vtk|A!|p!Gl`I&VUBUYdz?E0-k~yVclHHpnsI7OdHiFKcIX0aSvBk z29IxiKaTG==yDp0pJ3~ymvLvrV9(~G38PGppVVii+Z7q!QGB2=~KZ!1J z>~L%IFU~G%V}6q(>a4C@Q+4oE=p_7Wh54dA-z;LZ?vyV>IGvB9f1dc$)XCZR;owon z3>(mTvP6=G`1Kbc}-uh9PMM2 zg^~!Jz{Q{dh11Wnyjj_D7^x;A2vafOe_*)FXNZR~Oq+lcA$!4tq*o9Mmh-q&;3l~Q zZBi-GQ#0TTsVPC!wL(&Jm45dS@a@;^{pIbjR`TAfwJLI$jjeL!AdR>JxG_1!Z~X4) zevnp}w%Fex9q{w~KH;)1(~N@k2MybdpL zaaBPs8y}2s+3IXw)wh`K5&SpRvp@)RI|3qVw&Bm9uJXr?bAA_?1NF-FT86g06pWXt zbpNw9NHX`_xEyq5kh!NsG5~E?+e`hXPce?IKCQ3SjW;%g0Q9k16ewkzzT7mhc~;hY z)^1k*2%|L0xX(BQURaJhck!Azv&lO{Jg4AYU-)~I*F{YEuV)xv-aH|N5o>_5+k9rG z_u1o^Pe&8N>Gb&du>Bd<(e?+no$Tf5X#|Dq;3f&K3C(AQU*tU%{(oWmZVh{C%=If|liEaVAeub&X+IvU*Vx&K z3G3Ey=aG-afWyHFf%bHqmv6;=^b86(|K#>SUAX*T?a8*@G+EL7NM@$5mGM<6;YST@ z=GKL$VYGJ>{eo9GEnf1uG@yx9_>{5X{DQ_tg-$2oaRrg~6y(#8IUF4cId3t!&$xV% z$uT*D8Bd@IC8rmN{nPvDM>-O2($3;wijDg+%zEFSh=?}+R}?=XFP2yj#N`M#K2$3cH*10z(AuEXbBTF2je!!b) zl8ozGNg}{6b}m7RN=ZFj1uR*xP4Px1xQ)Nw4SiiAux^dzjB{qpClX>;i%X2VsEO6v z>?==V7xN+I;H}v^scHfK$v>d5Bo-YB(DgLOo((be1LU4D9u|_mbbCePC z5y9+j(BI?JB`OxI7M*fxoceiY{v|UMF&afySdxz8Hrs1{o*59+QAirAb1 zbOd{4jhR?Sna`4dD-V;{#mVpR$`ftS;Y&-+262p4R8Xw$a z^KOPmc{VL3_p3?xj#~q4NwA(^vVJ4%99lIc{Lrrt<@9pzV$p}+MB|7yt;`<(o*AW$3&GkYwpPR%wrub z8>MMP_%Z(r*OSIO>@m#!=q_#k-?%D-S(lkQMJ~+B)~-IV`7;fVIxC4E5H`flUO6U2 z%)nKv`Rza<2gp#mt-Js8<~_w1?Ondv%Syr9Q%YrCV<~AST9e8m2i(B?_wLH+b?NcD zh5&t9!TWngt&z|`17Ib6J|Xsb`gi4rpSUOL^f;Rgt# zVmUiXCHJA)Gi9o~cbH~e9ssYO<{Ro&fnqjQ`!f{cWrS1_hLUs1MHBPG6~Ql=G>M{K z4RfT%Az8A`aBvXF{l~5C1Nu?OMfc@Yg#84x3!=DG{JWuPh+N5Afa9Xj1>LXO|40lbbJ-HloK2{(fuzQZX)&1_x`xY3UiVkP zMOAzlm0gZHlIZ*=NTFxo9|H~i&xz&2!~H+gtHt+4t|#V6DBfVq5`cr3%^PYYC=)yk z0zD_pVS!1>E*0ocK~Kk@mYAqMZO(0C=#4vnWa>}%8AbToEdi}OZJ8|QgWH((SU+wa3O=Hhgy#wWcH4P6{IGSwhJy_yRNOk+nb#V{U2R7r5c^o>l#ZL82 zW9RTZ*cIii<8O{gwAewNl83p~DQ}QBbl|AX7tyQBL47v1VK>}Y!rM4|V+wsJk#juf zD`@;a!w6v2zinH7&|ILVBKF-6jS^fTc$?tlxiGt3Z9Rm$XpH1mXFb`W>cZf7fB_Uc z(v%j~#B(a<_@{<{iI?Dg;WcFs?J87za}$*X!&n7YwClqIKvgQ@U};_wtAroLr-$Q@ zgolt*&)Oj52tQjW`%CUVZ}w&*67#DS7j zOlG%}i|Hfx_EghzpB!JB3n`ZLpI(GnnJGIi_xVc%P<#&_GKhId;0rT3@_~_~`L5TF~91<+wzpCQ7I+u2V%T@*BT9O8XH3 z?T9eUd%W;^^05rwUM-sVRg!qh)5N&(iR2y9OO@uUYCUa}t(YP|iG*PAwx!X4CsDT+ z2=nLoCIL>jIx3aoyfAiNtKS~1l3W7x0*Z#jj57OPNU}Tm*>ao`@9zQ{rm-k%)$onp zD8nS=VL?K$8@FVoZRI4vA}^=|8X;+Rk;7S4s}hXo`=`d9nxMLFL;6W3h3N$C)<=8d z6(|`n!&T~2;w5Q~umndZa@_M`Um(oC9o|MNmBAapelHmd=+HA0w~Anh>uBt^OE*0& zX7dqQ*!NqB$UL5A=))UL`00QDyA1nKvNs82CQCpUu}|Qr0w0$s6{rHL!HoXHuUai* zxdmI0h>+TsDAdO6Wv9?YvB+$x>*=JP!;5aA<_Zo#3$0g8qOG_Ns=J&oZo)2b?R#!jNeb~)CT4pWfJ?6o-Z5{+ z9>zZ$_-Whe-O6HmNPnw*tnoL;`$11Q5~xbY^-# z^z%17Sm+_2|4;sV^#6pIGF>l6W0e-^kb_9zVq3>E`FjJJjFfF~k~F zMJ$LqKnTSr_PY+WYRx%S7)1Qtxn+t!rw5!$tu?7@4PhD@S9RxN>qo+ygq=RQerD@UQf9}cmTlF{ZpD?JRY!LEC z2HvNi7~)4V@05Q=iMZoq+z`HLY|1msT^ zx5+gglDSU4_q|X5{7Rk^Ub-w^%mp^tw3^2e6+{$dKL@?$OJ_5Fdfy3DfuR-xs5}e~ zde-Hg4u5CZt~;lI0xFpR$3kY+3XDA8c;P9{rh8%Vv_cbvT= zm-N~^3#y!xdC%_PxqM8!*y9@@nLYfovB}ai=9)E70t#yT6FS_5U$#|^eoLA7u`lc> z(FpCOVw}a`$rGosSuhBe^-d8Q5s(yXePPeoE!(C!ujEsoY52WnK9!HH=Y&vtknwrs z!pZ5a3TbHS%CieDCV%qp8cvy1zJK8#$+j*j@nNQgF9g^7&JocX;;$$kH?LmuypI_i z7KQ5K+xnU5nSk6K#U}!kuY^d@4sWH0NaJ(Ebls4>ub_I!Y?h1~0NrEe#P56>Hf&$+ z>+dbK(#%zb1nzz>8WbEC9`pW}P=13EvX1rJ@7|MrqCGl~CNc$4&~e&$;(HwU^ON6`z+;fBTR=qT^{(czhE_=P zfd+qZvMYjSnSM8@Oe5m`_ubp%Xi~pTFnuUZYAv2=OWiAWnyLWzkUO`ZtTB`E zf*g|DD`xMz^%Q$$M}hwbJHI(tU8T79i4R&&8>f9f&|{p3)a$zx!m;S(M9^#O*Iv(nUsGiL}ot3v$amlBbJ~gY0gnx zK`e&&C zbZW4Y%n*-<3Yj}SrtfRt@u;e*6gj_ZJs!x3m6n)bv?WgvB02j>b zK%D&#@a0V+7VBCeT%_1)r$HzD9svEuhs0+ctn>vJ;$%C0+EB(hEVRA=hmQuhi@abV zhBP1@@1ByLVbOd$<)$xyy+N#5;&{3axlJ%TvMo9Mm1E7PT*@K@K+Y?Gm!UY?ww0b^ za_EW4R)q^4HBDYNa#N^82;a~KQdman;vg1$XgQ1nxx2TfYD3IF{Kvnz6}P zCKlH4(&4#64x#Hme%V~Jz7RPSq?37y@B(OUg^CX8ClVqy>vZ`!ek`tpQy|*_>5PK+ zCp!bS6V)AWS9%8n`R;5OY-F9;V)$%fhAfExa4{JB(2rnu%8e$4MBmQcDC_P9y&l_FQjoDZw1wmFJ6MN zR|0U|-w3hD`p-Pvjkczx2e#~fj!YVi*fU>v; znMvB)1WjlIoT(1gDLebWdt&pRR6C*Y7FM=0KhHCFPJo7!x+$4N4=Bhpn0`MtZvWYL zGj&Ff^=F!rq__3w9Scp8PQ16NLzYoDc+f5hdYoo3T3eVZ?7qHcF^yMFiGFQxO(&ba z93@108z(mp=Gb+<8qy@-5eJA*yRBSpSaM$SAb-p)(SkcjshPlP7}8{oH*yPIu$#>% zxc&-Ihw||4yhf~rji0++&PHj6S$kmUlb>0ChGd2Bt-cgA7fgax)NP9BuTDRK-XIdD zzMrvA)saW}0Em*A*AqoCI*#z0v)}OIB>=$Je4!}!=DbBl;+%%>8tW5zxyu<0IPJ@n zTco*)iVMSC4LD!Ugq-tWzaye<3)3ryKu=?>=aG~}CW7BB?`kAt_`l3n3c^1RJ}Kb& zF6dMwUDJ*|V(jVKgxU=sLV#>180&oTBZ4 z1Zf`N;9gU=uX#Dx{bL|S?P^`gfXW#5jxJAIc|GP#6y#W&&lG?7IZ|p$w)tVo+391c z3x>L{X^(ArPU1gLnaDL;15f#mBy2}s~D2RU=K6JCMjw^*<1l!szIt3-_HnP|* z;=xJdOzcHfGdfE|7X-zmF1Iq(A*(kYaVjU8V@k z{QI5K4eGP60x~L!?^Q)01?Pr9-J|-CClBC&B(n^>2*Ew_GAYLFZ+c^AlYf zVL64*uNUS-aVSJM$7js+i;6~ikHa_*r$Kk$zRm;2ACuzH8OeK^tPx^uiiVNr^gB7i z2d7F~0}3_ucbOY3T)*4DWK*C}+>i{^*z8JY%d;odxo^j>ATz4{`f4Q%Zx96&f5~^p zrY5ZwS`}C|2|(E4&> zwHL9)#1cw62+~9NvD%FVnWz20-(Nuby;e9M8|~oB5YM|pjn9X9DdW!)3R4WrI$Yw` z73|geg!+~YaNo-L`X*#Jk1I9=Cuf8$x<*r#UKrkH9B=P>nqzMUGIwN(N&TPRg~;&j;{1${Fl?y| z3z~>uTBUhvz{a^pz^n-CF69b!iN_3nJ$S(hr-NjLu>0wIV?qX{-yW+@7IU3YDV)i| z(hrs6)k`(M@SGV%%tt$acnq!cUlx$EeKSRPdfHqjye{`6k34t5qN>P=?MI~QIoW>T zrhG>=HR5Na0y{{R1HzdrYQ z#c5c;p2{u+;8V7s0sig2+=WOyt(*110zSop#}1!y3d%elC zexlU4?~hia3eZd)X$?@zzL4H&T%`RoYgBh@u}k*9df@V@N=^AF6tHbJiUQ4gnCF^X zdg4h`X~yP-22QyDuFkOnW`$tEgbOnuLLC2A$k6oKF&ZOKY1U(2VO16(C`Invf0S`qd3FIdH--u5HRbkujsvnH5QYA2i$uqkf&6MC7bUdW+Z#G zA^^O)tom$le0+TI5&P@6jl?l&KUU7%(`Q+~K^AWJ2Lymm-RUYC409RZ@?Q=t3qRYN zmp10jgC_J9{_%o2?H~D4Vl~e(B|ywVVF4?sYiswz#>daTu4w+MD@d44NC|mSobXrm zNu8$F;G=W4ii@73HEh!rB&%V1K6W$FBCql@h5P2(zh8j!9C)Fmu2?K@7X1GKj$ity literal 0 HcmV?d00001 From 5bddd740c078882d0666ba5b79a5289ceac54b3b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 12 May 2023 16:01:24 -0400 Subject: [PATCH 133/157] favicon --- docs/conf.py | 5 ++++- docs/favicon.png | Bin 0 -> 38705 bytes 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 docs/favicon.png diff --git a/docs/conf.py b/docs/conf.py index 88066baa..cf91213e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,8 +54,11 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] - +html_favicon = "favicon.png" html_logo = "logo.png" +html_theme_options = { + "logo_only": True, +} def setup(app): diff --git a/docs/favicon.png b/docs/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..f66789ee170ff2fed17d089cd6b8dd6a4362dc0a GIT binary patch literal 38705 zcmeFXbx@q$@-B+IySux)I|O%!0S0#++%336umnkP2p&ARhv30AxI=I_lYCq6-RIQ( zRo(O7o~o&uS*!c$?x%b8yVhGPR$Wyd6^RH50s;b6Q9(u%0s?a5_ZI;UJQ6tDl>q@k z5#*<>@2Ls!p>TC~v9@yrQh55h0x5tXJ8K9C(9&wIfxB_LyX-3`5;|lccb*LXCL!p~ zlvws~wQ>;i_I?bH7Eh;22wuYalrGeLulwby?R7(qc&**ou(nwDTcOBh%pm$|@XO*O zSwPVGdg0Zx*4^XP=@2_rQtZ(6y=-ma(E0Pz0&y34_=VQ}SmeXQ!@$!cyWES&;LA*8 zmvcH#(J=bsk<|8)6k|{jW*3`zq20ZZ-{TR^&wG)e&esN%CyxhjJrci~@o#5AFTu`L zYiuj@10YR>f-}ZIREh&L_mR5~=c4CSaCwLRgIAp;XXW)2-A13tgK?TFKm1(p+IP{o z2#%S*RXg3Y-Mza>y`G_3AW!NJT#LMXEbAzJ9n1H4O`!|+?>eg!r+%=zS>IY;JNpI@9L^jh3IRtQ}c{SiVaaBIa^5NILT|6AE&5?rb}1WBDXpZ`ram?d?jlG^#$n zb0}|WW>Rm=H1w`+Trxaz;yEhcGxa;Ez7=ar5WsqGAV=zS%^5{_%Twdnu?!0R5zjp# z5|``M{9`Y>S)njP&%mo`Zb83!Tb1}}(W_#WezvRkVQfJ>#_;Z~VL-|>@^Vtyg`>lI7D8BBQ8s4XzB|R13MEE8qjgtwUNP0D z?0d+kQY=C%jd5@F{upyH%1q)e%e^i7qPHurOwKjC-2<#I?I|8etK#u1wQi3lv(2`) zoiTwG>T!qIb3+a+-Y>m6-C}Lx&s&9JFR$%WBsMKy4tJzr#8TNVG!^@ZKI_lFaiG~_ zJ&5PZFl$jI_J1EHx2xyS$L^6>e?^O7`Ng$zB2$f4^s*-KtGMN){+r-i-pfLtA$N<8 zPX2BGv<)KuDUFd^;)4$R* z!qRpg(8gT!CgL~Knv1hK%IeL_`RYBq=8T8|SyB%}a{*Q*(HimCr}3P74o~!|!STFI zK~2Pc3cGa7m3fw5S!#U=OS|Z3-Q_2rtNPTuPDbHQHz@kK84V<(FM!XC8Q>^}I#hqHlYMZ)r)JyKJL#PrpKz*{ z=Rh9p4@sA ze)`|EW{x|@m*KD4G8WaLj4LwQLQF3G%~FwZg*y|DKE?TKL4EK{clfSWHy|DqA-EDL zt-Ct@!DR~b4z&(v`>0C)FwPBg4H|^iJeO^-+>)24EF$qxBh7+}{5jz+Z>?#^1KdyafhN_6Z?w&)3~oH98V29Ef8F>? z(u6Z=D*3BQ(5pk=uqj>=P{zurI19&7Vk>6 z?MEf}fMr2d{EMznzYfz6X4(Q$kERKGZ29EZ#IQo6J@u|#n`NG${!vNd+!h8?3FjEe z8I3-1Pl6VQ0Ao0>1GP|^59dSgWevrKeT#2|cRS_9yTD-@c|RoKXQB+;pIZ*J6xT4z z9|sVmC=0f9O|fNcSiY2J8t@}!+$>TzZfPeNdscbc$F2NC@S%)W_<;(QgRCcSoY$k1`87|a%%u{f@$YyQ9pkyG#I)!|Tb++<7i@;3| zD`rO<_kH8HM02{um7j8UH+HTTSRan3j43;i1HH5AAtq=9Q2aBZC`VyN!e!M-``|;M zJNz4!cggw}&&y~CoWGrxz(`iK^WPx+%wv2);*Wf(Yy14`#`!ban^CH?pXa?0)c&f- zC-Csg<7gOh+4o)ctk{_692Ynfp=U=_=OO;qUM2Zo2#p6H`+n?&MPzlzecqCtym+=| z$mW9h5~(tew;~3Ma1&D|YMw!1T7iA|emG0KWC;~|obr+4<2J!AOM$&WpIio=jQN&1 z(i=hotXqh|vNnyYQ#n%~eRjB?3ePLN*%9=@B+wXK4T-r>h=yZr8mvv1RIZd~+*6ba z`vCyVFeTYK0S|8ILp14d^3y)tQD}3*4T@eN1%^H*gb%ZZz(B-Vb)&GVkYb&+@>$Zs zMD)72Hby_sb@8W%MW|^y(w}Yg$xB?LMV}f&M!9-ZV(js^>zs`MZTaWoseG4P`UA>CM_wv5tL0Q!xAO4i`qm4A1z9YhA0&`wyA*b zOgO4}2-Nyv&qG!e?KM>;9Dz75bnw!P!!eTA!_&M5&H`u73KFTKJIH0w{T3#?pDj3j zl$0gdr^AZz*ppOmRrxsd1@aRjuwpSL2W7c&!XgCH?@l%-wN{?cP%Ckc*nLODt<|HY zbv@lZL+Ow`R}>TsETX=NAL2YkyC(DCNL@w5Bgq^d@Jl#K@zp31jFU##*G}s@$UmWK zq9ZT>@LVItmmYC2&GhLBTV!-Yd+&MIgBl^raiR|?8Xn%2g`>pKLf}o0bFeV+NDCB; zPtCpEO)M#WF<0LqJ}hM-vc*PlS1{hA3yejgwNB}GqzMIVf;`q@h&*e?N#mj&RT-uh z^(`3iDl@)Xt!<*rzfFFgMfw$X2&Qq_tR@M{Zc%h2SP!??nvyV+2ql6w z`Z-K~Du6qt7Pe}fjxxc3+5;0gHUNIQnWmKjCM?QeD)4SwSdzGe%hc%Y3XdAyD1(~Z z2we#|fM^3Fwn!K1N<{NR9YsuJNrpR2TMQSL2xUcbO%qiU1MLnfL^w=HqB?I`o1zj2 z6QYo{t9EO|JD6e`cPS>6@MFX0`OF2izQ$+_ zYn%MKDVp9CP)^@7jn8l2VhUM%TSrfc!8A|uqsC5Jt!g%^jryslnuatED!IRE6tZMx zGFT~bAA|YQ%p95-nK8$91pM-@+6^%T!$K0dG+$ zc^I?a-3A3;lY{f+Eu>@sTQm~j0J#*^JUSinJ*8)MCgv45=QuSUUjX0h$=Q46ie3Sk};&@rt|%CY(ooFFYxk zJN(leO&Bd0C-zSk1%$S7N*!pjX^}t66@MU--C*^gt>71;A@&?&A` z^G4C#%CPLkUgIXIZ_qZ<4ZmgS9<}0J!NDXt&6d)Zbgc8I7PDMjlcM*2)PnKlwuymM zgN8GI# z^QSl~C3HIP161)UbCFx$mYs#+CX744^(!gludt_jcWy*f1oY1ZVEX#w^vzqc#AjKG z8Tc7Ds3DG$;0ujj@+2%p=uHKamL~GPDNfT6sr9mxNAi8V>U2s?_BD#)gR^2(rS+6& zf=gADI$_Vx>4ZU)drSreRtx8B;A)B*)R_i!hcUjCE^Bs=>3(r z8yQ1-OdDT&3JDeu?yd5wnXqIs8nJ)~#8ht`ywC*0{48b-D66DCX^&U`Db<01P%y|d z_VS*sJ)JNb3^hRNoPrdkOTFdU48)LbJwsZ9`BD5)u|7?ZJHNo=CPklU80UZ z1PD}7^U(_o#6V}~U{OH|8J?s02>~e09f}kcJ;)*aMQ8vCpFJ41kx-IH@+)Cre)4`v z;ims)8ATERjh+f}3r*mzypj@YfIC2f{0ZCbM~yxeDjBD6QlMl?QcSL#(thHN>hldS7occvAc9<}-Ry7bHU){QiT>OGdpBSWG&v_f(`t!?#Ww8hUfDHaxoy>|egpQ~#yhIR~$rk)dk+c`Y6IkNoou#O3-K7=G1_XNA#yX=qm?3-{TRaMRf zIK%-7E=~-dH24Y%ZpFT>HFogG3M>2jQD%xMjX7JosRZP*$rudphZT+xy)$JKrLtrD z!d4i~q+o`>YL&gmP!9OqL_3A!+~Fg!h(Bx`NIBIYkWP6B35a{Eq(;A!Yl@6OK?>^D zM{)4^bv5TA@}?>_-;&7+L2pxZ?W+%ly$ntK!5rJIK1_KfW9WPe<)V9!5e8&4b!RK# z%8-jX2BH~!LCSmQM@7sHp1Dxi(q@?GUkv96sg6p(MjK`p@3gb_vOuU9UF|T6;Tf9t zp5m%F9vl-2rIr54v>XQG_&7?0a-A2TxI|zor;=UO+f%#xgAGWTvt#` z)U(uCF9^uQ-SKI|6@9UXjGv`{ottYL2H;uoXJJZ~Y}eQ+h4Nk?gpq)l4n=%`xT#7S zsvWuY!riM7p27YWq3>mTHeKeTFs=$9QZ&yf^vnI+VT|7FYe>5l6Bvw_OnwO`3rmjS z>G_Tqr;X;;jlNYbr2pD6_%%~}r<3Da!>p+HO3@&H4$4rqld9NOgPV&CwHHsKwzeQ_ zXTyd~XVRHg+yI^=V3^x@X0wv{UGAwDaZ2!eqsjDGLw)U26`f{-K(TOv3q;=$-?HV} z0@6GqMci?+$QhrrnvG;kmKKq8Q>z4Ts_!W3bh_yGoV7dJuxesXnJy1p_-fqEm$sTUA*&%n=6w=Egyy>Z z_9(40+V>t_BrD^|kY2lOoVc`WRkEB}1?_zb>M>f*m*r4BeBT~WN*#i3R4A6b;7^Yf zGrgiZR;L;O_ORekWZEwyM{@DxNvuPM(avv+l+@ZUippU&h)#hjko(GOlH!`LATGS` zIeDy1A}UdDPF%cRQ!(WPuL<0Mi(_Ya-F*2$>(Z{lzG0UgF(V)NBr$l@GfupCVk7}h zQ`I{minKPSR82ui0`S~7&1g;N#qwN+?-a$Py$y$$4Nvx^);?NQGk%2t(MEKd*@L`K z8dKk12?Wrz>HjlbveIoh^f&tj{LKge-4;oNGoujqG zpXi}i5xL`X`?8#7Es*PkbmQqc;A307YFkX}$_&l{@bujL2{iNV@J`AOccR!RjU*9Iq1f+JE6D&b?$1ff0_e%xJIL}D)@`v9&)d6v#5KJ}cm)Y^D4-NZK{mgLi1TOJQ z@aR=&qrZ|+N6g5|m5QRaUYDj6Y|1rr7m%VLpdc9anW^>Pvqd2TNojAS2WZoZ{Xb{s z)9b_FKLed0yg}79d0tp>x)9St!-^mw6)D<-4eCjf_>w20D1-x58H3);If4bnFl|mh zK0r0X1JxPQsfMust+lCsg+>o9LL~l2wBwu)hh4h<1DU}rd>b7MV{giddRm$47st{n zzm>lz_jV;*o50peS0u1A!9z&sl2(Y9rr{w}VtZs|pbIpG^&CRMFjd+e!K$R2Et4}Z z@-!NHihcD=`=W?_CUK5ry%421f=gH)=m?eMZ-^pJndnNyVD{5%b_Qq5@~}seRKLEx z8Shqz1Bbt9-CT$XW&zzOEPP63S24BIA{lQ0jSO|zGlzc4(A&24r9cw0z8`mU9-(h0I3UQv+nONXz;1a8 z2~kot%7usKNG=QIhP`2i5M?AxZ640K;;b_G;<|go?oOc4_g&HlCA#8g`^kX=zUvcG-qM$a`h4`oS@#(@S zvymT#=b)^P8EskYySF<`rZ9_{6HmkEAdKaGp+4DqXq;pi%9Nt|c-%PW5#T{|ewrjl zdwWB-)F&D(JL>iU$yBa&gW7N*=#%HP=&<|n*^ zKaBHcj&?j@6jJaHtIdr|PI=?Jw=WDyk>r5CSC>s1S#-sBK5)*qKq&ft#)SYm+WV(N zPsNzjphHrIZCVwwhcAKK@em=_a#-`C;XrsDyw} z`j9;VzvyYGyIhIHhb4?MdnL?N$vSFXQh=6A`V9mPi{6!T1euhu2yk=tZEY-)rgTR> zXe(o-$HN@!E;lpIWhAKvNszD6ee%(-XNpwByGikv(}Hq)5_m(S1~Z4og2M-IzjTJg z@H4Dl!lB?!Iaa1*f?D65)l0w6F{jBa62(ag*Z(s?0gmY|R{G=7%I z*>}l@dn}^<;!krXyiue|WQW$D7Ty7wlP&Za@mmT5EPBNvbi4$0g)aPI4Etv7<_yo9pU^JVE~K}-NqvqX=N z>I=5D49$|#a}^yP+#ql3kH#VBHjk@>9?yNqOB)!Ox07qAO$vq>1z(*xo(6hiab|Ss z@w;kpVz2oF@Io(h^~Bw1vWDi_0CcNG(;!J!1Ew&nP|^^+K}B7u1ItdFSwbELtlK0` zwyqWicF0-0X-nH&T9A}P>IXwZ*%hmvPAHdbnCb*OORVd!APQPWm_*94plY+ zt(-<6hjU+bvm*tf7cdPtxg$SxzW)eIzdn8ta5#raU#k!v6QR!)%+01r?yUQUrwqjg zQRrxTYPpPFAiHD1_hXTFaYMp4S=F0qub0CtY%+6J%h*g$Wd#B#tA-WQVnz47Z)5_s z-a=vMveD4lDMW|Fj%iJW{!Ow{T9mjW^`#XE_Ia9;DTXt2L46$Xcpur}n=xSW#|{oh zLJ)1)lz?R|pYZUh*oJA}Ep7ke;~83>N=NC6a$LZSIsgS78Sh$8Me$DSUch!poXZ!b ze?=>25EN!h@WUt`b~NqrV$HA9-X28^y)G|M_kyxK2w6v2Ld#aP>Aa|!W7s_5 zI<8o?IT8J7Yf{KO{Yi60xXK}|(dquj3DXHB{Kr0mrIFcrc*v@(TrKvAzEm1FA(v>$ z@WnbLi$<1oWtv|*BeRlfIGcf_JJp>i6@(RSpL#-CWNyd|+^PgitGU(nPCgRmq0vfI z8FapL^RmT+;JN=?E)AjFaj2|OXu7;atu*GtBf#H>T^8Iims2s>_G=B(Cw-fN#ZwOQ zyt=P%sH^Ij5!4}0GL05OBQ)?K9p*073k?w!nSn;zPU~!Up8_8V>s!xmPTGCP?3obG zt)buP3CqE8Uf*PV<&3uMr?ZL=_{#oe4c>5`yAO60v32lYoB|z@tyDV+* zOvUwPF>Q5(MiDzpRk8eT4$VIUZj&vR03tuSxg|aeb46Upo}hM?&^II31HP1VMa_Rg zgD<9I!h(hoz44e&SWrHW!hu50S<_HU`+g0yzRB6B(Dc3)s^lz{`F6!CB-Im4tOVH@ zW{{=&)gdmNR^CQ=p{Eu%WN?-HW`3>{QN;8G0D3z){b4B9J}@e33zHXi)>z+OCM_xq zV#vbNi4f>7Lj#Kv^Rq>)`TW<`4SFy4f#pGVar6h!ry%3s1YAtbkGmRWj3MDjVN{!|_Fl zG%UdS8J`gBe&QaWYJ&cP!I*G&+NWQ3lxro`zA}V&Z$jm6=2UjR`Eh#dJ>r$;klJxZJPO z?6r2e_0lMdt^PA#K!}uxVQeTqV&O;;DSvPw)SQPT5vt54%4DcStD;)hslXiR83gKR z9GMG8OcH@g?Ohm3SKjMjq)+=sLt?O}ECP#af9*3aM;^$~+jzSP*?2T-gPf=@LpnPn zF04b4TTOicQ_~A~MsW7fyNvnp>e{jP?7w%>O+K=o+W5^YAID{StyXnmyWpK_Hic8V z-+%+RTw!i%l%V}|SM$5(TF*67U`l55HH%8tbfN_Cjs)HQ!I7SOPL*@u)AmU&&8QGT zr`HpKK!|#Q(mMtuTVLnE!TxsH&qwNCgc#3_D0eF-(deC;G;4%dA-p#DqeIj0L4iuW zvu~YncyuWpLR9E6l0#u4QVhP+U?gG_vb@ZvSO;!2K9msxCG0v(aH~bFV%izuQZJ=$ z?|Gya4pM)uI^Bndzmk)$c9Va8ZNR4@KAr!R`$|0ESINy=rWOUhmJnelEv>F7E&cbK z2;ln%d4VaS3VjkJ17=!pWSP)TD7KP62pM4DVj4Fh$(CW*N9vL)vpt8=MbQilXt=Tp z+KNhBQ9ZVl80kb(*H^uiN{Gsa=X3SIkYM)VXK{E?Uexa&N(+nTXTF@ zum7A`Du&{%T?EjYupu=)>g=vfKBU$Mv0yj&^xOAJJQg?@nJq%=Y`zPji=B^7u50v( zpX;lz3bB*2Jteo~YnaKp-PpdcoR4ScOpId=#SCb|Gn0;wev}iDB+=u|zl_WqV%lS! zG;A6)yxn1s$GP66t(hT_K|YDUvIE}?(N|FxvUG7`16a9O0NFrJuHc&^5D+5b zAXk8;1JIMg0%&9BEJ}6O-c3beXC+Fd$E(7j;wlZawNvnO2Wt7LYFqj_SPEKEiHjkL zfP}yRPC!op1<1+K*+U2ko^Fgq2+9}!OnQ7U~EbqZ-0cOV518xI=?t1QUQ zn~O>ei9*EP%34TMM(%GA;D4f2ww|7@LhS55K0a(d+-xrHHtd{&f`aTETmMyJ+5aZ#X=nXUvi{At z-!p&0`Bz84>i@v~H|f8}{zn)rrJ^Dv<6`Oc+dV}YQL5keg{)jG?W~0U`pGTG3;tNL zTJvxVu<}^)0$2rkt$0}ZtoXPEfz|*E0Z#6Jfl_q#@B}zp0)In+!P)G;?Mggtna%P|FkWn@vtW0d7GqPCi~f0X}{~ zLH>UV=>pw7z?JwLlaqsu`!C$zX%PYk111*myH3FXe>A|c2uZsG0iG`I+Ac1RqEx?K zqWG=(r@Sdd{z{61od;OL_jkqrt>(3WZh!szYY8~o{h6Ym_*1q*0L#COcmTYCR(}kE z_5PZ&v;{cZ0Kx0~Zw2-Dal8MNEG~X-ehX`UR%?DsepVhqE&)~m51%zFzlDGmH$R^> z7ts2zeE$R8!^PUu2jC8rv;jv7js{#nf1;sa_(LV*zx(223;fL!2NyRh2QMopmo_J# z5C?}4FCQxh5BLWa``-d)|GldJ9K2EclMje*+>xLdLRqh0-tv)?rS zU;O+n7ylP6fT90uBn{zt<9t*-yl^*>_ZeiYkUE~I}Rc!18} zU62p>I8)Hi&IUdT!CAbKmw_OI(1K`;0D5hJM-W{V3_Ktp*r(($Sfhj77*Cr zLFnw5m>5Xqe-r#O_DArK@*lxJ%6|m^DF21{pOpX4R{o#L|7R=zdl&zcmH)kq|H;b# z7b5;?waHLqpedb(;DHslNC37&Oo-wi0)a=Z7066I0-jM5b z+w2Jd1`K4_`)x%E+22`Zfulki4Dk|L#gN5hbrQ8b@+|6JkA=o1-9N#1t7|4+KR%0= zhFP)L2pU9zs(a4>b)Ta_kcl7JxT9cYGuUgzBT;KpOa%@~fqKOm{o%yf)9bdV<}?24 z>hq$}d;(i8w%OUfRHX@$ZOfPvbT~t$!U+vVnsJE67r7NsUwjXYUQhg@OD2jrOWXWL}z!in6D52~rkXs!KF0z+} zxz*?zXJMv_Hg&@vp`E*Y0cJ~aY|#SyqrLVRuD!O*)@lm9Wt)l$Y6;dJyuUsD(j^9O zxa6my`8}~sj=1^wOwP@ToT*)uC9jYZL1>)V`fJe_LIR`ou#yM`JS1>I4hw@*@%ale zZU*UMYXf1WkGF+cTpa9nu|OKym7J`_OBG5yVM^FO12S?X*rjVN)lbCmqegX>F5F^f z3t`U*s*5X6_v&xjbdnkpV~08{>(!GBAv#UJNdX|Rznkw~#4xZq4Mv@yf}W4$W`D@w zy)7h!Aw|NzZnX`9`>3KIZ>YrMDdfEdu3e2`k1rx+W%dJuCkdc~20xx(oSyL02isI` zzn+ZaS(PF=<2~rXK{AwF{}Qb0E+I7XZrK=1U|pO!w*CN<^L<3l*jtG%F()r_G20an z0UTW8=%#0JTQl@g^ZfSnoXW9GDP~TJpK%ih=@4mJ!cITKUs6St?dV5*3tMK)4i_fA z;>a?6k^!3?R(U-1_y^^jEEwQ+09zULkuBgV5O?ja4H8J|Xn}_7Mn}pfPDzNIG$t=c z6rK&KjI-m|2~P47Bm^cN^wxG2<-I7aVsB`@b-;xQZ)V_nRA!KBJZ6O^)wOWwCogb6OXne0y$g5cEA_;lEYcsZ#ZC~{;hS#v!dYI z>J*xP%+H$5br0rGMxiN$bUQX=@3u5P%}7ou#?h%A zy6p2?6rBbHjfctyrA2e&qbyweI60mU8j+Lrr0TFnKi3!{Emkp*;;AZz@b<=3X(Okm z6`7;VP_oh@;azWX4YJ28x`Fx63Rq1I* zmG{Z&-NmxUC1;>!=9W7p97hTZX?|eqYmUlYfXkW2>wFdbn!h@c@jEcs_1u=208R>4S|T`U*9nAO_b(bc%dtxjs_A;m z=ikv)%1$u=$kV%dsthPYPFYD(rrsj-*X9t|uu(LMi%l(%E#mFSxcO@>U&+!e2k+^D z7L)PD+3{by3o82#dT)uDE_QO^HI^`+q+>5FP=-~wEavY(Rv!&ga)`nkXBAY^Z#HWh zqCPqwGuwTQBAw9hY=_S<`8qc;L9y@p9IM#6(YP5PJ`O>5$=uhtt6VDGEJ7S}7GbWq!)U^L=GI zIsB;M#l(YQKu0+?>+@GFZnhZijRIyQPM{RswPU6=1a^7#^3NXRTBknoE(g7i+6*;S zSyk(RslcJkW-^w?9f*R54#xe&YKD=JqC5m;!u*&IzMKkg5~Z|vHjH;al}@8+L?YHsw)BdqOy)3tt%_u0T(_q&A z1u6K&M)0jszNDLSBqBh-+6sp*Nl{ADl*FQ#H73UVY?=HDB%{6HZ9ey4|8#XkH+zi| z&~hCn0Daow$pBuxeX54w#|QB3*gLnY!j=zJ(w*cjv@MDKRpnnXdz?->K__PZ7t<}Q z(1vMTM_&2@bXeSj^cHu?0wr|v+^&IBYv1D%mJiP#Y;+clh0oD77D#eKD%pv`6)JP+ zqal@P*aK<f7)Hv*8Nac+m=c* z)((4Bk)Ys{S;$9cwk-Iwz&7-xZaw020GptOpRu>5&_@+zS!+X<-riP;4)N<7!HYSc zUC?PbleRai@l2yZZQnD#mF0ItSdN@tkOlbJEARcj3Kn0a4%P(iHDU(Hz3q033XT1T zj-5!>JCNRnpTJP|b*rl`Nn&nF?4~p}=HQJD^v5CVp^7H>i(dFSXd%5@-!x0t$E}N) z#QO6Rj*wV1@~$kgMx9?zgWu^k%f1Fqe7?9_oIPmn2K>(2rocUlk>&|Hu6E!epBff?5&K?SwXvL#+VNEJ#A+;;ar*&i;sT?FP@ z-O_b#_FMJq-+=a?AE;ke18qKSZZ!-u-%Lrm9F+HOq*1U5?Kd;)^tIb7EfM|1$l!SxP4;~ayY0Y`f!l=3 z;C6|i6KF4utE25B@B7YmqX${z=LHLqHK(B>Y~*lfC3QFsjS#^+Hj(>FzABc3w5Dgo z$j#QxdN&Rt)csS@huWMUjwt4Lx|^E{r9&;Cy9CegVCtI?{s{_gm65t}VGI~%Q*?f|}q96_s~I#j4Z zywdHQ$JEc>TD+#9S`=X-s0N6kY4Rm=ALl_*y(AlXQE*Ij;1&+L&Umh04;2j@BPc2D zf4Y6uoAU%UnHXgSo3+i&`)sUrpXfNDcr+^Z4bOGXZszl#9&ito@+& zV$z)zx_4ozw$uE}bmb=Nguz+o6P(BWxPHK|9hAJlQH7{83QW7EjSj82^Hpwc*O&C{ zkbs|;C!(z<-%-n4j&~5vA+*Ytn=A3Y1+P*$EDNit4ONxb{WG zGW>{XIU`DFuPKa5r$M79il5f9Jm09Exp)b*L=ljt0*`cSqo#E|y zOcC8T;@tO)Wk38*KmiAk2FnF}bXfR>2G^CA1pa;}rYFT?KO=^=LrrVfFi;dz&%fRJ zY_3j>26do7fuA2Et#?%^0rc`y2c864>4{lnO8xa}zc$O!iJyc54+`k;onNo$MEq~H z`-f!Vj=wf_l_O772Qt(;$~$}_*6~{1_h{v$VJ7;?AsV$SPszy!C4Gez*7XVvogKEc zx1K)s4%Oba3bKV=jAvzkfu!u5?WsBahq+HHCT5K(gbTPT#DmmpZOH|{!-=$l2tKNf zZpuoDgGFSdTdm;bFdSo#3(%bO zU>bw=c0v z2bwSaD=t1;2f<_^t0#|L?nS8j?zaMY0yYRRp)D*U*v_057m?c+-wwnYYCoP|R6(&FZs=}TzYPx%@;OLBb+hGu^x4sIQG{n8J7vaMVn}5=N1QfkD z<>N+68?+{SF2aY=56GTrZg_LAb@%hcOJ5Io>)YNzAZps-PX*rdxxQFyzJ#;yb2w@8 znSKf8Qf1K&{*aR7{cEFB?RI2o{=jp)w$}NpsLOq@W{SMHW6w0Oony-6JwXqS_Awg% zOIOwS-pQ6SRE<#ga3yfYQ)m@4w=B4(q+Z_Oq-8yJFDv7*V|7Yj)U?Zm>(XP(7m!@R zHDh_vZ!+U{6lO)BZEAo1`Sz&Jq1L7I3A~rjB|jd#GHZdWLV0QIAah||AA_KKNXqvo zI`5NQr~lOYoTp4fduPK^G*fyU_&re+RSdmUspasZcQq{~A9E-!?5Wp+bE^ekttU+n zO|nt3lcCO^X63d;bpQ0CYhZ7geDorw_k*vyNMjWeaf>eMq9C{%O}Ty%0i95kVf`3X7zQPj4exIfKsV~eI7PP(QrqEKs zKOd;pfb0@fw@^8mh?`LjI-k>&Ewq`J)>TU z&}~i5bdcXZ_0}VSkG0T-YaKI;435jIFzrbhJb!yk({4)|EY{JrxHj310Uz|I66)fy z-HR{3{KrNZvxTV_KH=5YafsrXalmQScjQK0Wh7NV8zNgn%!D{NHEkB>cO^7+UdD^a zzDbakyVLb>w$JdnM@|c~6soNa-}U6OprbMdA<5bmkti+#z;7PV0L{SEnO0*tqbz z+9ik$QGV;#ZLJg34mu@e3fydhjL-P5Yv7aMW~2VoNdY7Hjm?&hQENaP;U}K3Gv4vK0?yI zrOFIEGeQN8>hChQ5uo^$3_doC7*fQk`}~cKBrq5EC)9>z^k$R*p@!whoVNFGmz4##pejl^f`)fSzf?Q_6iro z#zQS@lUtW^lGDIpvG!WaLL0lSUT|}hrMFf|yEVYi=R^uKg|{Sd5uE0W`w=ttjTku` z!2i;?a(kbvm*Km9e|Wv!5MU^^{s=iYbF_>V zs5MyL@*bq5Nkaz&1&0UT1vVUi|b%5tkL-1w2LMuuF{eD1GEl>evOw>HArhY{OSI*akq8+Sjq9EDY3=Hn}tU zjci^v)0+_{_XZ12&eccX4=tz8n~KyAIf66wm^PZXYb+%1HnJ=y6mHl;^qidT&su@t ztd=xAc91V|7fC#q6ZL`P-rqr~eSOKFn{fTIBH|_^jzw-WaNjUj`PmQ^s+aZUfE&Pv zY%0ofY=6lS!r0j4_vNWp5<(M;`q>?2hR)jW`P$S5`*Uqc4t?Uj%}P}{iDSjn1 z_crkX0gf=5MTumm;Cf2-a4+|nkH5{dHu~!rt@z_HTX!ZQcsa+@D4zeqi>u!UB-GuV=&NSu9NbJh6gr+r zshGg4QGEXvc2Ylc8D7Kp>_DZw#>*NPRDkskQhBNM3+!tRj<$Yse7V8eT%9l})x2~0 zwj$+2POPP7=R22sg~Sm~%ZGmDZZeyaf%g5YD`s=>QHI3qT{hLkizr?vh55ygHStb% zvsHxlzJWA_Yif-YJY!#QL(os-QU+c7q~Wsi9?qR9%D5^+kCpq{O87p8~}I9O#V4v75&|k z-}fbe{F^NJKs3Z68mTEn3|h<(v@kRT{)6NKa~aJJSj5kp9Q$6w3JDScdAM}NA?Z5| z1lM_%@8Bk@M#@2_%)X?!F5Qh7jEX5i;)9u!8z9i{*89%H_Uk37Jo$OSr!5ry=xW#_ zSY!W9*MK$$>=jex{+Bg;Tmn(58+wm9rPPRuCGT0@>-QaP#Kb;Ln-byJtKR5sCg=9u z@;W~u&c`lhtVNRvoF%+F$c9u^d&>Y9;8)+Wp4tNJJV@Cok&FZ>t@w|I0pv9;?oOO(~{ayz=G`9TY7-Ir2G#y8+ac;Ht2aVUYD$pK;(0z#DqWhm^&0r(99E_exb zzqWqgu;wCiy->_)y}Ah(b9tPu;4^l$$LE^1c=8Sazra>a^0_Vhv9lgy^_vi|8r%mQ zd4oI{LdNfq!XG1t(4{?cp_2kVuM>6SPS=W5tLKw+ac%*!Zj2$C#+=M($cYn@n^Q7u zXp%wPKw~Bp7wyqDezBJ}lm%0+?=D00E7sD$Xdjg+>$Ko?*@oYJt^ZEJICv`sUnYhR zDJG5={y6*E4>BuXm`Key(|=a+XSirU@M#9`iNN-2^XAgy{6@RiuB(t>C4MTAb0qGB?a}$V$T(jvv^Tqb51b@?aSEg|<>@T;Ws(5gg8eM)ljz5<6dR3cavNtY z3BAdSQH;c}bAUl9Y5pBnx>3UJ=O< zeDeb5Q{DD%+kM1Svn!-i+m$s(=ZBpsnbmAZefPJ{y;0S3L1wT~aM54>7fsh3Ray7< zvu)e9F>$I1Q%$yQo72rrn#`H3o15$=YqD+I?|FV}y?=J^y8CqY-sg)C_L<#PVSE}K z!oBnsDJPp8Aun(5OaSc-J$3j4@X4maNY>RR_sJEAb{$nuTWc;syrTu0EsfrIXmMIl zAk%7v6sy|blwVAzHsRvCeC_F&z!*7O&Bx$zT_6dAy3^a);67V>)65H^WG|~pAUin_ zT+*x014XX3Wm#JNxZnR&L-RF1aOVZJw!!0SgxIl-Peu{*91NU8o@?uV-!nQjQA)?* zB)AoL6Y#kaYLh`^p8r?s$&y!H`N^!t3;hgE0+g*dAIu07RTzG!R}S7BoO^uXuW}yM zcllkvLI^gSkc*LR^|)MRb5!lUFsc|VCm-?YNun}~R{LR{=yl=W(uwPEHUgqaCAA;G zCSUM-dlc%l`^?SY=W6rwD8hXgDRaw$CdN-sz8Y-k>Z8@rBk$C|Vx&LiI zVo=~RcCW*v#zF=D06sEg|4v{X^Wh7z;_0$SpTtf%5&g8Y8%I@IAGO#nwRCsj{^#nU z`TprPU);s~RHZomP7fK99UUoUCR+3Lswn5NQ}XHMU$(Vq#Qp&M?W2lVHH2_<+vR)& zbq%cl(in^)OEhQ+re8V(6Q3ATVoIte&6tnSn2RWX_GlARrbVAjAe-#9q-OX-*B|{Y zY>2wBYAxcPO<3P?t>EMRLfq9)nxxeFvrGpxG4u1(-mQw;RnFwuU{+A9tZ8T}KIgFO zY6H)!TTVM~gZOI`+KuI2pYdgfER#G|r;d6Ey4U?sAZ z(hA}0CthIjrn=wgbB&~bN98YlR<6SwTD;1fYE-usTZ8YOj3JVZ{;ZMtKkh-U-V91C zUHAr_UuRsUi;8-E>P%Q7iQvYM?~if=<#+T&OjHMk>-Ay~+sUL$eERmAc5{dZ0cv}P z3WFR!B=nUg+f@=0O|y9`#;M}+Rl&nIvGdx1nJ%Hc+oe9@vXCxsU`TeeWm?tSt)pG{ z^4Z}|4Bk1wjJ`U^o>_>r^X*cfhaPhtLEZS=z>sK{9SFhfU4Kx$I;#yI*@#G^PS{C> zH+eaM!c>Rwia?YLTIxCplbo27V(Z0DQC1}BF8BYn0MG#1xwBtEXR?<(>)eCmq50G2 zG|p8+sDEnH{q*0rrIo-x)v%sz58PWG54Sr#2TcW)eGhoe$p`yz_5&QiI#unzY!mBU z?OKhOpWQUnsy=pQ(NQwZj6vOp0&E$>z#G3xpPpBo*r}q~dikOI&aBgjHkfAgvfwI` zb2ujZZqD=DoG(Af%^`sP{jwjflI7}+cMP|F`v-nEB?!e>Er&+ld*^MzV0>QogSvE(A_5P)~py95wz|3A+Z$jRn{4vwv-4&rZ}h-jb_`Yv9e!vI(y|E>Zo ze(_sqJaN6)Ofnx*(VC33@X*)(;GgiqCW+;I_GSHVPNg@{Iamclqh4`C0Dg{FY1?Qb zI-x6og}FtydOZWFbFu&N9O62VoNqz!YT2*tM=nWx`E#JV z`^wdHv2ufO3VIi_BFAZP``(f9eJ&;9FS)K#>5~EkAjlz za%Xi3MyVWpCv-Vx=V!tH@w!+zo#_1_5D{>>$o-tNIvpVhhtFVFGVQ{7mT>r zC}P++f`{3K+Mcf#litL-KEM8PuLyprCXRU&nBcFB_>Y)wDI{^Kw*ATbmdD@uSO8g) ztNr(gahSfA6Epp%MgOzyK`?2b8NSudzb9(A*JTTcWcwtVtvK|1Q@yY{V#W!U%>i*m zX+uvaWQ~HseMkRG8OEQ1v$39ShYORs29BTI?D%KYOtmj}d4Vc;zQwz;#u4GsDnXfV zE4KpaKj1x>SC~6k9@j6L`05s4Df-@o7r}e4x|V??=`WPVKdfocgSdEbiJ0TpBK+3o zOuZMv?%n!Y>g3>rUD(TlB4OIRh$&iKnAh`c`)vQ!ANiRo!H|k|E&7hHKEr_nFFwz$ z1q8ezsjy7R4AF}XRdDE?YOzUtUftqGq5a99#JeE?Y*jSIG1$2l zwyxYHFBofv1y^c|%I<~lq$A3w3yG1|a`x~$<^bbY>Z^x%XpH#Iius$M?T>993%Lqr zJF@s#!RNj<*mTGDZnjV^aK_1td+HEApUo#3#kBYAvO5PR7@QIiq1L*7PtT+`Sw zM%lT^BZ(UqZ8=Irg(m8R0DBu^QB{#}$#%{jxib(E(fYk4@Hc>@1%VCIOh5UI_IgKc zVHWt|bmiPXwfYE8$g*A&5p{`F+FGEF2%!xlr|npc56je(bKi z&=&3ZCAufq`dt-XJGU6?lLos^ltnl>L*^>$zL~k}g~vEfL;=5^uZOT=4^QP~MexmH zO@w-{1f4+XGCL@1`WmT?gBJBU(DOflNx;DIAHRF|5msU@5wu-6VQ6|#k7ZVn_zl?XEG=TzBVSj|a}m^12S_3^s0%MAirI8lv=;2T5P@dVkde)ycNEFC ziyEuX`Pu*8_da!Wug56uy!&&TeffnjyKI6V8*zkLu{r#kGb)m3Xnxonm3V09`f|t# zn+1gWUr;b{4J*59LxLU~L1?Mmefr&Z8u`cD6Vvv`s#fM9=`5I4FbA#`8$rBWMHH+O zwB^{Pz^1Uzs41U^-_*Lvth*5m90#d}j|K7i23^;mgLHl5VwEvdiD&DTYTeb&2DP~W zHkRmq4}3gR>p(2zbU1zU@K^ZmpMjVLYs&otwhu4d!AGTyV0KL{#^s?@tNlLn?V|md z=RvmBmxs6VGxlYg%?UTI+QWVm4#Ga|ZX6ubU#SK$ymzF3$Kz*D9}a4$NS_$%$0HUr z8;92e6^aRy2Y=U61Z+CDUiOQ7=Kmfq8hMwcCw7_C=nJ!iB`ExmlUL(Q*3~QdGP{un zsrb3U#!uAV@o99KRg4*(4%<$@^YMCx%fC{sq@v=0ZnMr!mWndh%#-nvfpkJsv+Rtp zmY~~R8%Rv(T~(hkuNrU|k?S6MG!G zju5Z6)Ql7yT}(n0_c2vj4<5TdVTbA^nAHE{oE$?ts(Efi$>o>W8jY- zi7~ch#QNgUflSQe)aUO>`S72gNfDZ@>-oilNRPms&z-vWO_m*KLyD>;2lc=pJ>{MP z*;G`$eLqq$iIccF9Ts(CEc9A;)BZ?+hS(?jqTGFvvQFu#``m4v1R%t+l zDHw3{+MaXT_3CW>2(_1%(ZwMRV~E!7IH$-BC?WC7{8(7p{s&N3d)L!I7mt{6eO|kC zbr3W{ZkOXYw^e%*&EWwB@=NqQbQh9ApLM*Ns^F^HHsg9LPrBAz%BjI(^&QK~s5rba zVBxRbfF6769h=and=K83x_m(vu?!t8=Qdqcu@~;Tc0=8kMn^^ z;;E&gH=rtHH_s- z3s(8y>}p4#lREog7-%v;F_~#9mFp-x6J0)o8HXq~dN!}RAO#l#>K!M|&caDRI@QBZ z+u1ei5rYNj^LTaUF}GwD$(U*gm(CNNCy`&K)@CPTe)2H$3i?h3dckqLNgGJfGpz*J z&2S=glcpsE514+_ys5SZ>80?~uJzbDpveGY$D3lh(e=*XYjI9NrJLWsWLj_>ntYu? zVU=FiMv#_ps7O=!>O~`+mzQh^G>u{)fjS|;)Jp1{yO6b$FzbL)5()HpYN}`O< zZa8BJaRS@VBHwPrRsqBCQ@1htF=4wDkM4X7(%YtgfFvSh(Q{Vg#Z%@6oZ% zY1Gfa)1JzeYX>)|dTp!s9`0#QyG?z@{SR^9Z9112@`3>uCihK19#LsSVa|U84VX%r z#=ahb?L8BDjb}R7#e_5cwKtam3FPxDWx2nnhe65dLl?x<^cg~wn=!|s%luWR3Zstx z1Oy$b(E}0@*w%q6o6SKi7zm0%l=FL^WJ(@XlJLi))gADP1Ka9qVeFb3BgT9f{>I(> zA$&IO2*c;F(QF7!|U1lkPwCXfKWEqD~Pv|}^#@%;KdK<(%ADX7I>wP{;WKZu1+cWJrj`%!@# zjVE zz?3drNXuvlH=WrGY7y&#Yh{IVtR02;@wc|Y`%L1+cVd z=RcktjT~+JQV;F{v`hak;KbbXSjZ|co+kn2fr;_)BVB(lM5-7)qkJMI_7kEX z_bckJ=nD9#Q0Ri=SkR!jR#LY^hxVSzef>%1|EHYfixy8|@euaR#+?&?T>q5F$E`qo z{Oz0QR3knbs3*TtaB=>m+aYK@dQj|Qy(6%r4SxvOM zp6U6`^JyJUl!Y2<9)a6Qd4{IQD{-Z29HasVPUv~rD@PK!GZ6*CJZAfmCM2PY?5gwo z*KiQ=+p`Y96RZaKP*(f-Zm8IyF}`z7XI|M+e2 zl1W*@NjpYSD#T3)co4!Qy;Q`-p2~58bPPdkB~6)`V?Y-@M68qPOKaqGO$nnHju+Sa~n*r9Jbk$*Ea z`r_=Rq=X1S8FDZ)SV{!EEM%OCaOTBB5;Y5yBevlKrZdtRC+E)RGpUq-VpPg(F26Bmx> zXR-M?b1$8^6D$Q9qBebamlgTAhpc2eLvGz+j+`}{0W+XX#!2f&;|db00(wRYrWq`* z(dhwc(9Wt1b*=L{#_i(ikKap|f5 z?DT-{_9xp4h}LcygM~CJCJ9Br#Z_c5UJhu8St>Ctr5~RH?7@);{7)A9Kw<^g|JK_I z%k))Q>j*$=?<$v{Zvr&sfa>aU|G&{1=9z&C@1BCpP*-Tv%xBzpGxX>xU4V(~Nw5|m zeIu6oE%yKYTboT_-5Y_s_^dbQ$06`_etY(7DLHW-@p)R5D&Q|U_4-fe`aE~$?Ps>o zor53&vP)3u9~-tI5aU6lOQtl>Ob%1+T8k6Psw1*@P4Z>t2zZmKI(10MNgBge=!clc z1N38kg9@vcr|*!2n&!L+0&WT@@p*|6)6{C;jJ6ZdYKp6+rtbj>yP~WIp1Cz3rFuEp ztNLz>5v@68Z(7impGzY>r+z6rF@PcXTh9w2a0hjexMMbi#_OjorC!lSZjkpjR*l#1ryFl#3Jiezj)Kq+4 zGDRx<{Cq(piLy!Y&%tf;sc7YLB=bJlfBlEHC|gEe$d2VcMb!8hrG;)hAuLlBLZe?$ z^67UGAhrHT9zYb3dpXWZsS06LprS#X(tWf13gZk^Wn_Lf@M0S$AKxcR%rjiJfGNpJ z4_PEYLq*gSf}QO$^0cjHs=*K;!(?9IA6b8|^dYnlEdDXIUS(e_(P0>jM~p8e7W{=- zxArlSHIRHI3%h9OB8VG@yTrW5em{N9*oteUP456V-0V19Q6v68a|XG6GXE{0Bj+%2 zM^d5r4x{?*XRc6=a&pasq|kpOxm-c(nOAx4#0aZ=86c!bmC|RiqMnJ?Cv>AHh9N!I z7a@}UCDc6rGbgUjN>A8PTL56fyr~#zT13Kb{6O{)_-TAw+wjdJWb0dTHzm?LBW|)3 z%+x<~DtPFaW?})wsr^*}{p-yFnvAk$J5%1pRpjF~{8LBXWKQ0AvK(a}P+X7#*pKrQ z^~xJwC-9uN99%;w=B1!YYzhmoA~h4#T5EDOwHutxZEOT7!^P(aN;gi5Yp=zp(q8)p z0`&4=g7TjH?;o6UpD8JKXUqSLNgpwEFGhEun= zBVovffheLpqW&{0&VNMGmfdG6dWE1x`0AI5;eHt_oU&)IhTcY$7VcO#pO-G23YjVb zLI&Mo*j!c1)s;hOeVKUA;P+WmmvLAbKti;Qi)|VmQL;n+ka6{z<##zeVc0St#Y#V0 zr`U4eUXU&#hV7fGCnWY(EP3CQ%oY20RCzOs`q@QEZOW3^Qx3)%2tDT?CurZ?TQxQw zpbspzE5B14bGaV>{hFGLn;^axh!VK49s{_e`iEpGXcrw;wT z({_W@S6_%f$y=rMLu@vRs6k6G$ONe?6TnFrTQRE{0SatG?=FS=d;YwP0iQfZ_Z=;w z_6-Ad$kn;E|F}QoxQ=6`hO2|o8}Q|$$?1_|;0+++XRt|oIQKfAa%K? zbq9>-@hA}m$j(WlwzSv5KCm%rIRs6zkVPlQllBha{H-C%Y`waLZ>($TINHU@<|qjU zCPn4j7OAshN))egkTbCMH{$pCYX!N!N)(08B6d9H3@lr|naw1oVb%YXQy>AZAcqs!sWr_Dyka@A#A zI{drKxeE~qQD*9-dFg`PO(5biO?j#P0Ki590lfZZtmP@%g~L@9_PYKYU`F4UA@iD< z6{mJon&YMHQRqVpdVUqx22BM%)ix%{FN0O+eOOR@$j*g<9iHqNdB*`+qBeAqvEMHK4; z{6f!5E9_RT($MWRmKNS(K2z{ToRlPeI4tSJ9EdqxKVMiKfOZn~2B~SIm`SE7(2PqW zp~oF%UNGF3a}(|FB}jx`5fhNG~Yw`}?Z$7!B&0LgEX*Mo^l zvR94bxsWJom!KOCs1FnJ@WhK@eF*&O4F@Jtk-P{}-SS}FmUC>QAV<8?G+f*DI%)p5 zS){@G`K=8z3U0O8n}#k{`5=sPy350zDbIYEpx6q6*#l6c)&y=8dyBhWvkLs|$Yq4c z)bqP6Sx|`g4?`5v=VK<*%GC;g6uAFP@m%YerFSLPAOyo`4W*q#u^=1j+?54}4v}Y;0R$A-fg1p~0Yu=j~frsEZ%Y=ZN3l}ad_q&Wcj1ubz zo@pfCvGJ61&>*Dv?YbEK`g5xxaOF&bxZVQ!_QK(sri=2Pag~ejFIJN=|H09BM@xXV z5`PPi8aeteGHxRHA2e?XWXmp#I62mW)dn;@+g;tQ8=@(Ct{(MAmiSRXtrmGFoi&}0P0*1=a`Y)U@mG@ z0^aP=mc*c=J18R;jIzH+IJJLtmlU-vmXSPsk^aql7nd`l_i5IQu-o;NU8D0*vQQ== z5%$zy2Ka7GluTv?sEtsFf>?QsKX;ksT|p`Y z+2TsXWfHh?_A@~W$ko+yf7HL$ztDhIr#(6R}e;y6A9ndcg z;tt*udAb!yy$k%(d~6)Id7BIjyL%NG-Nb(qQYv>=BZ+qj(IhOrn7YTVoQZ)P7J|Kl zjcKE*WKdFgMjhu%GgyzfY^AUe1Os6g9wnL&+){bjc$u|d2LwXsJ+leq2OI$14!m%< z-OE!^jl24#X!Kj6A;)R`2nm8;wN!6UXrSr*prx9jN?#I#W}i@DCg_*EJ4g8|tA4vZvP78S{GNrIMEc!nB^RyNFuvKfM;qG}G< z`fXH)B`nN>XRBI&5JnI^x^#af%N?&;``B4>A~2!nYEfhV)q_u~{T@xQMIBMT$XP)8ytEJ{EVA{d#c;dcYYcr9}T zlfc0Nkm?2t1>Oj1YLYn6+V}9~96?qd=>OU4iDsR&-rI7LaC#tqzJ)N|8oI?KZO`#U zMly8sZZ4z!w_kyf56P0Jur}o9o70{N%=z!yxdQ7KpFWnU6DcdIVqo^t6|mD&zBSy~ z0;~Nau1y!$L7ax0$d}T6J{0|tUta!O3A~;iyd7{oUCL;x`vqoV={r;3bzk{%5FfF9-gC{I(&; zqMZC>Y$-d|N8s}ADVUb2VvXfL)zz>tZF)kGEU2|mpnhQB>)Q3eiv*h$QXBJ@-PFD) zEMdk(${{nAr@&q^no*f>5lWUTLXmX~<)Bf|{$uQ~;p4d~2W}x)3EmgiA@8Skh#`CU zw+xiO%^93*@~LkrO)b-NYeURgkqnW5qw4TjV;<6O4BYaAISmqV^OnZL@OSfyObV$v zHxH&Nhuz8HP!F4hD0q8^g;I+Ra88yTd(av+H->2Oqqdxr2_uIba*Qx7_JgVgKBFOmq@0D@U$^7 z$?#nFxF`%pSn#MV_cOrdiau2Zm22OZTiJFG=q{N`g3Br$2Mko4hW3}Gxa-E5IV4JE zYmFlV2X`=QtE88*QGZ?k!oagc_)|ytg9r?`I2xvp+rN6nV0q&5-vB?ixj1~2e}wvE z-Vy>Ub<3=~;#b?Dt7B1Nt?e)Ob3FL&h)g5Bbh%UPy&97&;V*Lz)HTn?tDSHlRwu-d zWJ}aPlvvWXfX6Ljyz2Dr0!&%3)GKJ*MerwMy1WD4&}b=PK78b<+T}RVvI4o<1jgqZL1XOip%U;aab0wzrZ8=GU)O8E&UoRR&kdv^e{<|(u zu#zk^p0ZXAUi;~bW<%qO_gpel;G%ZahFBv!>s!~Ui8%d6EbL80-H%8#sU(`pjiqvH z^7wr@w;Gm7Ol^fP`2cm-uf@6w`RRMQFgA0m zE~HinH~_d(=b+K-xPLDZbCOg?2GL9>0!(cjao!qy+rc5aU2|?yXumfza@-*T;N=uE zsIdPoRN>>CqA>*nL@ijW359PSdE@~aBW)q^1f2__9r!u%zb5~gVFatIY_=E`{`yZ8 zFfU{=3zf&m{2h|S+O624beZs^CXYA2D6Y}?xd_@qsbz1e14Mf8;`We}^?kQr4v%D7 zwNk&rU$%D`s6!z6Y4|beD;5by&mn&XxdeN`yh#H>vcQkA38S2?;|1W<2LTtpG-4W| zOtLJX&#tg1D4nCA5T_6Pnxgd;_P>yDG-OfMI*G5v^XpF|$O{e5|i|K>d8ZHwUGtH z8TwwDWPva3z6DRm3*!QJ;cUN)q;Jp&f1i5d=X%>PYo@IPET7lO2m+{SK|vXE+daRT zlRW-$ro*D7c*P$Xyubf5w_jce(j+xHQ35h-Un}S*Nr-@?b0zjfCJL!h~ z3{0pzL+veyp8mp*+E;1w6jr<%>RB+LGh6oqt9pIQLr}UMc*z!U;LOEOiQP7jRPWP( zjkMBO4g_y2jf&$fWC-SHKbHk!4S}ZzVwasTJFu$~p|8DXF3U{iot~{Z=psmQV#=iV zut7q@QG&P|&;&XP>aCGYUpSltZxi)s*Mp0y61H=`xf!VQwGfg<)L45D&~}UI)*1+C z+5ZDA@Nj=|*>ms$aGgWgpEI{z|6ENVJ(&doH4`0V%X1~mYmnaqDVa_AmN^f)qb@6oiOV zEq#w2!y*}X=pF3(ak)sKLenqP(8?*qN7|iBHj&P+yl&{6q1xN3WCPZgHWWlFFa1q~ z-sMh0Xu&TYIk_Mom4PixRn=etC}@Z<6WYBn-O|XUZDk>Y6zhD1Jr7>bWa)zDUUH_^ z5S!5*;j1)Pj*lnyy@bhDn^7g3_bHlnJBhcO)HTvQYY|9S;4hC|I6ORLEBzm6CUAJp zqx|>J-Z==ySp&&l*yhYOwz0hfB@qS_P9f;&QG@h_>7%J)Q7w0OfA$$SVbN z#8TlP!Dinnc>5iH)wAoJ!=mmta`dsq#lSv-R6(bQfp3*4nfp}L6c^%$&mD6){XSUb zUxbBoInBu7^9^TpZB2K4o7fY#o9ukT`?`~1%pi->?hkv7ILOQBa{pBk-qfCB=MG)j zY~^ThTn5EU%Vah3o`J0jr0nCsIQz57-HlASNW-}2Rv;Y4=lXLKH{378w3`tZt|A2b ziL|Yi-AvhgybrHa1#k=$w$oMLkIsU~=*#bFOTZz+fP0o-+=BO6NR=`8domOzA?6fQ zA3k`?MlQ&{KcPw>Jg2O&?&r(%7(;A(qv!<|+9#lie>xKsd6VDK+V-*Y6hl6Ysx?^S@coKl zYWBLde+(n>m1ne3(@+%zEib5jFVDwB>?kJrEfne|!f!JK(Ew156LdG7>-$0uih4m= z5+?9{V&9y__xq)U;D>kb_>st>7*FI>j&z=l6r$>WnK&`DkV(=17^C{%;)FJ2 z4wm;_k7rAOvv35M)e{GWE?bGYTT6W3_#|qCj#Xhgy@``)om5oVkCJ5()j?9Ez)MY{ z#%bA0BSuErA{Fv30?yo^`jd$Zt8#Wt_e8x0e|z%cU9ZsX?vlwDG`G6 zKoIO+tp4q{Q#QO`8E`OQ5QVBB;&dTnWJ0HD3JV~A(B%KFgtKD*_N#S0lsfI0h_q<| zn>@eQ4tWO?=Ww>XZ=o8BpoOp1<9ZhK!+`Mum2=)G%q#B#R-BxFUFlTO#fH~X#0%yi zhWI1X$(0?QC1V3V^zx&tC`U91mwCQqQ#;o|3E9^)MZ8!k79{)2l-gPXpA8o{6ya#m zvphQ01zPC26ntDe5sC8PbO%Jn5!x?5S!&b5Jm&2%_p7@3V2~dsclD!G6>VP z{{+3({`?Hr4&Em~Gv{-_Ela)Ges^z^2r|-&CBTRd=U~-0kv-O+mL~Q#7tK~h_6Wem zGJRXJ)ca(kgDSS{k2mSq5~=Q^KX5AB|5#d^xnvdMdJ}an!;j9$RF^Q|$#8jRY8R-% zUY3R7@F%4Vt|IKkZf)E7^VwITWbf5jF97=b44qK>GH}*w#MjEv4}}oJA_oMc+MnU} z^ys`o`^sN5Sng4%4n>?BDKlB^u=uI+H@Fmwz1;(Aqr|o9JPUDSi`3owwPykW?Y8rr zAZC7|x)bsvSGG#J>m2g$B#WePfj_kQ#2zu`5oJL@3VPl@`>C>eEe<;++vKjk_@nLU zqhukyvzM~`uM|@os8ZNc)3n-A$Qc?8W%U#7IQ1|jN^DX!Xw{c@o+i|ZI>3e_3%X;t z*WYbkC*CW>x=biA^fuQOdG!}1=nV4A10RHW&D(K0DbTl+?uy3Il3Q(!{qYA(;Oe@X zVFPxP{62w2AkA)Z`hicsn%)si8+D_4-hP2ye2m7(F(GnBgwQ?y%vp+DU6|vfY1S9~ zEo~i))SAy3+#;c7-huP$m^M_X%*BmgdhiM>0Wk}6#S?WyV36&{efNSN)wPV%={2h< zp*V3R)QgPc$YS-j+aWYh;S10xfCoJl82omsC+kWp;kMk^z$F)7gS`rw*I5v(>JML7 zSBdDcHVsRZFff4DY!4tYqIHX&j)QRuHta5R>3lTSQ9(H-hi=TwyCM+_#zlf1M6pW< zamX=e)0;^lIg{3fw|tP+=I@p*xKkJvW$*HhRxb~Hzs2`)``d2 zhb5~2jb*EiX?vplnY&g-^|rLfC{|H0{$eAMR`-R&XXMDKsIhj#e_vCUr6oR))vfX*-$;%D+i$eiY-@#T& zre5r=0Ie*9SXkq6Y)(q^HRJvKce0-FX+y zViC~Jffvxavi>b6OkT(PcAV!M!CuGKB++Tykky?fb*p@yqY`kk#BSrWuk!*D!Hh%< zS?#$s`BBs*N8W}fo2gS8q`v`>ZJ9R&LlV$_j_98M)z)f%m3DHBU5|~=nHWe(oBTT? z++H8U{N89CJf3^f^IK8m*|PPN@^#CMbh<_hhIkgWyo%fOF@_P@F(-);z9E^`!)(?eyo`9%U@IadN(M{& z8O*KJDSz{!R3>}iz?ja*NTx7hQ{S32L&W0ylFhYFrOwuF7I9b%V=jCTOx_X3U`OwEN zEBG>)sTv-o)efIzQVBioPz6#oBPxpUAcDvu@Z_aCRKs;}rp4sqjKxV)6d;v#asM&V zx(o5@=V&0Hdw`OaQOTX?t$B|(_D>C0IHN=j?t_UbyYjK>4F#fa2Cm~x15I27=wi>i zF`B6AmqLqb$a@atQ|nS5W&#|X9}czK1RhAmi_ChSW9se$_t9$QN6zQeoeX}UgeVa1k={Z361g5@2~cOjqGIE*}>@y;uWBa8pxC1v%NYs zVY0tMG%Ma%gKHjyGHL)FMI92N`%nZvHm6&D0&opX@8dO;Y(cNosv#uhlHHwDCBeEo zfqj(|l$?n`JeaRcuw%Jc2_~GrOCvnmG?h<0q`Zy#3RjaPf{9@@$$LgJ_U4tyP3`~S zAL8S}YMSk0^5~72xr4J(L2YC=R-S$JJ7MO`^>#;TqQZKGW(jB_v4e*!#R{yH5hP<#JPU%<$IU#p5BV%J? z?e-`X&l7e-+UMb7-X(>~&&f1gN{Y!A5#xEEu7=i;8%g!5I`HXxMydlHpgXMSh-~-7jmc6y50o;#t`;T99@DDNlE!B>7 zsaiHTwHAItEL8aEj~X6vkQV{T)OHzW+xi@S3hi^A%T#dSwTR2`>71+mQ|rHtAv4vn zy0PBZ3{8&W39Uls>8|zCs{PNClcuw(K6vd3_nJG?HZEwKMtVhR+goKh7HemAYv*eE zE#Jl>9%9zu>s>mvJI9$#-~M?&7o1O7f2-6th4@I6Ea-L!TQ-?~!~hm81}k?p{kz~O z+Lp8{nI8fFU#jRuFe70_(?QxBXLub<19Vre0aPyY&yG)6EPLgu({3MSxsjr zWZp%J*Fc3;u~%>c{((L=^dtM4XgPH;`sL5YG3boD^cE4qnsyHypQlxsJ@&o@s_iiL zq^oPK>+l){2+g-&q`oxag8Bw=1gm&a4(@fS&7ws_^b^iy1HaY(2n5P>Hfzql`2Etu zuO?JM+3dPtwV6v7|HXW+@FU2!I|8~Wz2VTx{`r`tyj)Q37U9;*P87c)zq?_L*UVX8xY41^Ue9Bsr6oI+foex3O+mS!KXZ6j6(HWbFO>EL?321ZrOhNMIstguSz z&dmk;uR1V_qc5b``tyb-gA0shJ#5}9J#0c}a>$JD6O%eE#D&N*;#zBXLo46$-cfAp z3=+v`%0W+6FGH`EgS3B1u%>DdUQ8rxh4SBdGIEmRgb);CTN=<>Jh<$hY^49@qGU%w z>1O;0)dQEq&Yj@-9zoJLxNWU^mqs3o?I6`;@0t5itWkNs&O-rqJ)1ZzBtY+izqv4p zX_fOM-`>u$%?{STwt={i>W!rHe4@x;ApJmjMD1scLU%K>A2rV+w)83FD1*otb{JNB zVuotyDDJ-HQ+FowOk#AJfFOpZC} zLAY7Dth>wVvzhZpQ&p>j&`IOvKc3AJbD#Z8Vpgp37&{KI7sfY*BB_P7c~U}wBvtA! zIuG}E-K0{#B>Ks~ssHA9H~Uzvz|URZSv^sH?dWslXOWv0wquYSLTcexjnApu=&%MZ z=6!GOoRE^JdaYn7gE1aR;cKyzrgtoY%U_=gE>k)08?LyHGYvc`utp(YoNzg*?1ZG= zKbLT96hZ%&Cq4@+x-IpoAEJp$6{q&&z`pYF5{fIUjQ`at)BuM7>mbC?z<-)=75cri z+Sy~e@o%kMQh56-(y3TV47%4ob@Q^&C~-N==Dr(cBB=;aJwfTD%OsTeW;ZPacMUMalM<-htpy%X&Yr!;s z!R%0L_D7A{WM|9OUc}!)?+=3tt*>-jzDm3JGE7_d!VqldI-BMh?N6x_5izm4TBK{KW9%-v1zLM@GC z`uIWgr7c1`Is%(4+3MzMkp4)(b;;Om=%5;RidPZDraEDbzZF&LtkTkIy|uHqdWZ>F zR0{G-v5xRZoe#|+J!!Rcs+wOuyMdle&Y=wo)EJi@tEsu$VEXXrhIRM-65P8f;E8uF zIWjqpt~%rxQw~z!5}5rc3**p|PUq`fP)s`00U z=b>}EScoVXXb?i7LLo{ukmRtYjDJD^2ZnsY33=_cB@xk!ilySV9UJ${os328f7{t1 zt>dS|2ZSfn`{wN}8l6R=VFobvzKob5@8@XhDhD%29FD{C=@#jF0zAuzPVB_y{Ls`< zkf&gznDWc3rW@_%ij-8!UyW{soWFzgK$-Xstf?AzEe~a3a?FWNpXMK{9kt)3jX+QP zo##8)j|tFKv^x_lSsOJ(4f{E0^OKjR9JPR;4Ulno zcHi|oy_M<=oJdT$bihw-EIK1MI;A~JLY z2gINldJBjX1PpBm9Rg9nY(Rto>19EI02w+l5UL^wLsLMCL}3&RRbeRZ&Ft>kbN1|> zvwy(8`n|ZnbMABRx#fAj=ef`4O18kR$CwnMB>IGS>t9d_Ybu-7d51s51}3c^BT06C zHCEvz_%9Qce0MsV(C^f#MpJ2r)L%HFLCmQT86Ma@QNy?$XnhRX*C*eJnp1Hm$2!Ix z8u`t*MiWYsWN-|FFZQP0dgtSPdhJJsB29%zp{>&M%pp3?NUXSTR{z=MY3Fo2a=OI* zb0`Cb)mhGE*G12h&84{ru zZpH>KXnba7-$!!2K%VVVdB9%7qvkMMt-I868K{JMoXq_$4|>(3sHmMWG{zA1p>FwS z#{^a1cVoh&fR4SkRw;&rgU6eL?%FEZ%YYi-#9{=f-D1!>cr5f5TT|W(pL~y|EXOw1 z@io6Tf)Bn|U;Ki=pxzb=ruVRgG2DVO&ocs}ts3l0gy=q76$q@q=r^`v7thK*3D7}%a1 zAb!wXCVPEbRNX~X%_3&@QJL8{JKxL|wEPsoIc>SO#S_pa@3hG@-QBh-pE$Dcg38EB zL+>XI$B%s0)~WZjuyvD4KC7!Rlh`+<8d(y+ITd||JLnk_sFhn%=5J!R5iz>zxrb@3YBTdtp$+S|Ow7u0$}F$GzrpsaYXzrYDOT0;Lt zmyDW!#@$e`K7NnY*2=MWLlIYJ@42W{cGXkRx_QXNUq$wC13+m^`axWQJzrR2joV1h z&kT>Fcf0ajbW}Q$)UnNguj)lW1gTkbtAZHy%cs(4y@DK~rotyzPw!uvPUhA;@0;bd zd?X#!MiUd)_;Pj9=r;)SER-!h8A$}#h1k-HNnY&8>0(Jqf?vQ9?5wRA{IdSjg5rHoB zCIT3;xs!bAs&CGaLXW#XKOdgcSg7bGMCW2j7DHgM#^AU05PtkRxrFM_(wL|82b@#B zBwgts%B6Nc{YD0S9GX`7REO@9;i0A_Z%C1D&*ZDykE*#L(QtnUxkz#3F(FSiEDC`- z_Y7W7h;w*ZQv~n#m#(lP?>KH#++Gpz<@=G(t_5(4(JrZtOPVnpQMu6*Euq*M?-jPM zF)YG>Mg|;`M>8av7y3{%YD+B%+;TMioWZh;iC}ba$>i7Znrj~P%7?c!)YGzpW%w(8 z_1hHz{Z&iQLFm`Jxlcz)(KPE*4BA2=Xmge&)$^h;{??qT9C6t<<58Ig!7m(GmHoGp z)>}w2b&(eEkeY*<&tsV|p;NC6+gTAxzT1|0O z6v0Dv-u_(n_W@$;@`Ns2`er4EaUT^$?gPCuq}pG735B0Z?=!S$thY0O*4v{h@0Yk?(ENMH^%P26`9 z1@^DnnWGI^+RaC~O%PmC-$^%q>MYCj}NrD#M`_@E7z!u Date: Fri, 12 May 2023 16:07:49 -0400 Subject: [PATCH 134/157] docs, again --- .readthedocs.yaml | 15 +++++++++++++++ docs/conf.py | 1 + docs/requirements.txt | 1 + 3 files changed, 17 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..661d3d3b --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,15 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index cf91213e..808e052e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,6 +33,7 @@ "sphinx_rtd_theme", "myst_parser", ] +source_suffix = [".rst", ".md"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..8085d79c --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +myst-parser \ No newline at end of file From d1cddec704ebe99817bd71bc11abaf81ae05f1e9 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 25 May 2023 14:43:04 -0400 Subject: [PATCH 135/157] add more printed warnings --- nequip/scripts/benchmark.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 933af3dc..f7b469ce 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -65,12 +65,6 @@ def main(args=None): type=int, default=2, ) - parser.add_argument( - "--timestep", - help="MD timestep for ns/day esimation, in fs. Defauts to 1fs.", - type=float, - default=1, - ) parser.add_argument( "--no-compile", help="Don't compile the model to TorchScript", @@ -301,7 +295,13 @@ def trace_handler(p): f"PLEASE NOTE: these are speeds for the MODEL, evaluated on --n-data={args.n_data} configurations kept in memory." ) print( - " \\_ MD itself, memory copies, and other overhead will affect real-world performance." + "A variety of factors affect the performance in real molecular dynamics calculations:" + ) + print( + "!!! Molecular dynamics speeds should be measured in LAMMPS; speeds from nequip-benchmark should only be used as an estimate of RELATIVE speed among different hyperparameters." + ) + print( + "Please further note that relative speed ordering of hyperparameters is NOT NECESSARILY CONSISTENT across different classes of GPUs (i.e. A100 vs V100 vs consumer) or GPUs vs CPUs." ) print() trim_time = trim_sigfig(perloop.times[0], perloop.significant_figures) @@ -310,19 +310,6 @@ def trace_handler(p): trim_time / time_scale ) print(f"The average call took {time_str}{time_unit}") - print( - "Assuming linear scaling — which is ALMOST NEVER true in practice, especially on GPU —" - ) - per_atom_time = trim_time / n_atom - time_unit_per, time_scale_per = select_unit(per_atom_time) - print( - f" \\_ this comes out to {per_atom_time/time_scale_per:g} {time_unit_per}/atom/call" - ) - ns_day = (86400.0 / trim_time) * args.timestep * 1e-6 - # day in s^ s/step^ ^ fs / step ^ ns / fs - print( - f"For this system, at a {args.timestep:.2f}fs timestep, this comes out to {ns_day:.2f} ns/day" - ) if __name__ == "__main__": From 15f036d4a2c5d1e9d16bf589861aff95c327bcbf Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 30 May 2023 22:21:57 -0400 Subject: [PATCH 136/157] don't require sklearn for whole package --- nequip/utils/gmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/utils/gmm.py b/nequip/utils/gmm.py index 2610dc60..8a957826 100644 --- a/nequip/utils/gmm.py +++ b/nequip/utils/gmm.py @@ -3,7 +3,6 @@ import math import torch import numpy as np -from sklearn import mixture from e3nn.util.jit import compile_mode @@ -100,6 +99,7 @@ def fit( rng: Optional[Union[torch.Generator, int]] = None, ) -> None: """Fit the GMM to the samples `X` using sklearn.""" + from sklearn import mixture # if RNG is an int, just use it as a seed; # if RNG is None, use the current torch random state; From dfbce31dc0a0db7a89bd5974dd5a8e6cc3311120 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:32:25 -0400 Subject: [PATCH 137/157] warnings on version mismatch --- nequip/utils/_global_options.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index 7a78d36f..9fda98c4 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -50,7 +50,16 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: torch.backends.cuda.matmul.allow_tf32 = config["allow_tf32"] torch.backends.cudnn.allow_tf32 = config["allow_tf32"] - if version.parse(torch.__version__) >= version.parse("1.11"): + # Temporary warning due to unresolved upstream issue + torch_version = version.parse(torch.__version__) + if torch_version < version.parse("1.11"): + warnings.warn("We currently recommend the use of PyTorch 1.11") + elif torch_version > version.parse("1.11"): + warnings.warn( + "!! Upstream issues in PyTorch versions >1.11 have been seen to cause unusual performance degredations on some CUDA systems that become worse over time; see https://github.com/mir-group/nequip/discussions/311. At present we *strongly* recommend the use of PyTorch 1.11 if using CUDA devices; while using other versions if you observe this problem, an unexpected lack of this problem, or other strange behavior, please post in the linked GitHub issue." + ) + + if torch_version >= version.parse("1.11"): # PyTorch >= 1.11 k = "_jit_fusion_strategy" if k in config: From 1473cc8ef12b6b5276dc5ba901d7ed4dfb16d3af Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 5 Jun 2023 16:02:13 -0400 Subject: [PATCH 138/157] Added `edge_energy` to `ALL_ENERGY_KEYS` subjecting it to global rescale --- CHANGELOG.md | 1 + nequip/data/AtomicData.py | 1 + nequip/data/_keys.py | 3 +++ 3 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 601d7943..93b5cb55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Most recent change on the bottom. - [Breaking] `default_dtype` defaults to `float64` (`model_dtype` default `float32`, `allow_tf32: true` by default--- see https://arxiv.org/abs/2304.10061) - `nequip-benchmark` now only uses `--n-data` frames to build the model - [Breaking] By default models now use `StressForceOutput`, not `ForceOutput` +- Added `edge_energy` to `ALL_ENERGY_KEYS` subjecting it to global rescale ### Fixed - Work with `wandb>=0.13.8` diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index f98b42a2..75dd457b 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -51,6 +51,7 @@ AtomicDataDict.EDGE_EMBEDDING_KEY, AtomicDataDict.EDGE_FEATURES_KEY, AtomicDataDict.EDGE_CUTOFF_KEY, + AtomicDataDict.EDGE_ENERGY_KEY, } _DEFAULT_GRAPH_FIELDS: Set[str] = { AtomicDataDict.TOTAL_ENERGY_KEY, diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py index d61d44f1..edd04cbe 100644 --- a/nequip/data/_keys.py +++ b/nequip/data/_keys.py @@ -47,6 +47,8 @@ EDGE_FEATURES_KEY: Final[str] = "edge_features" # [n_edge, 1] invariant of the radial cutoff envelope for each edge, allows reuse of cutoff envelopes EDGE_CUTOFF_KEY: Final[str] = "edge_cutoff" +# edge energy as in Allegro +EDGE_ENERGY_KEY: Final[str] = "edge_energy" NODE_FEATURES_KEY: Final[str] = "node_features" NODE_ATTRS_KEY: Final[str] = "node_attrs" @@ -59,6 +61,7 @@ VIRIAL_KEY: Final[str] = "virial" ALL_ENERGY_KEYS: Final[List[str]] = [ + EDGE_ENERGY_KEY, PER_ATOM_ENERGY_KEY, TOTAL_ENERGY_KEY, FORCE_KEY, From 32bad0cce8d66455957881f0d1bcb63cfdf7f9c7 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 22 Jun 2023 19:27:07 -0400 Subject: [PATCH 139/157] add simple LJ --- nequip/model/_pair_potential.py | 8 +++-- nequip/nn/pair_potential.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/nequip/model/_pair_potential.py b/nequip/model/_pair_potential.py index c1538759..25faf84a 100644 --- a/nequip/model/_pair_potential.py +++ b/nequip/model/_pair_potential.py @@ -1,7 +1,9 @@ from nequip.nn import SequentialGraphNetwork, AtomwiseReduce from nequip.nn.embedding import AddRadialCutoffToData from nequip.data import AtomicDataDict -from nequip.nn.pair_potential import LennardJones, ZBL +from nequip.nn.pair_potential import SimpleLennardJones, LennardJones, ZBL + +_PAIR_STYLES = {"LJ": SimpleLennardJones, "LJ_fancy": LennardJones, "ZBL": ZBL} def PairPotentialTerm( @@ -13,7 +15,7 @@ def PairPotentialTerm( model.insert_from_parameters( shared_params=config, name="pair_potential", - builder={"LJ": LennardJones, "ZBL": ZBL}[config.pair_style], + builder=_PAIR_STYLES[config.pair_style], before="total_energy_sum", ) return model @@ -24,7 +26,7 @@ def PairPotential(config) -> SequentialGraphNetwork: shared_params=config, layers={ "cutoff": AddRadialCutoffToData, - "pair_potential": {"LJ": LennardJones, "ZBL": ZBL}[config.pair_style], + "pair_potential": _PAIR_STYLES[config.pair_style], "total_energy_sum": ( AtomwiseReduce, dict( diff --git a/nequip/nn/pair_potential.py b/nequip/nn/pair_potential.py index 7d0f0e22..f448afc3 100644 --- a/nequip/nn/pair_potential.py +++ b/nequip/nn/pair_potential.py @@ -153,6 +153,65 @@ def update_for_rescale(self, rescale_module: RescaleOutput): self.epsilon.copy_(self.epsilon / rescale_module.scale_by.item()) +@compile_mode("script") +class SimpleLennardJones(GraphModuleMixin, torch.nn.Module): + """Simple Lennard-Jones.""" + + lj_sigma: float + lj_epsilon: float + lj_use_cutoff: bool + + def __init__( + self, + lj_sigma: float, + lj_epsilon: float, + lj_use_cutoff: bool = False, + irreps_in=None, + ) -> None: + super().__init__() + self._init_irreps( + irreps_in=irreps_in, irreps_out={AtomicDataDict.PER_ATOM_ENERGY_KEY: "0e"} + ) + self.lj_sigma, self.lj_epsilon, self.lj_use_cutoff = ( + lj_sigma, + lj_epsilon, + lj_use_cutoff, + ) + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data = AtomicDataDict.with_edge_vectors(data, with_lengths=True) + edge_center = data[AtomicDataDict.EDGE_INDEX_KEY][0] + edge_len = data[AtomicDataDict.EDGE_LENGTH_KEY].unsqueeze(-1) + + lj_eng = (self.lj_sigma / edge_len) ** 6.0 + lj_eng = lj_eng.square() - lj_eng + lj_eng = 2 * self.lj_epsilon * lj_eng + + if self.lj_use_cutoff: + # apply the cutoff for smoothness + lj_eng = lj_eng * data[AtomicDataDict.EDGE_CUTOFF_KEY] + + # sum edge LJ energies onto atoms + atomic_eng = scatter( + lj_eng, + edge_center, + dim=0, + dim_size=len(data[AtomicDataDict.POSITIONS_KEY]), + ) + if AtomicDataDict.PER_ATOM_ENERGY_KEY in data: + atomic_eng = atomic_eng + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] + data[AtomicDataDict.PER_ATOM_ENERGY_KEY] = atomic_eng + return data + + def update_for_rescale(self, rescale_module: RescaleOutput): + if AtomicDataDict.PER_ATOM_ENERGY_KEY not in rescale_module.scale_keys: + return + if not rescale_module.has_scale: + return + # Our energy will be scaled by scale_by later, so we have to divide here to cancel out: + self.lj_epsilon /= rescale_module.scale_by.item() + + @torch.jit.script def _zbl( Z: torch.Tensor, From 2f43aa84542df733bbe38cb9d6cca176b0e98054 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 22 Jun 2023 19:27:25 -0400 Subject: [PATCH 140/157] put the right versions in deployed models --- nequip/scripts/deploy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index e90466c2..bcee8255 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -24,7 +24,7 @@ from nequip.model import model_from_config from nequip.data import dataset_from_config from nequip.utils import Config -from nequip.utils.versions import check_code_version, get_config_code_versions +from nequip.utils.versions import check_code_version, get_current_code_versions from nequip.scripts.train import default_config from nequip.utils.misc import dtype_to_name from nequip.utils._global_options import _set_global_options @@ -287,7 +287,7 @@ def main(args=None): # Deploy metadata: dict = {} - code_versions, code_commits = get_config_code_versions(config) + code_versions, code_commits = get_current_code_versions(config) for code, version in code_versions.items(): metadata[code + "_version"] = version if len(code_commits) > 0: From 0b02c41cbd30ef9a2f58d95cc3dd41a8beb0ff5d Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 28 Jun 2023 09:43:26 -0600 Subject: [PATCH 141/157] No negative volumes in rare cases --- CHANGELOG.md | 1 + nequip/nn/_grad_output.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93b5cb55..24812ad5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Most recent change on the bottom. - Work with `wandb>=0.13.8` - Better error for standard deviation with too few data - `load_model_state` GPU -> CPU +- No negative volumes in rare cases ### Removed - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index c03ec350..ee0ce6f9 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -330,12 +330,13 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: # ^ can only scale by cell volume if we have one...: # Rescale stress tensor # See https://github.com/atomistic-machine-learning/schnetpack/blob/master/src/schnetpack/atomistic/output_modules.py#L180 + # See also https://en.wikipedia.org/wiki/Triple_product + # See also https://gitlab.com/ase/ase/-/blob/master/ase/cell.py, + # which uses np.abs(np.linalg.det(cell)) # First dim is batch, second is vec, third is xyz - volume = torch.einsum( - "zi,zi->z", - cell[:, 0, :], - torch.cross(cell[:, 1, :], cell[:, 2, :], dim=1), - ).unsqueeze(-1) + # Note the .abs(), since volume should always be positive + # det is equal to a dot (b cross c) + volume = torch.linalg.det(cell).abs().unsqueeze(-1) stress = virial / volume.view(num_batch, 1, 1) data[AtomicDataDict.CELL_KEY] = orig_cell else: From 3f03c7766b70f21bc9894d00db01c8e8909b90f6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 28 Jul 2023 20:10:35 -0400 Subject: [PATCH 142/157] set PYTORCH_JIT_USE_NNC_NOT_NVFUSER by default --- nequip/utils/_global_options.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/nequip/utils/_global_options.py b/nequip/utils/_global_options.py index 9fda98c4..bc5bc2d9 100644 --- a/nequip/utils/_global_options.py +++ b/nequip/utils/_global_options.py @@ -1,5 +1,6 @@ import warnings from packaging import version +import os import torch @@ -86,22 +87,24 @@ def _set_global_options(config, warn_on_override: bool = False) -> None: # fuser1 is NNC, fuser2 is nvFuser # See https://github.com/pytorch/pytorch/blob/master/torch/csrc/jit/OVERVIEW.md#fusers # And https://github.com/pytorch/pytorch/blob/e0a0f37a11164f59b42bc80a6f95b54f722d47ce/torch/jit/_fuser.py#L46 - default_fuser = ( - "fuser2" # TODO: does this make sense for ROCm? - if torch.cuda.is_available() - else "fuser1" # default to NNC on CPU for now no matter what - if version.parse(torch.__version__) >= version.parse("1.12") - else "fuser1" - ) - fuser = config.get("_jit_fuser", default_fuser) - # context manager just restores old fuser afterwards - if torch.cuda.is_available(): - torch.jit.fuser(fuser).__enter__() - if warn_on_override and fuser != default_fuser: - # ^ meh assumption, but better than hardcoding getting the old state + # Also https://github.com/pytorch/pytorch/blob/main/torch/csrc/jit/codegen/cuda/README.md + # Also https://github.com/pytorch/pytorch/blob/66fb83293e6a6f527d3fde632e3547fda20becea/torch/csrc/jit/OVERVIEW.md?plain=1#L1201 + # https://github.com/search?q=repo%3Apytorch%2Fpytorch%20PYTORCH_JIT_USE_NNC_NOT_NVFUSER&type=code + # We follow the approach they have explicitly built for disabling nvFuser in favor of NNC: + # https://github.com/pytorch/pytorch/blob/66fb83293e6a6f527d3fde632e3547fda20becea/torch/csrc/jit/codegen/cuda/README.md?plain=1#L214 + # + # There are three ways to disable nvfuser. Listed below with descending priorities: + # - Force using NNC instead of nvfuser for GPU fusion with env variable `export PYTORCH_JIT_USE_NNC_NOT_NVFUSER=1`. + # - Disabling nvfuser with torch API `torch._C._jit_set_nvfuser_enabled(False)`. + # - Disable nvfuser with env variable `export PYTORCH_JIT_ENABLE_NVFUSER=0`. + # + k = "PYTORCH_JIT_USE_NNC_NOT_NVFUSER" + if k in os.environ: warnings.warn( - f"Setting the GLOBAL value for JIT fuser to `{fuser}`, which is different than the default for your current PyTorch version ({torch.__version__}) of `{default_fuser}`" + "Do NOT manually set PYTORCH_JIT_USE_NNC_NOT_NVFUSER=0 unless you know exactly what you're doing!" ) + else: + os.environ[k] = "1" # TODO: warn_on_override for the rest here? if config.get("model_debug_mode", False): From 4aabe9f4f6587fac8696eaacb50e2cbf9ff99dee Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Oct 2023 14:42:37 -0400 Subject: [PATCH 143/157] Add nequip-deploy build --checkpoint --- CHANGELOG.md | 1 + nequip/scripts/deploy.py | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24812ad5..321c2ee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Most recent change on the bottom. - `include_file_as_baseline_config` for simple modifications of existing configs - `nequip-deploy --using-dataset` to support data-dependent deployment steps - Support for Gaussian Mixture Model uncertainty quantification (https://doi.org/10.1063/5.0136574) +- `nequip-deploy build --checkpoint` to deploy specific checkpoints easily ### Changed - Always require explicit `seed` diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index bcee8255..ba238cba 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -204,6 +204,12 @@ def main(args=None): help="Path to a working directory from a training session to deploy.", type=pathlib.Path, ) + build_parser.add_argument( + "--checkpoint", + help="Which model checkpoint from --train-dir to deploy. Defaults to `best_model.pth`. If --train-dir is provided, this is a relative path; if --model is provided instead, this is an absolute path.", + type=str, + default=None, + ) build_parser.add_argument( "--using-dataset", help="Allow model builders to use a dataset during deployment. By default uses the training dataset, but can point to a YAML file for another dataset.", @@ -246,12 +252,13 @@ def main(args=None): state_dict = None if args.model and args.train_dir: raise ValueError("--model and --train-dir cannot both be specified.") + checkpoint_file = args.checkpoint if args.train_dir is not None: - logging.info("Loading best_model from training session...") + if checkpoint_file is None: + checkpoint_file = "best_model.pth" + logging.info(f"Loading {checkpoint_file} from training session...") + checkpoint_file = str(args.train_dir / "best_model.pth") config = Config.from_file(str(args.train_dir / "config.yaml")) - state_dict = torch.load( - str(args.train_dir / "best_model.pth"), map_location="cpu" - ) elif args.model is not None: logging.info("Building model from config...") config = Config.from_file(str(args.model), defaults=default_config) @@ -278,7 +285,10 @@ def main(args=None): global _current_metadata _current_metadata = {} model = model_from_config(config, dataset=dataset, deploy=True) - if state_dict is not None: + if checkpoint_file is not None: + state_dict = torch.load( + str(args.train_dir / "best_model.pth"), map_location="cpu" + ) model.load_state_dict(state_dict, strict=True) # -- compile -- From 3fd2213ac3b35f9254ca6e934431d3a81fd64701 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Oct 2023 15:34:23 -0400 Subject: [PATCH 144/157] nequip-deploy --override --- CHANGELOG.md | 2 +- nequip/scripts/deploy.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 321c2ee4..f0025969 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ Most recent change on the bottom. - `include_file_as_baseline_config` for simple modifications of existing configs - `nequip-deploy --using-dataset` to support data-dependent deployment steps - Support for Gaussian Mixture Model uncertainty quantification (https://doi.org/10.1063/5.0136574) -- `nequip-deploy build --checkpoint` to deploy specific checkpoints easily +- `nequip-deploy build --checkpoint` and `--override` to avoid many largely duplicated YAML files ### Changed - Always require explicit `seed` diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index ba238cba..a0772df9 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -210,6 +210,12 @@ def main(args=None): type=str, default=None, ) + build_parser.add_argument( + "--override", + help="Override top-level configuration keys from the `--train-dir`/`--model`'s config YAML file. This should be a valid YAML string. Unless you know why you need to, do not use this option.", + type=str, + default=None, + ) build_parser.add_argument( "--using-dataset", help="Allow model builders to use a dataset during deployment. By default uses the training dataset, but can point to a YAML file for another dataset.", @@ -265,6 +271,20 @@ def main(args=None): else: raise ValueError("one of --train-dir or --model must be given") + # Set override options before _set_global_options so that things like allow_tf32 are correctly handled + if args.override is not None: + override_options = yaml.load(args.override, Loader=yaml.Loader) + assert isinstance( + override_options, dict + ), "--override's YAML string must define a dictionary of top-level options" + overridden_keys = set(config.keys()).intersection(override_options.keys()) + set_keys = set(override_options.keys()) - set(overridden_keys) + logging.info( + f"--override: overrode keys {list(overridden_keys)} and set new keys {list(set_keys)}" + ) + config.update(override_options) + del override_options, overridden_keys, set_keys + _set_global_options(config) check_code_version(config) From 2185c7af7b2ced7d8366ad4c0d0d46254883f65a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:46:06 -0500 Subject: [PATCH 145/157] more complete memory summary --- nequip/scripts/benchmark.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index f7b469ce..2ac1e607 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -7,6 +7,7 @@ import sys import pdb import traceback +import pickle import torch from torch.utils.benchmark import Timer, Measurement @@ -271,6 +272,14 @@ def trace_handler(p): ) del errstr else: + if args.memory_summary and torch.cuda.is_available(): + torch.cuda.memory._record_memory_history( + True, + # keep 100,000 alloc/free events from before the snapshot + trace_alloc_max_entries=100000, + # record stack information for the trace events + trace_alloc_record_context=True, + ) print("Warmup...") warmup_time = time.time() for _ in range(warmup): @@ -279,6 +288,7 @@ def trace_handler(p): print(f" {warmup} calls of warmup took {warmup_time:.4f}s") print("Benchmarking...") + # just time t = Timer( stmt="model(next(datas).copy())['total_energy'].item()", @@ -289,6 +299,10 @@ def trace_handler(p): if args.memory_summary and torch.cuda.is_available(): print("Memory usage summary:") print(torch.cuda.memory_summary()) + snapshot = torch.cuda.memory._snapshot() + + with open("snapshot.pickle", "wb") as f: + pickle.dump(snapshot, f) print(" -- Results --") print( From 04d272db539088ce6886a8e38ccf3d625f68407e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:48:05 -0500 Subject: [PATCH 146/157] warn on unsupported types in AtomicData --- nequip/data/AtomicData.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 75dd457b..86dfc724 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -157,6 +157,10 @@ def _process_dict(kwargs, ignore_fields=[]): # ^ this tensor is a scalar; we need to give it # a data dimension to play nice with irreps kwargs[k] = v + else: + warnings.warn( + f"Value for field {k} was of unsupported type {type(k)} (value was {v})" + ) if AtomicDataDict.BATCH_KEY in kwargs: num_frames = kwargs[AtomicDataDict.BATCH_KEY].max() + 1 @@ -229,7 +233,6 @@ class AtomicData(Data): def __init__( self, irreps: Dict[str, e3nn.o3.Irreps] = {}, _validate: bool = True, **kwargs ): - # empty init needed by get_example if len(kwargs) == 0 and len(irreps) == 0: super().__init__() @@ -420,7 +423,6 @@ def from_ase( ) if atoms.calc is not None: - if isinstance( atoms.calc, (SinglePointCalculator, SinglePointDFTCalculator) ): From bf54de851df1e8c15cbd91b1930f9fba7c80c162 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:16:00 -0500 Subject: [PATCH 147/157] fix type warning --- nequip/data/AtomicData.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 86dfc724..8f32428f 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -157,9 +157,12 @@ def _process_dict(kwargs, ignore_fields=[]): # ^ this tensor is a scalar; we need to give it # a data dimension to play nice with irreps kwargs[k] = v + elif isinstance(v, torch.Tensor): + # This is a tensor, so we just don't do anything except avoid the warning in the `else` + pass else: warnings.warn( - f"Value for field {k} was of unsupported type {type(k)} (value was {v})" + f"Value for field {k} was of unsupported type {type(v)} (value was {v})" ) if AtomicDataDict.BATCH_KEY in kwargs: From bffd533c60a05ff61b4cf9a5b0af8e8d940b66d5 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:16:20 -0500 Subject: [PATCH 148/157] add training blowup sanity threshold to example.yaml --- configs/example.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configs/example.yaml b/configs/example.yaml index 170304a8..f7e799eb 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -99,6 +99,9 @@ early_stopping_patiences: early_stopping_lower_bounds: # stop early if a metric value is lower than the bound LR: 1.0e-5 +early_stopping_upper_bounds: # stop early if the training appears to have exploded + validation_loss: 1.0e4 + # loss function loss_coeffs: forces: 1 # if using PerAtomMSELoss, a default weight of 1:1 on each should work well From e96ccd4cd4ad02cd496b47c55bf5485882081650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Se=C3=A1n=20Kavanagh?= <51478689+kavanase@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:42:19 -0400 Subject: [PATCH 149/157] Update `.readthedocs.yaml` (#418) * Update .readthedocs.yaml * Bump `flake8` to avoid linting failure * Fix typo and reformat code to satisfy now-caught `flake8` linting --- .github/workflows/lint.yaml | 2 +- .readthedocs.yaml | 9 +++++++-- nequip/data/_dataset/_base_datasets.py | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 070f2557..5f7c96cd 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -29,7 +29,7 @@ jobs: python-version: '3.x' - name: Install flake8 run: | - pip install flake8==4.0.1 + pip install flake8==7.0.0 - name: run flake8 run: | flake8 . --count --show-source --statistics diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 661d3d3b..70205bbd 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,10 +1,15 @@ # .readthedocs.yaml # Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details +# See https://docs.readthedocs.io/en/stable/config-file for details # Required version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.9" + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py @@ -12,4 +17,4 @@ sphinx: # Optionally declare the Python requirements required to build your docs python: install: - - requirements: docs/requirements.txt \ No newline at end of file + - requirements: docs/requirements.txt diff --git a/nequip/data/_dataset/_base_datasets.py b/nequip/data/_dataset/_base_datasets.py index d49b91d2..bda86734 100644 --- a/nequip/data/_dataset/_base_datasets.py +++ b/nequip/data/_dataset/_base_datasets.py @@ -220,11 +220,12 @@ def process(self): # Check bad key combinations, but don't require that this be a graph yet. AtomicDataDict.validate_keys(all_keys, graph_required=False) - # check dimesionality + # check dimensionality num_examples = set([len(a) for a in fields.values()]) if not len(num_examples) == 1: + shape_dict = {f: v.shape for f, v in fields.items()} raise ValueError( - f"This dataset is invalid: expected all fields to have same length (same number of examples), but they had shapes { {f: v.shape for f, v in fields.items() } }" + f"This dataset is invalid: expected all fields to have same length (same number of examples), but they had shapes {shape_dict}" ) num_examples = next(iter(num_examples)) From 4bf88206d84398766448ca7225189d8888cdd9a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Se=C3=A1n=20Kavanagh?= <51478689+kavanase@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:18:14 -0400 Subject: [PATCH 150/157] Fix docs dependencies (#420) --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8085d79c..a36b74ed 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ -myst-parser \ No newline at end of file +myst-parser +sphinx_rtd_theme From c310ad60bf0df92af918d983b5922123c8d5685a Mon Sep 17 00:00:00 2001 From: Chuin Wei Tan <87742566+cw-tan@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:38:01 -0400 Subject: [PATCH 151/157] add matscipy neighborlist option (#1) --------- Co-authored-by: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> --- CHANGELOG.md | 1 + nequip/data/AtomicData.py | 37 +++++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0025969..93fb1d25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Most recent change on the bottom. - `nequip-deploy --using-dataset` to support data-dependent deployment steps - Support for Gaussian Mixture Model uncertainty quantification (https://doi.org/10.1063/5.0136574) - `nequip-deploy build --checkpoint` and `--override` to avoid many largely duplicated YAML files +- matscipy neighborlist support enabled with `NEQUIP_MATSCIPY_NL` environment variable ### Changed - Always require explicit `seed` diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 8f32428f..70c8fd2e 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -5,7 +5,7 @@ import warnings from copy import deepcopy -from typing import Union, Tuple, Dict, Optional, List, Set, Sequence +from typing import Union, Tuple, Dict, Optional, List, Set, Sequence, Final from collections.abc import Mapping import os @@ -705,6 +705,13 @@ def without_nodes(self, which_nodes): assert _ERROR_ON_NO_EDGES in ("true", "false") _ERROR_ON_NO_EDGES = _ERROR_ON_NO_EDGES == "true" +_NEQUIP_MATSCIPY_NL: Final[bool] = os.environ.get("NEQUIP_MATSCIPY_NL", "false").lower() +assert _NEQUIP_MATSCIPY_NL in ("true", "false") +_NEQUIP_MATSCIPY_NL = _NEQUIP_MATSCIPY_NL == "true" + +if _NEQUIP_MATSCIPY_NL: + import matscipy.neighbours + def neighbor_list_and_relative_vec( pos, @@ -783,15 +790,25 @@ def neighbor_list_and_relative_vec( # ASE dependent part temp_cell = ase.geometry.complete_cell(temp_cell) - first_idex, second_idex, shifts = ase.neighborlist.primitive_neighbor_list( - "ijS", - pbc, - temp_cell, - temp_pos, - cutoff=float(r_max), - self_interaction=strict_self_interaction, # we want edges from atom to itself in different periodic images! - use_scaled_positions=False, - ) + if _NEQUIP_MATSCIPY_NL: + assert strict_self_interaction and not self_interaction + first_idex, second_idex, shifts = matscipy.neighbours.neighbour_list( + "ijS", + pbc=pbc, + cell=temp_cell, + positions=temp_pos, + cutoff=float(r_max), + ) + else: + first_idex, second_idex, shifts = ase.neighborlist.primitive_neighbor_list( + "ijS", + pbc, + temp_cell, + temp_pos, + cutoff=float(r_max), + self_interaction=strict_self_interaction, # we want edges from atom to itself in different periodic images! + use_scaled_positions=False, + ) # Eliminate true self-edges that don't cross periodic boundaries if not self_interaction: From 9b5b17c6d1d0449b4353d2adffee8ac1f8b48b34 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 30 Apr 2024 18:45:46 -0400 Subject: [PATCH 152/157] Fix dataset unit rescaling of per-species shifts (#2) --- CHANGELOG.md | 1 + configs/example.yaml | 13 ++--- configs/full.yaml | 13 +++-- configs/minimal_stress.yaml | 2 +- nequip/model/_scaling.py | 109 +++++++++++++++++++----------------- nequip/nn/_atomwise.py | 9 ++- 6 files changed, 78 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93fb1d25..11588117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Most recent change on the bottom. - [Breaking] `fixed_fields` machinery (`npz_fixed_field_keys` is still supported, but through a more straightforward implementation) - Default run name/WandB project name of `NequIP`, they must now always be provided explicitly - [Breaking] Removed `_params` as an allowable subconfiguration suffix (i.e. instead of `optimizer_params` now only `optimizer_kwargs` is valid, not both) +- [Breaking] Removed `per_species_rescale_arguments_in_dataset_units` ## [0.5.6] - 2022-12-19 ### Added diff --git a/configs/example.yaml b/configs/example.yaml index f7e799eb..e9606f6f 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -100,7 +100,7 @@ early_stopping_lower_bounds: LR: 1.0e-5 early_stopping_upper_bounds: # stop early if the training appears to have exploded - validation_loss: 1.0e4 + validation_loss: 1.0e+4 # loss function loss_coeffs: @@ -145,17 +145,12 @@ lr_scheduler_factor: 0.5 # the default is to scale the atomic energy and forces by scaling them by the force standard deviation and to shift the energy by the mean atomic energy # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom -# whether the shifts and scales are trainable. Defaults to False. Optional -per_species_rescale_shifts_trainable: false -per_species_rescale_scales_trainable: false - # initial atomic energy shift for each species. default to the mean of per atom energy. Optional # the value can be a constant float value, an array for each species, or a string that defines a statistics over the training dataset +# if numbers are explicitly provided, they must be in the same energy units as the training data per_species_rescale_shifts: dataset_per_atom_total_energy_mean # initial atomic energy scale for each species. Optional. # the value can be a constant float value, an array for each species, or a string -per_species_rescale_scales: dataset_forces_rms - -# if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values. -# per_species_rescale_arguments_in_dataset_units: True +# if numbers are explicitly provided, they must be in the same energy units as the training data +per_species_rescale_scales: null \ No newline at end of file diff --git a/configs/full.yaml b/configs/full.yaml index f43b3d49..45e14784 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -307,29 +307,32 @@ per_species_rescale_scales_trainable: false # whether the scales are trainable. Defaults to False. Optional per_species_rescale_shifts_trainable: false # whether the shifts are trainable. Defaults to False. Optional + per_species_rescale_shifts: dataset_per_atom_total_energy_mean # initial atomic energy shift for each species. default to the mean of per atom energy. Optional # the value can be a constant float value, an array for each species, or a string +# if numbers are explicitly provided, they must be in the same energy units as the training data # string option include: # * "dataset_per_atom_total_energy_mean", which computes the per atom average # * "dataset_per_species_total_energy_mean", which automatically compute the per atom energy mean using a GP model -per_species_rescale_scales: dataset_forces_rms + +per_species_rescale_scales: null # initial atomic energy scale for each species. Optional. # the value can be a constant float value, an array for each species, or a string +# if numbers are explicitly provided, they must be in the same energy units as the training data # string option include: # * "dataset_forces_absmax", which computes the dataset maxmimum force component magnitude # * "dataset_per_atom_total_energy_std", which computes the per atom energy std # * "dataset_per_species_total_energy_std", which uses the GP model uncertainty # * "dataset_per_species_forces_rms", which compute the force rms for each species -# If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained. +# If not provided, defaults to null. + # per_species_rescale_kwargs: # total_energy: # alpha: 0.001 # max_iteration: 20 # stride: 100 -# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2 -# per_species_rescale_arguments_in_dataset_units: True -# if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values. +# keywords for ridge regression decomposition of per species energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2 # global energy shift and scale # When "dataset_total_energy_mean", the mean energy of the dataset. When None, disables the global shift. When a number, used directly. diff --git a/configs/minimal_stress.yaml b/configs/minimal_stress.yaml index 63aaf832..4de407be 100644 --- a/configs/minimal_stress.yaml +++ b/configs/minimal_stress.yaml @@ -40,7 +40,7 @@ dataset_include_frames: !!python/object/apply:builtins.range global_rescale_scale: dataset_total_energy_std per_species_rescale_shifts: dataset_per_atom_total_energy_mean -per_species_rescale_scales: dataset_per_atom_total_energy_std +per_species_rescale_scales: null # logging wandb: false diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index 73d894a6..d1faaa88 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -23,9 +23,11 @@ def RescaleEnergyEtc( dataset=dataset, initialize=initialize, module_prefix="global_rescale", - default_scale=f"dataset_{AtomicDataDict.FORCE_KEY}_rms" - if AtomicDataDict.FORCE_KEY in model.irreps_out - else f"dataset_{AtomicDataDict.TOTAL_ENERGY_KEY}_std", + default_scale=( + f"dataset_{AtomicDataDict.FORCE_KEY}_rms" + if AtomicDataDict.FORCE_KEY in model.irreps_out + else f"dataset_{AtomicDataDict.TOTAL_ENERGY_KEY}_std" + ), default_shift=None, default_scale_keys=AtomicDataDict.ALL_ENERGY_KEYS, default_shift_keys=[AtomicDataDict.TOTAL_ENERGY_KEY], @@ -129,42 +131,60 @@ def PerSpeciesRescale( initialize: bool, dataset: Optional[AtomicDataset] = None, ): - """Add per-atom rescaling (and shifting) for energy. - - If ``initialize`` is false, doesn't compute statistics. - """ + """Add per-atom rescaling (and shifting) for per-atom energies.""" module_prefix = "per_species_rescale" - # = Determine energy rescale type = - scales = config.get( - module_prefix + "_scales", - f"dataset_{AtomicDataDict.FORCE_KEY}_rms" - # if `train_on_keys` isn't provided, assume conservatively - # that we aren't "training" on anything (i.e. take the - # most general defaults) - if AtomicDataDict.FORCE_KEY in config.get("train_on_keys", []) - else f"dataset_per_atom_{AtomicDataDict.TOTAL_ENERGY_KEY}_std", - ) - shifts = config.get( - module_prefix + "_shifts", - f"dataset_per_atom_{AtomicDataDict.TOTAL_ENERGY_KEY}_mean", - ) - # Check for common double shift mistake with defaults if "RescaleEnergyEtc" in config.get("model_builders", []): # if the defaults are enabled, then we will get bad double shift # THIS CHECK IS ONLY GOOD ENOUGH FOR EMITTING WARNINGS has_global_shift = config.get("global_rescale_shift", None) is not None if has_global_shift: - if shifts is not None: + if config.get(module_prefix + "_shifts", True) is not None: # using default of per_atom shift raise RuntimeError( "A global_rescale_shift was provided, but the default per-atom energy shift was not disabled." ) del has_global_shift - # = Determine what statistics need to be compute =\ - arguments_in_dataset_units = None + return _PerSpeciesRescale( + scales_default=None, + shifts_default=f"dataset_per_atom_{AtomicDataDict.TOTAL_ENERGY_KEY}_mean", + field=AtomicDataDict.PER_ATOM_ENERGY_KEY, + out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY, + module_prefix=module_prefix, + insert_before="total_energy_sum", + model=model, + config=config, + initialize=initialize, + dataset=dataset, + ) + + +def _PerSpeciesRescale( + scales_default, + shifts_default, + field: str, + out_field: str, + module_prefix: str, + insert_before: str, + model: GraphModuleMixin, + config, + initialize: bool, + dataset: Optional[AtomicDataset] = None, +): + """Add per-atom rescaling (and shifting) for a field + + If ``initialize`` is false, doesn't compute statistics. + """ + scales = config.get(module_prefix + "_scales", scales_default) + shifts = config.get(module_prefix + "_shifts", shifts_default) + + # = Determine what statistics need to be compute = + assert config.get( + module_prefix + "_arguments_in_dataset_units", True + ), f"The PerSpeciesRescale builder is only compatible with {module_prefix + '_arguments_in_dataset_units'} set to True" + if initialize: str_names = [] for value in [scales, shifts]: @@ -181,20 +201,6 @@ def PerSpeciesRescale( else: raise ValueError(f"Invalid value `{value}` of type {type(value)}") - if len(str_names) == 2: - # Both computed from dataset - arguments_in_dataset_units = True - elif len(str_names) == 1: - if None in [scales, shifts]: - # if the one that isnt str is null, it's just disabled - # that has no units - # so it's ok to have just one and to be in dataset units - arguments_in_dataset_units = True - else: - assert config[ - module_prefix + "_arguments_in_dataset_units" - ], "Requested to set either the shifts or scales of the per_species_rescale using dataset values, but chose to provide the other in non-dataset units. Please give the explictly specified shifts/scales in dataset units and set per_species_rescale_arguments_in_dataset_units" - # = Compute shifts and scales = if len(str_names) > 0: computed_stats = _compute_stats( @@ -206,21 +212,24 @@ def PerSpeciesRescale( if isinstance(scales, str): s = scales - scales = computed_stats[str_names.index(scales)].squeeze(-1) # energy is 1D + # energy or other property is 1D: + scales = computed_stats[str_names.index(scales)].squeeze(-1) logging.info(f"Replace string {s} to {scales}") elif isinstance(scales, (list, float)): scales = torch.as_tensor(scales) if isinstance(shifts, str): s = shifts - shifts = computed_stats[str_names.index(shifts)].squeeze(-1) # energy is 1D + # energy or other property is 1D: + shifts = computed_stats[str_names.index(shifts)].squeeze(-1) logging.info(f"Replace string {s} to {shifts}") elif isinstance(shifts, (list, float)): shifts = torch.as_tensor(shifts) + # TODO kind of weird error to check for here if scales is not None and torch.min(scales) < RESCALE_THRESHOLD: raise ValueError( - f"Per species energy scaling was very low: {scales}. Maybe try setting {module_prefix}_scales = 1." + f"Per species scaling was very low: {scales}. Maybe try setting {module_prefix}_scales = 1." ) logging.info( @@ -234,22 +243,20 @@ def PerSpeciesRescale( # so this is fine regardless of whether its trainable. scales = 1.0 if scales is not None else None shifts = 0.0 if shifts is not None else None - # values correctly scaled according to where the come from - # will be brought from the state dict later, - # so what you set this to doesnt matter: - arguments_in_dataset_units = False + # values from the previously initialized model + # will be brought in from the state dict later, + # so these values (and rescaling them) doesn't matter # insert in per species shift params = dict( - field=AtomicDataDict.PER_ATOM_ENERGY_KEY, - out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY, + field=field, + out_field=out_field, shifts=shifts, scales=scales, + arguments_in_dataset_units=True, ) - - params["arguments_in_dataset_units"] = arguments_in_dataset_units model.insert_from_parameters( - before="total_energy_sum", + before=insert_before, name=module_prefix, shared_params=config, builder=PerSpeciesScaleShift, diff --git a/nequip/nn/_atomwise.py b/nequip/nn/_atomwise.py index bd238072..b4020ec5 100644 --- a/nequip/nn/_atomwise.py +++ b/nequip/nn/_atomwise.py @@ -83,9 +83,11 @@ def __init__( self.out_field = f"{reduce}_{field}" if out_field is None else out_field self._init_irreps( irreps_in=irreps_in, - irreps_out={self.out_field: irreps_in[self.field]} - if self.field in irreps_in - else {}, + irreps_out=( + {self.out_field: irreps_in[self.field]} + if self.field in irreps_in + else {} + ), ) def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: @@ -204,6 +206,7 @@ def __init__( else: self.register_buffer("scales", torch.Tensor()) + assert isinstance(arguments_in_dataset_units, bool) self.arguments_in_dataset_units = arguments_in_dataset_units # we can use FMA for performance but its type promotion is broken until 1.13 From 7fcd45ddadc63f585dfec2a816eaf3162cfca06c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 May 2024 18:07:07 -0400 Subject: [PATCH 153/157] remove unused --- nequip/train/loss.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nequip/train/loss.py b/nequip/train/loss.py index 1420fc22..fe5144da 100644 --- a/nequip/train/loss.py +++ b/nequip/train/loss.py @@ -39,10 +39,7 @@ class Loss: def __init__( self, coeffs: Union[dict, str, List[str]], - coeff_schedule: str = "constant", ): - - self.coeff_schedule = coeff_schedule self.coeffs = {} self.funcs = {} self.keys = [] From 9ba1d5f8dea1c3afd1a9db98632b20d47b5cb52a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 1 May 2024 18:11:08 -0400 Subject: [PATCH 154/157] Add SimpleLossSchedule --- CHANGELOG.md | 2 + configs/full.yaml | 16 +++++--- nequip/train/callbacks/loss_schedule.py | 54 +++++++++++++++++++++++++ nequip/train/trainer.py | 6 +++ 4 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 nequip/train/callbacks/loss_schedule.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 11588117..39092d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ Most recent change on the bottom. - `include_file_as_baseline_config` for simple modifications of existing configs - `nequip-deploy --using-dataset` to support data-dependent deployment steps - Support for Gaussian Mixture Model uncertainty quantification (https://doi.org/10.1063/5.0136574) +- `start_of_epoch_callbacks` +- `nequip.train.callbacks.loss_schedule.SimpleLossSchedule` for changing the loss coefficients at specified epochs - `nequip-deploy build --checkpoint` and `--override` to avoid many largely duplicated YAML files - matscipy neighborlist support enabled with `NEQUIP_MATSCIPY_NL` environment variable diff --git a/configs/full.yaml b/configs/full.yaml index 45e14784..8a670517 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -212,9 +212,9 @@ early_stopping_upper_bounds: # loss function loss_coeffs: # different weights to use in a weighted loss functions - forces: 1 # if using PerAtomMSELoss, a default weight of 1:1 on each should work well + forces: 1.0 # if using PerAtomMSELoss, a default weight of 1:1 on each should work well total_energy: - - 1 + - 1.0 - PerAtomMSELoss # note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot. @@ -249,6 +249,15 @@ loss_coeffs: # - L1Loss # forces: 1.0 +# You can schedule changes in the loss coefficients using a callback: +# In the "schedule" key each entry is a two-element list of: +# - the 1-based epoch index at which to start the new loss coefficients +# - the new loss coefficients as a dict +# +# start_of_epoch_callbacks: +# - !!python/object:nequip.train.callbacks.loss_schedule.SimpleLossSchedule {"schedule": [[2, {"forces": 0.0, "total_energy": 1.0}]]} +# + # output metrics metrics_components: - - forces # key @@ -371,6 +380,3 @@ global_rescale_scale_trainable: false # per_species_rescale_shifts: null # per_species_rescale_scales: null -# Options for e3nn's set_optimization_defaults. A dict: -# e3nn_optimization_defaults: -# explicit_backward: True diff --git a/nequip/train/callbacks/loss_schedule.py b/nequip/train/callbacks/loss_schedule.py new file mode 100644 index 00000000..edd6f173 --- /dev/null +++ b/nequip/train/callbacks/loss_schedule.py @@ -0,0 +1,54 @@ +from typing import Dict, List, Tuple +from dataclasses import dataclass +import numpy as np + +from nequip.train import Trainer, Loss + +# Making this a dataclass takes care of equality operators, handing restart consistency checks + + +@dataclass +class SimpleLossSchedule: + """Schedule `loss_coeffs` through a training run. + + To use this in a training, set in your YAML file: + + start_of_epoch_callbacks: + - !!python/object:nequip.train.callbacks.loss_schedule.SimpleLossSchedule {"schedule": [[30, {"forces": 1.0, "total_energy": 0.0}], [30, {"forces": 0.0, "total_energy": 1.0}]]} + + This funny syntax tells PyYAML to construct an object of this class. + + Each entry in the schedule is a tuple of the 1-based epoch index to start that loss coefficient set at, and a dict of loss coefficients. + """ + + schedule: List[Tuple[int, Dict[str, float]]] = None + + def __call__(self, trainer: Trainer): + assert ( + self in trainer._start_of_epoch_callbacks + ), "must be start not end of epoch" + # user-facing 1 based indexing of epochs rather than internal zero based + iepoch: int = trainer.iepoch + 1 + if iepoch < 1: # initial validation epoch is 0 in user-facing indexing + return + loss_function: Loss = trainer.loss + + assert self.schedule is not None + schedule_start_epochs = np.asarray([e[0] for e in self.schedule]) + # make sure they are ascending + assert len(schedule_start_epochs) >= 1 + assert schedule_start_epochs[0] >= 2, "schedule must start at epoch 2 or later" + assert np.all( + (schedule_start_epochs[1:] - schedule_start_epochs[:-1]) > 0 + ), "schedule start epochs must be strictly ascending" + # we are running at _start_ of epoch, so we need to apply the right change for the current epoch + current_change_idex = np.searchsorted(schedule_start_epochs, iepoch + 1) - 1 + # ^ searchsorted 3 in [2, 10, 19] would return 1, for example + # but searching 2 in [2, 10, 19] gives 0, so we actually search iepoch + 1 to always be ahead of the start + # apply the current change to handle restarts + if current_change_idex >= 0: + new_coeffs = self.schedule[current_change_idex][1] + assert ( + loss_function.coeffs.keys() == new_coeffs.keys() + ), "all coeff schedules must contain all loss terms" + loss_function.coeffs.update(new_coeffs) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 986e7874..bdfb4f17 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -258,6 +258,7 @@ def __init__( val_idcs: Optional[list] = None, train_val_split: str = "random", init_callbacks: list = [], + start_of_epoch_callbacks: list = [], end_of_epoch_callbacks: list = [], end_of_batch_callbacks: list = [], end_of_train_callbacks: list = [], @@ -348,6 +349,9 @@ def __init__( # load all callbacks self._init_callbacks = [load_callable(callback) for callback in init_callbacks] + self._start_of_epoch_callbacks = [ + load_callable(callback) for callback in start_of_epoch_callbacks + ] self._end_of_epoch_callbacks = [ load_callable(callback) for callback in end_of_epoch_callbacks ] @@ -887,6 +891,8 @@ def reset_metrics(self): self.metrics.to(self.torch_device) def epoch_step(self): + for callback in self._start_of_epoch_callbacks: + callback(self) dataloaders = {TRAIN: self.dl_train, VALIDATION: self.dl_val} categories = [TRAIN, VALIDATION] if self.iepoch >= 0 else [VALIDATION] From f2a40fefe5feffd8b75230fc6f2c8e334e03f3a8 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 May 2024 16:51:54 -0400 Subject: [PATCH 155/157] Cleanup --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e2568aed..9c983f9f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ NequIP is an open-source code for building E(3)-equivariant interatomic potentia NequIP requires: * Python >= 3.7 -* PyTorch == 1.10.* or 1.13.*. PyTorch can be installed following the [instructions from their documentation](https://pytorch.org/get-started/locally/). Note that neither `torchvision` nor `torchaudio`, included in the default install command, are needed for NequIP. +* PyTorch == `1.11.*` or `1.13.*` or later (do **not** use `1.12`). (Some users have observed silent issues with PyTorch 2+, as reported in #311. Please report any similar issues you encounter.) PyTorch can be installed following the [instructions from their documentation](https://pytorch.org/get-started/locally/). Note that neither `torchvision` nor `torchaudio`, included in the default install command, are needed for NequIP. **You must install PyTorch before installing NequIP, however it is not marked as a dependency of `nequip` to prevent `pip` from trying to overwrite your PyTorch installation.** @@ -22,14 +22,9 @@ To install: * We use [Weights&Biases](https://wandb.ai) (or TensorBoard) to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install the Python package: ``` - pip install wandb # tensorboard + pip install wandb ``` - * for TensorBoard users - * On your local computer, build an ssh tunnel to your compute node by `ssh -L 6006:127.0.0.1:6006 username@ip` - * On the compute node, go to the `{root}` folder specify in the config file, and run `tensorboard --logdir tb_summary` - * Use your local computer browser to log on `http://localhost:6006` - * Install NequIP NequIP can be installed from PyPI: @@ -142,7 +137,7 @@ For installation instructions, please see the [`pair_nequip` repository](https:/ `nequip` is a modular framework and extension packages can provide new model components, architectures, etc. The main extension package(s) currently available are: - [Allegro](https://github.com/mir-group/allegro): implements the highly parallelizable Allegro model architecture. -Details on writing and using plugins can be found in the [Allegro tutorial](https://colab.research.google.com/drive/1yq2UwnET4loJYg_Fptt9kpklVaZvoHnq). +Details on writing and using plugins can be found in the [Allegro tutorial](https://colab.research.google.com/drive/1yq2UwnET4loJYg_Fptt9kpklVaZvoHnq) and in [`nequip-example-extension`](https://github.com/mir-group/nequip-example-extension/). ## References & citing From 0cc2e31ea5626d6342de34017a8fa5a7e793d4dc Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 May 2024 16:53:54 -0400 Subject: [PATCH 156/157] Bump version CHANGELOG --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39092d0e..c60ed185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. -## Unreleased - 0.6.0 +## Unreleased + +## [0.6.0] - 2024-5-10 ### Added - add Tensorboard as logger option - [Breaking] Refactor overall model logic into `GraphModel` top-level module From ef79965ced43851233a1bec86efb3ddfef09ffd5 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 10 May 2024 16:58:51 -0400 Subject: [PATCH 157/157] Update PyTorch version for tests --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_develop.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2e206edf..0fb33150 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.9] - torch-version: [1.11.0, 1.13.1] + torch-version: [1.13.1, "2.*"] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml index 1444a128..d399e426 100644 --- a/.github/workflows/tests_develop.yml +++ b/.github/workflows/tests_develop.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.9] - torch-version: [1.13.1] + torch-version: ["2.*"] steps: - uses: actions/checkout@v2