Skip to content

Commit

Permalink
Merge pull request #257 from cpnota/release/0.7.2
Browse files Browse the repository at this point in the history
Release/0.7.2
  • Loading branch information
cpnota authored Aug 5, 2021
2 parents 9c84581 + bb4fc1e commit aaa5403
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
sudo apt-get install swig
sudo apt-get install unrar
pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
make install
AutoROM -v
python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
Expand Down
20 changes: 14 additions & 6 deletions all/approximation/approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def eval(self, *inputs):
with torch.no_grad():
# check current mode
mode = self.model.training
# switch to eval mode
# switch model to eval mode
self.model.eval()
# run forward pass
result = self.model(*inputs)
Expand Down Expand Up @@ -144,14 +144,11 @@ def step(self):
Returns:
self: The current Approximation object
'''
if self._clip_grad != 0:
utils.clip_grad_norm_(self.model.parameters(), self._clip_grad)
self._clip_grad_norm()
self._optimizer.step()
self._optimizer.zero_grad()
self._step_lr_scheduler()
self._target.update()
if self._scheduler:
self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
self._scheduler.step()
self._checkpointer()
return self

Expand All @@ -164,3 +161,14 @@ def zero_grad(self):
'''
self._optimizer.zero_grad()
return self

def _clip_grad_norm(self):
'''Clip the gradient norm if set. Raises RuntimeError if norm is non-finite.'''
if self._clip_grad != 0:
utils.clip_grad_norm_(self.model.parameters(), self._clip_grad, error_if_nonfinite=True)

def _step_lr_scheduler(self):
'''Step the . Raises RuntimeError if norm is non-finite.'''
if self._scheduler:
self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
self._scheduler.step()
2 changes: 1 addition & 1 deletion all/policies/soft_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _log_prob(self, normal, raw):
'''
log_prob = normal.log_prob(raw)
log_prob -= torch.log(1 - torch.tanh(raw).pow(2) + 1e-6)
log_prob /= self._tanh_scale
log_prob -= torch.log(self._tanh_scale)
return log_prob.sum(-1)

def _squash(self, x):
Expand Down
24 changes: 18 additions & 6 deletions all/policies/soft_deterministic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,28 @@ def test_converge(self):
self.assertLess(loss, 0.2)

def test_scaling(self):
self.space = Box(np.array([-10, -5, 100]), np.array([10, -2, 200]))
self.policy = SoftDeterministicPolicy(
torch.manual_seed(0)
state = State(torch.randn(1, STATE_DIM))
policy1 = SoftDeterministicPolicy(
self.model,
self.optimizer,
self.space
Box(np.array([-1., -1., -1.]), np.array([1., 1., 1.]))
)
action1, log_prob1 = policy1(state)

# reset seed and sample same thing, but with different scaling
torch.manual_seed(0)
state = State(torch.randn(1, STATE_DIM))
action, log_prob = self.policy(state)
tt.assert_allclose(action, torch.tensor([[-3.09055, -4.752777, 188.98222]]))
tt.assert_allclose(log_prob, torch.tensor([-0.397002]), rtol=1e-4)
policy2 = SoftDeterministicPolicy(
self.model,
self.optimizer,
Box(np.array([-2., -1., -1.]), np.array([2., 1., 1.]))
)
action2, log_prob2 = policy2(state)

# check scaling was correct
tt.assert_allclose(action1 * torch.tensor([2, 1, 1]), action2)
tt.assert_allclose(log_prob1 - np.log(2), log_prob2)


if __name__ == '__main__':
Expand Down
3 changes: 1 addition & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
author = 'Chris Nota'

# The full version, including alpha/beta/rc tags
release = '0.7.1'

release = '0.7.2'

# -- General configuration ---------------------------------------------------

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

setup(
name="autonomous-learning-library",
version="0.7.1",
version="0.7.2",
description=("A library for building reinforcement learning agents in Pytorch"),
packages=find_packages(),
url="https://github.com/cpnota/autonomous-learning-library.git",
Expand All @@ -61,8 +61,8 @@
"gym~=0.18.0", # common environment interface
"numpy>=1.18.0", # math library
"matplotlib>=3.3.0", # plotting library
"opencv-python~=3.4.0", # used by atari wrappers
"torch~=1.8.0", # core deep learning library
"opencv-python~=3.4.0", # used by atari wrappers
"torch~=1.9.0", # core deep learning library
"tensorboard>=2.3.0", # logging and visualization
"tensorboardX>=2.1.0", # tensorboard/pytorch compatibility
"cloudpickle>=1.2.0", # used to copy environments
Expand Down

0 comments on commit aaa5403

Please sign in to comment.