Skip to content

Commit

Permalink
work
Browse files Browse the repository at this point in the history
  • Loading branch information
StoneT2000 committed Jan 21, 2024
1 parent 67b17ab commit 12904eb
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def get_action_and_value(self, x, action=None):
rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
values = torch.zeros((args.num_steps, args.num_envs)).to(device)

# TRY NOT TO MODIFY: start the game
global_step = 0
start_time = time.time()
Expand All @@ -234,6 +234,7 @@ def get_action_and_value(self, x, action=None):
def clip_action(action: torch.Tensor):
return torch.clamp(action.detach(), action_space_low, action_space_high)
for iteration in range(1, args.num_iterations + 1):
timeout_bonus = torch.zeros((args.num_steps, args.num_envs), device=device)
with torch.inference_mode():
if iteration % 25 == 1:
# evaluate
Expand Down Expand Up @@ -301,6 +302,9 @@ def clip_action(action: torch.Tensor):
# next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
if truncations.any():
# TODO make truncations a tensor, which should all be the same value really...
final_obs = next_obs
final_value = agent.get_value(final_obs)
timeout_bonus[step] = final_value.flatten()
next_obs, _ = envs.reset()
# writer.add_scalar("charts/episodic_is_grasped", is_grasped.mean().cpu().numpy(), global_step)
# writer.add_scalar("charts/episodic_place_rew", place_rew.mean().cpu().numpy(), global_step)
Expand All @@ -316,11 +320,11 @@ def clip_action(action: torch.Tensor):
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)

# bootstrap value if not done
with torch.no_grad():
next_value = agent.get_value(next_obs).reshape(1, -1)
advantages = torch.zeros_like(rewards).to(device)
rewards_ = rewards + timeout_bonus
lastgaelam = 0
for t in reversed(range(args.num_steps)):
if t == args.num_steps - 1:
Expand All @@ -329,7 +333,7 @@ def clip_action(action: torch.Tensor):
else:
nextnonterminal = 1.0 - dones[t + 1]
nextvalues = values[t + 1]
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
delta = rewards_[t] + args.gamma * nextvalues * nextnonterminal - values[t]
advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam
returns = advantages + values

Expand Down
1 change: 1 addition & 0 deletions mani_skill2/envs/sapien_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ def _set_scene_config(self):
# NOTE (fxiang): smaller contact_offset is faster as less contacts are considered, but some contacts may be missed if distance changes too fast
# NOTE (fxiang): solver iterations 15 is recommended to balance speed and accuracy. If stable grasps are necessary >= 20 is preferred.
# NOTE (fxiang): can try using more cpu_workers as it may also make it faster if there are a lot of collisions, collision filtering is on CPU
# NOTE (fxiang): enable_enhanced_determinism is for CPU probably. If there are 10 far apart sub scenes, this being True makes it so they do not impact each other at all
physx.set_scene_config(
cpu_workers=0,
enable_pcm=True,
Expand Down

0 comments on commit 12904eb

Please sign in to comment.