Small fix for disturbance in quad

utiasDSL · Jul 30, 2024 · 0d9c780 · 0d9c780
1 parent 6281e1e
commit 0d9c780
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 23 deletions.
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml
@@ -14,15 +14,15 @@ algo_config:
   critic_lr: 0.001
 
   # runner args
-  max_env_steps: 480000
+  max_env_steps: 540000
   rollout_batch_size: 4
-  rollout_steps: 1000
+  rollout_steps: 540
   eval_batch_size: 50
 
   # misc
-  log_interval: 8000
-  save_interval: 0
+  log_interval: 10800
+  save_interval: 540000
   num_checkpoints: 0
-  eval_interval: 8000
+  eval_interval: 10800
   eval_save_best: True
   tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -5,12 +5,12 @@ task_config:
   pyb_freq: 1200
   physics: pyb
   quad_type: 4
-  normalized_rl_action_space: True
+  normalized_rl_action_space: False
 
   init_state:
     init_x: 0
     init_x_dot: 0
-    init_z: 1.15
+    init_z: 1.0
     init_z_dot: 0
     init_theta: 0
     init_theta_dot: 0
@@ -20,20 +20,20 @@ task_config:
   init_state_randomization_info:
     init_x:
       distrib: 'uniform'
-      low: -0.01
-      high: 0.01
+      low: -0.02
+      high: 0.02
     init_x_dot:
       distrib: 'uniform'
-      low: -0.01
-      high: 0.01
+      low: -0.02
+      high: 0.02
     init_z:
       distrib: 'uniform'
-      low: -0.01
-      high: 0.01
+      low: -0.02
+      high: 0.02
     init_z_dot:
       distrib: 'uniform'
-      low: -0.01
-      high: 0.01
+      low: -0.02
+      high: 0.02
     init_theta:
       distrib: 'uniform'
       low: -0.02
@@ -60,10 +60,15 @@ task_config:
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1]
+  rew_state_weight: [10.0, 0.1, 10.0, 0.1, 0.1, 0.001]
   rew_act_weight: 0.1
   rew_exponential: True
 
+  disturbances:
+    observation:
+      - disturbance_func: white_noise
+        std: [0.02, 0.02, 0.04, 0.04, 0.04, 0.1, 0., 0., 0., 0., 0., 0.]
+
 #  constraints:
 #    - constraint_form: default_constraint
 #      constrained_variable: state

diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml
@@ -12,18 +12,18 @@ algo_config:
   entropy_lr: 0.001
 
   # runner args
-  max_env_steps: 200000
+  max_env_steps: 540000
   warm_up_steps: 1000
   rollout_batch_size: 4
   num_workers: 1
-  max_buffer_size: 50000
+  max_buffer_size: 54000
   deque_size: 50
   eval_batch_size: 50
 
   # misc
-  log_interval: 4000
-  save_interval: 0
+  log_interval: 10800
+  save_interval: 540000
   num_checkpoints: 0
-  eval_interval: 4000
+  eval_interval: 10800
   eval_save_best: True
   tensorboard: False
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
@@ -1023,8 +1023,6 @@ def _get_observation(self):
 
         # Apply observation disturbance.
         obs = deepcopy(self.state)
-        if 'observation' in self.disturbances:
-            obs = self.disturbances['observation'].apply(obs, self)
 
         # Concatenate goal info (references state(s)) for RL.
         # Plus two because ctrl_step_counter has not incremented yet, and we want to return the obs (which would be
@@ -1034,6 +1032,9 @@ def _get_observation(self):
             obs = self.extend_obs(obs, 1)
         else:
             obs = self.extend_obs(obs, self.ctrl_step_counter + 2)
+
+        if 'observation' in self.disturbances:
+            obs = self.disturbances['observation'].apply(obs, self)
         return obs
 
     def _get_reward(self):