Skip to content

Commit

Permalink
Bug fix, reordering params, note for the future
Browse files Browse the repository at this point in the history
  • Loading branch information
RedTachyon committed May 24, 2023
1 parent 62103bf commit 39739d0
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 129 deletions.
60 changes: 31 additions & 29 deletions Assets/Scenes/NormalScene.unity
Original file line number Diff line number Diff line change
Expand Up @@ -591,36 +591,28 @@ MonoBehaviour:
m_Script: {fileID: 11500000, guid: e172f6c086e340a78b2a268ebb9264c3, type: 3}
m_Name:
m_EditorClassIdentifier:
numAgents: 1
numAgents: 20
dynamics: 3
observer: 0
observer: 2
initializer: 1
rewarder: 6
spawnNoiseScale: 1.5
spawnScale: 7
gridSpawn: 1
groupSpawnScale: 1.5
enableObstacles: 1
blockScale: 3
randomMass: 1
randomEnergy: 0
sharedGoal: 1
maxSpeed: 3
maxAcceleration: 5
rotationSpeed: 2
potential: 1
goal: 10
collision: -0.05
stepReward: -0.005
comfortSpeed: 1.33
comfortSpeedWeight: -0.75
comfortSpeedExponent: 1
standstillWeight: 0
standstillExponent: 0
goalSpeedThreshold: 0
comfortDistance: 0
comfortDistanceWeight: 0
familyGoalRadius: 0.5
energyWeight: 1
finalEnergyWeight: 1
potentialEnergyScale: 2
useComplexEnergy: 1
alignmentWeight: 1
rewardBMR: 1
rewardDrag: 1
rewardDynamics: 1
rewardPotential: 1
rewardDiffPotential: 1
rewardSpeedMatching: 1
rewardSpeedMatchingExp: 1
rewardSpeeding: 1
Expand All @@ -637,21 +629,30 @@ MonoBehaviour:
rayDegrees: 90
rayAgentVision: 1
destroyRaycasts: 0
spawnNoiseScale: 0
spawnScale: 7
gridSpawn: 1
groupSpawnScale: 1.5
enableObstacles: 0
blockScale: 3
randomMass: 1
randomEnergy: 0
sharedGoal: 1
evaluationMode: 0
savePath:
earlyFinish: 0
niceColors: 1
showAttention: 0
backwardsAllowed: 1
potential: 1
goal: 10
collision: -0.05
stepReward: -0.005
comfortSpeed: 1.33
comfortSpeedWeight: -0.75
comfortSpeedExponent: 1
standstillWeight: 0
standstillExponent: 0
goalSpeedThreshold: 0
comfortDistance: 0
comfortDistanceWeight: 0
familyGoalRadius: 0.5
energyWeight: 1
finalEnergyWeight: 1
potentialEnergyScale: 2
useComplexEnergy: 1
alignmentWeight: 1
--- !u!4 &218126829
Transform:
m_ObjectHideFlags: 0
Expand Down Expand Up @@ -6285,6 +6286,7 @@ MonoBehaviour:
r_drag: 0
r_dynamics: 0
r_potential: 0
r_diffPotential: 0
r_speedmatch: 0
r_speeding: 0
r_velocity: 0
Expand Down
7 changes: 4 additions & 3 deletions Assets/Scripts/MLUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,10 @@ public static float FlatDistance(Vector3 a, Vector3 b)
public static float EnergyHeuristic(Vector3 position, Vector3 target, float e_s, float e_w)
{
var distance = FlatDistance(position, target);
var speed = Mathf.Sqrt(e_s / e_w);
var time = distance / speed;
return 2 * Mathf.Sqrt(e_s * e_w) * distance;
// var speed = Mathf.Sqrt(e_s / e_w);
// var time = distance / speed;

return e_s * time + e_w * speed * speed * time;
// return e_s * time + e_w * speed * speed * time;
}
}
7 changes: 5 additions & 2 deletions Assets/Scripts/Managers/Manager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,12 @@ private Dictionary<string, float> GetEpisodeStats()
energies.Add(agent.energySpent);
energiesComplex.Add(agent.energySpentComplex);

var finalDistance = MLUtils.FlatDistance(agent.transform.localPosition, agent.Goal.localPosition);
// var finalDistance = MLUtils.FlatDistance(agent.transform.localPosition, agent.Goal.localPosition);

var finalEnergy = 2 * Mathf.Sqrt(agent.e_s * agent.e_w * finalDistance);
// var finalEnergy = 2 * Mathf.Sqrt(agent.e_s * agent.e_w * finalDistance);

var finalEnergy = MLUtils.EnergyHeuristic(agent.transform.localPosition, agent.Goal.localPosition,
agent.e_s, agent.e_w);

energiesPlus.Add(agent.energySpent + finalEnergy);
energiesComplexPlus.Add(agent.energySpentComplex + finalEnergy);
Expand Down
184 changes: 93 additions & 91 deletions Assets/Scripts/Params.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,81 +50,46 @@ private void Awake()
public RewardersEnum rewarder = RewardersEnum.BaseRewarder;
public static RewardersEnum Rewarder => Enum.Parse<RewardersEnum>(Get("rewarder", Instance.rewarder.ToString()));

[Header("Physics")]
public float maxSpeed = 2f;
public static float MaxSpeed => Get("max_speed", Instance.maxSpeed);

public float maxAcceleration = 5f;
public static float MaxAcceleration => Get("max_acceleration", Instance.maxAcceleration);

public float rotationSpeed = 3f;
public static float RotationSpeed => Get("rotation_speed", Instance.rotationSpeed);



[Header("Reward settings")]
public float potential = 1f;
public static float Potential => Get("potential", Instance.potential);

public float goal = 10f;
public static float Goal => Get("goal", Instance.goal);

public float collision = -0.05f;
public static float Collision => Get("collision", Instance.collision);

public float stepReward = -0.005f;
public static float StepReward => Get("step_reward", Instance.stepReward);

// Spawn
[Header("Spawn settings")]
public float spawnNoiseScale = 0.5f;
public static float SpawnNoiseScale => Get("spawn_noise_scale", Instance.spawnNoiseScale);

public float comfortSpeed = 1.33f;
public static float ComfortSpeed => Get("comfort_speed", Instance.comfortSpeed);

public float comfortSpeedWeight = -0.75f;
public static float ComfortSpeedWeight => Get("comfort_speed_weight", Instance.comfortSpeedWeight);
public float spawnScale = 4f;
public static float SpawnScale => Get("spawn_scale", Instance.spawnScale);

public float comfortSpeedExponent = 1.0f;
public static float ComfortSpeedExponent => Get("comfort_speed_exponent", Instance.comfortSpeedExponent);
public bool gridSpawn = true;
public static bool GridSpawn => Convert.ToBoolean(Get("grid_spawn", Instance.gridSpawn ? 1f : 0f));

// Unused
public float groupSpawnScale = 1.5f;
public static float GroupSpawnScale => Get("group_spawn_scale", Instance.groupSpawnScale);

public float standstillWeight = 0f;
public static float StandstillWeight => Get("standstill_weight", Instance.standstillWeight);

public float standstillExponent = 0f;
public static float StandstillExponent => Get("standstill_exponent", Instance.standstillExponent);
public bool enableObstacles = true;
public static bool EnableObstacles => Convert.ToBoolean(Get("enable_obstacles", Instance.enableObstacles ? 1f : 0f));

public float goalSpeedThreshold = 0f;
public static float GoalSpeedThreshold => Get("goal_speed_threshold", Instance.goalSpeedThreshold);

public float blockScale = 1f;
public static float BlockScale => Get("block_scale", Instance.blockScale);

public float comfortDistance = 0f;
public static float ComfortDistance => Get("comfort_distance", Instance.comfortDistance);
public bool randomMass = false;
public static bool RandomMass => Convert.ToBoolean(Get("random_mass", Instance.randomMass ? 1f : 0f));

public float comfortDistanceWeight = 0f;
public static float ComfortDistanceWeight => Get("comfort_distance_weight", Instance.comfortDistanceWeight);


public float familyGoalRadius = 0.5f;
public static float FamilyGoalRadius => Get("family_goal_radius", Instance.familyGoalRadius);

// Energy rewarder
public bool randomEnergy = false;
public static bool RandomEnergy => Convert.ToBoolean(Get("random_energy", Instance.randomEnergy ? 1f : 0f));

public float energyWeight = 1f;
public static float EnergyWeight => Get("energy_weight", Instance.energyWeight);

public float finalEnergyWeight = 1f;
public static float FinalEnergyWeight => Get("final_energy_weight", Instance.finalEnergyWeight);

public float potentialEnergyScale = 2f;
public static float PotentialEnergyScale => Get("potential_energy_scale", Instance.potentialEnergyScale);
public bool sharedGoal;
public static bool SharedGoal => Convert.ToBoolean(Get("shared_goal", Instance.sharedGoal ? 1f : 0f));

public bool useComplexEnergy = true;
public static bool UseComplexEnergy => Convert.ToBoolean(Get("complex_energy", Instance.useComplexEnergy ? 1f : 0f));
[Header("Physics")]
public float maxSpeed = 2f;
public static float MaxSpeed => Get("max_speed", Instance.maxSpeed);

// Alignment
public float maxAcceleration = 5f;
public static float MaxAcceleration => Get("max_acceleration", Instance.maxAcceleration);

public float rotationSpeed = 3f;
public static float RotationSpeed => Get("rotation_speed", Instance.rotationSpeed);

public float alignmentWeight = 1f;
public static float AlignmentWeight => Get("alignment_weight", Instance.alignmentWeight);

[Header("Unified reward settings")]

Expand Down Expand Up @@ -198,35 +163,7 @@ private void Awake()
public bool destroyRaycasts = false; // Only at launch
public static bool DestroyRaycasts => Convert.ToBoolean(Get("destroy_raycasts", Instance.destroyRaycasts ? 1f : 0f));

// Spawn
[Header("Spawn settings")]
public float spawnNoiseScale = 0.5f;
public static float SpawnNoiseScale => Get("spawn_noise_scale", Instance.spawnNoiseScale);

public float spawnScale = 4f;
public static float SpawnScale => Get("spawn_scale", Instance.spawnScale);

public bool gridSpawn = true;
public static bool GridSpawn => Convert.ToBoolean(Get("grid_spawn", Instance.gridSpawn ? 1f : 0f));

public float groupSpawnScale = 1.5f;
public static float GroupSpawnScale => Get("group_spawn_scale", Instance.groupSpawnScale);

public bool enableObstacles = true;
public static bool EnableObstacles => Convert.ToBoolean(Get("enable_obstacles", Instance.enableObstacles ? 1f : 0f));

public float blockScale = 1f;
public static float BlockScale => Get("block_scale", Instance.blockScale);

public bool randomMass = false;
public static bool RandomMass => Convert.ToBoolean(Get("random_mass", Instance.randomMass ? 1f : 0f));

public bool randomEnergy = false;
public static bool RandomEnergy => Convert.ToBoolean(Get("random_energy", Instance.randomEnergy ? 1f : 0f));


public bool sharedGoal;
public static bool SharedGoal => Convert.ToBoolean(Get("shared_goal", Instance.sharedGoal ? 1f : 0f));

// Meta
[Header("Meta settings")]
Expand All @@ -247,6 +184,71 @@ private void Awake()

public bool backwardsAllowed = true;
public static bool BackwardsAllowed => Convert.ToBoolean(Get("backwards_allowed", Instance.backwardsAllowed ? 1f : 0f));


[Header("Reward settings")]
public float potential = 1f;
public static float Potential => Get("potential", Instance.potential);

public float goal = 10f;
public static float Goal => Get("goal", Instance.goal);

public float collision = -0.05f;
public static float Collision => Get("collision", Instance.collision);

public float stepReward = -0.005f;
public static float StepReward => Get("step_reward", Instance.stepReward);


public float comfortSpeed = 1.33f;
public static float ComfortSpeed => Get("comfort_speed", Instance.comfortSpeed);

public float comfortSpeedWeight = -0.75f;
public static float ComfortSpeedWeight => Get("comfort_speed_weight", Instance.comfortSpeedWeight);

public float comfortSpeedExponent = 1.0f;
public static float ComfortSpeedExponent => Get("comfort_speed_exponent", Instance.comfortSpeedExponent);

// Unused

public float standstillWeight = 0f;
public static float StandstillWeight => Get("standstill_weight", Instance.standstillWeight);

public float standstillExponent = 0f;
public static float StandstillExponent => Get("standstill_exponent", Instance.standstillExponent);

public float goalSpeedThreshold = 0f;
public static float GoalSpeedThreshold => Get("goal_speed_threshold", Instance.goalSpeedThreshold);


public float comfortDistance = 0f;
public static float ComfortDistance => Get("comfort_distance", Instance.comfortDistance);

public float comfortDistanceWeight = 0f;
public static float ComfortDistanceWeight => Get("comfort_distance_weight", Instance.comfortDistanceWeight);


public float familyGoalRadius = 0.5f;
public static float FamilyGoalRadius => Get("family_goal_radius", Instance.familyGoalRadius);

// Energy rewarder

public float energyWeight = 1f;
public static float EnergyWeight => Get("energy_weight", Instance.energyWeight);

public float finalEnergyWeight = 1f;
public static float FinalEnergyWeight => Get("final_energy_weight", Instance.finalEnergyWeight);

public float potentialEnergyScale = 2f;
public static float PotentialEnergyScale => Get("potential_energy_scale", Instance.potentialEnergyScale);

public bool useComplexEnergy = true;
public static bool UseComplexEnergy => Convert.ToBoolean(Get("complex_energy", Instance.useComplexEnergy ? 1f : 0f));

// Alignment

public float alignmentWeight = 1f;
public static float AlignmentWeight => Get("alignment_weight", Instance.alignmentWeight);



Expand Down
14 changes: 10 additions & 4 deletions Assets/Scripts/Rewards/DecisionRewarder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,19 @@ public float FinishReward(Transform transform, bool success)
var agent = transform.GetComponent<AgentBasic>();
float reward = 0f;
if (success) return reward;

var penalty = -MLUtils.EnergyHeuristic(transform.localPosition, agent.Goal.localPosition, agent.e_s, agent.e_w);

var finalDistance = MLUtils.FlatDistance(transform.localPosition, agent.Goal.localPosition);
// var finalDistance = MLUtils.FlatDistance(transform.localPosition, agent.Goal.localPosition);

// var finalReward = -2 * Mathf.Sqrt(agent.e_s * agent.e_w * finalDistance);
// var finalReward = -2 * Mathf.Sqrt(agent.e_s * agent.e_w) * finalDistance;

var finalReward = -2 * Mathf.Sqrt(agent.e_s * agent.e_w * finalDistance);
reward += Params.RewFinal * penalty;
agent.AddRewardPart(penalty, "final");

reward += Params.RewFinal * finalReward;
agent.AddRewardPart(finalReward, "final");
// TODO: Instead of assuming the optimal velocity, use the average velocity across the trajectory so far
// TODO: Track both of them as a metric, but add a switch to choose which one to use for the reward

return reward;
}
Expand Down

0 comments on commit 39739d0

Please sign in to comment.