ray-project · sven1977 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024 · Oct 27, 2024
@@ -1,7 +1,7 @@
 .. note::
 
     Ray 2.10.0 introduces the alpha stage of RLlib's "new API stack".
-    The Ray Team plans to transition algorithms, example scripts, and documentation to the new code base
-    thereby incrementally replacing the "old API stack" (e.g., ModelV2, Policy, RolloutWorker) throughout the subsequent minor releases leading up to Ray 3.0.
+    The team is currently transitioning algorithms, example scripts, and
+    documentation to the new code base throughout the subsequent minor releases leading up to Ray 3.0.
 
-    :doc:`See here </rllib/rllib-new-api-stack>` for more details on how to use the new API stack.
+    :doc:`See here </rllib/rllib-new-api-stack>` for more details on how to activate and use the new API stack.
@@ -136,7 +136,10 @@
     best_result = results.get_best_result(
         metric=f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}", mode="max"
     )
-    # Create new Algorithm and restore its state from the last checkpoint.
+    # Create new RLModule and restore its state from the last algo checkpoint.
+    # Note that the checkpoint for the RLModule can be found deeper inside the algo
+    # checkpoint's sub-directories ([algo dir] -> "learner/" -> "module_state/" ->
+    # "[module ID]):
     rl_module = RLModule.from_checkpoint(
         os.path.join(
             best_result.checkpoint.path,
@@ -147,7 +150,7 @@
         )
     )
 
-    # Create the env to do inference in.
+    # Create a env to do inference in.
-    # Create a env to do inference in.
+    # Create an env to do inference in.
-    # Create a env to do inference in.
+    # Create an env to do inference in.
     env = gym.make(args.env)
     obs, info = env.reset()