diff --git a/docs/component/rl/quickstart.rst b/docs/component/rl/quickstart.rst index 5e98e3baff..a77a99b327 100644 --- a/docs/component/rl/quickstart.rst +++ b/docs/component/rl/quickstart.rst @@ -54,9 +54,9 @@ QlibRL provides an example of an implementation of a single asset order executio # number of time indexes total_time: 240 # start time index - default_start_time: 0 + default_start_time_index: 0 # end time index - default_end_time: 240 + default_end_time_index: 240 proc_data_dim: 6 num_workers: 0 queue_size: 20 diff --git a/examples/rl/experiment_config/backtest/config.yml b/examples/rl/experiment_config/backtest/config.yml index 418780c2cc..0d6ad0bd1e 100644 --- a/examples/rl/experiment_config/backtest/config.yml +++ b/examples/rl/experiment_config/backtest/config.yml @@ -53,5 +53,5 @@ strategies: module_path: qlib.rl.order_execution.policy kwargs: lr: 1.0e-4 - weight_file: ./checkpoints/latest.pth + weight_file: ./checkpoints/checkpoints/latest.pth concurrency: 5 diff --git a/examples/rl/experiment_config/training/config.yml b/examples/rl/experiment_config/training/config.yml index 7e50d3eee0..43f06ec914 100644 --- a/examples/rl/experiment_config/training/config.yml +++ b/examples/rl/experiment_config/training/config.yml @@ -32,8 +32,8 @@ data: order_dir: ./data/training_order_split data_dir: ./data/pickle_dataframe/backtest total_time: 240 - default_start_time: 0 - default_end_time: 240 + default_start_time_index: 0 + default_end_time_index: 240 proc_data_dim: 6 num_workers: 0 queue_size: 20 diff --git a/examples/rl/simple_example.ipynb b/examples/rl/simple_example.ipynb index 1e655ff184..7d38b3b5db 100644 --- a/examples/rl/simple_example.ipynb +++ b/examples/rl/simple_example.ipynb @@ -323,7 +323,7 @@ "simulator = SimpleSimulator(100.0, NSTEPS)\n", "state = simulator.get_state()\n", "obs = [{\"obs\": state_interpreter.interpret(state)}]\n", - "policy_out = policy(Batch(obs))\n", + "policy_out = policy(Batch(obs, info=None))\n", "act = float(action_interpreter.interpret(state, policy_out.act))\n", "\n", "simulator.step(act)\n", diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index b6f5e12b24..0b84613b67 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -527,7 +527,7 @@ def _generate_trade_decision(self, execute_result: list | None = None) -> BaseTr obs_batch.append({"obs": self._state_interpreter.interpret(state)}) with torch.no_grad(): - policy_out = self._policy(Batch(obs_batch)) + policy_out = self._policy(Batch(obs_batch, info=None)) act = policy_out.act.numpy() if torch.is_tensor(policy_out.act) else policy_out.act exec_vols = [self._action_interpreter.interpret(s, a) for s, a in zip(states, act)]