From ce6c073c29a93afc2147253991ddefac5ff87a73 Mon Sep 17 00:00:00 2001 From: Ammar Elsabe Date: Mon, 6 Mar 2023 13:00:11 +0400 Subject: [PATCH 1/3] Fix configuration example --- docs/component/rl/quickstart.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/component/rl/quickstart.rst b/docs/component/rl/quickstart.rst index 5e98e3baff..a77a99b327 100644 --- a/docs/component/rl/quickstart.rst +++ b/docs/component/rl/quickstart.rst @@ -54,9 +54,9 @@ QlibRL provides an example of an implementation of a single asset order executio # number of time indexes total_time: 240 # start time index - default_start_time: 0 + default_start_time_index: 0 # end time index - default_end_time: 240 + default_end_time_index: 240 proc_data_dim: 6 num_workers: 0 queue_size: 20 From 73216634a4c7a9687895c5e9e1d96539c80eabd8 Mon Sep 17 00:00:00 2001 From: Ammar Elsabe Date: Tue, 7 Mar 2023 01:51:01 +0400 Subject: [PATCH 2/3] Fixed configurations --- examples/rl/experiment_config/backtest/config.yml | 2 +- examples/rl/experiment_config/training/config.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/rl/experiment_config/backtest/config.yml b/examples/rl/experiment_config/backtest/config.yml index 418780c2cc..0d6ad0bd1e 100644 --- a/examples/rl/experiment_config/backtest/config.yml +++ b/examples/rl/experiment_config/backtest/config.yml @@ -53,5 +53,5 @@ strategies: module_path: qlib.rl.order_execution.policy kwargs: lr: 1.0e-4 - weight_file: ./checkpoints/latest.pth + weight_file: ./checkpoints/checkpoints/latest.pth concurrency: 5 diff --git a/examples/rl/experiment_config/training/config.yml b/examples/rl/experiment_config/training/config.yml index 7e50d3eee0..43f06ec914 100644 --- a/examples/rl/experiment_config/training/config.yml +++ b/examples/rl/experiment_config/training/config.yml @@ -32,8 +32,8 @@ data: order_dir: ./data/training_order_split data_dir: ./data/pickle_dataframe/backtest total_time: 240 - default_start_time: 0 - default_end_time: 240 + default_start_time_index: 0 + default_end_time_index: 240 proc_data_dim: 6 num_workers: 0 queue_size: 20 From ff5abbad30d51f7d031d9a28c0f30ed3f5f38e54 Mon Sep 17 00:00:00 2001 From: Ammar Elsabe Date: Tue, 7 Mar 2023 01:55:28 +0400 Subject: [PATCH 3/3] Fixes tianshou error AttributeError: 'dict' object has no attribute 'info' --- examples/rl/simple_example.ipynb | 2 +- qlib/rl/order_execution/strategy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/rl/simple_example.ipynb b/examples/rl/simple_example.ipynb index 1e655ff184..7d38b3b5db 100644 --- a/examples/rl/simple_example.ipynb +++ b/examples/rl/simple_example.ipynb @@ -323,7 +323,7 @@ "simulator = SimpleSimulator(100.0, NSTEPS)\n", "state = simulator.get_state()\n", "obs = [{\"obs\": state_interpreter.interpret(state)}]\n", - "policy_out = policy(Batch(obs))\n", + "policy_out = policy(Batch(obs, info=None))\n", "act = float(action_interpreter.interpret(state, policy_out.act))\n", "\n", "simulator.step(act)\n", diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index b6f5e12b24..0b84613b67 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -527,7 +527,7 @@ def _generate_trade_decision(self, execute_result: list | None = None) -> BaseTr obs_batch.append({"obs": self._state_interpreter.interpret(state)}) with torch.no_grad(): - policy_out = self._policy(Batch(obs_batch)) + policy_out = self._policy(Batch(obs_batch, info=None)) act = policy_out.act.numpy() if torch.is_tensor(policy_out.act) else policy_out.act exec_vols = [self._action_interpreter.interpret(s, a) for s, a in zip(states, act)]