diff --git a/.github/workflows/test_release.yml b/.github/workflows/test_release.yml deleted file mode 100644 index 67ca365..0000000 --- a/.github/workflows/test_release.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Publish to TestPyPI - -on: - # Trigger this workflow when a new tag is pushed - push: - tags: - - 'test*' - -jobs: - build: - name: Build distribution 📦 - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.x" - - name: Install pypa/build - run: >- - python3 -m - pip install - build - --user - - name: Build a binary wheel and a source tarball - run: python3 -m build - - name: Store the distribution packages - uses: actions/upload-artifact@v4 - with: - name: python-package-distributions - path: dist/ - - publish-to-testpypi: - name: Publish Python 🐍 distribution 📦 to TestPyPI - needs: - - build - runs-on: ubuntu-latest - - environment: - name: testpypi - url: https://test.pypi.org/p/GenerativeRL - - permissions: - id-token: write # IMPORTANT: mandatory for trusted publishing - - steps: - - name: Download all the dists - uses: actions/download-artifact@v4 - with: - name: python-package-distributions - path: dist/ - - name: Publish distribution 📦 to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ diff --git a/README.md b/README.md index 1feb2e5..544a611 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Generative Reinforcement Learning (GRL) +# Generative Reinforcement Learning [![Twitter](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Fopendilab)](https://twitter.com/opendilab) [![GitHub stars](https://img.shields.io/github/stars/opendilab/GenerativeRL)](https://github.com/opendilab/GenerativeRL/stargazers) @@ -57,6 +57,7 @@ English | [简体中文(Simplified Chinese)](https://github.com/opendilab/Genera | Algo./Models | Diffusion Model | Flow Model | |---------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- | +| [IDQL](https://arxiv.org/abs/2304.10573) | ✔ | 🚫 | | [QGPO](https://arxiv.org/abs/2304.12824) | ✔ | 🚫 | | [SRPO](https://arxiv.org/abs/2310.07297) | ✔ | 🚫 | | GMPO | ✔ [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1A79ueOdLvTfrytjOPyfxb6zSKXi1aePv) | ✔ | @@ -66,7 +67,7 @@ English | [简体中文(Simplified Chinese)](https://github.com/opendilab/Genera ## Installation ```bash -pip install grl +pip install GenerativeRL ``` Or, if you want to install from source: @@ -142,6 +143,18 @@ We offer some baseline experiments to evaluate the performance of generative rei We welcome contributions to GenerativeRL! If you are interested in contributing, please refer to the [Contributing Guide](CONTRIBUTING.md). +## Citation + +```latex +@misc{generative_rl, + title={GenerativeRL: A Python Library for Solving Reinforcement Learning Problems Using Generative Models}, + author={Zhang, Jinouwen and Xue, Rongkun and Niu, Yazhe and Chen, Yun and Chen, Xinyan and Wang, Ruiheng and Liu, Yu}, + publisher={GitHub}, + howpublished={\url{https://github.com/opendilab/GenerativeRL}}, + year={2024}, +} +``` + ## License GenerativeRL is licensed under the Apache License 2.0. See [LICENSE](LICENSE) for more details. diff --git a/README.zh.md b/README.zh.md index 00d7263..f413088 100644 --- a/README.zh.md +++ b/README.zh.md @@ -55,6 +55,7 @@ | 算法/模型 | 扩散模型 | 流模型 | |---------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- | +| [IDQL](https://arxiv.org/abs/2304.10573) | ✔ | 🚫 | | [QGPO](https://arxiv.org/abs/2304.12824) | ✔ | 🚫 | | [SRPO](https://arxiv.org/abs/2310.07297) | ✔ | 🚫 | | GMPO | ✔ [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1A79ueOdLvTfrytjOPyfxb6zSKXi1aePv) | ✔ | @@ -63,7 +64,7 @@ ## 安装 ```bash -pip install grl +pip install GenerativeRL ``` 或者,如果你想从源码安装: @@ -139,6 +140,18 @@ if __name__ == '__main__': 我们欢迎所有对 GenerativeRL 的贡献和支持!请参考 [开源贡献指南](CONTRIBUTING.md)。 +## 引用 + +```latex +@misc{generative_rl, + title={GenerativeRL: A Python Library for Solving Reinforcement Learning Problems Using Generative Models}, + author={Zhang, Jinouwen and Xue, Rongkun and Niu, Yazhe and Chen, Yun and Chen, Xinyan and Wang, Ruiheng and Liu, Yu}, + publisher={GitHub}, + howpublished={\url{https://github.com/opendilab/GenerativeRL}}, + year={2024}, +} +``` + ## 开源协议 GenerativeRL 开源协议为 Apache License 2.0。更多信息和文档,请参考 [开源协议](LICENSE)。 diff --git a/docs/source/tutorials/installation/index.rst b/docs/source/tutorials/installation/index.rst index 000f97a..633d77c 100644 --- a/docs/source/tutorials/installation/index.rst +++ b/docs/source/tutorials/installation/index.rst @@ -5,7 +5,7 @@ GenerativeRL can be installed using pip: .. code-block:: console - $ pip install grl + $ pip install GenerativeRL You can also install the latest development version from GitHub: diff --git a/grl_pipelines/tutorials/README.md b/grl_pipelines/tutorials/README.md index 174408e..af6086b 100644 --- a/grl_pipelines/tutorials/README.md +++ b/grl_pipelines/tutorials/README.md @@ -2,22 +2,52 @@ English | [简体中文(Simplified Chinese)](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/README.zh.md) -## Train a Generative Model +## Train a generative model -### Diffusion Model +### Diffusion model -We provide a simple colab notebook to demonstrate how to build a diffusion model using the `grl` library. You can access the notebook [here](https://colab.research.google.com/drive/18yHUAmcMh_7xq2U6TBCtcLKX2y4YvNyk#scrollTo=aqtDAvG6cQ1V). +We provide a simple colab notebook to demonstrate how to build a diffusion model using the `GenerativeRL` library. You can access the notebook [here](https://colab.research.google.com/drive/18yHUAmcMh_7xq2U6TBCtcLKX2y4YvNyk#scrollTo=aqtDAvG6cQ1V). -### Flow Model +### Flow model -We provide a simple colab notebook to demonstrate how to build a flow model using the `grl` library. You can access the notebook [here](https://colab.research.google.com/drive/1vrxREVXKsSbnsv9G2CnKPVvrbFZleElI?usp=drive_link). +We provide a simple colab notebook to demonstrate how to build a flow model using the `GenerativeRL` library. You can access the notebook [here](https://colab.research.google.com/drive/1vrxREVXKsSbnsv9G2CnKPVvrbFZleElI?usp=drive_link). -## Evaluate a Generative Model +## Evaluate a generative model -### Sample Generation +### Sample generation -We provide a simple colab notebook to demonstrate how to generate samples from a trained generative model using the `grl` library. You can access the notebook [here](https://colab.research.google.com/drive/16jQhf1BDjtToxMZ4lDxB4IwGdRmr074j?usp=sharing). +We provide a simple colab notebook to demonstrate how to generate samples from a trained generative model using the `GenerativeRL` library. You can access the notebook [here](https://colab.research.google.com/drive/16jQhf1BDjtToxMZ4lDxB4IwGdRmr074j?usp=sharing). -### Density Estimation +### Density estimation -We provide a simple colab notebook to demonstrate how to estimate the density of samples using a trained generative model using the `grl` library. You can access the notebook [here](https://colab.research.google.com/drive/1zHsW13n338YqX87AIWG26KLC4uKQL1ZP?usp=sharing). +We provide a simple colab notebook to demonstrate how to estimate the density of samples using a trained generative model using the `GenerativeRL` library. You can access the notebook [here](https://colab.research.google.com/drive/1zHsW13n338YqX87AIWG26KLC4uKQL1ZP?usp=sharing). + +## Tutorials via toy examples + +We provide several toy examples to demonstrate the features of the `GenerativeRL` library. You can access the examples [here](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/). + +### Diverse generative models + +- [Diffusion Model](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_diffusion.py) +- [Energy condition Diffusion Model](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_energy_condition.py) +- [Independent Conditional Flow Matching Model](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_icfm.py) +- [Optimal Transport Conditional Flow Matching Model](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py) +- [SF2M](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py) + +### Generative model applications + +- [World Model](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/applications/swiss_roll_world_model.py) + +### Generative model evaluation + +- [Likelihood Evaluation](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/metrics/swiss_roll_likelihood.py) + +### ODE/SDE solvers usages + +- [DPM Solver](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/solvers/swiss_roll_dpmsolver.py) +- [SDE Solver](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/solvers/swiss_roll_sdesolver.py) + +### Special usages in GenerativeRL + +- [Customized Neural Network Modules](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/special_usages/customized_modules.py) +- [Dict-like Structure Data Generation](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/special_usages/dict_tensor_ode.py) diff --git a/grl_pipelines/tutorials/README.zh.md b/grl_pipelines/tutorials/README.zh.md index 4991c1d..72e5252 100644 --- a/grl_pipelines/tutorials/README.zh.md +++ b/grl_pipelines/tutorials/README.zh.md @@ -6,18 +6,48 @@ ### 扩散模型 -我们提供了一个简单的 colab 笔记本,演示如何使用 `grl` 库构建扩散模型。您可以在[这里](https://colab.research.google.com/drive/18yHUAmcMh_7xq2U6TBCtcLKX2y4YvNyk#scrollTo=aqtDAvG6cQ1V)访问笔记本。 +我们提供了一个简单的 colab 笔记本,演示如何使用 `GenerativeRL` 库构建扩散模型。您可以在[这里](https://colab.research.google.com/drive/18yHUAmcMh_7xq2U6TBCtcLKX2y4YvNyk#scrollTo=aqtDAvG6cQ1V)访问笔记本。 ### 流模型 -我们提供了一个简单的 colab 笔记本,演示如何使用 `grl` 库构建流模型。您可以在[这里](https://colab.research.google.com/drive/1vrxREVXKsSbnsv9G2CnKPVvrbFZleElI?usp=drive_link)访问笔记本。 +我们提供了一个简单的 colab 笔记本,演示如何使用 `GenerativeRL` 库构建流模型。您可以在[这里](https://colab.research.google.com/drive/1vrxREVXKsSbnsv9G2CnKPVvrbFZleElI?usp=drive_link)访问笔记本。 ## 评估生成模型 ### 采样生成 -我们提供了一个简单的 colab 笔记本,演示如何使用 `grl` 库从训练有素的生成模型生成样本。您可以在[这里](https://colab.research.google.com/drive/16jQhf1BDjtToxMZ4lDxB4IwGdRmr074j?usp=sharing)访问笔记本。 +我们提供了一个简单的 colab 笔记本,演示如何使用 `GenerativeRL` 库从训练有素的生成模型生成样本。您可以在[这里](https://colab.research.google.com/drive/16jQhf1BDjtToxMZ4lDxB4IwGdRmr074j?usp=sharing)访问笔记本。 ### 概率密度估计 -我们提供了一个简单的 colab 笔记本,演示如何使用 `grl` 库从训练有素的生成模型估计样本的概率密度。您可以在[这里](https://colab.research.google.com/drive/1zHsW13n338YqX87AIWG26KLC4uKQL1ZP?usp=sharing)访问笔记本。 +我们提供了一个简单的 colab 笔记本,演示如何使用 `GenerativeRL` 库从训练有素的生成模型估计样本的概率密度。您可以在[这里](https://colab.research.google.com/drive/1zHsW13n338YqX87AIWG26KLC4uKQL1ZP?usp=sharing)访问笔记本。 + +## 玩具示例教程 + +我们提供了几个玩具示例,演示了 `GenerativeRL` 库的特性。您可以在[这里](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/)访问示例。 + +### 多种生成模型 + +- [扩散模型](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_diffusion.py) +- [能量条件扩散模型](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_energy_condition.py) +- [独立条件流匹配模型](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_icfm.py) +- [最优输运条件流匹配模型](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py) +- [SF2M](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py) + +### 生成模型应用 + +- [世界模型](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/applications/swiss_roll_world_model.py) + +### 生成模型评估 + +- [似然性评估](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/metrics/swiss_roll_likelihood.py) + +### ODE/SDE 求解器用法 + +- [DPM 求解器](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/solvers/swiss_roll_dpmsolver.py) +- [SDE 求解器](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/solvers/swiss_roll_sdesolver.py) + +### GenerativeRL 的特殊用法 + +- [自定义神经网络模块](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/special_usages/customized_modules.py) +- [类似字典结构的数据生成](https://github.com/opendilab/GenerativeRL/tree/main/grl_pipelines/tutorials/special_usages/dict_tensor_ode.py) diff --git a/grl_pipelines/tutorials/rl_examples/swiss_roll_world_model.py b/grl_pipelines/tutorials/applications/swiss_roll_world_model.py similarity index 100% rename from grl_pipelines/tutorials/rl_examples/swiss_roll_world_model.py rename to grl_pipelines/tutorials/applications/swiss_roll_world_model.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_diffusion.py b/grl_pipelines/tutorials/generative_models/swiss_roll_diffusion.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_diffusion.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_diffusion.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll_discrete/swiss_roll_discrete_flow_model.py b/grl_pipelines/tutorials/generative_models/swiss_roll_discrete_flow_model.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll_discrete/swiss_roll_discrete_flow_model.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_discrete_flow_model.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_energy_condition.py b/grl_pipelines/tutorials/generative_models/swiss_roll_energy_condition.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_energy_condition.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_energy_condition.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_icfm.py b/grl_pipelines/tutorials/generative_models/swiss_roll_icfm.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_icfm.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_icfm.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_icfm_with_mask.py b/grl_pipelines/tutorials/generative_models/swiss_roll_icfm_with_mask.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_icfm_with_mask.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_icfm_with_mask.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_otcfm.py b/grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_otcfm.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_otcfm.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_sf2m.py b/grl_pipelines/tutorials/generative_models/swiss_roll_sf2m.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_sf2m.py rename to grl_pipelines/tutorials/generative_models/swiss_roll_sf2m.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_likelihood.py b/grl_pipelines/tutorials/metrics/swiss_roll_likelihood.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_likelihood.py rename to grl_pipelines/tutorials/metrics/swiss_roll_likelihood.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_dpmsolver.py b/grl_pipelines/tutorials/solvers/swiss_roll_dpmsolver.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_dpmsolver.py rename to grl_pipelines/tutorials/solvers/swiss_roll_dpmsolver.py diff --git a/grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_sdesolver.py b/grl_pipelines/tutorials/solvers/swiss_roll_sdesolver.py similarity index 100% rename from grl_pipelines/tutorials/toy_examples/swiss_roll/swiss_roll_sdesolver.py rename to grl_pipelines/tutorials/solvers/swiss_roll_sdesolver.py diff --git a/grl_pipelines/tutorials/customized_modules.py b/grl_pipelines/tutorials/special_usages/customized_modules.py similarity index 99% rename from grl_pipelines/tutorials/customized_modules.py rename to grl_pipelines/tutorials/special_usages/customized_modules.py index 3840d3f..e5db821 100644 --- a/grl_pipelines/tutorials/customized_modules.py +++ b/grl_pipelines/tutorials/special_usages/customized_modules.py @@ -100,7 +100,7 @@ lr=5e-3, data_num=10000, iterations=1000, - batch_size=2048, + batch_size=4096, clip_grad_norm=1.0, eval_freq=500, checkpoint_freq=100, diff --git a/grl_pipelines/tutorials/dict_tensor_ode.py b/grl_pipelines/tutorials/special_usages/dict_tensor_ode.py similarity index 99% rename from grl_pipelines/tutorials/dict_tensor_ode.py rename to grl_pipelines/tutorials/special_usages/dict_tensor_ode.py index 1d0eae4..6a951c9 100644 --- a/grl_pipelines/tutorials/dict_tensor_ode.py +++ b/grl_pipelines/tutorials/special_usages/dict_tensor_ode.py @@ -124,7 +124,7 @@ def forward( lr=5e-4, data_num=10000, iterations=3000, - batch_size=2048, + batch_size=4096, clip_grad_norm=1.0, eval_freq=2999, checkpoint_freq=100,