From e11c7b88fc0f2e75b1c6dc9cbf557fccce1ade2e Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 27 Oct 2023 08:10:32 +0800 Subject: [PATCH] Convert hydra example --- .../cylinder2d_unsteady_transformer_physx.md | 96 +++++---- docs/zh/examples/lorenz.md | 94 +++++---- docs/zh/examples/rossler.md | 96 +++++---- .../transformer_physx/conf/enn.yaml | 54 +++++ .../transformer_physx/conf/transformer.yaml | 65 ++++++ .../transformer_physx/train_enn.py | 193 +++++++++++++----- .../transformer_physx/train_transformer.py | 170 +++++++++------ examples/lorenz/conf/enn.yaml | 54 +++++ examples/lorenz/conf/transformer.yaml | 65 ++++++ examples/lorenz/train_enn.py | 191 ++++++++++++----- examples/lorenz/train_transformer.py | 162 +++++++++------ examples/rossler/conf/enn.yaml | 54 +++++ examples/rossler/conf/transformer.yaml | 65 ++++++ examples/rossler/train_enn.py | 186 ++++++++++++----- examples/rossler/train_transformer.py | 166 +++++++++------ 15 files changed, 1273 insertions(+), 438 deletions(-) create mode 100644 examples/cylinder/2d_unsteady/transformer_physx/conf/enn.yaml create mode 100644 examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml create mode 100644 examples/lorenz/conf/enn.yaml create mode 100644 examples/lorenz/conf/transformer.yaml create mode 100644 examples/rossler/conf/enn.yaml create mode 100644 examples/rossler/conf/transformer.yaml diff --git a/docs/zh/examples/cylinder2d_unsteady_transformer_physx.md b/docs/zh/examples/cylinder2d_unsteady_transformer_physx.md index 500e8fd94..a85175f2f 100644 --- a/docs/zh/examples/cylinder2d_unsteady_transformer_physx.md +++ b/docs/zh/examples/cylinder2d_unsteady_transformer_physx.md @@ -2,6 +2,30 @@ AI Studio快速体验 +=== "模型训练命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_training.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_valid.hdf5 + # windows + # curl [https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 ](https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_training.hdf5)--output cylinder_training.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_valid.hdf5 --output cylinder_valid.hdf5 + python train_enn.py + ``` + +=== "模型评估命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_training.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_valid.hdf5 + # windows + # curl [https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 ](https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_training.hdf5)--output cylinder_training.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/cylinder_valid.hdf5 --output cylinder_valid.hdf5 + python train_enn.py mode=eval EVAL.pretrained_model_path=https://paddle-org.bj.bcebos.com/paddlescience/models/cylinder/cylinder_pretrained.pdparams + ``` + ## 1. 背景简介 圆柱绕流问题可以应用于很多领域。例如,在工业设计中,它可以被用来模拟和优化流体在各种设备中的流动,如风力发电机、汽车和飞机的流体动力学性能等。在环保领域,圆柱绕流问题也有应用,如预测和控制河流的洪水、研究污染物的扩散等。此外,在工程实践中,如流体动力学、流体静力学、热交换、空气动力学等领域,圆柱绕流问题也具有实际意义。 @@ -113,9 +137,9 @@ $$Re \sim(100, 750)$$ 首先展示代码中定义的各个参数变量,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="50" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="58" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:50:65 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:58:59 --8<-- ``` @@ -123,9 +147,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:50:65 本案例基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="70" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="61" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:70:87 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:61:80 --8<-- ``` @@ -144,9 +168,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:70:87 定义监督约束的代码如下: -``` py linenums="89" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="82" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:89:97 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:82:94 --8<-- ``` @@ -169,17 +193,17 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:89:97 用 PaddleScience 代码表示如下: -``` py linenums="102" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="104" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:102:108 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:104:109 --8<-- ``` 其中,`CylinderEmbedding` 的前两个参数在前文中已有描述,这里不再赘述,网络模型的第三、四个参数是训练数据集的均值和方差,用于归一化输入数据。计算均值、方差的的代码表示如下: -``` py linenums="29" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="32" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:29:46 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:32:49 --8<-- ``` @@ -187,9 +211,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:29:46 本案例中使用的学习率方法为 `ExponentialDecay`,学习率大小设置为0.001。优化器使用 `Adam`,梯度裁剪使用了 Paddle 内置的 `ClipGradByGlobalNorm` 方法。用 PaddleScience 代码表示如下: -``` py linenums="110" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="111" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:110:124 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:111:120 --8<-- ``` @@ -197,9 +221,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:110:124 本案例训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。代码如下: -``` py linenums="126" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="124" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:126:153 +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:124:143 --8<-- ``` @@ -209,9 +233,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:126:153 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="156" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" +``` py linenums="153" title="examples/cylinder/2d_unsteady/transformer_physx/train_enn.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:156: +examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:153:169 --8<-- ``` @@ -219,9 +243,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_enn.py:156: 上文介绍了如何构建 Embedding 模型的训练、评估,在本节中将介绍如何使用训练好的 Embedding 模型训练 Transformer 模型。因为训练 Transformer 模型的步骤与训练 Embedding 模型的步骤基本相似,因此本节在两者的重复部分的各个参数不再详细介绍。首先将代码中定义的各个参数变量展示如下,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="57" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` yaml linenums="26" title="examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:57:79 +examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml:26:33 --8<-- ``` @@ -229,9 +253,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:57:79 Transformer 模型同样基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="87" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="68" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:87:104 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:68:85 --8<-- ``` @@ -239,9 +263,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:87:104 定义监督约束的代码如下: -``` py linenums="106" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="87" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:106:111 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:87:92 --8<-- ``` @@ -256,9 +280,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:106:111 用 PaddleScience 代码表示如下: -``` py linenums="116" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="98" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:116:124 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:98:98 --8<-- ``` @@ -268,9 +292,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:116:124 本案例中使用的学习率方法为 `CosineWarmRestarts`,学习率大小设置为0.001。优化器使用 `Adam`,梯度裁剪使用了 Paddle 内置的 `ClipGradByGlobalNorm` 方法。用 PaddleScience 代码表示如下: -``` py linenums="126" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="100" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:126:140 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:100:107 --8<-- ``` @@ -278,9 +302,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:126:140 训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。用 PaddleScience 代码表示如下: -``` py linenums="142" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="110" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:142:168 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:110:127 --8<-- ``` @@ -290,15 +314,15 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:142:168 在本文中首先定义了对 Transformer 模型输出数据变换到物理状态空间的代码: -``` py linenums="33" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="35" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:33:53 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:35:56 --8<-- ``` -``` py linenums="83" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="64" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:83:84 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:64:65 --8<-- ``` @@ -306,9 +330,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:83:84 在定义好了以上代码之后,就可以实现可视化器代码的构建了: -``` py linenums="170" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="146" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:170:197 +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:146:164 --8<-- ``` @@ -318,9 +342,9 @@ examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:170:197 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="199" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" +``` py linenums="166" title="examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py" --8<-- -examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:199: +examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py:166:184 --8<-- ``` diff --git a/docs/zh/examples/lorenz.md b/docs/zh/examples/lorenz.md index 153e9c13c..556c8ea99 100644 --- a/docs/zh/examples/lorenz.md +++ b/docs/zh/examples/lorenz.md @@ -2,6 +2,30 @@ AI Studio快速体验 +=== "模型训练命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_valid_rk.hdf5 + # windows + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 --output lorenz_training_rk.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_valid_rk.hdf5 --output lorenz_valid_rk.hdf5 + python train_enn.py + ``` + +=== "模型评估命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_valid_rk.hdf5 + # windows + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 --output lorenz_training_rk.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_valid_rk.hdf5 --output lorenz_valid_rk.hdf5 + python train_enn.py mode=eval EVAL.pretrained_model_path=https://paddle-org.bj.bcebos.com/paddlescience/models/lorenz/lorenz_pretrained.pdparams + ``` + ## 1. 背景简介 Lorenz System,中文名称可译作“洛伦兹系统”,又称“洛伦兹混沌系统”,最早由美国气象学家爱德华·洛伦兹(Edward N.Lorenz)在1963年的一篇文章中提出。著名的“蝴蝶效应”,即“一只南美洲亚马逊河流域热带雨林中的蝴蝶,偶尔扇动几下翅膀,可以在两周以后引起美国得克萨斯州的一场龙卷风”,也是最早起源于这篇文章。洛伦兹系统的特点是在一定参数条件下展现出复杂、不确定的动态行为,包括对初始条件的敏感性和长期行为的不可预测性。这种混沌行为在自然界和许多实际应用领域中都存在,例如气候变化、股票市场波动等。洛伦兹系统对数值扰动极为敏感,是评估机器学习(深度学习)模型准确性的良好基准。 @@ -58,9 +82,9 @@ $$x_{0} \sim(-20, 20), y_{0} \sim(-20, 20), z_{0} \sim(10, 40)$$ 首先展示代码中定义的各个参数变量,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="40" title="examples/lorenz/train_enn.py" +``` yaml linenums="26" title="examples/conf/enn.yaml" --8<-- -examples/lorenz/train_enn.py:40:55 +examples/lorenz/conf/enn.yaml:26:34 --8<-- ``` @@ -68,9 +92,9 @@ examples/lorenz/train_enn.py:40:55 本案例基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="60" title="examples/lorenz/train_enn.py" +``` py linenums="51" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:60:77 +examples/lorenz/train_enn.py:51:70 --8<-- ``` @@ -89,9 +113,9 @@ examples/lorenz/train_enn.py:60:77 定义监督约束的代码如下: -``` py linenums="79" title="examples/lorenz/train_enn.py" +``` py linenums="72" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:79:87 +examples/lorenz/train_enn.py:72:85 --8<-- ``` @@ -114,17 +138,17 @@ examples/lorenz/train_enn.py:79:87 用 PaddleScience 代码表示如下: -``` py linenums="93" title="examples/lorenz/train_enn.py" +``` py linenums="86" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:93:96 +examples/lorenz/train_enn.py:86:89 --8<-- ``` 其中,`LorenzEmbedding` 的前两个参数在前文中已有描述,这里不再赘述,网络模型的第三、四个参数是训练数据集的均值和方差,用于归一化输入数据。计算均值、方差的的代码表示如下: -``` py linenums="29" title="examples/lorenz/train_enn.py" +``` py linenums="32" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:29:36 +examples/lorenz/train_enn.py:32:39 --8<-- ``` @@ -134,7 +158,7 @@ examples/lorenz/train_enn.py:29:36 ``` py linenums="99" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:99:112 +examples/lorenz/train_enn.py:99:108 --8<-- ``` @@ -142,9 +166,9 @@ examples/lorenz/train_enn.py:99:112 本案例训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。代码如下: -``` py linenums="115" title="examples/lorenz/train_enn.py" +``` py linenums="111" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:115:141 +examples/lorenz/train_enn.py:111:131 --8<-- ``` @@ -154,9 +178,9 @@ examples/lorenz/train_enn.py:115:141 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="144" title="examples/lorenz/train_enn.py" +``` py linenums="142" title="examples/lorenz/train_enn.py" --8<-- -examples/lorenz/train_enn.py:144: +examples/lorenz/train_enn.py:142:156 --8<-- ``` @@ -164,9 +188,9 @@ examples/lorenz/train_enn.py:144: 上文介绍了如何构建 Embedding 模型的训练、评估,在本节中将介绍如何使用训练好的 Embedding 模型训练 Transformer 模型。因为训练 Transformer 模型的步骤与训练 Embedding 模型的步骤基本相似,因此本节在两者的重复部分的各个参数不再详细介绍。首先将代码中定义的各个参数变量展示如下,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="57" title="examples/lorenz/train_transformer.py" +``` yaml linenums="26" title="examples/lorenz/conf/transformer.yaml" --8<-- -examples/lorenz/train_transformer.py:57:79 +examples/lorenz/conf/transformer.yaml:26:42 --8<-- ``` @@ -174,9 +198,9 @@ examples/lorenz/train_transformer.py:57:79 Transformer 模型同样基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="87" title="examples/lorenz/train_transformer.py" +``` py linenums="68" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:87:104 +examples/lorenz/train_transformer.py:68:85 --8<-- ``` @@ -184,9 +208,9 @@ examples/lorenz/train_transformer.py:87:104 定义监督约束的代码如下: -``` py linenums="106" title="examples/lorenz/train_transformer.py" +``` py linenums="87" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:106:111 +examples/lorenz/train_transformer.py:87:92 --8<-- ``` @@ -201,9 +225,9 @@ examples/lorenz/train_transformer.py:106:111 用 PaddleScience 代码表示如下: -``` py linenums="116" title="examples/lorenz/train_transformer.py" +``` py linenums="98" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:116:124 +examples/lorenz/train_transformer.py:98:98 --8<-- ``` @@ -213,9 +237,9 @@ examples/lorenz/train_transformer.py:116:124 本案例中使用的学习率方法为 `CosineWarmRestarts`,学习率大小设置为0.001。优化器使用 `Adam`,梯度裁剪使用了 Paddle 内置的 `ClipGradByGlobalNorm` 方法。用 PaddleScience 代码表示如下: -``` py linenums="126" title="examples/lorenz/train_transformer.py" +``` py linenums="101" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:126:140 +examples/lorenz/train_transformer.py:101:107 --8<-- ``` @@ -223,9 +247,9 @@ examples/lorenz/train_transformer.py:126:140 训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。用 PaddleScience 代码表示如下: -``` py linenums="142" title="examples/lorenz/train_transformer.py" +``` py linenums="110" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:142:168 +examples/lorenz/train_transformer.py:110:135 --8<-- ``` @@ -235,15 +259,15 @@ examples/lorenz/train_transformer.py:142:168 在本文中首先定义了对 Transformer 模型输出数据变换到物理状态空间的代码: -``` py linenums="32" title="examples/lorenz/train_transformer.py" +``` py linenums="34" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:32:50 +examples/lorenz/train_transformer.py:34:52 --8<-- ``` -``` py linenums="83" title="examples/lorenz/train_transformer.py" +``` py linenums="64" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:83:84 +examples/lorenz/train_transformer.py:64:65 --8<-- ``` @@ -251,9 +275,9 @@ examples/lorenz/train_transformer.py:83:84 在定义好了以上代码之后,就可以实现可视化器代码的构建了: -``` py linenums="170" title="examples/lorenz/train_transformer.py" +``` py linenums="138" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:170:188 +examples/lorenz/train_transformer.py:138:155 --8<-- ``` @@ -263,9 +287,9 @@ examples/lorenz/train_transformer.py:170:188 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="190" title="examples/lorenz/train_transformer.py" +``` py linenums="157" title="examples/lorenz/train_transformer.py" --8<-- -examples/lorenz/train_transformer.py:190: +examples/lorenz/train_transformer.py:157:175 --8<-- ``` diff --git a/docs/zh/examples/rossler.md b/docs/zh/examples/rossler.md index 42fbc9f56..a8ee06685 100644 --- a/docs/zh/examples/rossler.md +++ b/docs/zh/examples/rossler.md @@ -2,6 +2,30 @@ AI Studio快速体验 +=== "模型训练命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_training.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_valid.hdf5 + # windows + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_training.hdf5 --output rossler_training.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_valid.hdf5 --output rossler_valid.hdf5 + python train_enn.py + ``` + +=== "模型评估命令" + + ``` sh + # linux + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_training.hdf5 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_valid.hdf5 + # windows + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_training.hdf5 --output rossler_training.hdf5 + # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_valid.hdf5 --output rossler_valid.hdf5 + python train_enn.py mode=eval EVAL.pretrained_model_path=https://paddle-org.bj.bcebos.com/paddlescience/models/rossler/rossler_pretrained.pdparams + ``` + ## 1. 背景简介 Rossler System,最早由德国科学家 Rossler 提出,也是常见的混沌系统。该系统在混沌理论的研究中具有重要地位,为混沌现象提供了一种数学描述和理解方法。同时由于该系统对数值扰动极为敏感,因此也是是评估机器学习(深度学习)模型准确性的良好基准。 @@ -43,9 +67,9 @@ $$\omega = 1.0, \alpha = 0.165, \beta = 0.2, \gamma = 10$$ 首先展示代码中定义的各个参数变量,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="44" title="examples/rossler/train_enn.py" +``` yaml linenums="22" title="examples/rossler/conf/enn.yaml" --8<-- -examples/rossler/train_enn.py:44:59 +examples/rossler/conf/enn.yaml:22:34 --8<-- ``` @@ -53,9 +77,9 @@ examples/rossler/train_enn.py:44:59 本案例基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="64" title="examples/rossler/train_enn.py" +``` py linenums="57" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:64:81 +examples/rossler/train_enn.py:55:74 --8<-- ``` @@ -74,9 +98,9 @@ examples/rossler/train_enn.py:64:81 定义监督约束的代码如下: -``` py linenums="83" title="examples/rossler/train_enn.py" +``` py linenums="76" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:83:91 +examples/rossler/train_enn.py:76:86 --8<-- ``` @@ -99,17 +123,17 @@ examples/rossler/train_enn.py:83:91 用 PaddleScience 代码表示如下: -``` py linenums="96" title="examples/rossler/train_enn.py" +``` py linenums="89" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:96:100 +examples/rossler/train_enn.py:93:99 --8<-- ``` 其中,`RosslerEmbedding` 的前两个参数在前文中已有描述,这里不再赘述,网络模型的第三、四个参数是训练数据集的均值和方差,用于归一化输入数据。计算均值、方差的的代码表示如下: -``` py linenums="29" title="examples/rossler/train_enn.py" +``` py linenums="32" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:29:40 +examples/rossler/train_enn.py:32:43 --8<-- ``` @@ -117,9 +141,9 @@ examples/rossler/train_enn.py:29:40 本案例中使用的学习率方法为 `ExponentialDecay` ,学习率大小设置为0.001。优化器使用 `Adam`,梯度裁剪使用了 Paddle 内置的 `ClipGradByGlobalNorm` 方法。用 PaddleScience 代码表示如下 -``` py linenums="102" title="examples/rossler/train_enn.py" +``` py linenums="101" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:102:116 +examples/rossler/train_enn.py:101:110 --8<-- ``` @@ -127,9 +151,9 @@ examples/rossler/train_enn.py:102:116 本案例训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。代码如下: -``` py linenums="118" title="examples/rossler/train_enn.py" +``` py linenums="114" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:118:145 +examples/rossler/train_enn.py:114:133 --8<-- ``` @@ -139,9 +163,9 @@ examples/rossler/train_enn.py:118:145 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="147" title="examples/rossler/train_enn.py" +``` py linenums="143" title="examples/rossler/train_enn.py" --8<-- -examples/rossler/train_enn.py:147: +examples/rossler/train_enn.py:143:157 --8<-- ``` @@ -149,9 +173,9 @@ examples/rossler/train_enn.py:147: 上文介绍了如何构建 Embedding 模型的训练、评估,在本节中将介绍如何使用训练好的 Embedding 模型训练 Transformer 模型。因为训练 Transformer 模型的步骤与训练 Embedding 模型的步骤基本相似,因此本节在两者的重复部分的各个参数不再详细介绍。首先将代码中定义的各个参数变量展示如下,每个参数的具体含义会在下面使用到时进行解释。 -``` py linenums="54" title="examples/rossler/train_transformer.py" +``` yaml linenums="23" title="examples/rossler/conf/transformer.yaml" --8<-- -examples/rossler/train_transformer.py:54:76 +examples/rossler/conf/transformer.yaml:23:33 --8<-- ``` @@ -159,9 +183,9 @@ examples/rossler/train_transformer.py:54:76 Transformer 模型同样基于数据驱动的方法求解问题,因此需要使用 PaddleScience 内置的 `SupervisedConstraint` 构建监督约束。在定义约束之前,需要首先指定监督约束中用于数据加载的各个参数,代码如下: -``` py linenums="84" title="examples/rossler/train_transformer.py" +``` py linenums="67" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:84:101 +examples/rossler/train_transformer.py:65:82 --8<-- ``` @@ -169,9 +193,9 @@ examples/rossler/train_transformer.py:84:101 定义监督约束的代码如下: -``` py linenums="103" title="examples/rossler/train_transformer.py" +``` py linenums="84" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:103:108 +examples/rossler/train_transformer.py:84:89 --8<-- ``` @@ -186,9 +210,9 @@ examples/rossler/train_transformer.py:103:108 用 PaddleScience 代码表示如下: -``` py linenums="113" title="examples/rossler/train_transformer.py" +``` py linenums="95" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:113:121 +examples/rossler/train_transformer.py:95:95 --8<-- ``` @@ -198,9 +222,9 @@ examples/rossler/train_transformer.py:113:121 本案例中使用的学习率方法为 `CosineWarmRestarts`,学习率大小设置为0.001。优化器使用 `Adam`,梯度裁剪使用了 Paddle 内置的 `ClipGradByGlobalNorm` 方法。用 PaddleScience 代码表示如下: -``` py linenums="123" title="examples/rossler/train_transformer.py" +``` py linenums="97" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:123:137 +examples/rossler/train_transformer.py:97:104 --8<-- ``` @@ -208,9 +232,9 @@ examples/rossler/train_transformer.py:123:137 训练过程中会按照一定的训练轮数间隔,使用验证集评估当前模型的训练情况,需要使用 `SupervisedValidator` 构建评估器。用 PaddleScience 代码表示如下: -``` py linenums="139" title="examples/rossler/train_transformer.py" +``` py linenums="107" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:139:165 +examples/rossler/train_transformer.py:107:124 --8<-- ``` @@ -220,15 +244,15 @@ examples/rossler/train_transformer.py:139:165 在本文中首先定义了对 Transformer 模型输出数据变换到物理状态空间的代码: -``` py linenums="32" title="examples/rossler/train_transformer.py" +``` py linenums="34" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:32:50 +examples/rossler/train_transformer.py:34:52 --8<-- ``` -``` py linenums="80" title="examples/rossler/train_transformer.py" +``` py linenums="63" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:80:81 +examples/rossler/train_transformer.py:63:64 --8<-- ``` @@ -236,9 +260,9 @@ examples/rossler/train_transformer.py:80:81 在定义好了以上代码之后,就可以实现可视化器代码的构建了: -``` py linenums="167" title="examples/rossler/train_transformer.py" +``` py linenums="134" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:167:185 +examples/rossler/train_transformer.py:134:152 --8<-- ``` @@ -248,9 +272,9 @@ examples/rossler/train_transformer.py:167:185 完成上述设置之后,只需要将上述实例化的对象按顺序传递给 `ppsci.solver.Solver`,然后启动训练、评估。 -``` py linenums="187" title="examples/rossler/train_transformer.py" +``` py linenums="154" title="examples/rossler/train_transformer.py" --8<-- -examples/rossler/train_transformer.py:187: +examples/rossler/train_transformer.py:154:172 --8<-- ``` diff --git a/examples/cylinder/2d_unsteady/transformer_physx/conf/enn.yaml b/examples/cylinder/2d_unsteady/transformer_physx/conf/enn.yaml new file mode 100644 index 000000000..49515738f --- /dev/null +++ b/examples/cylinder/2d_unsteady/transformer_physx/conf/enn.yaml @@ -0,0 +1,54 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_cylinder2d_unsteady_transformer_physx_enn/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 42 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 4 +VALID_BLOCK_SIZE: 32 +TRAIN_FILE_PATH: ./datasets/cylinder_training.hdf5 +VALID_FILE_PATH: ./datasets/cylinder_valid.hdf5 + +# model settings +MODEL: + input_keys: ["states", "visc"] + output_keys: ["pred_states", "recover_states"] + +# training settings +TRAIN: + epochs: 300 + batch_size: + train: 64 + eval: 8 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + gamma: 0.995 + by_epoch: true + optimizer: + weight_decay: 1e-8 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml b/examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml new file mode 100644 index 000000000..c4adbaf97 --- /dev/null +++ b/examples/cylinder/2d_unsteady/transformer_physx/conf/transformer.yaml @@ -0,0 +1,65 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_cylinder2d_unsteady_transformer_physx_transformer/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 42 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 16 +VALID_BLOCK_SIZE: 256 +TRAIN_FILE_PATH: ./datasets/cylinder_training.hdf5 +VALID_FILE_PATH: ./datasets/cylinder_valid.hdf5 + +# set working condition +EMBEDDING_MODEL_PATH: ./outputs_cylinder2d_unsteady_transformer_physx_enn/checkpoints/latest +VIS_DATA_NUMS: 1 + +# model settings +MODEL: + input_keys: ["embeds"] + output_keys: ["pred_embeds"] + num_layers: 4 + num_ctx: 16 + embed_size: 128 + num_heads: 4 + +# training settings +TRAIN: + epochs: 200 + batch_size: + train: 4 + eval: 16 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + T_0: 14 + T_mult: 2 + eta_min: 1.0e-9 + optimizer: + weight_decay: 1.0e-8 + eval_during_train: true + eval_freq: 50 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/cylinder/2d_unsteady/transformer_physx/train_enn.py b/examples/cylinder/2d_unsteady/transformer_physx/train_enn.py index 21b7e89cc..4105d9810 100644 --- a/examples/cylinder/2d_unsteady/transformer_physx/train_enn.py +++ b/examples/cylinder/2d_unsteady/transformer_physx/train_enn.py @@ -18,11 +18,14 @@ # This file is for step1: training a embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp + +import hydra import numpy as np import paddle +from omegaconf import DictConfig import ppsci -from ppsci.utils import config from ppsci.utils import logger @@ -46,52 +49,47 @@ def get_mean_std(data: np.ndarray, visc: np.ndarray): return mean, std -if __name__ == "__main__": - args = config.parse_args() +def train(cfg: DictConfig): # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(42) - # set training hyper-parameters - EPOCHS = 300 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 4 - VALID_BLOCK_SIZE = 32 - - input_keys = ("states", "visc") - output_keys = ("pred_states", "recover_states") - weights = (10.0 * (TRAIN_BLOCK_SIZE - 1), 10.0 * TRAIN_BLOCK_SIZE) - regularization_key = "k_matrix" - - OUTPUT_DIR = "./output/cylinder_enn" if not args.output_dir else args.output_dir - TRAIN_FILE_PATH = "./datasets/cylinder_training.hdf5" - VALID_FILE_PATH = "./datasets/cylinder_valid.hdf5" + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + weights = (10.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 10.0 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "CylinderDataset", - "file_path": TRAIN_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": TRAIN_BLOCK_SIZE, + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 16, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": True, "shuffle": True, }, - "batch_size": 64, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } sup_constraint = ppsci.constraint.SupervisedConstraint( train_dataloader_cfg, ppsci.loss.MSELossWithL2Decay( - regularization_dict={regularization_key: 1.0e-2 * (TRAIN_BLOCK_SIZE - 1)} + regularization_dict={ + regularization_key: 1.0e-2 * (cfg.TRAIN_BLOCK_SIZE - 1) + } ), - {key: lambda out, k=key: out[k] for key in output_keys + (regularization_key,)}, + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, name="Sup", ) constraint = {sup_constraint.name: sup_constraint} @@ -104,43 +102,43 @@ def get_mean_std(data: np.ndarray, visc: np.ndarray): sup_constraint.data_loader.dataset.data, sup_constraint.data_loader.dataset.visc ) model = ppsci.arch.CylinderEmbedding( - input_keys, output_keys + (regularization_key,), data_mean, data_std + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, ) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.ExponentialDecay( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - gamma=0.995, + iters_per_epoch=ITERS_PER_EPOCH, decay_steps=ITERS_PER_EPOCH, - by_epoch=True, + **cfg.TRAIN.lr_scheduler, )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator - weights = (10.0 * (VALID_BLOCK_SIZE - 1), 10.0 * VALID_BLOCK_SIZE) + weights = (10.0 * (cfg.VALID_BLOCK_SIZE - 1), 10.0 * cfg.VALID_BLOCK_SIZE) eval_dataloader_cfg = { "dataset": { "name": "CylinderDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 32, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": False, "shuffle": False, }, - "batch_size": 8, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -156,10 +154,10 @@ def get_mean_std(data: np.ndarray, visc: np.ndarray): solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, eval_during_train=True, eval_freq=50, @@ -170,12 +168,111 @@ def get_mean_std(data: np.ndarray, visc: np.ndarray): # evaluate after finished training solver.eval() - # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + +def evaluate(cfg: DictConfig): + # set random seed for reproducibility + ppsci.utils.misc.set_random_seed(cfg.seed) + # initialize logger + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + weights = (10.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 10.0 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" + # manually build constraint(s) + train_dataloader_cfg = { + "dataset": { + "name": "CylinderDataset", + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, + "stride": 16, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": True, + "shuffle": True, + }, + "batch_size": cfg.TRAIN.batch_size.train, + "num_workers": 4, + } + + sup_constraint = ppsci.constraint.SupervisedConstraint( + train_dataloader_cfg, + ppsci.loss.MSELossWithL2Decay( + regularization_dict={ + regularization_key: 1.0e-2 * (cfg.TRAIN_BLOCK_SIZE - 1) + } + ), + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, + name="Sup", + ) + + # manually init model + data_mean, data_std = get_mean_std( + sup_constraint.data_loader.dataset.data, sup_constraint.data_loader.dataset.visc + ) + model = ppsci.arch.CylinderEmbedding( + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, + ) + + # manually build validator + weights = (10.0 * (cfg.VALID_BLOCK_SIZE - 1), 10.0 * cfg.VALID_BLOCK_SIZE) + eval_dataloader_cfg = { + "dataset": { + "name": "CylinderDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 32, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} + solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() + + +@hydra.main(version_base=None, config_path="./conf", config_name="enn.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py b/examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py index 636751eb8..7bb1eaffb 100644 --- a/examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py +++ b/examples/cylinder/2d_unsteady/transformer_physx/train_transformer.py @@ -18,14 +18,16 @@ # This file is for step2: training a transformer model, based on frozen pretrained embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp from typing import Dict +import hydra import numpy as np import paddle +from omegaconf import DictConfig import ppsci from ppsci.arch import base -from ppsci.utils import config from ppsci.utils import logger from ppsci.utils import save_load @@ -53,44 +55,23 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: return pred_states -if __name__ == "__main__": - args = config.parse_args() +def train(cfg: DictConfig): # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(42) - # set training hyper-parameters - NUM_LAYERS = 6 - NUM_CTX = 16 - EMBED_SIZE = 128 - NUM_HEADS = 4 - - EPOCHS = 200 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 16 - VALID_BLOCK_SIZE = 256 - input_keys = ("embeds",) - output_keys = ("pred_embeds",) - - VIS_DATA_NUMS = 1 - - TRAIN_FILE_PATH = "./datasets/cylinder_training.hdf5" - VALID_FILE_PATH = "./datasets/cylinder_valid.hdf5" - EMBEDDING_MODEL_PATH = "./output/cylinder_enn/checkpoints/latest" - OUTPUT_DIR = ( - "./output/cylinder_transformer" if not args.output_dir else args.output_dir - ) + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") - embedding_model = build_embedding_model(EMBEDDING_MODEL_PATH) + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) output_transform = OutputTransform(embedding_model) # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "CylinderDataset", - "file_path": TRAIN_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": TRAIN_BLOCK_SIZE, + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 4, "embedding_model": embedding_model, }, @@ -99,7 +80,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: "drop_last": True, "shuffle": True, }, - "batch_size": 4, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } @@ -114,39 +95,25 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: ITERS_PER_EPOCH = len(constraint["Sup"].data_loader) # manually init model - model = ppsci.arch.PhysformerGPT2( - input_keys, - output_keys, - NUM_LAYERS, - NUM_CTX, - EMBED_SIZE, - NUM_HEADS, - ) + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.CosineWarmRestarts( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - T_0=14, - T_mult=2, - eta_min=1e-9, + iters_per_epoch=ITERS_PER_EPOCH, **cfg.TRAIN.lr_scheduler )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator eval_dataloader_cfg = { "dataset": { "name": "CylinderDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 1024, "embedding_model": embedding_model, }, @@ -155,7 +122,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: "drop_last": False, "shuffle": False, }, - "batch_size": 16, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -172,8 +139,8 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: embedding_data = mse_validator.data_loader.dataset.embedding_data vis_datas = { - "embeds": embedding_data[:VIS_DATA_NUMS, :-1], - "states": states[:VIS_DATA_NUMS, 1:], + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1], + "states": states[: cfg.VIS_DATA_NUMS, 1:], } visualizer = { @@ -199,13 +166,13 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, - eval_during_train=True, - eval_freq=50, + eval_during_train=cfg.TRAIN.eval_during_train, + eval_freq=cfg.TRAIN.eval_freq, validator=validator, visualizer=visualizer, ) @@ -216,15 +183,94 @@ def __call__(self, x: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: # visualize prediction after finished training solver.visualize() + +def evaluate(cfg: DictConfig): # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) + output_transform = OutputTransform(embedding_model) + + # manually init model + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) + + # manually build validator + eval_dataloader_cfg = { + "dataset": { + "name": "CylinderDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 1024, + "embedding_model": embedding_model, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} + + # set visualizer(optional) + states = mse_validator.data_loader.dataset.data + embedding_data = mse_validator.data_loader.dataset.embedding_data + vis_datas = { + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1], + "states": states[: cfg.VIS_DATA_NUMS, 1:], + } + + visualizer = { + "visulzie_states": ppsci.visualize.Visualizer2DPlot( + vis_datas, + { + "target_ux": lambda d: d["states"][:, :, 0], + "pred_ux": lambda d: output_transform(d)[:, :, 0], + "target_uy": lambda d: d["states"][:, :, 1], + "pred_uy": lambda d: output_transform(d)[:, :, 1], + "target_p": lambda d: d["states"][:, :, 2], + "preds_p": lambda d: output_transform(d)[:, :, 2], + }, + batch_size=1, + num_timestamps=10, + stride=20, + xticks=np.linspace(-2, 14, 9), + yticks=np.linspace(-4, 4, 5), + prefix="result_states", + ) + } + solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, visualizer=visualizer, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() # visualize prediction for pretrained model(optional) solver.visualize() + + +@hydra.main(version_base=None, config_path="./conf", config_name="transformer.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/lorenz/conf/enn.yaml b/examples/lorenz/conf/enn.yaml new file mode 100644 index 000000000..f14912233 --- /dev/null +++ b/examples/lorenz/conf/enn.yaml @@ -0,0 +1,54 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_lorenz_enn/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 42 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 16 +VALID_BLOCK_SIZE: 32 +TRAIN_FILE_PATH: ./datasets/lorenz_training_rk.hdf5 +VALID_FILE_PATH: ./datasets/lorenz_valid_rk.hdf5 + +# model settings +MODEL: + input_keys: ["states"] + output_keys: ["pred_states", "recover_states"] + +# training settings +TRAIN: + epochs: 300 + batch_size: + train: 512 + eval: 512 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + gamma: 0.995 + by_epoch: true + optimizer: + weight_decay: 1e-8 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/lorenz/conf/transformer.yaml b/examples/lorenz/conf/transformer.yaml new file mode 100644 index 000000000..b5cb2886e --- /dev/null +++ b/examples/lorenz/conf/transformer.yaml @@ -0,0 +1,65 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_lorenz_transformer/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 42 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 64 +VALID_BLOCK_SIZE: 256 +TRAIN_FILE_PATH: ./datasets/lorenz_training_rk.hdf5 +VALID_FILE_PATH: ./datasets/lorenz_valid_rk.hdf5 + +# set working condition +EMBEDDING_MODEL_PATH: ./outputs_lorenz_enn/checkpoints/latest +VIS_DATA_NUMS: 16 + +# model settings +MODEL: + input_keys: ["embeds"] + output_keys: ["pred_embeds"] + num_layers: 4 + num_ctx: 64 + embed_size: 32 + num_heads: 4 + +# training settings +TRAIN: + epochs: 200 + batch_size: + train: 16 + eval: 16 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + T_0: 14 + T_mult: 2 + eta_min: 1.0e-9 + optimizer: + weight_decay: 1.0e-8 + eval_during_train: true + eval_freq: 50 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/lorenz/train_enn.py b/examples/lorenz/train_enn.py index 16b676344..d41c5b44b 100644 --- a/examples/lorenz/train_enn.py +++ b/examples/lorenz/train_enn.py @@ -18,11 +18,14 @@ # This file is for step1: training a embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp + +import hydra import numpy as np import paddle +from omegaconf import DictConfig import ppsci -from ppsci.utils import config from ppsci.utils import logger @@ -36,52 +39,47 @@ def get_mean_std(data: np.ndarray): return mean, std -if __name__ == "__main__": - args = config.parse_args() +def train(cfg: DictConfig): # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(42) - # set training hyper-parameters - EPOCHS = 300 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 16 - VALID_BLOCK_SIZE = 32 - - input_keys = ("states",) - output_keys = ("pred_states", "recover_states") - weights = (1.0 * (TRAIN_BLOCK_SIZE - 1), 1.0e4 * TRAIN_BLOCK_SIZE) - regularization_key = "k_matrix" - - OUTPUT_DIR = "./output/lorenz_enn" if not args.output_dir else args.output_dir - TRAIN_FILE_PATH = "./datasets/lorenz_training_rk.hdf5" - VALID_FILE_PATH = "./datasets/lorenz_valid_rk.hdf5" + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + weights = (1.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 1.0e4 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "LorenzDataset", - "file_path": TRAIN_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": TRAIN_BLOCK_SIZE, + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 16, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": True, "shuffle": True, }, - "batch_size": 512, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } sup_constraint = ppsci.constraint.SupervisedConstraint( train_dataloader_cfg, ppsci.loss.MSELossWithL2Decay( - regularization_dict={regularization_key: 1.0e-1 * (TRAIN_BLOCK_SIZE - 1)} + regularization_dict={ + regularization_key: 1.0e-1 * (cfg.TRAIN_BLOCK_SIZE - 1) + } ), - {key: lambda out, k=key: out[k] for key in output_keys + (regularization_key,)}, + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, name="Sup", ) constraint = {sup_constraint.name: sup_constraint} @@ -92,43 +90,43 @@ def get_mean_std(data: np.ndarray): # manually init model data_mean, data_std = get_mean_std(sup_constraint.data_loader.dataset.data) model = ppsci.arch.LorenzEmbedding( - input_keys, output_keys + (regularization_key,), data_mean, data_std + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, ) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.ExponentialDecay( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - gamma=0.995, + iters_per_epoch=ITERS_PER_EPOCH, decay_steps=ITERS_PER_EPOCH, - by_epoch=True, + **cfg.TRAIN.lr_scheduler, )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator - weights = (1.0 * (VALID_BLOCK_SIZE - 1), 1.0e4 * VALID_BLOCK_SIZE) + weights = (1.0 * (cfg.VALID_BLOCK_SIZE - 1), 1.0e4 * cfg.VALID_BLOCK_SIZE) eval_dataloader_cfg = { "dataset": { "name": "LorenzDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 32, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": False, "shuffle": False, }, - "batch_size": 512, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -144,10 +142,10 @@ def get_mean_std(data: np.ndarray): solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, eval_during_train=True, validator=validator, @@ -157,12 +155,109 @@ def get_mean_std(data: np.ndarray): # evaluate after finished training solver.eval() - # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + +def evaluate(cfg: DictConfig): + # set random seed for reproducibility + ppsci.utils.misc.set_random_seed(cfg.seed) + # initialize logger + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + weights = (1.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 1.0e4 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" + # manually build constraint(s) + train_dataloader_cfg = { + "dataset": { + "name": "LorenzDataset", + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, + "stride": 16, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": True, + "shuffle": True, + }, + "batch_size": cfg.TRAIN.batch_size.train, + "num_workers": 4, + } + + sup_constraint = ppsci.constraint.SupervisedConstraint( + train_dataloader_cfg, + ppsci.loss.MSELossWithL2Decay( + regularization_dict={ + regularization_key: 1.0e-1 * (cfg.TRAIN_BLOCK_SIZE - 1) + } + ), + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, + name="Sup", + ) + + # manually init model + data_mean, data_std = get_mean_std(sup_constraint.data_loader.dataset.data) + model = ppsci.arch.LorenzEmbedding( + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, + ) + + # manually build validator + weights = (1.0 * (cfg.VALID_BLOCK_SIZE - 1), 1.0e4 * cfg.VALID_BLOCK_SIZE) + eval_dataloader_cfg = { + "dataset": { + "name": "LorenzDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 32, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} + solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() + + +@hydra.main(version_base=None, config_path="./conf", config_name="enn.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/lorenz/train_transformer.py b/examples/lorenz/train_transformer.py index 5b3b375f5..b3e2c0485 100644 --- a/examples/lorenz/train_transformer.py +++ b/examples/lorenz/train_transformer.py @@ -18,13 +18,15 @@ # This file is for step2: training a transformer model, based on frozen pretrained embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp from typing import Dict +import hydra import paddle +from omegaconf import DictConfig import ppsci from ppsci.arch import base -from ppsci.utils import config from ppsci.utils import logger from ppsci.utils import save_load @@ -50,47 +52,26 @@ def __call__(self, x: Dict[str, paddle.Tensor]): return pred_states -if __name__ == "__main__": +def train(cfg: DictConfig): # train time-series: 2048 time-steps: 256 block-size: 64 stride: 64 # valid time-series: 64 time-steps: 1024 block-size: 256 stride: 1024 # test time-series: 256 time-steps: 1024 - args = config.parse_args() # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(42) - # set training hyper-parameters - NUM_LAYERS = 4 - NUM_CTX = 64 - EMBED_SIZE = 32 - NUM_HEADS = 4 - - EPOCHS = 200 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 64 - VALID_BLOCK_SIZE = 256 - input_keys = ("embeds",) - output_keys = ("pred_embeds",) - - VIS_DATA_NUMS = 16 - - TRAIN_FILE_PATH = "./datasets/lorenz_training_rk.hdf5" - VALID_FILE_PATH = "./datasets/lorenz_valid_rk.hdf5" - EMBEDDING_MODEL_PATH = "./output/lorenz_enn/checkpoints/latest" - OUTPUT_DIR = ( - "./output/lorenz_transformer" if not args.output_dir else args.output_dir - ) + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") - embedding_model = build_embedding_model(EMBEDDING_MODEL_PATH) + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) output_transform = OutputTransform(embedding_model) # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "LorenzDataset", - "input_keys": input_keys, - "label_keys": output_keys, - "file_path": TRAIN_FILE_PATH, - "block_size": TRAIN_BLOCK_SIZE, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "file_path": cfg.TRAIN_FILE_PATH, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 64, "embedding_model": embedding_model, }, @@ -99,7 +80,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]): "drop_last": True, "shuffle": True, }, - "batch_size": 16, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } @@ -114,39 +95,25 @@ def __call__(self, x: Dict[str, paddle.Tensor]): ITERS_PER_EPOCH = len(constraint["Sup"].data_loader) # manually init model - model = ppsci.arch.PhysformerGPT2( - input_keys, - output_keys, - NUM_LAYERS, - NUM_CTX, - EMBED_SIZE, - NUM_HEADS, - ) + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.CosineWarmRestarts( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - T_0=14, - T_mult=2, - eta_min=1e-9, + iters_per_epoch=ITERS_PER_EPOCH, **cfg.TRAIN.lr_scheduler )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator eval_dataloader_cfg = { "dataset": { "name": "LorenzDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 1024, "embedding_model": embedding_model, }, @@ -155,7 +122,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]): "drop_last": False, "shuffle": False, }, - "batch_size": 16, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -171,8 +138,8 @@ def __call__(self, x: Dict[str, paddle.Tensor]): states = mse_validator.data_loader.dataset.data embedding_data = mse_validator.data_loader.dataset.embedding_data vis_datas = { - "embeds": embedding_data[:VIS_DATA_NUMS, :-1, :], - "states": states[:VIS_DATA_NUMS, 1:, :], + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1, :], + "states": states[: cfg.VIS_DATA_NUMS, 1:, :], } visualizer = { @@ -190,13 +157,13 @@ def __call__(self, x: Dict[str, paddle.Tensor]): solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, - eval_during_train=True, - eval_freq=50, + eval_during_train=cfg.TRAIN.eval_during_train, + eval_freq=cfg.TRAIN.eval_freq, validator=validator, visualizer=visualizer, ) @@ -207,15 +174,86 @@ def __call__(self, x: Dict[str, paddle.Tensor]): # visualize prediction after finished training solver.visualize() + +def evaluate(cfg: DictConfig): # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) + output_transform = OutputTransform(embedding_model) + + # manually init model + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) + + # manually build validator + eval_dataloader_cfg = { + "dataset": { + "name": "LorenzDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 1024, + "embedding_model": embedding_model, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} + + # set visualizer(optional) + states = mse_validator.data_loader.dataset.data + embedding_data = mse_validator.data_loader.dataset.embedding_data + vis_datas = { + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1, :], + "states": states[: cfg.VIS_DATA_NUMS, 1:, :], + } + + visualizer = { + "visulzie_states": ppsci.visualize.VisualizerScatter3D( + vis_datas, + { + "pred_states": lambda d: output_transform(d), + "states": lambda d: d["states"], + }, + num_timestamps=1, + prefix="result_states", + ) + } + solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, visualizer=visualizer, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() # visualize prediction for pretrained model(optional) solver.visualize() + + +@hydra.main(version_base=None, config_path="./conf", config_name="transformer.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/rossler/conf/enn.yaml b/examples/rossler/conf/enn.yaml new file mode 100644 index 000000000..8c3aab19c --- /dev/null +++ b/examples/rossler/conf/enn.yaml @@ -0,0 +1,54 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_rossler_enn/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 6 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 16 +VALID_BLOCK_SIZE: 32 +TRAIN_FILE_PATH: ./datasets/rossler_training.hdf5 +VALID_FILE_PATH: ./datasets/rossler_valid.hdf5 + +# model settings +MODEL: + input_keys: ["states"] + output_keys: ["pred_states", "recover_states"] + +# training settings +TRAIN: + epochs: 300 + batch_size: + train: 256 + eval: 8 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + gamma: 0.995 + by_epoch: true + optimizer: + weight_decay: 1e-8 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/rossler/conf/transformer.yaml b/examples/rossler/conf/transformer.yaml new file mode 100644 index 000000000..032c3f2ff --- /dev/null +++ b/examples/rossler/conf/transformer.yaml @@ -0,0 +1,65 @@ +hydra: + run: + # dynamic output directory according to running time and override name + dir: outputs_rossler_transformer/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - mode + - output_dir + - log_freq + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 42 +output_dir: ${hydra:run.dir} +TRAIN_BLOCK_SIZE: 32 +VALID_BLOCK_SIZE: 256 +TRAIN_FILE_PATH: ./datasets/rossler_training.hdf5 +VALID_FILE_PATH: ./datasets/rossler_valid.hdf5 + +# set working condition +EMBEDDING_MODEL_PATH: ./outputs_lorenz_enn/checkpoints/latest +VIS_DATA_NUMS: 16 + +# model settings +MODEL: + input_keys: ["embeds"] + output_keys: ["pred_embeds"] + num_layers: 4 + num_ctx: 64 + embed_size: 32 + num_heads: 4 + +# training settings +TRAIN: + epochs: 200 + batch_size: + train: 64 + eval: 16 + lr_scheduler: + epochs: ${TRAIN.epochs} + learning_rate: 0.001 + T_0: 14 + T_mult: 2 + eta_min: 1.0e-9 + optimizer: + weight_decay: 1.0e-8 + eval_during_train: true + eval_freq: 50 + pretrained_model_path: null + checkpoint_path: null + +# evaluation settings +EVAL: + pretrained_model_path: null diff --git a/examples/rossler/train_enn.py b/examples/rossler/train_enn.py index 6383e1b46..ccd937160 100644 --- a/examples/rossler/train_enn.py +++ b/examples/rossler/train_enn.py @@ -18,11 +18,14 @@ # This file is for step1: training a embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp + +import hydra import numpy as np import paddle +from omegaconf import DictConfig import ppsci -from ppsci.utils import config from ppsci.utils import logger @@ -40,52 +43,45 @@ def get_mean_std(data: np.ndarray): return mean, std -if __name__ == "__main__": - args = config.parse_args() +def train(cfg: DictConfig): # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(6) - # set training hyper-parameters - EPOCHS = 300 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 16 - VALID_BLOCK_SIZE = 32 - - input_keys = ("states",) - output_keys = ("pred_states", "recover_states") - weights = (1.0 * (TRAIN_BLOCK_SIZE - 1), 1.0e3 * TRAIN_BLOCK_SIZE) - regularization_key = "k_matrix" - - OUTPUT_DIR = "./output/rossler_enn" if not args.output_dir else args.output_dir - TRAIN_FILE_PATH = "./datasets/rossler_training.hdf5" - VALID_FILE_PATH = "./datasets/rossler_valid.hdf5" + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + weights = (1.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 1.0e3 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "RosslerDataset", - "file_path": TRAIN_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": TRAIN_BLOCK_SIZE, + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 16, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": True, "shuffle": True, }, - "batch_size": 256, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } sup_constraint = ppsci.constraint.SupervisedConstraint( train_dataloader_cfg, ppsci.loss.MSELossWithL2Decay( - regularization_dict={regularization_key: 1e-1 * (TRAIN_BLOCK_SIZE - 1)} + regularization_dict={regularization_key: 1e-1 * (cfg.TRAIN_BLOCK_SIZE - 1)} ), - {key: lambda out, k=key: out[k] for key in output_keys + (regularization_key,)}, + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, name="Sup", ) constraint = {sup_constraint.name: sup_constraint} @@ -96,43 +92,43 @@ def get_mean_std(data: np.ndarray): # manually init model data_mean, data_std = get_mean_std(sup_constraint.data_loader.dataset.data) model = ppsci.arch.RosslerEmbedding( - input_keys, output_keys + (regularization_key,), data_mean, data_std + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, ) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.ExponentialDecay( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - gamma=0.995, + iters_per_epoch=ITERS_PER_EPOCH, decay_steps=ITERS_PER_EPOCH, - by_epoch=True, + **cfg.TRAIN.lr_scheduler, )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator - weights = (1.0 * (VALID_BLOCK_SIZE - 1), 1.0e4 * VALID_BLOCK_SIZE) + weights = (1.0 * (cfg.VALID_BLOCK_SIZE - 1), 1.0e4 * cfg.VALID_BLOCK_SIZE) eval_dataloader_cfg = { "dataset": { "name": "RosslerDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 32, - "weight_dict": {key: value for key, value in zip(output_keys, weights)}, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, }, "sampler": { "name": "BatchSampler", "drop_last": False, "shuffle": False, }, - "batch_size": 8, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -147,10 +143,10 @@ def get_mean_std(data: np.ndarray): solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, eval_during_train=True, validator=validator, @@ -160,12 +156,106 @@ def get_mean_std(data: np.ndarray): # evaluate after finished training solver.eval() - # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + +def evaluate(cfg: DictConfig): + # set random seed for reproducibility + ppsci.utils.misc.set_random_seed(cfg.seed) + # initialize logger + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + weights = (1.0 * (cfg.TRAIN_BLOCK_SIZE - 1), 1.0e3 * cfg.TRAIN_BLOCK_SIZE) + regularization_key = "k_matrix" + # manually build constraint(s) + train_dataloader_cfg = { + "dataset": { + "name": "RosslerDataset", + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, + "stride": 16, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": True, + "shuffle": True, + }, + "batch_size": cfg.TRAIN.batch_size.train, + "num_workers": 4, + } + + sup_constraint = ppsci.constraint.SupervisedConstraint( + train_dataloader_cfg, + ppsci.loss.MSELossWithL2Decay( + regularization_dict={regularization_key: 1e-1 * (cfg.TRAIN_BLOCK_SIZE - 1)} + ), + { + key: lambda out, k=key: out[k] + for key in cfg.MODEL.output_keys + (regularization_key,) + }, + name="Sup", + ) + + # manually init model + data_mean, data_std = get_mean_std(sup_constraint.data_loader.dataset.data) + model = ppsci.arch.RosslerEmbedding( + cfg.MODEL.input_keys, + cfg.MODEL.output_keys + (regularization_key,), + data_mean, + data_std, + ) + + # manually build validator + weights = (1.0 * (cfg.VALID_BLOCK_SIZE - 1), 1.0e4 * cfg.VALID_BLOCK_SIZE) + eval_dataloader_cfg = { + "dataset": { + "name": "RosslerDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 32, + "weight_dict": { + key: value for key, value in zip(cfg.MODEL.output_keys, weights) + }, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() + + +@hydra.main(version_base=None, config_path="./conf", config_name="enn.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/rossler/train_transformer.py b/examples/rossler/train_transformer.py index 2450fccf4..34c79a8e3 100644 --- a/examples/rossler/train_transformer.py +++ b/examples/rossler/train_transformer.py @@ -18,13 +18,15 @@ # This file is for step2: training a transformer model, based on frozen pretrained embedding model. # This file is based on PaddleScience/ppsci API. +from os import path as osp from typing import Dict +import hydra import paddle +from omegaconf import DictConfig import ppsci from ppsci.arch import base -from ppsci.utils import config from ppsci.utils import logger from ppsci.utils import save_load @@ -50,44 +52,23 @@ def __call__(self, x: Dict[str, paddle.Tensor]): return pred_states -if __name__ == "__main__": - args = config.parse_args() +def train(cfg: DictConfig): # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(42) - # set training hyper-parameters - NUM_LAYERS = 4 - NUM_CTX = 64 - EMBED_SIZE = 32 - NUM_HEADS = 4 - - EPOCHS = 200 if not args.epochs else args.epochs - TRAIN_BLOCK_SIZE = 32 - VALID_BLOCK_SIZE = 256 - input_keys = ("embeds",) - output_keys = ("pred_embeds",) - - VIS_DATA_NUMS = 16 - - TRAIN_FILE_PATH = "./datasets/rossler_training.hdf5" - VALID_FILE_PATH = "./datasets/rossler_valid.hdf5" - EMBEDDING_MODEL_PATH = "./output/rossler_enn/checkpoints/latest" - OUTPUT_DIR = ( - "./output/rossler_transformer" if not args.output_dir else args.output_dir - ) + ppsci.utils.misc.set_random_seed(cfg.seed) # initialize logger - logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info") + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") - embedding_model = build_embedding_model(EMBEDDING_MODEL_PATH) + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) output_transform = OutputTransform(embedding_model) # manually build constraint(s) train_dataloader_cfg = { "dataset": { "name": "RosslerDataset", - "file_path": TRAIN_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": TRAIN_BLOCK_SIZE, + "file_path": cfg.TRAIN_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.TRAIN_BLOCK_SIZE, "stride": 16, "embedding_model": embedding_model, }, @@ -96,7 +77,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]): "drop_last": True, "shuffle": True, }, - "batch_size": 64, + "batch_size": cfg.TRAIN.batch_size.train, "num_workers": 4, } @@ -111,39 +92,25 @@ def __call__(self, x: Dict[str, paddle.Tensor]): ITERS_PER_EPOCH = len(constraint["Sup"].data_loader) # manually init model - model = ppsci.arch.PhysformerGPT2( - input_keys, - output_keys, - NUM_LAYERS, - NUM_CTX, - EMBED_SIZE, - NUM_HEADS, - ) + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) # init optimizer and lr scheduler clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=0.1) lr_scheduler = ppsci.optimizer.lr_scheduler.CosineWarmRestarts( - EPOCHS, - ITERS_PER_EPOCH, - 0.001, - T_0=14, - T_mult=2, - eta_min=1e-9, + iters_per_epoch=ITERS_PER_EPOCH, **cfg.TRAIN.lr_scheduler )() optimizer = ppsci.optimizer.Adam( - lr_scheduler, - weight_decay=1e-8, - grad_clip=clip, + lr_scheduler, grad_clip=clip, **cfg.TRAIN.optimizer )(model) # manually build validator eval_dataloader_cfg = { "dataset": { "name": "RosslerDataset", - "file_path": VALID_FILE_PATH, - "input_keys": input_keys, - "label_keys": output_keys, - "block_size": VALID_BLOCK_SIZE, + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, "stride": 1024, "embedding_model": embedding_model, }, @@ -152,7 +119,7 @@ def __call__(self, x: Dict[str, paddle.Tensor]): "drop_last": False, "shuffle": False, }, - "batch_size": 16, + "batch_size": cfg.TRAIN.batch_size.eval, "num_workers": 4, } @@ -168,8 +135,8 @@ def __call__(self, x: Dict[str, paddle.Tensor]): states = mse_validator.data_loader.dataset.data embedding_data = mse_validator.data_loader.dataset.embedding_data vis_datas = { - "embeds": embedding_data[:VIS_DATA_NUMS, :-1, :], - "states": states[:VIS_DATA_NUMS, 1:, :], + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1, :], + "states": states[: cfg.VIS_DATA_NUMS, 1:, :], } visualizer = { @@ -187,13 +154,13 @@ def __call__(self, x: Dict[str, paddle.Tensor]): solver = ppsci.solver.Solver( model, constraint, - OUTPUT_DIR, + cfg.output_dir, optimizer, lr_scheduler, - EPOCHS, + cfg.TRAIN.epochs, ITERS_PER_EPOCH, - eval_during_train=True, - eval_freq=50, + eval_during_train=cfg.TRAIN.eval_during_train, + eval_freq=cfg.TRAIN.eval_freq, validator=validator, visualizer=visualizer, ) @@ -204,15 +171,88 @@ def __call__(self, x: Dict[str, paddle.Tensor]): # visualize prediction after finished training solver.visualize() - # directly evaluate pretrained model(optional) - logger.init_logger("ppsci", f"{OUTPUT_DIR}/eval.log", "info") + +def evaluate(cfg: DictConfig): + # set random seed for reproducibility + ppsci.utils.misc.set_random_seed(cfg.seed) + # initialize logger + logger.init_logger("ppsci", osp.join(cfg.output_dir, f"{cfg.mode}.log"), "info") + + embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH) + output_transform = OutputTransform(embedding_model) + + # manually init model + model = ppsci.arch.PhysformerGPT2(**cfg.MODEL) + + # manually build validator + eval_dataloader_cfg = { + "dataset": { + "name": "RosslerDataset", + "file_path": cfg.VALID_FILE_PATH, + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.output_keys, + "block_size": cfg.VALID_BLOCK_SIZE, + "stride": 1024, + "embedding_model": embedding_model, + }, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, + }, + "batch_size": cfg.TRAIN.batch_size.eval, + "num_workers": 4, + } + + mse_validator = ppsci.validate.SupervisedValidator( + eval_dataloader_cfg, + ppsci.loss.MSELoss(), + metric={"MSE": ppsci.metric.MSE()}, + name="MSE_Validator", + ) + validator = {mse_validator.name: mse_validator} + + # set visualizer(optional) + states = mse_validator.data_loader.dataset.data + embedding_data = mse_validator.data_loader.dataset.embedding_data + vis_datas = { + "embeds": embedding_data[: cfg.VIS_DATA_NUMS, :-1, :], + "states": states[: cfg.VIS_DATA_NUMS, 1:, :], + } + + visualizer = { + "visulzie_states": ppsci.visualize.VisualizerScatter3D( + vis_datas, + { + "pred_states": lambda d: output_transform(d), + "states": lambda d: d["states"], + }, + num_timestamps=1, + prefix="result_states", + ) + } + solver = ppsci.solver.Solver( model, - output_dir=OUTPUT_DIR, + output_dir=cfg.output_dir, validator=validator, visualizer=visualizer, - pretrained_model_path=f"{OUTPUT_DIR}/checkpoints/latest", + pretrained_model_path=cfg.EVAL.pretrained_model_path, ) solver.eval() # visualize prediction for pretrained model(optional) solver.visualize() + + +@hydra.main(version_base=None, config_path="./conf", config_name="transformer.yaml") +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main()