diff --git a/docs/api/paddle/optimizer/Adadelta_cn.rst b/docs/api/paddle/optimizer/Adadelta_cn.rst index db0d2270b87..f4c3b8fda71 100644 --- a/docs/api/paddle/optimizer/Adadelta_cn.rst +++ b/docs/api/paddle/optimizer/Adadelta_cn.rst @@ -41,19 +41,7 @@ Adadelta 优化器出自 `DECOUPLED WEIGHT DECAY REGULARIZATION 论文 `_ 的第二 代码示例 :::::::::::: -.. code-block:: python - - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters()) - out.backward() - adam.step() - adam.clear_grad() - -.. code-block:: python - - # Adam with beta1/beta2 as Tensor and weight_decay as float - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters(), - beta1=beta1, - beta2=beta2, - weight_decay=0.01) - out.backward() - adam.step() - adam.clear_grad() +COPY-FROM: paddle.optimizer.Adam:code-example1 + +COPY-FROM: paddle.optimizer.Adam:code-example2 方法 :::::::::::: @@ -104,20 +68,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() +COPY-FROM: paddle.optimizer.Adam.step append_regularization_ops(parameters_and_grads, regularization=None) ''''''''' @@ -155,26 +106,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - out.backward() - adam.minimize(loss) - adam.clear_grad() +COPY-FROM: paddle.optimizer.Adam.minimize clear_grad() ''''''''' @@ -187,20 +119,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.Adam(learning_rate=0.02, - parameters=linear.parameters()) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.Adam.clear_grad set_lr(value) ''''''''' @@ -221,26 +140,7 @@ set_lr(value) **代码示例** -.. code-block:: python - - import paddle - - linear = paddle.nn.Linear(10, 10) - - adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - adam.set_lr(lr_list[i]) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 +COPY-FROM: paddle.optimizer.Adam.set_lr get_lr() ''''''''' @@ -257,39 +157,7 @@ float,当前步骤的学习率。 **代码示例** -.. code-block:: python - - import numpy as np - import paddle - # example1: _LRScheduler is not used, return value is all the same - emb = paddle.nn.Embedding(10, 10, sparse=False) - adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters()) - lr = adam.get_lr() - print(lr) # 0.001 - - # example2: StepDecay is used, return the step learning rate - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - bd = [2, 4, 6, 8] - value = [0.2, 0.4, 0.6, 0.8, 1.0] - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - adam = paddle.optimizer.Adam(scheduler, - parameters=linear.parameters()) - - # first step: learning rate is 0.2 - np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True - - # learning rate for different steps - ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] - for i in range(12): - adam.step() - lr = adam.get_lr() - scheduler.step() - np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True +COPY-FROM: paddle.optimizer.Adam.get_lr set_state_dict(state_dict) ''''''''' @@ -306,25 +174,7 @@ set_state_dict(state_dict) **代码示例** -.. code-block:: python - - import paddle - - emb = paddle.nn.Embedding(10, 10) - - layer_state_dict = emb.state_dict() - paddle.save(layer_state_dict, "emb.pdparams") - - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, - parameters=emb.parameters()) - opt_state_dict = adam.state_dict() - paddle.save(opt_state_dict, "adam.pdopt") - - opti_state_dict = paddle.load("adam.pdopt") - adam.set_state_dict(opti_state_dict) +COPY-FROM: paddle.optimizer.Adam.set_state_dict state_dict(state_dict) ''''''''' @@ -344,10 +194,4 @@ state_dict(dict) **代码示例** -.. code-block:: python - - import paddle - emb = paddle.nn.Embedding(10, 10) - - adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) - state_dict = adam.state_dict() +COPY-FROM: paddle.optimizer.Adam.state_dict diff --git a/docs/api/paddle/optimizer/Adamax_cn.rst b/docs/api/paddle/optimizer/Adamax_cn.rst index 7125659ec43..51e28f354f1 100755 --- a/docs/api/paddle/optimizer/Adamax_cn.rst +++ b/docs/api/paddle/optimizer/Adamax_cn.rst @@ -48,22 +48,7 @@ Adamax 优化器是参考 `Adam 论文 `_ 第 7 代码示例 :::::::::::: -.. code-block:: python - - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - adam = paddle.optimizer.Adamax(learning_rate=0.1, - parameters=linear.parameters()) - out.backward() - adam.step() - adam.clear_grad() - +COPY-FROM: paddle.optimizer.Adamax 方法 :::::::::::: @@ -83,20 +68,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() +COPY-FROM: paddle.optimizer.Adamax.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -116,26 +88,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adamax(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - out.backward() - adam.minimize(loss) - adam.clear_grad() +COPY-FROM: paddle.optimizer.Adamax.minimize clear_grad() @@ -150,20 +103,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.Adamax(learning_rate=0.02, - parameters=linear.parameters()) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.Adamax.clear_grad set_lr(value) ''''''''' @@ -184,26 +124,7 @@ set_lr(value) **代码示例** -.. code-block:: python - - import paddle - - linear = paddle.nn.Linear(10, 10) - - adam = paddle.optimizer.Adamax(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - adam.set_lr(lr_list[i]) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 +COPY-FROM: paddle.optimizer.Adamax.set_lr get_lr() ''''''''' @@ -221,37 +142,4 @@ float,当前步骤的学习率。 **代码示例** -.. code-block:: python - - - import numpy as np - import paddle - # example1: _LRScheduler is not used, return value is all the same - emb = paddle.nn.Embedding(10, 10, sparse=False) - adam = paddle.optimizer.Adamax(0.001, parameters = emb.parameters()) - lr = adam.get_lr() - print(lr) # 0.001 - - # example2: StepDecay is used, return the step learning rate - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - bd = [2, 4, 6, 8] - value = [0.2, 0.4, 0.6, 0.8, 1.0] - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - adam = paddle.optimizer.Adamax(scheduler, - parameters=linear.parameters()) - - # first step: learning rate is 0.2 - np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True - - # learning rate for different steps - ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] - for i in range(12): - adam.step() - lr = adam.get_lr() - scheduler.step() - np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True +COPY-FROM: paddle.optimizer.Adamax.get_lr diff --git a/docs/api/paddle/optimizer/Lamb_cn.rst b/docs/api/paddle/optimizer/Lamb_cn.rst index 10b7c8aabf9..a732c78915c 100755 --- a/docs/api/paddle/optimizer/Lamb_cn.rst +++ b/docs/api/paddle/optimizer/Lamb_cn.rst @@ -45,20 +45,7 @@ LAMB(Layer-wise Adaptive Moments optimizer for Batching training)优化器 代码示例 :::::::::::: -.. code-block:: python - - import paddle - - inp = paddle.uniform(shape=[10, 10], dtype='float32', min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.85], dtype="float32") - lamb = paddle.optimizer.Lamb(learning_rate=0.002, parameters=linear.parameters(), lamb_weight_decay=0.01) - back = out.backward() - lamb.step() - lamb.clear_grad() +COPY-FROM: paddle.optimizer.Lamb 方法 :::::::::::: @@ -77,20 +64,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - - value = paddle.arange(26, dtype='float32') - value = paddle.reshape(value, [2, 13]) - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - lamb = paddle.optimizer.Lamb(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - lamb.step() - lamb.clear_grad() +COPY-FROM: paddle.optimizer.Lamb.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -110,26 +84,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - - inp = paddle.uniform(shape=[10, 10], dtype="float32", min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - lamb = paddle.optimizer.Lamb(learning_rate=0.1, - lamb_weight_decay=0.01, - parameters=linear.parameters()) - out.backward() - lamb.minimize(loss) - lamb.clear_grad() - +COPY-FROM: paddle.optimizer.Lamb.minimize clear_grad() ''''''''' @@ -142,20 +97,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - - value = paddle.arange(26, dtype="float32") - value = paddle.reshape(value, [2, 13]) - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.Lamb(learning_rate=0.02, - parameters=linear.parameters()) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.Lamb.clear_grad set_lr(value) ''''''''' @@ -176,26 +118,7 @@ set_lr(value) **代码示例** -.. code-block:: python - - import paddle - - linear = paddle.nn.Linear(10, 10) - - lamb = paddle.optimizer.Lamb(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - lamb.set_lr(lr_list[i]) - lr = lamb.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 +COPY-FROM: paddle.optimizer.Lamb.set_lr get_lr() ''''''''' @@ -213,38 +136,4 @@ float,当前步骤的学习率。 **代码示例** -.. code-block:: python - - - import paddle - import numpy as np - - # example1: _LRScheduler is not used, return value is all the same - emb = paddle.nn.Embedding(10, 10, sparse=False) - lamb = paddle.optimizer.Lamb(0.001, parameters = emb.parameters()) - lr = lamb.get_lr() - print(lr) # 0.001 - - # example2: StepDecay is used, return the step learning rate - inp = paddle.uniform(shape=[10, 10], dtype="float32", min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - bd = [2, 4, 6, 8] - value = [0.2, 0.4, 0.6, 0.8, 1.0] - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - lamb = paddle.optimizer.Lamb(scheduler, - parameters=linear.parameters()) - - # first step: learning rate is 0.2 - np.allclose(lamb.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True - - # learning rate for different steps - ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] - for i in range(12): - lamb.step() - lr = lamb.get_lr() - scheduler.step() - np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True +COPY-FROM: paddle.optimizer.Lamb.get_lr diff --git a/docs/api/paddle/optimizer/Momentum_cn.rst b/docs/api/paddle/optimizer/Momentum_cn.rst index 470a0f2ab5e..4bb914722ac 100644 --- a/docs/api/paddle/optimizer/Momentum_cn.rst +++ b/docs/api/paddle/optimizer/Momentum_cn.rst @@ -37,18 +37,7 @@ Momentum 代码示例 :::::::::::: -.. code-block:: python - - import paddle - - inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32') - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) - out.backward() - momentum.step() - momentum.clear_grad() +COPY-FROM: paddle.optimizer.Momentum 方法 @@ -69,17 +58,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - momentum = paddle.optimizer.Momentum(learning_rate=0.0003, parameters = linear.parameters()) - out = linear(a) - out.backward() - momentum.step() - momentum.clear_grad() +COPY-FROM: paddle.optimizer.Momentum.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -100,22 +79,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - - inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32') - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) - out.backward() - momentum.minimize(loss) - momentum.clear_grad() +COPY-FROM: paddle.optimizer.Momentum.minimize clear_grad() ''''''''' @@ -129,18 +93,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.Momentum.clear_grad set_lr(value) ''''''''' diff --git a/docs/api/paddle/optimizer/Optimizer_cn.rst b/docs/api/paddle/optimizer/Optimizer_cn.rst index 0cdb2933bc9..5a81ad86d24 100755 --- a/docs/api/paddle/optimizer/Optimizer_cn.rst +++ b/docs/api/paddle/optimizer/Optimizer_cn.rst @@ -25,22 +25,7 @@ Optimizer 代码示例 :::::::::::: -.. code-block:: python - - #以子类 Adam 为例 - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters()) - loss.backward() - adam.step() - adam.clear_grad() +COPY-FROM: paddle.optimizer.Optimizer 方法 :::::::::::: @@ -60,21 +45,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() +COPY-FROM: paddle.optimizer.Optimizer.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -95,26 +66,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - loss.backward() - adam.minimize(loss) - adam.clear_grad() +COPY-FROM: paddle.optimizer.Optimizer.minimize clear_grad() ''''''''' @@ -128,20 +80,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - import numpy as np - - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.Adam(learning_rate=0.02, - parameters=linear.parameters()) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.Optimizer.clear_grad set_lr(value) ''''''''' @@ -162,26 +101,7 @@ set_lr(value) **代码示例** -.. code-block:: python - - import paddle - - linear = paddle.nn.Linear(10, 10) - - adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - adam.set_lr(lr_list[i]) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 +COPY-FROM: paddle.optimizer.Optimizer.set_lr get_lr() ''''''''' @@ -199,36 +119,4 @@ float,当前步骤的学习率。 **代码示例** -.. code-block:: python - - import numpy as np - import paddle - # example1: _LRScheduler is not used, return value is all the same - emb = paddle.nn.Embedding(10, 10, sparse=False) - adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters()) - lr = adam.get_lr() - print(lr) # 0.001 - - # example2: StepDecay is used, return the step learning rate - inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - bd = [2, 4, 6, 8] - value = [0.2, 0.4, 0.6, 0.8, 1.0] - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - adam = paddle.optimizer.Adam(scheduler, - parameters=linear.parameters()) - - # first step: learning rate is 0.2 - np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True - - # learning rate for different steps - ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] - for i in range(12): - adam.step() - lr = adam.get_lr() - scheduler.step() - np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True +COPY-FROM: paddle.optimizer.Optimizer.get_lr diff --git a/docs/api/paddle/optimizer/RMSProp_cn.rst b/docs/api/paddle/optimizer/RMSProp_cn.rst index e1c06d497a6..f93e61a2c8e 100755 --- a/docs/api/paddle/optimizer/RMSProp_cn.rst +++ b/docs/api/paddle/optimizer/RMSProp_cn.rst @@ -48,21 +48,7 @@ RMSProp 代码示例 :::::::::::: -.. code-block:: python - - import paddle - - inp = paddle.rand([10,10], dtype="float32") - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - - rmsprop = paddle.optimizer.RMSProp(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - out.backward() - rmsprop.step() - rmsprop.clear_grad() +COPY-FROM: paddle.optimizer.RMSProp 方法 :::::::::::: @@ -82,17 +68,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - a = paddle.rand([2,13], dtype="float32") - linear = paddle.nn.Linear(13, 5) - rmsprop = paddle.optimizer.RMSProp(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - rmsprop.step() - rmsprop.clear_grad() +COPY-FROM: paddle.optimizer.RMSProp.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -113,23 +89,9 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - - inp = paddle.rand([10,10], dtype="float32") - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - - rmsprop = paddle.optimizer.RMSProp(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - out.backward() - rmsprop.step() - rmsprop.clear_grad() +COPY-FROM: paddle.optimizer.RMSProp.minimize -clear_gradients() +clear_grad(set_to_zero=True) ''''''''' .. note:: @@ -141,18 +103,7 @@ clear_gradients() **代码示例** -.. code-block:: python - - import paddle - - a = paddle.rand([2,13], dtype="float32") - linear = paddle.nn.Linear(13, 5) - rmsprop = paddle.optimizer.RMSProp(learning_rate=0.02, - parameters=linear.parameters()) - out = linear(a) - out.backward() - rmsprop.step() - rmsprop.clear_gradients() +COPY-FROM: paddle.optimizer.RMSProp.clear_grad set_lr(value) ''''''''' @@ -173,26 +124,7 @@ set_lr(value) **代码示例** -.. code-block:: python - - - import paddle - - linear = paddle.nn.Linear(10, 10) - rmsprop = paddle.optimizer.RMSProp(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - rmsprop.set_lr(lr_list[i]) - lr = rmsprop.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 +COPY-FROM: paddle.optimizer.RMSProp.set_lr get_lr() ''''''''' @@ -210,35 +142,4 @@ float,当前步骤的学习率。 **代码示例** -.. code-block:: python - - import paddle - import numpy as np - # example1: _LRScheduler is not used, return value is all the same - emb = paddle.nn.Embedding(10, 10, sparse=False) - rmsprop = paddle.optimizer.RMSProp(0.001, parameters = emb.parameters()) - lr = rmsprop.get_lr() - print(lr) # 0.001 - - # example2: StepDecay is used, return the step learning rate - linear = paddle.nn.Linear(10, 10) - inp = paddle.rand([10,10], dtype="float32") - out = linear(inp) - loss = paddle.mean(out) - - bd = [2, 4, 6, 8] - value = [0.2, 0.4, 0.6, 0.8, 1.0] - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - rmsprop = paddle.optimizer.RMSProp(scheduler, - parameters=linear.parameters()) - - # first step: learning rate is 0.2 - np.allclose(rmsprop.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True - - # learning rate for different steps - ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] - for i in range(12): - rmsprop.step() - lr = rmsprop.get_lr() - scheduler.step() - np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True +COPY-FROM: paddle.optimizer.RMSProp.get_lr diff --git a/docs/api/paddle/optimizer/SGD_cn.rst b/docs/api/paddle/optimizer/SGD_cn.rst index 20d7df05516..48dfec1b2d7 100644 --- a/docs/api/paddle/optimizer/SGD_cn.rst +++ b/docs/api/paddle/optimizer/SGD_cn.rst @@ -28,20 +28,7 @@ SGD 代码示例 :::::::::::: -.. code-block:: python - - import paddle - - inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32') - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - sgd = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) - out.backward() - sgd.step() - sgd.clear_grad() - +COPY-FROM: paddle.optimizer.SGD 方法 :::::::::::: @@ -60,17 +47,7 @@ step() **代码示例** -.. code-block:: python - - import paddle - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - sgd = paddle.optimizer.SGD(learning_rate=0.0003, parameters = linear.parameters()) - out = linear(a) - out.backward() - sgd.step() - sgd.clear_grad() +COPY-FROM: paddle.optimizer.SGD.step minimize(loss, startup_program=None, parameters=None, no_grad_set=None) ''''''''' @@ -91,22 +68,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None) **代码示例** -.. code-block:: python - - import paddle - - inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32') - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - sgd = paddle.optimizer.SGD(learning_rate=0.0003, parameters=linear.parameters()) - out.backward() - sgd.minimize(loss) - sgd.clear_grad() +COPY-FROM: paddle.optimizer.SGD.minimize clear_grad() ''''''''' @@ -120,19 +82,7 @@ clear_grad() **代码示例** -.. code-block:: python - - import paddle - - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - optimizer = paddle.optimizer.SGD(learning_rate=0.0003, - parameters=linear.parameters()) - out = linear(a) - out.backward() - optimizer.step() - optimizer.clear_grad() +COPY-FROM: paddle.optimizer.SGD.clear_grad set_lr(value) ''''''''' diff --git a/docs/api/paddle/optimizer/lr/LRScheduler_cn.rst b/docs/api/paddle/optimizer/lr/LRScheduler_cn.rst index 4448c498e64..7ec4bc52444 100644 --- a/docs/api/paddle/optimizer/lr/LRScheduler_cn.rst +++ b/docs/api/paddle/optimizer/lr/LRScheduler_cn.rst @@ -56,34 +56,7 @@ LRScheduler 代码示例 :::::::::::: -这里提供了重载基类 ``LRScheduler`` 并实现 ``StepLR`` 的示例,你可以根据你的需求来实现任意子类。 - -.. code-block:: python - - import paddle - from paddle.optimizer.lr import LRScheduler - - class StepDecay(LRScheduler): - def __init__(self, - learning_rate, - step_size, - gamma=0.1, - last_epoch=-1, - verbose=False): - if not isinstance(step_size, int): - raise TypeError( - "The type of 'step_size' must be 'int', but received %s." % - type(step_size)) - if gamma >= 1.0: - raise ValueError('gamma should be < 1.0.') - - self.step_size = step_size - self.gamma = gamma - super().__init__(learning_rate, last_epoch, verbose) - - def get_lr(self): - i = self.last_epoch // self.step_size - return self.base_lr * (self.gamma**i) +COPY-FROM: paddle.optimizer.lr.LRScheduler 方法 :::::::::::: @@ -102,27 +75,7 @@ step 函数需要在优化器的 `optimizer.step()` 函数之后调用,调用 **代码示例** -请参考 ``基类 LRScheduler`` 的任意子类实现,这里以 ``StepLR`` 为例进行了示例: - -.. code-block:: python - - import paddle - import numpy as np - - x = np.random.uniform(-1, 1, [10, 10]).astype("float32") - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(2): - x = paddle.to_tensor(x) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch +COPY-FROM: paddle.optimizer.lr.StepLR get_lr() '''''''''