Skip to content

Commit

Permalink
修改COPY-FROM No.4 optimizer
Browse files Browse the repository at this point in the history
Signed-off-by: jjyaoao <[email protected]>
  • Loading branch information
jjyaoao committed Jul 11, 2023
1 parent 3354d86 commit aa366f9
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 1,028 deletions.
117 changes: 6 additions & 111 deletions docs/api/paddle/optimizer/Adadelta_cn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,7 @@ Adadelta 优化器出自 `DECOUPLED WEIGHT DECAY REGULARIZATION 论文 <https://
代码示例
::::::::::::

.. code-block:: python
import paddle
inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(inp)
loss = paddle.mean(out)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters=linear.parameters())
out.backward()
adadelta.step()
adadelta.clear_grad()
COPY-FROM: paddle.optimizer.Adadelta


方法
Expand All @@ -73,20 +61,7 @@ step()



**代码示例**

.. code-block:: python
import paddle
value = paddle.arange(26, dtype='float32')
a = paddle.reshape(value, [2, 13])
linear = paddle.nn.Linear(13, 5)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters = linear.parameters())
out = linear(a)
out.backward()
adadelta.step()
adadelta.clear_grad()
COPY-FROM: paddle.optimizer.Adadelta.step

minimize(loss, startup_program=None, parameters=None, no_grad_set=None)
'''''''''
Expand All @@ -107,23 +82,7 @@ minimize(loss, startup_program=None, parameters=None, no_grad_set=None)

**代码示例**

.. code-block:: python
import paddle
inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters=linear.parameters())
out.backward()
adadelta.minimize(loss)
adadelta.clear_grad()
COPY-FROM: paddle.optimizer.Adadelta.minimize

clear_grad()
'''''''''
Expand All @@ -137,19 +96,7 @@ clear_grad()

**代码示例**

.. code-block:: python
import paddle
value = paddle.arange(26, dtype='float32')
a = paddle.reshape(value, [2, 13])
linear = paddle.nn.Linear(13, 5)
optimizer = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters=linear.parameters())
out = linear(a)
out.backward()
optimizer.step()
optimizer.clear_grad()
COPY-FROM: paddle.optimizer.Adadelta.clear_grad

set_lr(value)
'''''''''
Expand All @@ -170,26 +117,7 @@ set_lr(value)

**代码示例**

.. code-block:: python
import paddle
linear = paddle.nn.Linear(10, 10)
adadelta = paddle.optimizer.Adadelta(weight_decay=0.01,
learning_rate=0.1, parameters=linear.parameters())
# set learning rate manually by python float value
lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
for i in range(5):
adadelta.set_lr(lr_list[i])
lr = adadelta.get_lr()
print("current lr is {}".format(lr))
# Print:
# current lr is 0.2
# current lr is 0.3
# current lr is 0.4
# current lr is 0.5
# current lr is 0.6
COPY-FROM: paddle.optimizer.Adadelta.set_lr

get_lr()
'''''''''
Expand All @@ -207,37 +135,4 @@ float,当前步骤的学习率。

**代码示例**

.. code-block:: python
import numpy as np
import paddle
# example1: _LRScheduler is not used, return value is all the same
emb = paddle.nn.Embedding(10, 10, sparse=False)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.001, parameters = emb.parameters(),weight_decay=0.01)
lr = adadelta.get_lr()
print(lr) # 0.001
# example2: PiecewiseDecay is used, return the step learning rate
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value, 0)
adadelta = paddle.optimizer.Adadelta(scheduler,
parameters=linear.parameters(),
weight_decay=0.01)
# first step: learning rate is 0.2
np.allclose(adadelta.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True
# learning rate for different steps
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12):
adadelta.step()
lr = adadelta.get_lr()
scheduler.step()
np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True
COPY-FROM: paddle.optimizer.Adadelta.get_lr
144 changes: 6 additions & 138 deletions docs/api/paddle/optimizer/AdamW_cn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,49 +49,7 @@ AdamW 优化器出自 `DECOUPLED WEIGHT DECAY REGULARIZATION <https://arxiv.org/
代码示例
::::::::::::

.. code-block:: python
import paddle
linear = paddle.nn.Linear(10, 10)
inp = paddle.rand([10,10], dtype="float32")
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adam = paddle.optimizer.AdamW(learning_rate=0.1,
parameters=linear.parameters(),
beta1=beta1,
beta2=beta2,
weight_decay=0.01)
out.backward()
adam.step()
adam.clear_grad()
# Note that the learning_rate of linear_2 is 0.01.
linear_1 = paddle.nn.Linear(10, 10)
linear_2 = paddle.nn.Linear(10, 10)
inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
out = linear_1(inp)
out = linear_2(out)
loss = paddle.mean(out)
adam = paddle.optimizer.AdamW(
learning_rate=0.1,
parameters=[{
'params': linear_1.parameters()
}, {
'params': linear_2.parameters(),
'weight_decay': 0.001,
'learning_rate': 0.1,
'beta1': 0.8
}],
weight_decay=0.01,
beta1=0.9)
out.backward()
adam.step()
adam.clear_grad()
COPY-FROM: paddle.optimizer.AdamW

方法
::::::::::::
Expand All @@ -110,18 +68,7 @@ step()

**代码示例**

.. code-block:: python
import paddle
a = paddle.rand(shape=[2,13], dtype="float32")
linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.AdamW(learning_rate = 0.01,
weight_decay = 0.01,
parameters = linear.parameters())
out = linear(a)
out.backward()
adam.step()
adam.clear_grad()
COPY-FROM: paddle.optimizer.AdamW.step

minimize(loss, startup_program=None, parameters=None, no_grad_set=None)
'''''''''
Expand All @@ -142,24 +89,7 @@ tuple(optimize_ops, params_grads),其中 optimize_ops 为参数优化 OP 列

**代码示例**

.. code-block:: python
import paddle
linear = paddle.nn.Linear(10, 10)
inp = paddle.randn(shape=[10,10], dtype="float32")
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adam = paddle.optimizer.AdamW(learning_rate=0.1,
parameters=linear.parameters(),
weight_decay=0.01)
out.backward()
adam.minimize(loss)
adam.clear_grad()
COPY-FROM: paddle.optimizer.AdamW.minimize

clear_grad()
'''''''''
Expand All @@ -172,19 +102,7 @@ clear_grad()

**代码示例**

.. code-block:: python
import paddle
a = paddle.rand(shape=[2,13], dtype="float32")
linear = paddle.nn.Linear(13, 5)
optimizer = paddle.optimizer.AdamW(weight_decay=0.01,
learning_rate=0.02,
parameters=linear.parameters())
out = linear(a)
out.backward()
optimizer.step()
optimizer.clear_grad()
COPY-FROM: paddle.optimizer.AdamW.clear_grad

set_lr(value)
'''''''''
Expand All @@ -204,26 +122,7 @@ set_lr(value)

**代码示例**

.. code-block:: python
import paddle
linear = paddle.nn.Linear(10, 10)
adam = paddle.optimizer.AdamW(weight_decay=0.01,
learning_rate=0.1, parameters=linear.parameters())
# set learning rate manually by python float value
lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
for i in range(5):
adam.set_lr(lr_list[i])
lr = adam.get_lr()
print("current lr is {}".format(lr))
# Print:
# current lr is 0.2
# current lr is 0.3
# current lr is 0.4
# current lr is 0.5
# current lr is 0.6
COPY-FROM: paddle.optimizer.AdamW.set_lr

get_lr()
'''''''''
Expand All @@ -240,35 +139,4 @@ float,当前步骤的学习率。

**代码示例**

.. code-block:: python
import paddle
# example1: _LRScheduler is not used, return value is all the same
emb = paddle.nn.Embedding(10, 10, sparse=False)
adam = paddle.optimizer.AdamW(learning_rate=0.001, parameters = emb.parameters(),weight_decay=0.01)
lr = adam.get_lr()
print(lr) # 0.001
# example2: StepDecay is used, return the step learning rate
linear = paddle.nn.Linear(10, 10)
inp = paddle.randn([10,10], dtype="float32")
out = linear(inp)
loss = paddle.mean(out)
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1)
adam = paddle.optimizer.AdamW(scheduler,
parameters=linear.parameters(),
weight_decay=0.01)
# learning rate is 0.2
print(adam.get_lr())
# learning rate for different steps
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12):
adam.step()
lr = adam.get_lr()
scheduler.step()
print(lr, ret[i])
COPY-FROM: paddle.optimizer.AdamW.get_lr
Loading

0 comments on commit aa366f9

Please sign in to comment.