PaddlePaddle · luotao1 · Jul 11, 2023 · Jul 6, 2023 · Jul 7, 2023
diff --git a/docs/api/paddle/io/DataLoader_cn.rst b/docs/api/paddle/io/DataLoader_cn.rst
@@ -52,66 +52,7 @@ DataLoader，迭代 ``dataset`` 数据的迭代器，迭代器返回的数据中
 代码示例
 ::::::::::::
 
-.. code-block:: python
-
-    import numpy as np
-
-    import paddle
-    import paddle.nn as nn
-    import paddle.nn.functional as F
-    from paddle.io import Dataset, BatchSampler, DataLoader
-
-    BATCH_NUM = 20
-    BATCH_SIZE = 16
-    EPOCH_NUM = 4
-
-    IMAGE_SIZE = 784
-    CLASS_NUM = 10
-
-    USE_GPU = False # whether use GPU to run model
-
-    # define a random dataset
-    class RandomDataset(Dataset):
-        def __init__(self, num_samples):
-            self.num_samples = num_samples
-
-        def __getitem__(self, idx):
-            image = np.random.random([IMAGE_SIZE]).astype('float32')
-            label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-            return image, label
-
-        def __len__(self):
-            return self.num_samples
-
-    dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-
-    class SimpleNet(nn.Layer):
-        def __init__(self):
-            super().__init__()
-            self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-        def forward(self, image, label=None):
-            return self.fc(image)
-
-    simple_net = SimpleNet()
-    opt = paddle.optimizer.SGD(learning_rate=1e-3,
-                              parameters=simple_net.parameters())
-
-    loader = DataLoader(dataset,
-                        batch_size=BATCH_SIZE,
-                        shuffle=True,
-                        drop_last=True,
-                        num_workers=2)
-
-    for e in range(EPOCH_NUM):
-        for i, (image, label) in enumerate(loader()):
-            out = simple_net(image)
-            loss = F.cross_entropy(out, label)
-            avg_loss = paddle.mean(loss)
-            avg_loss.backward()
-            opt.minimize(avg_loss)
-            simple_net.clear_gradients()
-            print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))
+COPY-FROM: paddle.io.DataLoader:data-loader-example
 
 方法
 ::::::::::::
@@ -150,204 +91,11 @@ from_generator(feed_list=None, capacity=None, use_double_buffer=True, iterable=T
 
 **代码示例 1**
 
-.. code-block:: python
-
-    '''
-    Example in static graph mode
-    '''
-    import numpy as np
-
-    import paddle
-    import paddle.static as static
-    import paddle.nn.functional as F
-
-
-    BATCH_NUM = 10
-    BATCH_SIZE = 16
-    EPOCH_NUM = 4
-
-    CLASS_NUM = 10
-
-    ITERABLE = True # whether the created DataLoader object is iterable
-    USE_GPU = False # whether to use GPU
-
-    DATA_FORMAT = 'batch_generator' # data format of data source user provides
-
-    paddle.enable_static()
-
-    def simple_net(image, label):
-        fc_tmp = static.nn.fc(image, size=CLASS_NUM)
-        cross_entropy = F.softmax_with_cross_entropy(image, label)
-        loss = paddle.mean(cross_entropy)
-        sgd = paddle.optimizer.SGD(learning_rate=1e-3)
-        sgd.minimize(loss)
-        return loss
-
-    def get_random_images_and_labels(image_shape, label_shape):
-        image = np.random.random(size=image_shape).astype('float32')
-        label = np.random.random(size=label_shape).astype('int64')
-        return image, label
-
-    # If the data generator yields one sample each time,
-    # use DataLoader.set_sample_generator to set the data source.
-    def sample_generator_creator():
-        def __reader__():
-            for _ in range(BATCH_NUM * BATCH_SIZE):
-                image, label = get_random_images_and_labels([784], [1])
-                yield image, label
-
-        return __reader__
-
-    # If the data generator yield list of samples each time,
-    # use DataLoader.set_sample_list_generator to set the data source.
-    def sample_list_generator_creator():
-        def __reader__():
-            for _ in range(BATCH_NUM):
-                sample_list = []
-                for _ in range(BATCH_SIZE):
-                    image, label = get_random_images_and_labels([784], [1])
-                    sample_list.append([image, label])
-
-                yield sample_list
-
-        return __reader__
-
-    # If the data generator yields a batch each time,
-    # use DataLoader.set_batch_generator to set the data source.
-    def batch_generator_creator():
-        def __reader__():
-            for _ in range(BATCH_NUM):
-                batch_image, batch_label = get_random_images_and_labels([BATCH_SIZE, 784], [BATCH_SIZE, 1])
-                yield batch_image, batch_label
-
-        return __reader__
-
-    # If DataLoader is iterable, use for loop to train the network
-    def train_iterable(exe, prog, loss, loader):
-        for _ in range(EPOCH_NUM):
-            for data in loader():
-                exe.run(prog, feed=data, fetch_list=[loss])
-
-    # If DataLoader is not iterable, use start() and reset() method to control the process
-    def train_non_iterable(exe, prog, loss, loader):
-        for _ in range(EPOCH_NUM):
-            loader.start() # call DataLoader.start() before each epoch starts
-            try:
-                while True:
-                    exe.run(prog, fetch_list=[loss])
-            except paddle.core.EOFException:
-                loader.reset() # call DataLoader.reset() after catching EOFException
-
-    def set_data_source(loader, places):
-        if DATA_FORMAT == 'sample_generator':
-            loader.set_sample_generator(sample_generator_creator(), batch_size=BATCH_SIZE, drop_last=True, places=places)
-        elif DATA_FORMAT == 'sample_list_generator':
-            loader.set_sample_list_generator(sample_list_generator_creator(), places=places)
-        elif DATA_FORMAT == 'batch_generator':
-            loader.set_batch_generator(batch_generator_creator(), places=places)
-        else:
-            raise ValueError('Unsupported data format')
-
-    image = static.data(name='image', shape=[None, 784], dtype='float32')
-    label = static.data(name='label', shape=[None, 1], dtype='int64')
-
-    # Define DataLoader
-    loader = paddle.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE)
-
-    # Define network
-    loss = simple_net(image, label)
-
-    # Set data source of DataLoader
-    #
-    # If DataLoader is iterable, places must be given and the number of places must be the same with device number.
-    #  - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places.
-    #  - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places.
-    #
-    # If DataLoader is not iterable, places can be None.
-    places = static.cuda_places() if USE_GPU else static.cpu_places()
-    set_data_source(loader, places)
-
-    exe = static.Executor(places[0])
-    exe.run(static.default_startup_program())
-
-    prog = static.CompiledProgram(static.default_main_program())
-
-    if loader.iterable:
-        train_iterable(exe, prog, loss, loader)
-    else:
-        train_non_iterable(exe, prog, loss, loader)
-
+COPY-FROM: paddle.fluid.DataLoader.from_generator:static-data-loader-example-1
 
 **代码示例 2**
 
-.. code-block:: python
-
-    '''
-    Example in dynamic graph mode.
-    '''
-    import numpy as np
-
-    import paddle
-    import paddle.nn as nn
-    import paddle.optimizer as opt
-    import paddle.distributed as dist
-
-    BATCH_SIZE = 16
-    BATCH_NUM = 4
-    EPOCH_NUM = 4
-
-    IMAGE_SIZE = 784
-    CLASS_NUM = 1
-
-    USE_GPU = False # whether to use GPU
-
-    def _get_random_images_and_labels(image_shape, label_shape):
-            image = np.random.random(size=image_shape).astype('float32')
-            label = np.random.random(size=label_shape).astype('int64')
-            return image, label
-
-    def __reader__():
-            for _ in range(BATCH_NUM):
-                batch_image, batch_label = _get_random_images_and_labels(
-                    [BATCH_SIZE, IMAGE_SIZE], [BATCH_SIZE, CLASS_NUM])
-                yield batch_image, batch_label
-
-    def random_batch_reader():
-        return __reader__
-
-    class LinearNet(nn.Layer):
-        def __init__(self):
-            super().__init__()
-            self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return self._linear(x)
-
-    # set device
-    paddle.set_device('gpu' if USE_GPU else 'cpu')
-
-    # create network
-    layer = LinearNet()
-    dp_layer = paddle.DataParallel(layer)
-    loss_fn = nn.CrossEntropyLoss()
-    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())
-
-    # create data loader
-    loader = paddle.io.DataLoader.from_generator(capacity=5)
-    loader.set_batch_generator(random_batch_reader())
-
-    for epoch_id in range(EPOCH_NUM):
-        for batch_id, (image, label) in enumerate(loader()):
-            out = layer(image)
-            loss = loss_fn(out, label)
-
-            loss.backward()
-
-            adam.step()
-            adam.clear_grad()
-            print("Epoch {} batch {}: loss = {}".format(
-                epoch_id, batch_id, np.mean(loss.numpy())))
+COPY-FROM: paddle.fluid.DataLoader.from_generator:static-data-loader-example-2
 
 **代码示例 3**
 
@@ -419,21 +167,4 @@ from_dataset(dataset, places, drop_last=True)
 
 **代码示例**
 
-.. code-block:: python
-
-    import paddle
-    import paddle.static as static
-
-    paddle.enable_static()
-
-    image = static.data(name='image', shape=[None, 784], dtype='float32')
-    label = static.data(name='label', shape=[None, 1], dtype='int64')
-
-    dataset = paddle.distributed.QueueDataset()
-    dataset.init(
-        batch_size=32,
-        pipe_command='cat',
-        use_var=[image, label])
-    dataset.set_filelist(['a.txt', 'b.txt', 'c.txt'])
-
-    loader = paddle.io.DataLoader.from_dataset(dataset, static.cpu_places())
+COPY-FROM: paddle.fluid.DataLoader.from_dataset
diff --git a/docs/api/paddle/io/DistributedBatchSampler_cn.rst b/docs/api/paddle/io/DistributedBatchSampler_cn.rst
@@ -45,27 +45,4 @@ set_epoch(epoch)
 
 **代码示例**
 
-.. code-block:: python
-
-    import numpy as np
-
-    from paddle.io import Dataset, DistributedBatchSampler
-
-    # init with dataset
-    class RandomDataset(Dataset):
-        def __init__(self, num_samples):
-            self.num_samples = num_samples
-
-        def __getitem__(self, idx):
-            image = np.random.random([784]).astype('float32')
-            label = np.random.randint(0, 9, (1, )).astype('int64')
-            return image, label
-
-        def __len__(self):
-            return self.num_samples
-
-    dataset = RandomDataset(100)
-    sampler = DistributedBatchSampler(dataset, batch_size=64)
-
-    for epoch in range(10):
-        sampler.set_epoch(epoch)
+COPY-FROM: paddle.io.DistributedBatchSampler.set_epoch