This repository has been archived by the owner on Oct 28, 2022. It is now read-only.
generated from cresset-template/cresset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
361 lines (296 loc) · 13.2 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# syntax = docker/dockerfile:1.3.0-labs
# The top line is used by BuildKit. DO NOT ERASE IT.
# See the link below for documentation on BuildKit syntax.
# https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/syntax.md
# Perhaps the BuildKit dependency is not a good idea since not everyone can use it.
# However, the Dockerfile in the official PyTorch repository also uses BuildKit.
# Do not make changes to the `build` layers unless absolutely necessary.
# If another library needs to be built, add another build layer.
# Users are free to customize the `train` and `deploy` layers as they please.
# All `ARG` variables must be redefined for every stage,
# `ARG`s defined before `FROM` transfer their values to layers that redefine them.
# `ENV` and `LABEL` variables are inherited only by child stages.
# See https://docs.docker.com/engine/reference/builder on how to write Dockerfiles and
# https://docs.docker.com/develop/develop-images/dockerfile_best-practices
# for best practices.
# Style guide: variables specified in the Dockerfile are written as ${ARGUMENT}
# while variables not specified by ARG/ENV are written as $ARGUMENT.
# See https://hub.docker.com/r/nvidia/cuda for all CUDA images.
# Default image is nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04.
# Currently, only Ubuntu images are implemented.
ARG USE_CUDA=1
ARG CUDA_VERSION=11.3.1
ARG CUDNN_VERSION=8
ARG LINUX_DISTRO=ubuntu
ARG DISTRO_VERSION=20.04
ARG TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
ARG BUILD_IMAGE=nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-${LINUX_DISTRO}${DISTRO_VERSION}
ARG TRAIN_IMAGE=nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-${LINUX_DISTRO}${DISTRO_VERSION}
ARG DEPLOY_IMAGE=nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-runtime-${LINUX_DISTRO}${DISTRO_VERSION}
# Build stages exist to build PyTorch and subsidiary libraries.
# They can be easily extended to include builds for other libraries.
# They are not used in the final image, which only copies
# the build outputs from the build stages.
FROM ${BUILD_IMAGE} AS build-base-ubuntu
# Change default settings to allow `apt` cache in Docker image.
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' \
> /etc/apt/apt.conf.d/keep-cache
RUN --mount=type=cache,id=apt-cache-build,target=/var/cache/apt \
--mount=type=cache,id=apt-lib-build,target=/var/lib/apt \
apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
curl \
git && \
rm -rf /var/lib/apt/lists/*
# FROM ${BUILD_IMAGE} AS build-base-centos
# FROM ${BUILD_IMAGE} AS build-base-ubi
# To build images based on CentOS or UBI,
# simply implement the install for the
# libraries installed by `apt` in the Ubuntu layer.
FROM build-base-${LINUX_DISTRO} AS build-base
LABEL maintainer="[email protected]"
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
# Conda packages have higher priority than system packages during build.
ENV PATH=/opt/conda/bin:$PATH
# Python won’t try to write .pyc or .pyo files on the import of source modules.
ENV PYTHONDONTWRITEBYTECODE=1
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging.
ENV PYTHONUNBUFFERED=1
# Allows UTF-8 characters as outputs in Docker.
ENV PYTHONIOENCODING=UTF-8
ARG PYTHON_VERSION=3.8
# Conda always uses the specified version of Python, regardless of Miniconda version.
# Use a different conda URL for different architectures. Default is x86_64.
ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh
RUN curl -fsSL -v -o ~/miniconda.sh -O ${CONDA_URL} && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
conda install -y python=${PYTHON_VERSION} && \
conda clean -ya
# Include `conda` in dynamic linking. Setting $LD_LIBRARY_PATH directly is bad practice.
RUN echo /opt/conda/lib >> /etc/ld.so.conf.d/conda.conf && ldconfig
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache && ccache --max-size 0
# Install everything required for build.
FROM build-base AS build-install
# Magma version must match CUDA version of build image.
ARG MAGMA_VERSION=113
# Maybe fix versions for these libraries. Also maybe sort packages alphabetically.
# Perhaps multiple conda installs are not the best solution but
# Using multiple channels in one install would use older packages.
RUN --mount=type=cache,id=conda-build,target=/opt/conda/pkgs \
conda install -y \
astunparse \
numpy \
ninja \
pyyaml \
mkl \
mkl-include \
setuptools \
cmake \
cffi \
typing_extensions \
future \
six \
requests \
pillow \
pkgconfig && \
conda install -y -c pytorch \
magma-cuda${MAGMA_VERSION} && \
conda install -y -c conda-forge \
libpng \
libjpeg-turbo
WORKDIR /opt
# Using --jobs 0 gives a reasonable default value for parallel recursion.
RUN git clone --recursive --jobs 0 https://github.com/pytorch/pytorch.git
RUN git clone --recursive --jobs 0 https://github.com/pytorch/vision.git
RUN git clone --recursive --jobs 0 https://github.com/pytorch/text.git
RUN git clone --recursive --jobs 0 https://github.com/pytorch/audio.git
FROM build-install AS build-torch
ARG USE_CUDA
ARG PYTORCH_VERSION_TAG
# See https://developer.nvidia.com/cuda-gpus for the official list of
# CUDA Compute Capability (CC) versions for architectures.
# See https://pytorch.org/docs/stable/cpp_extension.html for an
# explanation of how to specify the `TORCH_CUDA_ARCH_LIST` variable.
# The `+PTX` means that PTX should be built for that CC.
# PyTorch will find the best CC for the host hardware even if
# `TORCH_CUDA_ARCH_LIST` is not given explicitly
# but TorchVision and other subsidiary libraries cannot.
ARG TORCH_CUDA_ARCH_LIST
ARG TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
# Checkout to specific version and update submodules.
WORKDIR /opt/pytorch
RUN if [ -n ${PYTORCH_VERSION_TAG} ]; then \
git checkout ${PYTORCH_VERSION_TAG} && \
git submodule sync && \
git submodule update --init --recursive --jobs 0; \
fi
# Build PyTorch. `USE_CUDA`, `USE_CUDNN`, and `USE_ROCM` are made explicit just in case.
RUN --mount=type=cache,target=/opt/ccache \
USE_CUDA=${USE_CUDA} USE_CUDNN=${USE_CUDA} USE_ROCM=0 \
TORCH_NVCC_FLAGS=${TORCH_NVCC_FLAGS} \
TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python setup.py bdist_wheel -d /tmp/dist
# Install PyTorch for subsidiary libraries.
RUN --mount=type=cache,target=/opt/ccache \
USE_CUDA=${USE_CUDA} USE_CUDNN=${USE_CUDA} USE_ROCM=0 \
TORCH_NVCC_FLAGS=${TORCH_NVCC_FLAGS} \
TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python setup.py install
FROM build-torch AS build-vision
ARG USE_CUDA
ARG TORCHVISION_VERSION_TAG
ARG TORCH_CUDA_ARCH_LIST
# Build TorchVision from source to satisfy PyTorch versioning requirements.
# Setting `FORCE_CUDA=1` creates bizarre errors unless CCs are specified explicitly.
# Fix this issue later if necessary by getting output from `torch.cuda.get_arch_list()`.
# Note that the `FORCE_CUDA` flag may be changed to `USE_CUDA` in later versions.
WORKDIR /opt/vision
RUN if [ -n ${TORCHVISION_VERSION_TAG} ]; then \
git checkout ${TORCHVISION_VERSION_TAG} && \
git submodule sync && \
git submodule update --init --recursive --jobs 0; \
fi
RUN --mount=type=cache,target=/opt/ccache \
FORCE_CUDA=${USE_CUDA} TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} \
python setup.py bdist_wheel -d /tmp/dist
FROM build-torch AS build-text
ARG TORCHTEXT_VERSION_TAG
WORKDIR /opt/text
RUN if [ -n ${TORCHTEXT_VERSION_TAG} ]; then \
git checkout ${TORCHTEXT_VERSION_TAG} && \
git submodule sync && \
git submodule update --init --recursive --jobs 0; \
fi
# TorchText does not use CUDA.
RUN --mount=type=cache,target=/opt/ccache \
python setup.py bdist_wheel -d /tmp/dist
FROM build-torch AS build-audio
ARG USE_CUDA
ARG TORCHAUDIO_VERSION_TAG
ARG TORCH_CUDA_ARCH_LIST
WORKDIR /opt/audio
RUN if [ -n ${TORCHAUDIO_VERSION_TAG} ]; then \
git checkout ${TORCHAUDIO_VERSION_TAG} && \
git submodule sync && \
git submodule update --init --recursive --jobs 0; \
fi
RUN --mount=type=cache,target=/opt/ccache \
BUILD_SOX=1 USE_CUDA=${USE_CUDA} \
TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} \
python setup.py bdist_wheel -d /tmp/dist
FROM ${BUILD_IMAGE} AS train-builds
# A convenience layer to gather PyTorch and subsidiary builds required for training.
# If other source builds are included later on, gather them here as well.
# The train layer should not have any dependencies other than this layer.
COPY --from=build-install /opt/conda /opt/conda
COPY --from=build-vision /tmp/dist /tmp/dist
COPY --from=build-text /tmp/dist /tmp/dist
COPY --from=build-audio /tmp/dist /tmp/dist
FROM ${TRAIN_IMAGE} AS train
######### *Customize for your use case by editing from here* #########
# The `train` image is the one actually used for training.
# It is designed to be separate from the `build` image,
# with only the build artifacts (e.g., pip wheels) copied over.
LABEL maintainer="[email protected]"
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV PYTHONIOENCODING=UTF-8
# Set as `ARG` values to reduce the image footprint but not affect resulting images.
ARG PYTHONDONTWRITEBYTECODE=1
ARG PYTHONUNBUFFERED=1
# Change default settings to allow `apt` cache in Docker image.
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' \
> /etc/apt/apt.conf.d/keep-cache
# `tzdata` requires a timezone and noninteractive mode.
ENV TZ=Asia/Seoul
ARG DEBIAN_FRONTEND=noninteractive
# Speedups in `apt` and `pip` installs for Korean users. Change URLs for other locations.
# http://archive.ubuntu.com/ubuntu is specific to nvidia/cuda CUDA Ubuntu images.
# Check `/etc/apt/sources.list` of your base image to find your Ubuntu URL.
# Download optimization is located here but not in the install image for 2 reasons.
# 1. Installation images should be modular and should not be affected by the timezone.
# 2. Installation is very short compared to build but a speedup is desirable if a build is already cached.
ARG DEB_OLD=http://archive.ubuntu.com
ARG DEB_NEW=http://mirror.kakao.com
ARG INDEX_URL=http://mirror.kakao.com/pypi/simple
ARG TRUSTED_HOST=mirror.kakao.com
# Remove any pre-existing global `pip` configurations.
RUN if [ $TZ = Asia/Seoul ]; then \
sed -i "s%${DEB_OLD}%${DEB_NEW}%g" /etc/apt/sources.list && \
printf "[global]\nindex-url=${INDEX_URL}\ntrusted-host=${TRUSTED_HOST}\n" \
> /etc/pip.conf; \
fi
RUN --mount=type=cache,id=apt-cache-train,target=/var/cache/apt \
--mount=type=cache,id=apt-lib-train,target=/var/lib/apt \
apt-get update && apt-get install -y --no-install-recommends \
git \
sudo \
nano \
tmux \
openssh-server \
tzdata && \
rm -rf /var/lib/apt/lists/*
ARG GID
ARG UID
ARG GRP=user
ARG USR=user
ARG PASSWD=ubuntu
# Create user with home directory and password-free sudo permissions.
# This may cause security issues. Use at your own risk.
RUN groupadd -g ${GID} ${GRP} && \
useradd --shell /bin/bash --create-home -u ${UID} -g ${GRP} \
-p $(openssl passwd -1 ${PASSWD}) ${USR} && \
echo "${GRP} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers && \
usermod -aG sudo ${USR}
USER ${USR}
# Enable colors on the bash terminal. This is a personal preference.
RUN sed -i 's/#force_color_prompt=yes/force_color_prompt=yes/' $HOME/.bashrc
COPY --from=train-builds --chown=${UID}:${GID} /opt/conda /opt/conda
# Paths created by `--mount` are owned by root unless created beforehand.
# Expects home directory to be in the default location.
ENV PIP_DOWNLOAD_CACHE=/home/${USR}/.cache/pip
WORKDIR ${PIP_DOWNLOAD_CACHE}
# `PROJECT_ROOT` is where the project code will reside.
ARG PROJECT_ROOT=/opt/project
# `PROJECT_ROOT` belongs to `USR` if created after `USER` has been set.
# Not so for pre-existing directories, which will still belong to root.
WORKDIR ${PROJECT_ROOT}
# Path order conveys precedence.
ENV PATH=${PROJECT_ROOT}:/opt/conda/bin:$PATH
ENV PYTHONPATH=${PROJECT_ROOT}
RUN conda config --set pip_interop_enabled True
# Install numpy from conda to use MKL.
RUN conda install -y \
numpy==1.20.3 && \
conda clean -ya
# Not using a `requirements.txt` file by design as this would create an external dependency.
# Also, the file would not be a true requirements file because of the source builds and conda installs.
# Preserving pip cache by omitting `--no-cache-dir`.
RUN --mount=type=cache,id=pip-train,target=${PIP_DOWNLOAD_CACHE} \
--mount=type=bind,from=train-builds,source=/tmp/dist,target=/tmp/dist \
python -m pip install \
/tmp/dist/*.whl \
torch_tb_profiler==0.2.1 \
jupyterlab==3.2.0 \
hydra-core==1.1.0 \
hydra_colorlog==1.1.0 \
accelerate==0.5.1 \
pytorch-lightning==1.5.2 \
seaborn==0.11.1 \
pandas==1.3.1 \
openpyxl==3.0.9 \
scikit-learn==1.0 \
wandb==0.12.4
CMD ["/bin/bash"]
# Create a deployment image as necessary.
# FROM ${DEPLOY_IMAGE} AS deploy