forked from NVIDIA/modulus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
141 lines (118 loc) · 6.57 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:23.12-py3
FROM ${BASE_CONTAINER} as builder
ARG TARGETPLATFORM
# Update pip and setuptools
RUN pip install "pip==23.2.1" "setuptools==68.2.2"
# Setup git lfs, graphviz gl1(vtk dep)
RUN apt-get update && \
apt-get install -y git-lfs graphviz libgl1 && \
git lfs install
ENV _CUDA_COMPAT_TIMEOUT=90
# Install other dependencies
RUN pip install --no-cache-dir "h5py>=3.7.0" "netcdf4>=1.6.3" "ruamel.yaml>=0.17.22" "scikit-learn>=1.0.2" "cftime>=1.6.2" "einops>=0.7.0" "pyspng>=0.1.0"
RUN pip install --no-cache-dir "hydra-core>=1.2.0" "termcolor>=2.1.1" "wandb>=0.13.7" "mlflow>=2.1.1" "pydantic>=1.10.2" "imageio>=2.28.1" "moviepy>=1.0.3" "tqdm>=4.60.0" "gcsfs==2024.2.0"
# copy modulus source
COPY . /modulus/
# Install Numcodecs (This needs a separate install because Numcodecs ARM pip install has issues)
# A fix is being added here: https://github.com/zarr-developers/numcodecs/pull/315 but the public release is not ready yet.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Pip install for numcodecs for $TARGETPLATFORM exists, installing!" && \
pip install --no-cache-dir numcodecs; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/numcodecs-0.11.0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "Numcodecs wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/numcodecs-0.11.0-cp310-cp310-linux_aarch64.whl; \
else \
echo "Numcodecs wheel for $TARGETPLATFORM is not present, attempting to build from pip, but might fail" && \
pip install --no-cache-dir numcodecs; \
fi
# install vtk and pyvista
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "VTK wheel for $TARGETPLATFORM exists, installing!" && \
pip install --no-cache-dir /modulus/deps/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing vtk for: $TARGETPLATFORM" && \
pip install --no-cache-dir "vtk>=9.2.6"; \
else \
echo "Installing vtk for: $TARGETPLATFORM from source" && \
apt-get update && apt-get install -y libgl1-mesa-dev && \
git clone https://gitlab.kitware.com/vtk/vtk.git && cd vtk && git checkout tags/v9.2.6 && git submodule update --init --recursive && \
mkdir build && cd build && cmake -GNinja -DVTK_WHEEL_BUILD=ON -DVTK_WRAP_PYTHON=ON /workspace/vtk/ && ninja && \
python setup.py bdist_wheel && \
pip install --no-cache-dir dist/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl && \
cd ../../ && rm -r vtk; \
fi
RUN pip install --no-cache-dir "pyvista>=0.40.1"
# Install DGL, below instructions only work for containers with CUDA >= 12.1
# (https://www.dgl.ai/pages/start.html)
ARG DGL_BACKEND=pytorch
ENV DGL_BACKEND=$DGL_BACKEND
ENV DGLBACKEND=$DGL_BACKEND
RUN pip install --no-cache-dir --no-deps dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html
# Install custom onnx
# TODO: Find a fix to eliminate the custom build
# Forcing numpy update to over ride numba 0.56.4 max numpy constraint
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] && [ -e "/modulus/deps/onnxruntime_gpu-1.15.1-cp310-cp310-linux_x86_64.whl" ]; then \
echo "Custom onnx wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/onnxruntime_gpu-1.15.1-cp310-cp310-linux_x86_64.whl; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/onnxruntime_gpu-1.15.1-cp310-cp310-linux_aarch64.whl" ]; then \
echo "Custom onnx wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/onnxruntime_gpu-1.15.1-cp310-cp310-linux_aarch64.whl; \
else \
echo "No custom wheel present, skipping" && \
pip install --no-cache-dir "numpy==1.22.4"; \
fi
# cleanup of stage
RUN rm -rf /modulus/
# CI image
FROM builder as ci
ARG TARGETPLATFORM
COPY . /modulus/
RUN cd /modulus/ && pip install -e .[makani] && pip uninstall nvidia-modulus -y && rm -rf /modulus/
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM" && \
pip install --no-cache-dir "tensorflow==2.9.0" "warp-lang>=0.6.0"; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM is not supported presently"; \
fi
RUN pip install --no-cache-dir "black==22.10.0" "interrogate==1.5.0" "coverage==6.5.0" "protobuf==3.20.3"
# TODO(akamenev): install Makani via direct URL, see comments in pyproject.toml.
RUN pip install --no-cache-dir -e git+https://github.com/NVIDIA/[email protected]#egg=makani
# Deployment image
FROM builder as deploy
COPY . /modulus/
RUN cd /modulus/ && pip install .
RUN pip install --no-cache-dir "protobuf==3.20.3"
# Set Git Hash as a environment variable
ARG MODULUS_GIT_HASH
ENV MODULUS_GIT_HASH=${MODULUS_GIT_HASH:-unknown}
# Clean up
RUN rm -rf /modulus/
# Docs image
FROM deploy as docs
ARG TARGETPLATFORM
# Install CI packages
RUN pip install --no-cache-dir "protobuf==3.20.3"
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM" && \
pip install --no-cache-dir "tensorflow==2.9.0" "warp-lang>=0.6.0"; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM is not supported presently"; \
fi
# Install packages for Sphinx build
RUN pip install --no-cache-dir "recommonmark==0.7.1" "sphinx==5.1.1" "sphinx-rtd-theme==1.0.0" "pydocstyle==6.1.1" "nbsphinx==0.8.9" "nbconvert==6.4.3" "jinja2==3.0.3"
RUN wget https://github.com/jgm/pandoc/releases/download/3.1.6.2/pandoc-3.1.6.2-1-amd64.deb && dpkg -i pandoc-3.1.6.2-1-amd64.deb