forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.jp5
111 lines (86 loc) · 3.63 KB
/
Dockerfile.jp5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#
# build cuDF from source for JetPack 5 (see config.py for package configuration)
#
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG CUDF_REPO
ARG CUDF_VERSION
ARG CUDF_CMAKE_CUDA_ARCHITECTURES
ARG INSTALL_PREFIX=/usr/local
ARG BUILD_DIR=/opt/rapids
WORKDIR ${BUILD_DIR}
#
# cudf bundles many of it's dependencies, but some are still needed
# libssl for cudf, boost and liblz4 for ORC extensions
#
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libssl-dev \
libboost-system-dev \
libboost-filesystem-dev \
liblz4-dev \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# arrow gets confused if python 3.9 is present
RUN apt-get purge -y python3.9 libpython3.9* || echo "python3.9 not found, skipping removal"
# cudf.DataFrame.sort_values() - ValueError: Cannot convert value of type NotImplementedType to cudf scalar
# https://stackoverflow.com/questions/73928178/cannot-convert-value-of-type-notimplementedtype-to-cudf-scalar-appearing-on-tr
RUN pip3 install 'numpy<1.23'
#
# build libcudf (C++)
#
ADD https://api.github.com/repos/${CUDF_REPO}/git/refs/heads/${CUDF_VERSION} /tmp/cudf_version.json
RUN git clone --branch ${CUDF_VERSION} --depth=1 --recursive https://github.com/${CUDF_REPO} cudf && \
cd cudf && \
./build.sh libcudf -v --cmake-args=\"-DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON -DCUDF_ENABLE_ARROW_ORC=ON\"
#
# build rmm
#
RUN cd cudf/cpp/build/_deps/rmm-src/python && \
sed -i "s|versioneer.get_version()|\"${CUDF_VERSION}\".lstrip('v')|g" setup.py && \
sed -i "s|get_versions().*|\"${CUDF_VERSION}\".lstrip('v')|g" rmm/__init__.py && \
python3 setup.py bdist_wheel --verbose && \
cp dist/rmm*.whl /opt && \
pip3 install /opt/rmm*.whl
RUN pip3 show rmm && python3 -c 'import rmm; print(rmm.__version__)'
#
# build pyarrow
#
RUN export PYARROW_WITH_ORC=1 && \
export PYARROW_WITH_CUDA=1 && \
export PYARROW_WITH_HDFS=1 && \
export PYARROW_WITH_DATASET=1 && \
export PYARROW_WITH_PARQUET=1 && \
export PYARROW_PARALLEL=$(nproc) && \
export PYARROW_CMAKE_OPTIONS="-DARROW_HOME=/usr/local" && \
cd cudf/cpp/build/_deps/arrow-src/python && \
python3 setup.py --verbose build_ext --inplace bdist_wheel && \
cp dist/pyarrow*.whl /opt && \
pip3 install /opt/pyarrow*.whl
RUN pip3 show pyarrow && python3 -c 'import pyarrow; print(pyarrow.__version__)'
#
# build cudf (python)
#
RUN cd cudf/python/cudf && \
sed -i "s|versioneer.get_version()|\"${CUDF_VERSION}\".lstrip('v')|g" setup.py && \
sed -i "s|get_versions().*|\"${CUDF_VERSION}\".lstrip('v')|g" cudf/__init__.py && \
PARALLEL_LEVEL=$(nproc) python3 setup.py --verbose build_ext --inplace -j$(nproc) bdist_wheel && \
cp dist/cudf*.whl /opt && \
pip3 install /opt/cudf*.whl
# cudf/utils/metadata/orc_column_statistics_pb2.py - your generated code is out of date and must be regenerated with protoc >= 3.19.0
RUN pip3 install 'protobuf<3.20'
# requests package needed for test_csv.py
RUN pip3 install requests
# make sure cudf loads
RUN pip3 show cudf && python3 -c 'import cudf; print(cudf.__version__)'
#
# build dask_cudf
#
RUN cd $BUILD_DIR/cudf/python/dask_cudf && \
sed -i "s|versioneer.get_version()|\"${CUDF_VERSION}\".lstrip('v')|g" setup.py && \
sed -i "s|get_versions().*|\"${CUDF_VERSION}\".lstrip('v')|g" dask_cudf/__init__.py && \
PARALLEL_LEVEL=$(nproc) python3 setup.py --verbose build_ext --inplace -j$(nproc) bdist_wheel && \
cp dist/dask_cudf*.whl /opt && \
pip3 install /opt/dask_cudf*.whl
RUN pip3 show dask_cudf && python3 -c 'import dask_cudf; print(dask_cudf.__version__)'
WORKDIR /