From 9c6848deb9f5dd6790a16258ceed32826e231875 Mon Sep 17 00:00:00 2001 From: Chunmei Xu Date: Tue, 18 Jul 2023 15:58:41 +0800 Subject: [PATCH] build with cuda support Signed-off-by: Chunmei Xu --- pytorch.spec | 89 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/pytorch.spec b/pytorch.spec index c33505e..3fde779 100644 --- a/pytorch.spec +++ b/pytorch.spec @@ -1,4 +1,4 @@ - +%define anolis_release 2 %global vcu_maj 12 %global vcu_min 1 @@ -12,7 +12,7 @@ Name: pytorch Version: 2.0.1 -Release: 1%{dist} +Release: %{anolis_release}%{dist} Summary: PyTorch Neural Network Package License: BSD @@ -64,12 +64,12 @@ BuildRequires: onnx-devel onnx-optimizer-devel BuildRequires: kineto-devel %endif -%define have_cuda 0 +%define have_cuda 1 %define have_tensorrt 0 %define have_cuda_gcc 0 %global toolchain gcc -%define gpu_target_arch "5.2+PTX 6.1 7.5 8.6 8.9 9.0" +%define gpu_target_arch "6.0 6.1 7.0 7.5 8.0 8.6" %global _lto_cflags %{nil} %global debug_package %{nil} @@ -79,7 +79,7 @@ BuildRequires: kineto-devel %undefine _find_debuginfo_dwz_opts %undefine _missing_build_ids_terminate_build -%bcond_with cuda +%bcond_without cuda %if %{without cuda} %global have_cuda 0 %endif @@ -111,7 +111,7 @@ BuildRequires: libcusparse-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libcusolver-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libnvjitlink-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libnccl-devel -BuildRequires: libcudnn8-devel +BuildRequires: libcudnn-devel %if %{use_magma} BuildRequires: magma-devel %endif @@ -159,7 +159,7 @@ This package contains python files for pythorch. # % patch2 -p1 -b .gcc11~ # % patch3 -p1 -b .cpp~ %patch4 -p1 -b .xnn~ -#%patch5 -p1 -b .cu12~ +%patch5 -p1 -b .cu12~ # python version sed -i -e 's|VERSION_LESS 3.10)|VERSION_LESS 3.6)|g' cmake/Dependencies.cmake @@ -169,7 +169,7 @@ sed -i -e 's|PY_MAJOR_VERSION == 3|PY_MAJOR_VERSION == 3 \&\& PY_MINOR_VERSION > sed -i 's|CMAKE_CXX_STANDARD 14|CMAKE_CXX_STANDARD 17|' CMakeLists.txt # external fbgemm qnnpack gloo -sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo |' caffe2/CMakeLists.txt +sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo gloo_cuda |' caffe2/CMakeLists.txt # external pybind11 sed -i -e 's|USE_SYSTEM_BIND11|USE_SYSTEM_PYBIND11|g' cmake/Dependencies.cmake @@ -202,9 +202,9 @@ sed -i -e 's|Caffe2_DEPENDENCY_LIBS onnx_proto onnx|Caffe2_DEPENDENCY_LIBS onnx_ %endif # external tensorpipe -#mkdir -p third_party/tensorpipe -#echo '' >> third_party/tensorpipe/CMakeLists.txt -#sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt +mkdir -p third_party/tensorpipe +echo '' >> third_party/tensorpipe/CMakeLists.txt +sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt # external nnpack echo '' > cmake/External/nnpack.cmake @@ -325,7 +325,7 @@ export PYTHON_EXECUTABLE="%{__python3}" # -DUSE_NATIVE_ARCH=ON export LDFLAGS="-Wl,-lstdc++" export CFLAGS="${CFLAGS} -fPIC" -#export LD_LIBRARY_PATH="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/%{_lib}/" +export LD_LIBRARY_PATH=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/lib64 %cmake .. -Wno-dev \ -DCMAKE_SKIP_RPATH=ON \ -DCMAKE_VERBOSE_MAKEFILE=OFF \ @@ -362,7 +362,33 @@ export CFLAGS="${CFLAGS} -fPIC" -DUSE_SYSTEM_GLOO=ON \ -DUSE_SYSTEM_PYBIND11=ON \ -DUSE_SYSTEM_EIGEN_INSTALL=ON \ - -DUSE_CUDA=OFF \ +%if %{have_cuda} + -DUSE_CUDA=ON \ + -DUSE_CUDNN=ON \ + -DUSE_NVRTC=OFF \ + -DUSE_CUPTI_SO=ON \ + -DUSE_FAST_NVCC=ON \ + -DUSE_SYSTEM_NCCL=ON \ + -DCMAKE_CUDA_FLAGS="-fPIC" \ + -DCUDA_PROPAGATE_HOST_FLAGS=OFF \ + -DTORCH_CUDA_ARCH_LIST=%{gpu_target_arch} \ + -DCUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-%{vcu_maj}.%{vcu_min}" \ + -DCMAKE_CUDA_COMPILER="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc" \ + -DCUDA_NVCC_FLAGS="--compiler-options;-fPIC;-Wno-deprecated-gpu-targets;-allow-unsupported-compiler;--fatbin-options;-compress-all" \ + -DCMAKE_CUDA_FLAGS="--compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler --fatbin-options -compress-all" \ + -DNCCL_INCLUDE_DIR="%{_includedir}/nccl" \ +%if %{use_magma} + -DUSE_MAGMA=ON \ +%else + -DUSE_MAGMA=OFF \ +%endif + -DBUILD_SPLIT_CUDA=ON \ +%if %{have_tensorrt} + -DUSE_TENSORRT=ON \ +%else + -DUSE_TENSORRT=OFF \ +%endif +%endif -DBLAS="OpenBLAS" \ -DUSE_MPI=OFF \ -DUSE_OBSERVERS=OFF \ @@ -397,11 +423,7 @@ export CFLAGS="${CFLAGS} -fPIC" -DUSE_LMDB=ON \ -DUSE_REDIS=ON \ -DUSE_LEVELDB=ON \ -%if 0%{?fedora} >= 30 - -DUSE_ROCKSDB=ON \ -%else -DUSE_ROCKSDB=OFF \ -%endif -DUSE_FFMPEG=OFF \ -DUSE_OPENCV=ON \ -DUSE_METAL=OFF \ @@ -466,17 +488,17 @@ done # version.py -#cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}') -#echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py -#echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py -#echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py -#echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py +cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}') +echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py +echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py +echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py +echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py # install path -#mv -f %{buildroot}/%{_builddir}/pytorch/nvfuser/nvfuser.so \ -# %{buildroot}/%{_libdir}/ -#mv -f %{buildroot}/%{_builddir}/pytorch/torch/lib/libnvfuser_codegen.so \ -# %{buildroot}/%{_libdir}/ +mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/nvfuser/nvfuser.so \ + %{buildroot}/%{_libdir}/ +mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/torch/lib/libnvfuser_codegen.so \ + %{buildroot}/%{_libdir}/ # remove junk rm -rf %{buildroot}/%{_includedir}/clog.h || true @@ -488,9 +510,6 @@ rm -rf %{buildroot}/%{_builddir}/pytorch/nvfuser || true cp -r torch.egg-info %{buildroot}%{python3_sitearch}/ sed -i 's|[<=>].*||g' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt sed -i '/triton/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt -%if 0%{?rhel} -sed -i '/sympy/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt -%endif # strip elf set +x find %{buildroot} -type f -print | LC_ALL=C sort | @@ -510,18 +529,25 @@ set -x %{_bindir}/* %{_libdir}/libshm.so.* %{_libdir}/libc10.so.* +%{_libdir}/libc10_cuda.so %{_libdir}/libtorch.so.* %{_libdir}/libtorch_cpu.so.* +%{_libdir}/libtorch_cuda.so %{_libdir}/libtorch_global_deps.so.* %{_libdir}/libcaffe2_observers.so.* +%{_libdir}/libcaffe2_detectron_ops_gpu.so* +%{_libdir}/libcaffe2_nvrtc.so %{_libdir}/libnnapi_backend.so -%{_libdir}/libcaffe2_detectron_ops.so* %{_libdir}/libshm.so %{_libdir}/libc10.so %{_libdir}/libtorch.so %{_libdir}/libtorch_cpu.so %{_libdir}/libtorch_global_deps.so %{_libdir}/libcaffe2_observers.so +%{_libdir}/libtorch_cuda_linalg.so +%{_libdir}/nvfuser.so +%{_libdir}/libnvfuser_codegen.so + %files devel %{_includedir}/* @@ -533,5 +559,8 @@ set -x %changelog +* Tue Jul 18 2023 Chunmei Xu - 2.0.1-2 +- build with cuda support + * Fri Jun 16 2023 forrest_ly - 2.0.1-1 - init for anolis 23 -- Gitee