From 9c6848deb9f5dd6790a16258ceed32826e231875 Mon Sep 17 00:00:00 2001
From: Chunmei Xu <xuchunmei@linux.alibaba.com>
Date: Tue, 18 Jul 2023 15:58:41 +0800
Subject: [PATCH] build with cuda support

Signed-off-by: Chunmei Xu <xuchunmei@linux.alibaba.com>
---
 pytorch.spec | 89 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 30 deletions(-)

diff --git a/pytorch.spec b/pytorch.spec
index c33505e..3fde779 100644
--- a/pytorch.spec
+++ b/pytorch.spec
@@ -1,4 +1,4 @@
-
+%define anolis_release 2
 %global vcu_maj 12
 %global vcu_min 1
 
@@ -12,7 +12,7 @@
 
 Name:           pytorch
 Version:        2.0.1 
-Release:        1%{dist}
+Release:        %{anolis_release}%{dist}
 Summary:        PyTorch Neural Network Package
 License:        BSD
 
@@ -64,12 +64,12 @@ BuildRequires:  onnx-devel onnx-optimizer-devel
 BuildRequires:  kineto-devel
 %endif
 
-%define have_cuda 0 
+%define have_cuda 1
 %define have_tensorrt 0
 %define have_cuda_gcc 0 
 %global toolchain gcc
 
-%define gpu_target_arch "5.2+PTX 6.1 7.5 8.6 8.9 9.0"
+%define gpu_target_arch "6.0 6.1 7.0 7.5 8.0 8.6"
 
 %global _lto_cflags %{nil}
 %global debug_package %{nil}
@@ -79,7 +79,7 @@ BuildRequires:  kineto-devel
 %undefine _find_debuginfo_dwz_opts
 %undefine _missing_build_ids_terminate_build
 
-%bcond_with cuda
+%bcond_without cuda
 %if %{without cuda}
 %global have_cuda 0
 %endif
@@ -111,7 +111,7 @@ BuildRequires:  libcusparse-devel-%{vcu_maj}-%{vcu_min}
 BuildRequires:  libcusolver-devel-%{vcu_maj}-%{vcu_min}
 BuildRequires:  libnvjitlink-devel-%{vcu_maj}-%{vcu_min}
 BuildRequires:  libnccl-devel
-BuildRequires:  libcudnn8-devel
+BuildRequires:  libcudnn-devel
 %if %{use_magma}
 BuildRequires:  magma-devel
 %endif
@@ -159,7 +159,7 @@ This package contains python files for pythorch.
 # % patch2 -p1 -b .gcc11~
 # % patch3 -p1 -b .cpp~
 %patch4 -p1 -b .xnn~
-#%patch5 -p1 -b .cu12~
+%patch5 -p1 -b .cu12~
 
 # python version
 sed -i -e 's|VERSION_LESS 3.10)|VERSION_LESS 3.6)|g' cmake/Dependencies.cmake
@@ -169,7 +169,7 @@ sed -i -e 's|PY_MAJOR_VERSION == 3|PY_MAJOR_VERSION == 3 \&\& PY_MINOR_VERSION >
 sed -i 's|CMAKE_CXX_STANDARD 14|CMAKE_CXX_STANDARD 17|' CMakeLists.txt
 
 # external fbgemm qnnpack gloo
-sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo |' caffe2/CMakeLists.txt
+sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo gloo_cuda |' caffe2/CMakeLists.txt
 # external pybind11
 sed -i -e 's|USE_SYSTEM_BIND11|USE_SYSTEM_PYBIND11|g' cmake/Dependencies.cmake
 
@@ -202,9 +202,9 @@ sed -i -e 's|Caffe2_DEPENDENCY_LIBS onnx_proto onnx|Caffe2_DEPENDENCY_LIBS onnx_
 %endif
 
 # external tensorpipe
-#mkdir -p third_party/tensorpipe
-#echo '' >> third_party/tensorpipe/CMakeLists.txt
-#sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt
+mkdir -p third_party/tensorpipe
+echo '' >> third_party/tensorpipe/CMakeLists.txt
+sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt
 
 # external nnpack
 echo '' > cmake/External/nnpack.cmake
@@ -325,7 +325,7 @@ export PYTHON_EXECUTABLE="%{__python3}"
 # -DUSE_NATIVE_ARCH=ON
 export LDFLAGS="-Wl,-lstdc++"
 export CFLAGS="${CFLAGS} -fPIC"
-#export LD_LIBRARY_PATH="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/%{_lib}/"
+export LD_LIBRARY_PATH=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/lib64
 %cmake .. -Wno-dev \
        -DCMAKE_SKIP_RPATH=ON \
        -DCMAKE_VERBOSE_MAKEFILE=OFF \
@@ -362,7 +362,33 @@ export CFLAGS="${CFLAGS} -fPIC"
        -DUSE_SYSTEM_GLOO=ON \
        -DUSE_SYSTEM_PYBIND11=ON \
        -DUSE_SYSTEM_EIGEN_INSTALL=ON \
-       -DUSE_CUDA=OFF \
+%if %{have_cuda}
+       -DUSE_CUDA=ON \
+       -DUSE_CUDNN=ON \
+       -DUSE_NVRTC=OFF \
+       -DUSE_CUPTI_SO=ON \
+       -DUSE_FAST_NVCC=ON \
+       -DUSE_SYSTEM_NCCL=ON \
+       -DCMAKE_CUDA_FLAGS="-fPIC" \
+       -DCUDA_PROPAGATE_HOST_FLAGS=OFF \
+       -DTORCH_CUDA_ARCH_LIST=%{gpu_target_arch} \
+       -DCUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-%{vcu_maj}.%{vcu_min}" \
+       -DCMAKE_CUDA_COMPILER="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc" \
+       -DCUDA_NVCC_FLAGS="--compiler-options;-fPIC;-Wno-deprecated-gpu-targets;-allow-unsupported-compiler;--fatbin-options;-compress-all" \
+       -DCMAKE_CUDA_FLAGS="--compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler --fatbin-options -compress-all" \
+       -DNCCL_INCLUDE_DIR="%{_includedir}/nccl" \
+%if %{use_magma}
+       -DUSE_MAGMA=ON \
+%else
+       -DUSE_MAGMA=OFF \
+%endif
+       -DBUILD_SPLIT_CUDA=ON \
+%if %{have_tensorrt}
+       -DUSE_TENSORRT=ON \
+%else
+       -DUSE_TENSORRT=OFF \
+%endif
+%endif
        -DBLAS="OpenBLAS" \
        -DUSE_MPI=OFF \
        -DUSE_OBSERVERS=OFF \
@@ -397,11 +423,7 @@ export CFLAGS="${CFLAGS} -fPIC"
        -DUSE_LMDB=ON \
        -DUSE_REDIS=ON \
        -DUSE_LEVELDB=ON \
-%if 0%{?fedora} >= 30
-       -DUSE_ROCKSDB=ON \
-%else
        -DUSE_ROCKSDB=OFF \
-%endif
        -DUSE_FFMPEG=OFF \
        -DUSE_OPENCV=ON \
        -DUSE_METAL=OFF \
@@ -466,17 +488,17 @@ done
 
 
 # version.py
-#cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}')
-#echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py
-#echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py
-#echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py
-#echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py
+cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}')
+echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py
+echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py
+echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py
+echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py
 
 # install path
-#mv -f %{buildroot}/%{_builddir}/pytorch/nvfuser/nvfuser.so \
-#      %{buildroot}/%{_libdir}/
-#mv -f %{buildroot}/%{_builddir}/pytorch/torch/lib/libnvfuser_codegen.so \
-#      %{buildroot}/%{_libdir}/
+mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/nvfuser/nvfuser.so \
+      %{buildroot}/%{_libdir}/
+mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/torch/lib/libnvfuser_codegen.so \
+      %{buildroot}/%{_libdir}/
 
 # remove junk
 rm -rf %{buildroot}/%{_includedir}/clog.h || true
@@ -488,9 +510,6 @@ rm -rf %{buildroot}/%{_builddir}/pytorch/nvfuser || true
 cp -r torch.egg-info %{buildroot}%{python3_sitearch}/
 sed -i 's|[<=>].*||g' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
 sed -i '/triton/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
-%if 0%{?rhel}
-sed -i '/sympy/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
-%endif
 # strip elf
 set +x
 find %{buildroot} -type f -print | LC_ALL=C sort |
@@ -510,18 +529,25 @@ set -x
 %{_bindir}/*
 %{_libdir}/libshm.so.*
 %{_libdir}/libc10.so.*
+%{_libdir}/libc10_cuda.so
 %{_libdir}/libtorch.so.*
 %{_libdir}/libtorch_cpu.so.*
+%{_libdir}/libtorch_cuda.so
 %{_libdir}/libtorch_global_deps.so.*
 %{_libdir}/libcaffe2_observers.so.*
+%{_libdir}/libcaffe2_detectron_ops_gpu.so*
+%{_libdir}/libcaffe2_nvrtc.so
 %{_libdir}/libnnapi_backend.so
-%{_libdir}/libcaffe2_detectron_ops.so*
 %{_libdir}/libshm.so
 %{_libdir}/libc10.so
 %{_libdir}/libtorch.so
 %{_libdir}/libtorch_cpu.so
 %{_libdir}/libtorch_global_deps.so
 %{_libdir}/libcaffe2_observers.so
+%{_libdir}/libtorch_cuda_linalg.so
+%{_libdir}/nvfuser.so
+%{_libdir}/libnvfuser_codegen.so
+
 
 %files devel
 %{_includedir}/*
@@ -533,5 +559,8 @@ set -x
 
 
 %changelog
+* Tue Jul 18 2023 Chunmei Xu <xuchunmei@linux.alibaba.com> - 2.0.1-2
+- build with cuda support
+
 * Fri Jun 16 2023 forrest_ly <flin@linux.alibaba.com> - 2.0.1-1
 - init for anolis 23
-- 
Gitee