diff --git a/examples/mpi/deadlock/avoid_stuck_sr.F90 b/examples/mpi/deadlock/avoid_stuck_sr.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f8f945892c9f0fd4cfc44e82cc368fe9f2ae692d --- /dev/null +++ b/examples/mpi/deadlock/avoid_stuck_sr.F90 @@ -0,0 +1,24 @@ +program main + use mpi + implicit none + + integer :: rank, size, tag, ierr, status(MPI_STATUS_SIZE) + real :: sendbuf(100), recvbuf(100) + integer :: count + tag = 1 + count = 100 + call MPI_INIT(ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + ! C的完成只需要A完成,而A的完成只要有对应的D存在,则不需要系统提供缓冲区也可以进行,这里恰恰满足这样的条件,因此A总能够完成,因此D也一定能完成。当A和D完成后,B的完成只需要相应的C,不需要缓冲区也能完成,因此B和C也一定能完成,所以说这样的通信形式是安全的。显然A和C,D和B同时互换,从原理上说和这种情况是一样的,因此也是安全的。 + IF (rank.EQ.0) THEN + CALL MPI_SEND(sendbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, ierr) + CALL MPI_RECV(recvbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, status, ierr) + ELSE IF( rank .EQ. 1) THEN + CALL MPI_RECV(recvbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, status, ierr) + CALL MPI_SEND(sendbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, ierr) + END IF + print *, 'Process', rank, 'of', size, 'is running' + + call MPI_FINALIZE(ierr) +end program main diff --git a/examples/mpi/deadlock/maybe_stuck_sr.F90 b/examples/mpi/deadlock/maybe_stuck_sr.F90 new file mode 100644 index 0000000000000000000000000000000000000000..57eca02dff48f9b8cb713868c99160748af11ac0 --- /dev/null +++ b/examples/mpi/deadlock/maybe_stuck_sr.F90 @@ -0,0 +1,24 @@ +program main + use mpi + implicit none + + integer :: rank, size, tag, ierr, status(MPI_STATUS_SIZE) + real :: sendbuf(100), recvbuf(100) + integer :: count + tag = 1 + count = 100 + call MPI_INIT(ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + ! 由于进程0或进程1的发送需要系统提供缓冲区(在MPI的四种通信模式中有详细的解释),如果系统缓冲区不足,则进程0或进程1的发送将无法完成,相应的,进程1和进程0的接收也无法正确完成。显然对于需要相互交换数据的进程,直接将两个发送语句写在前面也是不安全的。说明:当进程0发送的消息长度超过缓冲区大小时,要等到全部消息发送完成函数才能返回,在这种情况下,A的完成依赖于D的成功接收,而D的调用依赖于B的完成,B发送消息要等到C成功接收,而C的调用依赖于A的完成,从而造成彼此依赖,陷入死锁。 + IF (rank.EQ.0) THEN + CALL MPI_SEND(sendbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, ierr) + CALL MPI_RECV(recvbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, status, ierr) + ELSE IF( rank .EQ. 1) THEN + CALL MPI_SEND(sendbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, ierr) + CALL MPI_RECV(recvbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, status, ierr) + END IF + print *, 'Process', rank, 'of', size, 'is running' + + call MPI_FINALIZE(ierr) +end program main diff --git a/examples/mpi/deadlock/reduce_stuck.F90 b/examples/mpi/deadlock/reduce_stuck.F90 new file mode 100644 index 0000000000000000000000000000000000000000..049cee9fc5b42abd55eba94d3d3641c687ffc129 --- /dev/null +++ b/examples/mpi/deadlock/reduce_stuck.F90 @@ -0,0 +1,55 @@ +program mpi_example + use mpi + implicit none + + integer :: rank, comm_size, i, j,ierr + integer, parameter :: BUFFER_SIZE = 100 + real, allocatable :: data(:) + + ! Initialize MPI environment + call MPI_Init(ierr) + call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr) + call MPI_Comm_size(MPI_COMM_WORLD, comm_size, ierr) + + allocate(data(BUFFER_SIZE)) ! 为 data 数组分配内存 + ! 初始化 data 数组 + data = 0.0 + ! 大量点对点消息可能造成死锁: https://zhuanlan.zhihu.com/p/431147881 + if (rank == 0) then + ! Master rank + do i = 1, BUFFER_SIZE + do j = 1, comm_size-1 + call MPI_Recv(data(i), 1, MPI_FLOAT, j, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + end do + end do + + ! Aggregate data from other ranks + do i = 1, BUFFER_SIZE + do j = 1, comm_size-1 + data(i) = data(i) + data(i + (j-1)*BUFFER_SIZE) + end do + end do + + ! Send aggregated data back to other ranks + do i = 1, BUFFER_SIZE + do j = 1, comm_size-1 + call MPI_Send(data(i), 1, MPI_FLOAT, j, i, MPI_COMM_WORLD, ierr) + end do + end do + else + ! Other ranks + do i = 1, BUFFER_SIZE + call MPI_Send(data(i), 1, MPI_FLOAT, 0, i, MPI_COMM_WORLD, ierr) + end do + + ! Receive aggregated data from master rank + do i = 1, BUFFER_SIZE + call MPI_Recv(data(i), 1, MPI_FLOAT, 0, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + end do + end if + deallocate(data) ! 释放 data 数组的内存 + print *, 'Process', rank, 'is finished' + ! Finalize MPI environment + call MPI_Finalize(ierr) + +end program mpi_example diff --git a/examples/mpi/deadlock/send_recv_must_stuck.F90 b/examples/mpi/deadlock/send_recv_must_stuck.F90 new file mode 100644 index 0000000000000000000000000000000000000000..07a798f8b1cde8f0ecfa9418701cc99a53d4d2db --- /dev/null +++ b/examples/mpi/deadlock/send_recv_must_stuck.F90 @@ -0,0 +1,24 @@ +program main + use mpi + implicit none + + integer :: rank, size, tag, ierr, status(MPI_STATUS_SIZE) + real :: sendbuf(100), recvbuf(100) + integer :: count + tag = 1 + count = 100 + call MPI_INIT(ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + ! 进程0的第一条接收语句A能否完成取决于进程1的第二条发送语句D,即A依赖于D,从执行次序上可以明显地看出,进程0向进程1发送消息的语句C的执行又依赖于它前面的接收语句A的完成,即C依赖于A;同时,进程1的第一条接收语句B能否完成取决于进程0的第二条发送语句C的执行,即B依赖于C,从执行次序上可以明显地看出,向进程0发送消息的语句D的执行又依赖于B的完成,故有A依赖于D,而D又依赖于B,B依赖于C,C依赖于A,形成了一个环,进程0和进程1相互等待,彼此都无法执行下去,必然导致死锁 + IF (rank.EQ.0) THEN + CALL MPI_RECV(recvbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, status, ierr) + CALL MPI_SEND(sendbuf, count, MPI_REAL, 1, tag, MPI_COMM_WORLD, ierr) + ELSE IF( rank .EQ. 1) THEN + CALL MPI_RECV(recvbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, status, ierr) + CALL MPI_SEND(sendbuf, count, MPI_REAL, 0, tag, MPI_COMM_WORLD, ierr) + END IF + print *, 'Process', rank, 'of', size, 'is running' + + call MPI_FINALIZE(ierr) +end program main diff --git a/init.sh b/init.sh index c742945dbad8761cf3e557862622b2dbec7b5c64..99e53c73cc7654e8c9662523876f9b6a3be286bf 100644 --- a/init.sh +++ b/init.sh @@ -14,7 +14,7 @@ export JARVIS_UTILS=${CUR_PATH}/software/utils export JARVIS_DOWNLOAD=${CUR_PATH}/downloads export JARVIS_MODULES=${CUR_PATH}/software/modulefiles export JARVIS_MODULEDEPS=${CUR_PATH}/software/moduledeps -export JARVIS_TMP=${CUR_PATH}/tmp +export JARVIS_TMP=/tmp export JARVIS_TMP_DOWNLOAD=${CUR_PATH}/tmp export JARVIS_EXE=${CUR_PATH}/exe export JARVIS_PROXY=https://gh.ddlc.top/https://github.com diff --git a/package/cmake/3.28.2/install.sh b/package/cmake/3.28.2/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..6ab388a7075dcf9f6d7a6af988b688d2c3d692da --- /dev/null +++ b/package/cmake/3.28.2/install.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e +export cmake_ver='3.28.2' +../meta.sh $1 diff --git a/package/cmake/meta.sh b/package/cmake/meta.sh new file mode 100644 index 0000000000000000000000000000000000000000..d966397c054a2384382108e1bb395be6d4756a80 --- /dev/null +++ b/package/cmake/meta.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e +. ${DOWNLOAD_TOOL} -u $JARVIS_PROXY/Kitware/CMake/releases/download/v${cmake_ver}/cmake-${cmake_ver}-linux-`arch`.tar.gz +tar -xzvf ${JARVIS_DOWNLOAD}/cmake-${cmake_ver}-linux-`arch`.tar.gz -C $1 --strip-components=1 diff --git a/package/envcheck/envcheck.sh b/package/envcheck/envcheck.sh new file mode 100644 index 0000000000000000000000000000000000000000..f23f79ebf9b1ccd20929f9a2c9c96abf2a6f594d --- /dev/null +++ b/package/envcheck/envcheck.sh @@ -0,0 +1,51 @@ +#!/bin/bash +#检查网卡配置PF_LOG_BAR_SIZE是否为8 +printf "\n###网卡配置PF_LOG_BAR_SIZE是否为8###\n" +#mst start +mlxconfig q|grep PF_LOG_BAR_SIZE + +#检查网卡配置PCI_WR_ORDERING是否为1 +printf "\n###检查网卡配置PCI_WR_ORDERING是否为1###\n" +mlxconfig q |grep PCI_WR_ORDERING + +#检查BIOS版本 +printf "\n###检查BIOS版本###\n" +dmidecode -s bios-version + + +#海思patch +printf "\n###检查海思patch 寄存器值是否修改###\n" +sh /etc/init.d/run_dev.sh + +#检查dpc挂载 +printf "\n###检查dpc挂载###\n" +ls /share/home +ls /share/software + +#检查tuned服务状态 +printf "\n###检查tuned服务状态###\n" +#systemctl status tuned.service |grep Active |awk '{print$3}' +tuned-adm active + +#检查numa_balancing +printf "\n###检查numa_balancing是否为0###\n" +cat /proc/sys/kernel/numa_balancing + +#检查核隔离 +printf "\n####检查核隔离是否为空###\n" +cat /sys/devices/system/cpu/nohz_full + +#检查grub启动参数 +printf "\n###检查grub启动参数###\n" + +printf "\n###检查grub启动参数###\n" +#cat /proc/cmdline +tail -c 20 /proc/cmdline + +#检查是否降频# +printf "\n###检查是否降频###\n" +cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq | sort -r | tail -1 |cut -b -3 + +#检查内存频率 +printf "\n###检查内存频率###\n" +dmidecode |grep -A20 "Memory Device" |egrep '^\s+Speed' |sort -r | tail -1 diff --git a/package/envcheck/readme b/package/envcheck/readme new file mode 100644 index 0000000000000000000000000000000000000000..40ffdcd9a8d37a1515ed1751303670c72ceedfc0 --- /dev/null +++ b/package/envcheck/readme @@ -0,0 +1,3 @@ +功能:应用运行前的集群环境检查 +操作: +clush --hostfile hostfile ./envcheck.sh diff --git a/package/fftw/3.3.10/sve/install.sh b/package/fftw/3.3.10/sve/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..3f314b60b1f3f326da1834adcb9f525667308793 --- /dev/null +++ b/package/fftw/3.3.10/sve/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -x +set -e +cd ${JARVIS_TMP} +#rm -rf fftw-3.3.10 +${DOWNLOAD_TOOL} -u http://www.fftw.org/fftw-3.3.10.tar.gz +tar -xvf ${JARVIS_DOWNLOAD}/fftw-3.3.10.tar.gz +cd fftw-3.3.10 +CFLAGS="-O3 -fPIC" +./configure --prefix=$1 --enable-fma --enable-generic-simd256 --enable-single --enable-float --enable-neon --enable-shared --enable-threads --enable-openmp --enable-mpi CFLAGS="$CFLAGS" FFLAGS="$CFLAGS" FCFLAGS="$CFLAGS" +make -j 16 && make install diff --git a/package/get-gdb-trace/get-gdb-trace.sh b/package/get-gdb-trace/get-gdb-trace.sh new file mode 100644 index 0000000000000000000000000000000000000000..0a25c1721b8c3de3d48f2f8e25fba2a5595c2f67 --- /dev/null +++ b/package/get-gdb-trace/get-gdb-trace.sh @@ -0,0 +1,18 @@ +#!/bin/bash +process=$(pidof $1) +cur_time=$(date +%Y-%m-%d-%H-%M) +root_dir=$(pwd) +log_dir=$1/$cur_time +mkdir -p $log_dir +log_file=$log_dir/gdb-$HOSTNAME.log +> $log_file +for proc in $process +do + echo "**************Process $proc**************" >> $log_file + # 使用GDB获取进程的堆栈信息 + gdb -ex "attach $proc" -ex "thread apply all bt" -ex "detach" -ex "quit" -batch >> $log_file + echo "******************************************">> $log_file +done +echo " + +" >> $log_file diff --git a/package/get-gdb-trace/readme b/package/get-gdb-trace/readme new file mode 100644 index 0000000000000000000000000000000000000000..15c0f261178249f27c3c94f1a35a0f88c033d6f7 --- /dev/null +++ b/package/get-gdb-trace/readme @@ -0,0 +1,3 @@ +功能:一键收集应用的所有堆栈信息 +操作:(以WRF举例) +clush --hostfile hostfile ./get-gdb-trace.sh wrf.exe diff --git a/package/openblas/0.3.24/install.sh b/package/openblas/0.3.24/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..08136ca3a68241dbf7d052858067de0dfc750c93 --- /dev/null +++ b/package/openblas/0.3.24/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -x +set -e +ver="0.3.24" +. ${DOWNLOAD_TOOL} -u $JARVIS_PROXY/xianyi/OpenBLAS/archive/refs/tags/v${ver}.tar.gz -f OpenBLAS-${ver}.tar.gz +cd ${JARVIS_TMP} +rm -rf OpenBLAS-${ver} +tar -xzvf ${JARVIS_DOWNLOAD}/OpenBLAS-${ver}.tar.gz +cd OpenBLAS-${ver} +make -j 16 +make PREFIX=$1 install diff --git a/setemail.sh b/setemail.sh new file mode 100644 index 0000000000000000000000000000000000000000..c139ce8b8b16afa697be05d9b97512a657b12b36 --- /dev/null +++ b/setemail.sh @@ -0,0 +1,6 @@ +#!/bin/bash +git config core.fileMode false +git config --global user.name 'xxx' +git config --global user.email 'xxx@qq.com' +git remote add upstream https://gitee.com/openeuler/hpcrunner.git +#git remote set-url upstream https://gitee.com/openeuler/hpcrunner.git diff --git a/templates/gromacs/2023.3/gromacs.2023.3.config b/templates/gromacs/2023.3/gromacs.2023.3.config new file mode 100644 index 0000000000000000000000000000000000000000..0ef5ce13ff0fb97dddceb49dcdc3065c71e8ed6e --- /dev/null +++ b/templates/gromacs/2023.3/gromacs.2023.3.config @@ -0,0 +1,77 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] +gromacs/2023.3 http://ftp.gromacs.org/pub/gromacs/gromacs-2023.3.tar.gz +Testcase https://repository.prace-ri.eu/ueabs/GROMACS/2.2/GROMACS_TestCaseC.tar.xz + +[DEPENDENCY] +module purge +module use ./software/modulefiles +./jarvis -install bisheng/3.2.0 com +module load bisheng/3.2.0 +./jarvis -install openblas/0.3.24 clang +./jarvis -install hmpi/2.3.0 clang +module load hmpi/2.3.0 +export CC=mpicc CXX=mpicxx FC=mpifort +./jarvis -install fftw/3.3.10/sve clang+mpi +if [ ! -d gromacs-2023.3 ]; then +tar -xvf ${JARVIS_DOWNLOAD}/gromacs-2023.3.tar.gz +fi + +[ENV] +module purge +module use ./software/modulefiles +module use ./software/moduledeps +module load bisheng/3.2.0 +module load hmpi/2.3.0 +module load fftw-sve/3.3.10 +module load openblas/0.3.24 +compile_dir=$(dirname $(dirname $(which clang))) +#export MEMKIND_HBW_NODES=0 +export gromacs_dir=$JARVIS_ROOT/gromacs-2023.3 + +[APP] +app_name = gromacs +build_dir = $gromacs_dir +binary_dir = +case_dir = $JARVIS_ROOT/workloads/Gromacs/GROMACS_TestCaseC + +[BUILD] +#rm -rf build +mkdir -p build +cd build +FLAGS="-mcpu=linxicore9100 -O3 -ffast-math -mllvm --aarch64-sched-inline-asm=false -mllvm -unroll-threshold-aggressive=600" +LD_FLAGS="-mcpu=linxicore9100 -O3 -lbsmath -lflang -L${compile_dir}/lib/jemalloc-64kbpage -ljemalloc" + +CFLAGS=$FLAGS CXXFLAGS=$FLAGS LDFLAGS=$LD_FLAGS CC=mpicc CXX=mpicxx FC=mpifort cmake -DCMAKE_INSTALL_PREFIX=$JARVIS_ROOT/gromacs -DBUILD_SHARED_LIBS=on -DBUILD_TESTING=on -DREGRESSIONTEST_DOWNLOAD=off -DGMX_BUILD_OWN_FFTW=off -DGMX_SIMD=ARM_SVE -DGMX_SIMD_ARM_SVE_LENGTH=256 -DGMX_DOUBLE=off -DGMX_EXTERNAL_BLAS=on -DGMX_EXTERNAL_LAPACK=on -DGMX_FFT_LIBRARY=fftw3 -DGMX_BLAS_USER=${OPENBLAS_PATH}/lib/libopenblas.a -DGMX_LAPACK_USER=${OPENBLAS_PATH}/lib/libopenblas.a -DFFTWF_LIBRARY=$FFTW_SVE_PATH/lib/libfftw3f.so -DFFTWF_INCLUDE_DIR=$FFTW_SVE_PATH/include -DGMX_X11=off -DGMX_OPENMP=off -DGMX_MPI=on -DGMX_THREAD_MPI=off -DGMX_CYCLE_SUBCOUNTERS=off ../ + +make -j40 V=1 +make install + +#-DGMX_BLAS_USER=$openblas_path/lib/libopenblas.a #-DGMX_LAPACK_USER=$openblas_path/lib/libopenblas.a +#-DGMX_BLAS_USER=${KML_GCC_PATH}/lib/kblas/omp/libkblas.so -DGMX_LAPACK_USER=${KML_GCC_PATH}/lib/libklapack_full.so -DFFTWF_LIBRARY=$KFFT_PATH/lib/libfftw3f.so -DFFTWF_INCLUDE_DIR=$KFFT_PATH/include + +[CLEAN] +./clean_build + +[RUN] +run = +binary = mpirun -np 574 --allow-run-as-root -x UCX_TLS=sm --bind-to cpulist:ordered -mca pml ucx -mca btl ^vader,tcp,openib,uct ${gromacs_dir}/build/bin/gmx_mpi mdrun -dlb yes -v -nsteps 4000 -noconfout -pin on -pinoffset 0 -ntomp 1 -npme 112 -g md_sve_0229-ucpg-bisheng-2P.log -s stmv.28M.tpr +nodes = 1 + +[JOB] +#!/bin/bash +#DSUB -n gmx +#DSUB --job_type cosched +#DSUB -N 1 +#DSUB -R "cpu=128" +#DSUB -o gmx_%J.log +#DSUB -e gmx_err_%J.log +#DSUB -T '2h' + +export HOSTFILE=hostfile.gmx +rm -f $HOSTFILE +touch $HOSTFILE +cat ${CCSCHEDULER_ALLOC_FILE} | sort > $HOSTFILE +mpirun -np 128 --hostfile $HOSTFILE -x UCX_TLS=sm --bind-to cpulist:ordered -mca pml ucx -mca btl ^vader,tcp,openib,uct ${gromacs_dir}/build/bin/gmx_mpi mdrun -dlb yes -v -nsteps 4000 -noconfout -pin on -pinoffset 0 -ntomp 1 -npme 8 -g 3-5-pme8-128C.log -s stmv.28M.tpr