diff --git a/README.md b/README.md index b71739b3036aa3d88c4681a060fd725abbfff894..5b07db44db5c22e5ff796e50e7a0d8c5eb7eb9d3 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,10 @@ - 所有配置仅用一个文件记录,HPC应用部署到不同的机器仅需修改配置文件. - 日志管理系统自动记录HPC应用部署过程中的所有信息. - 软件本身无需编译开箱即用,仅依赖Python环境. +- HPC应用容器化-目前QE已经实现,参考container目录. - (未来) 集成HPC领域常用性能调优手段、核心算法. - (未来) 集群性能分析工具. - (未来) 智能调优. -- (未来) HPC应用[容器化](https://catalog.ngc.nvidia.com/orgs/hpc/containers/quantum_espresso). ### 目录结构 @@ -30,7 +30,7 @@ | src | 贾维斯源码 | | | templates | 常用HPC应用的配置模板 | | | test | 贾维斯测试用例 | | -| workload | 常用HPC应用的算例合集 | | +| workloads | 常用HPC应用的算例合集 | | | init.sh | 贾维斯初始化文件 | | | jarvis | 贾维斯启动入口 | | @@ -60,25 +60,25 @@ 1.下载包解压之后初始化 ``` -source init.sh +source ./init.sh ``` 2.修改data.config或者套用现有模板,各配置项说明如下所示: -| 配置项 | 说明 | 示例 | -| :----------: | :--------------------------------------------------------- | :----------------------------------------------------------- | -| [SERVER] | 服务器节点列表,多节点时用于自动生成hostfile,每行一个节点 | 11.11.11.11 | -| [DOWNLOAD] | 每行一个软件的版本和下载链接,默认下载到downloads目录 | cmake/3.16.4 https://cmake.org/files/v3.16/cmake-3.16.4.tar.gz | -| [DEPENDENCY] | HPC应用依赖安装脚本 | ./jarvis -install gcc/9.3.1 com
module use ./software/modulefiles
module load gcc9 | -| [ENV] | HPC应用编译运行环境配置 | source env.sh | -| [APP] | HPC应用信息,包括应用名、构建路径、二进制路径、算例路径 | app_name = CP2K
build_dir = /home/cp2k-8.2/
binary_dir = /home/CP2K/cp2k-8.2/bin/
case_dir = /home/CP2K/cp2k-8.2/benchmarks/QS/ | -| [BUILD] | HPC应用构建脚本 | make -j 128 | -| [CLEAN] | HPC应用编译清理脚本 | make -j 128 clean | -| [RUN] | HPC应用运行配置,包括前置命令、应用命令和节点个数 | run = mpi
binary = cp2k.psmp H2O-256.inp
nodes = 1 | -| [BATCH] | HPC应用批量运行命令 | #!/bin/bash
nvidia-smi -pm 1
nvidia-smi -ac 1215,1410 | -| [PERF] | 性能工具额外参数 | | +| 配置项 | 说明 | 示例 | +| :----------: | :----------------------------------------------------------- | :----------------------------------------------------------- | +| [SERVER] | 服务器节点列表,多节点时用于自动生成hostfile,每行一个节点 | 11.11.11.11 | +| [DOWNLOAD] | 每行一个软件的版本和下载链接,默认下载到downloads目录(可设置别名) | cmake/3.16.4 https://cmake.org/files/v3.16/cmake-3.16.4.tar.gz 别名 | +| [DEPENDENCY] | HPC应用依赖安装脚本 | ./jarvis -install gcc/9.3.1 com
module use ./software/modulefiles
module load gcc9 | +| [ENV] | HPC应用编译运行环境配置 | source env.sh | +| [APP] | HPC应用信息,包括应用名、构建路径、二进制路径、算例路径 | app_name = CP2K
build_dir = /home/cp2k-8.2/
binary_dir = /home/CP2K/cp2k-8.2/bin/
case_dir = /home/CP2K/cp2k-8.2/benchmarks/QS/ | +| [BUILD] | HPC应用构建脚本 | make -j 128 | +| [CLEAN] | HPC应用编译清理脚本 | make -j 128 clean | +| [RUN] | HPC应用运行配置,包括前置命令、应用命令和节点个数 | run = mpi
binary = cp2k.psmp H2O-256.inp
nodes = 1 | +| [BATCH] | HPC应用批量运行命令 | #!/bin/bash
nvidia-smi -pm 1
nvidia-smi -ac 1215,1410 | +| [PERF] | 性能工具额外参数 | | -3.一键下载依赖(仅针对无需鉴权的链接,否则需要自行下载) +3.一键下载依赖(仅针对无需鉴权的链接,否则需要自行下载到downloads目录) ``` ./jarvis -d @@ -105,7 +105,7 @@ option支持列表如下所示 | icc | 使用当前icc进行编译 | software/libs/icc | | icc+mpi | 使用当前icc+当前mpi进行编译 | software/libs/icc/mpi | | com | 安装编译器 | software/compiler | -| any | 安装工具软件 | software/compiler/utils | +| any | 安装工具软件 | software/utils | 注意,如果软件为MPI通信软件(如hmpi、openmpi),会安装到software/mpi目录 @@ -117,8 +117,6 @@ eg: ./jarvis -install openmpi/4.1.2 gcc #使用gcc编译openmpi 4.1.2版本 ``` - - 5.一键安装所有依赖 ``` diff --git a/container/openeuler-kgcc9-openmpi4-qe-6.4.def b/container/openeuler-kgcc9-openmpi4-qe-6.4.def new file mode 100644 index 0000000000000000000000000000000000000000..b17dd62f2276b4014658b9d8adcc26ca93950cb7 --- /dev/null +++ b/container/openeuler-kgcc9-openmpi4-qe-6.4.def @@ -0,0 +1,41 @@ +BootStrap: docker +From: openeuler/openeuler + +%environment + source /etc/profile || true + cd /hpcrunner + source env.sh + +%post + # Install the necessary development environment + yum install -y environment-modules git dmidecode pciutils wget vim + # Install base gcc + yum install -y gcc gcc-c++ gcc-gfortran glibc-devel make libgfortran + # install network package + yum install tcsh tcl lsof tk -y + source /etc/profile || true + git config --global http.sslVerify false + git clone https://gitee.com/openeuler/hpcrunner + cd hpcrunner + source ./init.sh + ./jarvis -i + cp ./templates/qe/6.4/data.qe.container.config . + wget --no-check-certificate https://github.com/QEF/q-e/archive/refs/tags/qe-6.4.1.tar.gz + tar -xzvf qe-6.4.1.tar.gz + # Modify the case path + sed -i 's/pseudopotentials\.d/\/hpcrunner\/workloads\/QE\/qe-test\/pseudopotentials\.d/g' ./workloads/QE/qe-test/test_3.in + # Switch config + ./jarvis -use data.qe.container.config + # download dependency + ./jarvis -d + # install dependency + ./jarvis -dp + # build hpc + ./jarvis -b + # run hpc + ./jarvis -r + # clean tmp directory + rm -rf downloads tmp + +%labels + Author fang \ No newline at end of file diff --git a/package/openmpi/4.1.2/IB/install.sh b/package/openmpi/4.1.2/IB/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..6b93da002460d131b4bd8651555e802edb349b31 --- /dev/null +++ b/package/openmpi/4.1.2/IB/install.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -x +set -e +cd ${JARVIS_TMP} +tar -xvf ${JARVIS_DOWNLOAD}/openmpi-4.1.2.tar.gz +cd openmpi-4.1.2 +./configure CC=gcc CXX=g++ FC=gfortran --prefix=$1 --enable-pretty-print-stacktrace --enable-orterun-prefix-by-default --with-knem=/opt/knem-1.1.4.90mlnx1/ --with-hcoll=/opt/mellanox/hcoll/ --with-cma --with-ucx --enable-mpi1-compatibility +make -j install diff --git a/package/openmpi/4.1.2/install.sh b/package/openmpi/4.1.2/install.sh index 6b93da002460d131b4bd8651555e802edb349b31..581eae2a5db545803b5598e7e57bd3783a53e600 100644 --- a/package/openmpi/4.1.2/install.sh +++ b/package/openmpi/4.1.2/install.sh @@ -4,5 +4,5 @@ set -e cd ${JARVIS_TMP} tar -xvf ${JARVIS_DOWNLOAD}/openmpi-4.1.2.tar.gz cd openmpi-4.1.2 -./configure CC=gcc CXX=g++ FC=gfortran --prefix=$1 --enable-pretty-print-stacktrace --enable-orterun-prefix-by-default --with-knem=/opt/knem-1.1.4.90mlnx1/ --with-hcoll=/opt/mellanox/hcoll/ --with-cma --with-ucx --enable-mpi1-compatibility +./configure CC=gcc CXX=g++ FC=gfortran --prefix=$1 --enable-pretty-print-stacktrace --enable-orterun-prefix-by-default --enable-mpi1-compatibility make -j install diff --git a/src/analysis.py b/src/analysis.py index 8e68366e0e28fb9aa41558445b7f795152412dba..d8a3bf689ff496304101db1bf7588a65eb11b388 100644 --- a/src/analysis.py +++ b/src/analysis.py @@ -154,8 +154,11 @@ class Install: return version.split('.')[0] def is_mpi_software(self, software_name): - mpis = ['hmpi', 'openmpi', 'hpcx'] - return software_name in mpis + mpis = ['hmpi', 'openmpi', 'hpcx', 'mpich'] + for mpi in mpis: + if software_name.startswith(mpi): + return True + return False def add_mpi_path(self, software_info, install_path): if not software_info['is_use_mpi']: @@ -492,6 +495,7 @@ class Download: self.ROOT = os.getcwd() self.download_list = self.tool.gen_list(Data.download_info) self.download_path = os.path.join(self.ROOT, 'downloads') + self.package_path = os.path.join(self.ROOT, 'package') def check_network(self): print(f"start network checking") @@ -510,40 +514,43 @@ yum makecache ''' self.exe.exec_raw(repo_cmd) - def gen_wget_url(self, out_dir='./downloads', url=''): + def gen_wget_url(self, out_dir='./downloads', url='', filename=''): head = "wget --no-check-certificate" - out_para = "-P" - download_url = f'{head} {out_para} {out_dir} {url}' + file_path = os.path.join(out_dir, filename) + download_url = f'{head} {url} -O {file_path}' return download_url def download(self): print(f"start download") - url_links = [] + filename_url_map = {} self.tool.mkdirs(self.download_path) download_flag = False # create directory for url_info in self.download_list: url_list = url_info.split(' ') - if len(url_list) != 2: + if len(url_list) < 2: continue software_info = url_list[0].strip() url_link = url_list[1].strip() - url_links.append(url_link) + filename = os.path.basename(url_link) + if len(url_list) == 3: + filename = url_list[2].strip() + filename_url_map[filename] = url_link # create software directory - software_path = os.path.join(self.ROOT, 'package', software_info) + software_path = os.path.join(self.package_path, software_info) self.tool.mkdirs(software_path) # create install script install_script = os.path.join(software_path, "install.sh") self.tool.mkfile(install_script) + print(filename_url_map) # start download - for url in url_links: + for filename, url in filename_url_map.items(): download_flag = True - filename = os.path.basename(url) file_path = os.path.join(self.download_path, filename) if os.path.exists(file_path): self.tool.prt_content(f"FILE {filename} already DOWNLOADED") continue - download_url = self.gen_wget_url(self.download_path, url) + download_url = self.gen_wget_url(self.download_path, url, filename) self.tool.prt_content("DOWNLOAD " + filename) output = os.popen(download_url) data = output.read() diff --git a/templates/qe/6.4/data.qe.container.config b/templates/qe/6.4/data.qe.container.config new file mode 100644 index 0000000000000000000000000000000000000000..b9ea99efe2f55c9c1ab475d3f6a93c55a194edc0 --- /dev/null +++ b/templates/qe/6.4/data.qe.container.config @@ -0,0 +1,45 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] +kgcc/9.3.1 https://mirrors.huaweicloud.com/kunpeng/archive/compiler/kunpeng_gcc/gcc-9.3.1-2021.03-aarch64-linux.tar.gz +openmpi/4.1.2 https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.2.tar.gz + +[DEPENDENCY] +./jarvis -install kgcc/9.3.1 com +module purge +module use ./software/modulefiles +module load kgcc9/9.3.1 +export CC=`which gcc` +export CXX=`which g++` +export FC=`which gfortran` +./jarvis -install openmpi/4.1.2 gcc +module load openmpi4/4.1.2 +#test if mpi is normal +./jarvis -bench mpi + +[ENV] +module purge +module use ./software/modulefiles +module load kgcc9 +module load openmpi4/4.1.2 + +[APP] +app_name = QE +build_dir = /hpcrunner/q-e-qe-6.4.1/ +binary_dir = /hpcrunner/q-e-qe-6.4.1/bin/ +case_dir = /hpcrunner/workloads/QE/qe-test + +[BUILD] +./configure F90=gfortran F77=gfortran MPIF90=mpifort MPIF77=mpifort CC=mpicc FCFLAGS="-O3" CFLAGS="-O3" --with-scalapack=no --enable-openmp +make -j 96 pwall +make install + +[CLEAN] +make clean + +[RUN] +#container-run = mpirun --allow-run-as-root -x OMP_NUM_THREADS=1 -np 96 singularity exec xx.sif /hpcrunner/q-e-qe-6.4.1/bin pw.x -input /hpcrunner/workloads/QE/qe-test/test_3.in +run = mpirun --allow-run-as-root -x OMP_NUM_THREADS=1 -np 96 +binary = pw.x -input test_3.in +nodes = 1 \ No newline at end of file diff --git a/templates/qe/6.4/data.qe.test.config b/templates/qe/6.4/data.qe.test.config index 59254e0e8b21c6888b0d364119129e0c3721e7a5..b46531a8738f5e287285fb69a2bffcd92f281df0 100644 --- a/templates/qe/6.4/data.qe.test.config +++ b/templates/qe/6.4/data.qe.test.config @@ -1,6 +1,10 @@ [SERVER] 11.11.11.11 +[DOWNLOAD] +kgcc/9.3.1 https://mirrors.huaweicloud.com/kunpeng/archive/compiler/kunpeng_gcc/gcc-9.3.1-2021.03-aarch64-linux.tar.gz +openmpi/4.1.2 https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.2.tar.gz + [DEPENDENCY] ./jarvis -install kgcc/9.3.1 com module purge @@ -9,7 +13,7 @@ module load kgcc9/9.3.1 export CC=`which gcc` export CXX=`which g++` export FC=`which gfortran` -./jarvis -install openmpi/4.1.2/ gcc +./jarvis -install openmpi/4.1.2 gcc module load openmpi4/4.1.2 #test if mpi is normal ./jarvis -bench mpi diff --git a/templates/qe/6.4/data.qe.test.opt.config b/templates/qe/6.4/data.qe.test.opt.config index b191dcb5b6b7a92e60b412fba6ed046f2da15061..78cf7e01af1340095e60369ba277a622123887fa 100644 --- a/templates/qe/6.4/data.qe.test.opt.config +++ b/templates/qe/6.4/data.qe.test.opt.config @@ -1,6 +1,13 @@ [SERVER] 11.11.11.11 +[DOWNLOAD] +bisheng/2.1.0 https://mirrors.huaweicloud.com/kunpeng/archive/compiler/bisheng_compiler/bisheng-compiler-2.1.0-aarch64-linux.tar.gz +hmpi/1.1.1 https://github.com/kunpengcompute/hucx/archive/refs/tags/v1.1.1-huawei.zip hucx-1.1.1-huawei.zip +hmpi/1.1.1 https://github.com/kunpengcompute/hmpi/archive/refs/tags/v1.1.1-huawei.zip hmpi-1.1.1-huawei.zip +hmpi/1.1.1 https://github.com/kunpengcompute/xucg/archive/refs/tags/v1.1.1-huawei.zip xucg-1.1.1-huawei.zip +openblas/0.3.18 https://github.com/xianyi/OpenBLAS/releases/download/v0.3.18/OpenBLAS-0.3.18.tar.gz + [DEPENDENCY] set -x set -e diff --git a/templates/singularity/singularity.config b/templates/singularity/singularity.config index 00846a2fa829d236585303eee71e4d02d85d1d78..04400f4a769cd24415e4089d29d8e40d3edd487f 100644 --- a/templates/singularity/singularity.config +++ b/templates/singularity/singularity.config @@ -1,20 +1,26 @@ [SERVER] 11.11.11.11 +[DOWNLOAD] +go/1.18 https://go.dev/dl/go1.18.linux-arm64.tar.gz +singrarity/3.9.6 https://github.com/sylabs/singularity/archive/refs/tags/v3.9.6.tar.gz singrarity-3.9.6.tar.gz + [DEPENDENCY] +set -x set -e # Install RPM packages for dependencies sudo yum install -y libseccomp-devel squashfs-tools cryptsetup ./jarvis -install go/1.18 gcc export PATH=/usr/local/go/bin:$PATH # Install singrarity from https://github.com/sylabs/singularity.git +tar -xzvf singrarity-3.9.6.tar.gz [ENV] export PATH=/usr/local/go/bin:$PATH [APP] app_name = singularity -build_dir = /tmp/hpcrunner/singularity-ce-3.9.6/ +build_dir = ./singularity-ce-3.9.6/ binary_dir = case_dir = diff --git a/wechat-group-qr.png b/wechat-group-qr.png index 6e758332d01a5f5df5ef516f12791e175e3ee326..9efb322f64d0f8b09c486cb759e40a97b112d1be 100644 Binary files a/wechat-group-qr.png and b/wechat-group-qr.png differ