diff --git a/.gitignore b/.gitignore index 2a95443a1ca7562ce1d0e2983c47a8812634678c..2d9cfd6d3d43ce1059fc03591aff2ee595b90a66 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ pip-wheel-metadata perf.data.* kperf.data.* env.sh +build.sh +test.py hostfile .vscode tmp diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..1d83794d4b7b8e225684638bc88c0eb6a4d8e033 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,57 @@ +## **GITEE前提工作** + +**1.设置SSH** + +​ cd ~/.ssh + +​ ssh-keygen + +​ cat id_rsa.pub + +​ 复制内容到Gitee-设置-SSH Key里面 + +**2.设置用户名和密码** + +​ \#忽略文件模式变化 + + git config core.fileMode false + +​ git config --global user.name "XXX" + +​ git config --global user.email "XXX" + +**3.Fork主仓库,克隆个人分支(以iotwins为例,以实际的为主)** + +​ git clone git@gitee.com:iotwins/hpcrunner.git + +**4.建立个人仓库和远程主仓库的联系** + +git remote add upstream git@gitee.com:openeuler/hpcrunner.git + + + +## **社区提交代码流程** + +**1.先切回master分支,拉取最新社区代码** + +git checkout master + +git pull upstream master + +**2.新建需求分支(分支名自定义),并切换到新分支** + +git checkout -b new_branch + +**3.修改代码** + +**4.提交修改的代码** + +git add . + +git commit --no-verif -m "Add XXX function" + +git push origin new_branch + +**5.在gitee创建PR** + +https://gitee.com/iotwins/hpcrunner diff --git a/README.md b/README.md index cbd233561c7d26b949a04fba73db1434e50c736e..8db7efcfb3ebd73bb325ce8d2d8ed8dc754492e0 100644 --- a/README.md +++ b/README.md @@ -74,9 +74,9 @@ source ./init.sh | [APP] | HPC应用信息,包括应用名、构建路径、二进制路径、算例路径 | app_name = CP2K
build_dir = /home/cp2k-8.2/
binary_dir = /home/CP2K/cp2k-8.2/bin/
case_dir = /home/CP2K/cp2k-8.2/benchmarks/QS/ | | [BUILD] | HPC应用构建脚本 | make -j 128 | | [CLEAN] | HPC应用编译清理脚本 | make -j 128 clean | -| [RUN] | HPC应用运行配置,包括前置命令、应用命令和节点个数 | run = mpi
binary = cp2k.psmp H2O-256.inp
nodes = 1 | +| [RUN] | HPC应用运行配置,包括前置命令、应用命令和节点个数 | run = mpirun -np 2
binary = cp2k.psmp H2O-256.inp
nodes = 1 | | [BATCH] | HPC应用批量运行命令 | #!/bin/bash
nvidia-smi -pm 1
nvidia-smi -ac 1215,1410 | -| [PERF] | 性能工具额外参数 | | +| [PERF] | 性能工具额外参数 | perf= -o
nsys=
ncu=--target-processes all --launch-skip 71434 --launch-count 1 | 3.一键下载依赖(仅针对无需鉴权的链接,否则需要自行下载到downloads目录) diff --git a/data.config b/data.config index 730dff95d7d722bb7a955e3b55ccf419c9d8e16d..f4076fc25867599d689e123f9b2301dfafd5779c 100644 --- a/data.config +++ b/data.config @@ -31,4 +31,9 @@ make clean [RUN] run = hpctool -o ./output -l detail binary = pw.x -input scf.in -nodes = 1 \ No newline at end of file +nodes = 1 + +[PERF] +perf = top -g +nsys = -y 5s -d 150s +ncu = --target-processes all --launch-skip 71434 --launch-count 1 \ No newline at end of file diff --git a/src/dataService.py b/src/dataService.py index 99a202f46857aa91779d2a5e60d9c9bd52ee317b..32b8162881ce5d6a3953cb3260ec047520503c01 100644 --- a/src/dataService.py +++ b/src/dataService.py @@ -30,7 +30,6 @@ class DataService: root_path = os.getcwd() download_info = '' #perf info - kperf_para = '' perf_para = '' nsys_para = '' ncu_para = '' @@ -54,8 +53,8 @@ class DataService: contents = file_obj.read() return contents.strip() - def is_empty(self, str): - return len(str) == 0 or str.isspace() or str == '\n' + def is_empty(self, content): + return len(content) == 0 or content.isspace() or content == '\n' def read_rows(self, rows, start_row): data = '' @@ -89,7 +88,6 @@ class DataService: DataService.case_dir = data['case_dir'] def set_perf_info(self, data): - DataService.kperf_para = data['kperf'] DataService.perf_para = data['perf'] DataService.nsys_para = data['nsys'] DataService.ncu_para = data['ncu'] diff --git a/src/perfService.py b/src/perfService.py index ee4c2d75e10abfd6ea1e9c98baa900448b334a99..4a7c0d03d34cda5fe256634f20d5020f23aac614 100644 --- a/src/perfService.py +++ b/src/perfService.py @@ -50,20 +50,26 @@ perf report -i ./perf.data -F period,sample,overhead,symbol,dso,comm -s overhea def gpu_perf(self): print(f"start gpu perf") run_cmd = self.hpc_data.get_run() + nsys_para = '-y 5s -d 100s' + if DataService.nsys_para != '': + nsys_para = DataService.nsys_para gperf_cmd = f''' {self.hpc_data.get_env()} cd {DataService.case_dir} -nsys profile -y 5s -d 100s {DataService.nsys_para} -o nsys-{self.get_arch()}-{self.get_cur_time()} {run_cmd} - ''' +nsys profile {nsys_para} -o nsys-{self.get_arch()}-{self.get_cur_time()} {run_cmd} +''' self.exe.exec_raw(gperf_cmd) def ncu_perf(self, kernel): print(f"start ncu perf") run_cmd = self.hpc_data.get_run() + ncu_para = '--launch-skip 1735 --launch-count 1' + if DataService.ncu_para != '': + ncu_para = DataService.ncu_para ncu_cmd = f''' {self.hpc_data.get_env()} cd {DataService.case_dir} -ncu --export ncu-{self.get_arch()}-{self.get_cur_time()} {DataService.ncu_para} --import-source=yes --set full --kernel-name {kernel} --launch-skip 1735 --launch-count 1 {run_cmd} +ncu --export ncu-{self.get_arch()}-{self.get_cur_time()} --import-source=yes --set full --kernel-name {kernel} {ncu_para} {run_cmd} ''' self.exe.exec_raw(ncu_cmd) diff --git a/wechat-group-qr.png b/wechat-group-qr.png index 9efb322f64d0f8b09c486cb759e40a97b112d1be..f6361bcf32555ac5100c50227e23a5a70029b9b4 100644 Binary files a/wechat-group-qr.png and b/wechat-group-qr.png differ