From c60041b9e772b9c90cc2ed577e7fa038ab56b601 Mon Sep 17 00:00:00 2001 From: binshuo zu Date: Fri, 16 Dec 2022 00:14:36 +0800 Subject: [PATCH 1/2] feat: add imb Signed-off-by: binshuo --- .../openeuler-bisheng2-hmpi1-imb-2021.3.def | 35 + .../openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def | 35 + ...25\346\212\245\345\221\212\343\200\213.md" | 6560 +++++++++++++++++ ...15\346\214\207\345\215\227\343\200\213.md" | 287 + templates/imb/2021.3/data.imb.amd.cpu.config | 44 + templates/imb/2021.3/data.imb.arm.cpu.config | 42 + test/test-imb.sh | 16 + 7 files changed, 7019 insertions(+) create mode 100644 container/imb/openeuler-bisheng2-hmpi1-imb-2021.3.def create mode 100644 container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def create mode 100644 "doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\346\265\213\350\257\225\346\212\245\345\221\212\343\200\213.md" create mode 100644 "doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\347\247\273\346\244\215\346\214\207\345\215\227\343\200\213.md" create mode 100644 templates/imb/2021.3/data.imb.amd.cpu.config create mode 100644 templates/imb/2021.3/data.imb.arm.cpu.config create mode 100755 test/test-imb.sh diff --git a/container/imb/openeuler-bisheng2-hmpi1-imb-2021.3.def b/container/imb/openeuler-bisheng2-hmpi1-imb-2021.3.def new file mode 100644 index 0000000..af93037 --- /dev/null +++ b/container/imb/openeuler-bisheng2-hmpi1-imb-2021.3.def @@ -0,0 +1,35 @@ +BootStrap: docker +From: openeuler/openeuler + +%environment + source /etc/profile || true + source /etc/profile.d/modules.sh + cd /hpcrunner + source init.sh + source env.sh + +%post + # Install the necessary development environment + yum install -y environment-modules git wget unzip make flex tar + source /etc/profile || true + git config --global http.sslVerify false + git clone https://gitee.com/openeuler/hpcrunner + cd hpcrunner + source ./init.sh + ./jarvis -i + # Switch config + ./jarvis -use templates/imb/2021.3/data.imb.arm.cpu.config + # download dependency + ./jarvis -d + # install dependency + ./jarvis -dp + # build imb + ./jarvis -b + # run test + ./jarvis -r + # clean cache and downloads directory + yum clean all + rm -rf downloads + +%labels + Author Zu diff --git a/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def b/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def new file mode 100644 index 0000000..13c917f --- /dev/null +++ b/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def @@ -0,0 +1,35 @@ +BootStrap: docker +From: openeuler/openeuler + +%environment + source /etc/profile || true + source /etc/profile.d/modules.sh + cd /hpcrunner + source env.sh + +%post + # Install the necessary development environment + yum install -y environment-modules git flex wget vim tar unzip coreutils + # Install base gcc + yum install -y gcc gcc-c++ gcc-gfortran glibc-devel make libgfortran + source /etc/profile || true + git config --global http.sslVerify false + git clone https://gitee.com/openeuler/hpcrunner.git + cd hpcrunner + source ./init.sh + ./jarvis -i + # Switch config + ./jarvis -use templates/imb/2021.3/data.imb.amd.cpu.config + # downloads imb + ./jarvis -d + # install dependency + ./jarvis -dp + # build qmcpack + ./jarvis -b + # run qmcpack + ./jarvis -r + # clean downloads directory + rm -rf downloads + +%labels + Author Zu \ No newline at end of file diff --git "a/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\346\265\213\350\257\225\346\212\245\345\221\212\343\200\213.md" "b/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\346\265\213\350\257\225\346\212\245\345\221\212\343\200\213.md" new file mode 100644 index 0000000..34109de --- /dev/null +++ "b/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\346\265\213\350\257\225\346\212\245\345\221\212\343\200\213.md" @@ -0,0 +1,6560 @@ +# 《基于openEuler的imb软件测试报告》 + +## 1.规范性自检 + +使用对项目Clang-Format对文件进行格式化 + +Clang-Format是一个广泛使用的C++代码格式化器。我们在使用编辑器的缩进(TAB)功能时,由于不同编辑器的差别,有的插入的是制表符,有的是2个空格,有的是4个空格。这样如果别人用另一个编辑器来阅读程序时,可能会由于缩进的不同,导致阅读效果一团糟。为了解决这个问题,使用C++开发了一个插件,它可以自动重新缩进,并手动指定空格的数量,自动格式化源文件。它是可以通过命令行使用,也可以作为插件,在其他IDE中使用。 + +文件格式化配置参考文件`.clang-format`,文件内容如下 + +```clang-format +--- +BasedOnStyle: LLVM +IndentWidth: 4 +UseTab: false +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +AccessModifierOffset: -4 +``` + +### 1.1.选择统计文件类型 + +统计项目文件类型及其文件数量 + +使用python编写脚本文件 + +```python +# -*- coding: utf-8 -*- + +import os + +print (os.getcwd()) + +def getAllFiles(targetDir): + files = [] + listFiles = os.listdir(targetDir) + for i in range(0, len(listFiles)): + path = os.path.join(targetDir, listFiles[i]) + if os.path.isdir(path): + files.extend(getAllFiles(path)) + elif os.path.isfile(path): + files.append(path) + return files + +all_files=getAllFiles(os.curdir) +type_dict=dict() + +for each_file in all_files: + if os.path.isdir(each_file): + type_dict.setdefault("文件夹",0) + type_dict["文件夹"]+=1 + else: + ext=os.path.splitext(each_file)[1] + type_dict.setdefault(ext,0) + type_dict[ext]+=1 + +for each_type in type_dict.keys(): + print ("当前文件夹下共有[%s]的文件%d个" %(each_type,type_dict[each_type])) +``` + +在imb项目根目录下运行,运行结果如下 + +```bash +[root@dc6-80-067 imb]# python count.py +/root/mpi-benchmarks +当前文件夹下共有[.c]的文件61个 +当前文件夹下共有[.h]的文件53个 +当前文件夹下共有[.cpp]的文件24个 +当前文件夹下共有[.rc]的文件21个 +当前文件夹下共有[.sln]的文件21个 +当前文件夹下共有[.vcxproj]的文件21个 +当前文件夹下共有[]的文件18个 +当前文件夹下共有[.mk]的文件16个 +当前文件夹下共有[.sample]的文件13个 +当前文件夹下共有[.txt]的文件5个 +当前文件夹下共有[.md]的文件1个 +当前文件夹下共有[.idx]的文件1个 +当前文件夹下共有[.pack]的文件1个 +当前文件夹下共有[.sh]的文件1个 +``` + +查看上述结果可知主要源码文件后缀名为 `cpp`,`c`,`h`. + +### 1.2.统计源码总行数 + +统计所有源码文件的代码行数 + +```bash + find ./ -regex ".*\.c\|.*\.h\|.*\.cpp" | xargs wc -l +``` + +统计结果 + +```bash + 36684 total +``` + +### 1.3.统计不符合要求的总行数 + +对文件后缀名为`cpp`,`c`,`h`, 的所有文件进行格式 +通过git与clang-format结合的方式进行统计 + +```bash +[root@host- src]# find . -regex '.*\.\(cpp\|hpp\)' -exec clang-format -style=./src/.clang-format -i {} \; +[root@host- src]# git commit -m "fomat update" +[master 81a84d6] format + 99 files changed, 9500 insertions(+), 7625 deletions(-) + rewrite src_c/Makefile_win (76%) + rewrite src_cpp/HALO/halo_benchmark.h (74%) + rewrite src_cpp/IO/IO_benchmark.cpp (68%) + rewrite src_cpp/MT/MT_benchmark.h (78%) + rewrite src_cpp/args_parser.h (64%) + rewrite src_cpp/helpers/helper_IMB_functions.h (81%) +``` + +### 1.4.统计结果 + +综上信息,项目中代码规范性自检检查结果为 + +通过率 : 79.21% 1-7625/36684*100% + +不通过率 : 20.79% 7625/36684*100% + +## 2.功能性测试 + +### 2.1.所选测试案例 + +imb提供了运行的不同二进制Benchmark文件,本次选取IMB-RMA基准测试,通过在CPPFLAG编译选项加入-DCHECK可对MPI以及程序的运行情况进行检测。 + +**请注意:-DCHECK的结果作为真实的基准数据是无效的。停用-DCHECK并重新编译以获得正确的结果。** + +测试文件列表树如下 + +```bash +root@zubinshuo-PC 09:33:42 ~/imb/tmp/mpi-benchmarks-IMB-v2021.3 |imb → origin U:146 ?:3 ✗| → ll +total 19M +-rwxr-xr-x 1 root root 2.7M Dec 15 16:26 IMB-EXT* +-rwxr-xr-x 1 root root 3.1M Dec 15 16:26 IMB-IO* +-rwxr-xr-x 1 root root 3.0M Dec 15 16:26 IMB-MPI1* +-rwxr-xr-x 1 root root 4.2M Dec 15 16:27 IMB-MT* +-rwxr-xr-x 1 root root 3.0M Dec 15 16:26 IMB-NBC* +-rwxr-xr-x 1 root root 47K Dec 15 16:26 IMB-P2P* +-rwxr-xr-x 1 root root 2.9M Dec 15 16:26 IMB-RMA* +``` + +在项目根目录下执行命令来运行Benchmark + +```bash +export OMPI_ALLOW_RUN_AS_ROOT=1; +export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; +mpirun -n 4 IMB-RMA +``` + +### 2.2.运行结果 + +```bash +[root@dc6-80-067 mpi-benchmarks-IMB-v2021.3]# mpirun -n 4 IMB-MPI1 +#---------------------------------------------------------------- +# Intel(R) MPI Benchmarks 2021.3, MPI-RMA part +#---------------------------------------------------------------- +# Date : Thu Dec 15 21:52:32 2022 +# Machine : x86_64 +# System : Linux +# Release : 5.15.79.1-microsoft-standard-WSL2 +# Version : #1 SMP Wed Nov 23 01:01:46 UTC 2022 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# /root/imb/software/libs/IMB-v2021.3/IMB-RMA + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 4194304 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# Unidir_put +# Bidir_put +# Unidir_get +# Bidir_get +# Put_local +# Put_all_local +# One_put_all +# All_put_all +# One_get_all +# All_get_all +# Exchange_put +# Exchange_get +# Accumulate +# Get_accumulate +# Fetch_and_op +# Compare_and_swap +# Uses MPI_INT data type +# Truly_passive_put +# The benchmark measures execution time of MPI_Put for 2 cases: +# 1) The target is waiting in MPI_Barrier call (t_pure value) +# 2) The target performs computation and then enters MPI_Barrier routine (t_ovrl value) + +#---------------------------------------------------------------- +# Benchmarking Unidir_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 0.05 0.00 0.00 + 1 100 1.81 0.55 0.00 + 2 100 2.82 0.71 0.00 + 4 100 2.98 1.34 0.00 + 8 100 3.69 2.17 0.00 + 16 100 1.98 8.07 0.00 + 32 100 1.47 21.84 0.00 + 64 100 1.95 32.79 0.00 + 128 100 2.39 53.49 0.00 + 256 100 4.29 59.73 0.00 + 512 100 2.01 254.60 0.00 + 1024 100 2.04 502.70 0.00 + 2048 100 4.94 414.24 0.00 + 4096 100 2.66 1536.96 0.00 + 8192 100 2.98 2750.84 0.00 + 16384 100 5.07 3229.65 0.00 + 32768 100 6.93 4729.79 0.00 + 65536 100 12.66 5176.62 0.00 + 131072 100 13.11 9995.58 0.00 + 262144 100 25.27 10374.14 0.00 + 524288 80 58.61 8944.99 0.00 + 1048576 40 82.22 12753.30 0.00 + 2097152 20 190.29 11020.83 0.00 + 4194304 10 409.09 10252.77 0.00 + +#---------------------------------------------------------------- +# Benchmarking Unidir_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.04 0.00 0.00 + 1 1000 0.23 4.31 0.00 + 2 1000 0.25 8.05 0.00 + 4 1000 0.31 13.05 0.00 + 8 1000 0.30 26.73 0.00 + 16 1000 0.30 53.87 0.00 + 32 1000 0.32 99.94 0.00 + 64 1000 0.33 191.85 0.00 + 128 1000 0.33 392.04 0.00 + 256 1000 0.33 785.76 0.00 + 512 1000 0.29 1750.43 0.00 + 1024 1000 0.43 2404.33 0.00 + 2048 1000 0.39 5312.59 0.00 + 4096 1000 0.50 8180.56 0.00 + 8192 1000 0.63 13067.50 0.00 + 16384 1000 1.06 15499.02 0.00 + 32768 1000 2.24 14639.04 0.00 + 65536 640 5.66 11583.28 0.00 + 131072 320 8.60 15247.03 0.00 + 262144 160 19.82 13227.91 0.00 + 524288 80 38.82 13505.62 0.00 + 1048576 40 85.04 12329.67 0.00 + 2097152 20 195.70 10715.89 0.00 + 4194304 10 356.44 11767.22 0.00 + +#---------------------------------------------------------------- +# Benchmarking Bidir_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 0.06 0.00 0.00 + 1 100 0.40 2.52 0.00 + 2 100 0.40 5.03 0.00 + 4 100 0.40 10.05 0.00 + 8 100 0.40 20.15 0.00 + 16 100 0.41 38.93 0.00 + 32 100 0.42 76.56 0.00 + 64 100 0.42 152.74 0.00 + 128 100 0.43 299.77 0.00 + 256 100 0.45 568.89 0.00 + 512 100 0.50 1026.05 0.00 + 1024 100 0.65 1585.14 0.00 + 2048 100 0.80 2556.80 0.00 + 4096 100 1.13 3624.78 0.00 + 8192 100 1.65 4952.87 0.00 + 16384 100 2.33 7043.85 0.00 + 32768 100 81.28 403.17 0.00 + 65536 100 8.95 7319.19 0.00 + 131072 100 18.31 7157.71 0.00 + 262144 100 30.93 8475.40 0.00 + 524288 80 64.37 8144.76 0.00 + 1048576 40 163.21 6424.71 0.00 + 2097152 20 348.78 6012.74 0.00 + 4194304 10 996.92 4207.27 0.00 + +#---------------------------------------------------------------- +# Benchmarking Bidir_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.04 0.00 0.00 + 1 1000 0.31 3.28 0.00 + 2 1000 0.30 6.61 0.00 + 4 1000 0.31 12.94 0.00 + 8 1000 0.38 21.33 0.00 + 16 1000 0.35 45.33 0.00 + 32 1000 0.36 88.54 0.00 + 64 1000 0.39 163.35 0.00 + 128 1000 0.37 349.82 0.00 + 256 1000 0.39 656.75 0.00 + 512 1000 0.42 1216.44 0.00 + 1024 1000 0.54 1911.87 0.00 + 2048 1000 0.64 3176.18 0.00 + 4096 1000 1.03 3974.00 0.00 + 8192 1000 1.23 6653.68 0.00 + 16384 1000 2.35 6985.00 0.00 + 32768 1000 4.32 7581.50 0.00 + 65536 640 9.47 6918.67 0.00 + 131072 320 21.35 6139.30 0.00 + 262144 160 42.06 6231.88 0.00 + 524288 80 85.24 6150.91 0.00 + 1048576 40 160.04 6551.76 0.00 + 2097152 20 357.99 5858.13 0.00 + 4194304 10 858.86 4883.57 0.00 + +#---------------------------------------------------------------- +# Benchmarking Unidir_get +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 0.06 0.00 0.00 + 1 100 1.38 0.72 0.00 + 2 100 1.28 1.56 0.00 + 4 100 1.28 3.12 0.00 + 8 100 1.29 6.20 0.00 + 16 100 1.70 9.41 0.00 + 32 100 1.29 24.77 0.00 + 64 100 1.46 43.75 0.00 + 128 100 1.49 85.91 0.00 + 256 100 1.89 135.59 0.00 + 512 100 1.34 382.66 0.00 + 1024 100 1.41 728.31 0.00 + 2048 100 1.39 1476.57 0.00 + 4096 100 1.44 2838.55 0.00 + 8192 100 1.66 4949.85 0.00 + 16384 100 3.28 4999.71 0.00 + 32768 100 4.23 7744.76 0.00 + 65536 100 6.39 10251.21 0.00 + 131072 100 10.33 12684.81 0.00 + 262144 100 16.94 15473.95 0.00 + 524288 80 35.01 14975.92 0.00 + 1048576 40 68.18 15378.41 0.00 + 2097152 20 153.08 13699.27 0.00 + 4194304 10 425.90 9848.10 0.00 + +#---------------------------------------------------------------- +# Benchmarking Unidir_get +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.02 0.00 0.00 + 1 1000 0.19 5.19 0.00 + 2 1000 0.19 10.40 0.00 + 4 1000 0.19 21.18 0.00 + 8 1000 0.21 37.49 0.00 + 16 1000 0.19 84.08 0.00 + 32 1000 0.19 168.87 0.00 + 64 1000 0.35 184.49 0.00 + 128 1000 0.29 441.53 0.00 + 256 1000 0.26 966.40 0.00 + 512 1000 0.26 1947.51 0.00 + 1024 1000 0.23 4423.33 0.00 + 2048 1000 0.37 5571.27 0.00 + 4096 1000 0.37 11221.92 0.00 + 8192 1000 0.57 14316.67 0.00 + 16384 1000 1.02 16047.01 0.00 + 32768 1000 2.39 13727.69 0.00 + 65536 640 4.47 14673.61 0.00 + 131072 320 9.05 14490.60 0.00 + 262144 160 16.96 15455.46 0.00 + 524288 80 33.89 15469.15 0.00 + 1048576 40 68.17 15382.34 0.00 + 2097152 20 166.99 12558.17 0.00 + 4194304 10 373.11 11241.47 0.00 + +#---------------------------------------------------------------- +# Benchmarking Bidir_get +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 0.10 0.00 0.00 + 1 100 0.41 2.44 0.00 + 2 100 0.40 4.96 0.00 + 4 100 0.57 7.01 0.00 + 8 100 0.39 20.36 0.00 + 16 100 0.39 40.82 0.00 + 32 100 0.39 82.05 0.00 + 64 100 0.41 158.02 0.00 + 128 100 0.43 294.93 0.00 + 256 100 0.43 596.74 0.00 + 512 100 0.47 1084.75 0.00 + 1024 100 0.57 1783.97 0.00 + 2048 100 0.73 2820.94 0.00 + 4096 100 1.02 4011.75 0.00 + 8192 100 1.77 4623.02 0.00 + 16384 100 2.85 5742.73 0.00 + 32768 100 4.26 7683.00 0.00 + 65536 100 10.72 6116.29 0.00 + 131072 100 17.15 7641.79 0.00 + 262144 100 41.65 6294.73 0.00 + 524288 80 80.13 6542.97 0.00 + 1048576 40 142.77 7344.77 0.00 + 2097152 20 292.84 7161.43 0.00 + 4194304 10 965.31 4345.03 0.00 + +#---------------------------------------------------------------- +# Benchmarking Bidir_get +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.25 0.00 0.00 + 1 1000 0.48 2.08 0.00 + 2 1000 0.40 5.05 0.00 + 4 1000 0.34 11.69 0.00 + 8 1000 0.32 24.85 0.00 + 16 1000 0.38 42.51 0.00 + 32 1000 0.30 105.65 0.00 + 64 1000 0.33 196.14 0.00 + 128 1000 0.36 359.05 0.00 + 256 1000 0.34 749.85 0.00 + 512 1000 0.44 1167.62 0.00 + 1024 1000 0.52 1964.70 0.00 + 2048 1000 0.76 2707.56 0.00 + 4096 1000 0.84 4901.87 0.00 + 8192 1000 1.15 7119.14 0.00 + 16384 1000 2.22 7396.84 0.00 + 32768 1000 4.55 7206.19 0.00 + 65536 640 9.05 7241.17 0.00 + 131072 320 19.42 6749.01 0.00 + 262144 160 36.27 7228.44 0.00 + 524288 80 76.88 6819.90 0.00 + 1048576 40 157.74 6647.39 0.00 + 2097152 20 331.71 6322.15 0.00 + 4194304 10 781.92 5364.11 0.00 + +#---------------------------------------------------------------- +# Benchmarking Put_local +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 0.06 0.00 0.00 + 1 100 1.32 0.76 0.00 + 2 100 2.14 0.93 0.00 + 4 100 1.85 2.17 0.00 + 8 100 2.57 3.12 0.00 + 16 100 2.35 6.81 0.00 + 32 100 2.42 13.23 0.00 + 64 100 2.42 26.47 0.00 + 128 100 2.56 49.94 0.00 + 256 100 2.45 104.58 0.00 + 512 100 2.47 207.20 0.00 + 1024 100 2.53 404.10 0.00 + 2048 100 2.63 778.71 0.00 + 4096 100 3.33 1228.19 0.00 + 8192 100 4.10 1998.54 0.00 + 16384 100 4.82 3398.47 0.00 + 32768 100 5.25 6237.96 0.00 + 65536 100 8.73 7502.69 0.00 + 131072 100 16.91 7749.78 0.00 + 262144 100 23.02 11386.18 0.00 + 524288 80 37.46 13997.81 0.00 + 1048576 40 87.65 11963.56 0.00 + 2097152 20 191.52 10950.04 0.00 + 4194304 10 364.03 11521.86 0.00 + +#---------------------------------------------------------------- +# Benchmarking Put_local +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.04 0.00 0.00 + 1 1000 0.25 4.01 0.00 + 2 1000 0.25 8.05 0.00 + 4 1000 0.24 16.37 0.00 + 8 1000 0.30 26.85 0.00 + 16 1000 0.23 68.46 0.00 + 32 1000 0.27 117.69 0.00 + 64 1000 0.30 215.20 0.00 + 128 1000 0.35 366.45 0.00 + 256 1000 0.27 957.01 0.00 + 512 1000 0.32 1587.60 0.00 + 1024 1000 0.40 2549.80 0.00 + 2048 1000 0.55 3732.46 0.00 + 4096 1000 0.36 11293.08 0.00 + 8192 1000 0.64 12828.06 0.00 + 16384 1000 1.41 11645.46 0.00 + 32768 1000 2.74 11944.31 0.00 + 65536 640 4.41 14867.62 0.00 + 131072 320 9.92 13207.49 0.00 + 262144 160 19.41 13502.58 0.00 + 524288 80 38.57 13594.48 0.00 + 1048576 40 70.44 14886.09 0.00 + 2097152 20 173.46 12089.77 0.00 + 4194304 10 573.76 7310.21 0.00 + +#---------------------------------------------------------------- +# Benchmarking Put_all_local +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.02 0.00 0.00 + 1 1000 0.23 4.34 0.00 + 2 1000 0.30 6.61 0.00 + 4 1000 0.29 13.94 0.00 + 8 1000 0.26 31.20 0.00 + 16 1000 0.25 63.24 0.00 + 32 1000 0.28 113.64 0.00 + 64 1000 0.26 244.84 0.00 + 128 1000 0.26 498.44 0.00 + 256 1000 0.31 834.15 0.00 + 512 1000 0.33 1567.67 0.00 + 1024 1000 0.33 3065.87 0.00 + 2048 1000 0.40 5099.60 0.00 + 4096 1000 0.37 11091.25 0.00 + 8192 1000 0.60 13607.97 0.00 + 16384 1000 1.24 13187.38 0.00 + 32768 1000 2.74 11950.84 0.00 + 65536 640 4.44 14764.00 0.00 + 131072 320 10.09 12993.91 0.00 + 262144 160 19.23 13632.92 0.00 + 524288 80 39.88 13148.29 0.00 + 1048576 40 80.73 12989.08 0.00 + 2097152 20 155.82 13458.81 0.00 + 4194304 10 393.83 10650.04 0.00 + +#---------------------------------------------------------------- +# Benchmarking Put_all_local +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.06 0.00 0.00 + 1 1000 0.49 2.03 0.00 + 2 1000 0.47 4.21 0.00 + 4 1000 0.62 6.42 0.00 + 8 1000 0.55 14.64 0.00 + 16 1000 0.63 25.52 0.00 + 32 1000 0.65 49.18 0.00 + 64 1000 0.61 104.15 0.00 + 128 1000 0.67 190.96 0.00 + 256 1000 0.71 362.30 0.00 + 512 1000 0.73 704.85 0.00 + 1024 1000 0.76 1341.37 0.00 + 2048 1000 0.93 2198.60 0.00 + 4096 1000 1.10 3721.61 0.00 + 8192 1000 2.08 3929.39 0.00 + 16384 1000 3.17 5169.11 0.00 + 32768 1000 7.44 4403.95 0.00 + 65536 640 13.66 4798.37 0.00 + 131072 320 27.23 4813.13 0.00 + 262144 160 55.79 4699.08 0.00 + 524288 80 119.80 4376.32 0.00 + 1048576 40 266.09 3940.65 0.00 + 2097152 20 462.12 4538.11 0.00 + 4194304 10 1222.62 3430.59 0.00 + +#---------------------------------------------------------------- +# Benchmarking One_put_all +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.02 0.00 0.00 + 1 1000 0.23 4.31 0.00 + 2 1000 0.20 10.05 0.00 + 4 1000 0.24 16.63 0.00 + 8 1000 0.20 40.88 0.00 + 16 1000 0.19 83.33 0.00 + 32 1000 0.20 161.78 0.00 + 64 1000 0.27 235.73 0.00 + 128 1000 0.28 456.98 0.00 + 256 1000 0.21 1232.55 0.00 + 512 1000 0.23 2249.56 0.00 + 1024 1000 0.25 4076.43 0.00 + 2048 1000 0.29 7175.89 0.00 + 4096 1000 0.46 8912.10 0.00 + 8192 1000 0.57 14331.70 0.00 + 16384 1000 1.12 14628.57 0.00 + 32768 1000 2.36 13866.53 0.00 + 65536 640 7.16 9158.27 0.00 + 131072 320 8.17 16049.22 0.00 + 262144 160 29.40 8917.41 0.00 + 524288 80 35.97 14576.21 0.00 + 1048576 40 80.89 12962.59 0.00 + 2097152 20 159.66 13135.11 0.00 + 4194304 10 495.50 8464.79 0.00 + +#---------------------------------------------------------------- +# Benchmarking One_put_all +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.06 0.00 0.00 + 1 1000 0.46 2.16 0.00 + 2 1000 0.47 4.21 0.00 + 4 1000 0.58 6.89 0.00 + 8 1000 0.46 17.53 0.00 + 16 1000 0.48 33.17 0.00 + 32 1000 0.51 62.97 0.00 + 64 1000 0.49 131.28 0.00 + 128 1000 0.47 270.33 0.00 + 256 1000 0.60 429.39 0.00 + 512 1000 0.56 917.56 0.00 + 1024 1000 0.70 1459.94 0.00 + 2048 1000 0.93 2204.76 0.00 + 4096 1000 1.12 3645.10 0.00 + 8192 1000 1.69 4840.18 0.00 + 16384 1000 3.82 4284.52 0.00 + 32768 1000 7.29 4494.25 0.00 + 65536 640 13.71 4778.47 0.00 + 131072 320 28.19 4648.92 0.00 + 262144 160 59.86 4379.06 0.00 + 524288 80 113.88 4603.76 0.00 + 1048576 40 251.16 4174.85 0.00 + 2097152 20 492.87 4254.94 0.00 + 4194304 10 1178.36 3559.44 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking All_put_all +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.02 0.04 0.03 0.00 + 1 1000 0.36 0.36 0.36 0.00 + 2 1000 0.24 0.29 0.26 0.00 + 4 1000 0.36 0.36 0.36 0.00 + 8 1000 0.39 0.39 0.39 0.00 + 16 1000 0.32 0.38 0.35 0.00 + 32 1000 0.33 0.37 0.35 0.00 + 64 1000 0.36 0.59 0.47 0.00 + 128 1000 0.46 0.46 0.46 0.00 + 256 1000 0.38 0.38 0.38 0.00 + 512 1000 0.42 0.42 0.42 0.00 + 1024 1000 0.52 0.52 0.52 0.00 + 2048 1000 0.61 0.67 0.64 0.00 + 4096 1000 0.81 0.88 0.85 0.00 + 8192 1000 1.61 1.67 1.64 0.00 + 16384 1000 2.58 2.71 2.65 0.00 + 32768 1000 5.17 5.52 5.35 0.00 + 65536 640 9.36 9.51 9.44 0.00 + 131072 320 19.46 19.61 19.53 0.00 + 262144 160 39.86 42.13 40.99 0.00 + 524288 80 82.06 82.06 82.06 0.00 + 1048576 40 162.40 165.79 164.10 0.00 + 2097152 20 388.38 392.87 390.63 0.00 + 4194304 10 1047.79 1081.29 1064.54 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking All_put_all +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.06 0.11 0.09 0.00 + 1 1000 0.81 1.41 1.09 0.00 + 2 1000 1.45 1.71 1.64 0.00 + 4 1000 1.50 1.69 1.64 0.00 + 8 1000 1.47 1.51 1.49 0.00 + 16 1000 1.55 1.57 1.56 0.00 + 32 1000 0.48 1.79 1.30 0.00 + 64 1000 1.85 1.91 1.88 0.00 + 128 1000 0.43 1.49 1.15 0.00 + 256 1000 1.47 1.51 1.49 0.00 + 512 1000 0.48 1.72 1.21 0.00 + 1024 1000 1.59 2.07 1.94 0.00 + 2048 1000 1.89 2.04 1.99 0.00 + 4096 1000 2.36 2.63 2.47 0.00 + 8192 1000 2.36 4.36 3.57 0.00 + 16384 1000 4.45 6.04 5.45 0.00 + 32768 1000 14.23 15.55 14.71 0.00 + 65536 640 18.62 30.05 26.55 0.00 + 131072 320 63.39 67.78 65.28 0.00 + 262144 160 138.47 151.95 144.42 0.00 + 524288 80 0.51 197.57 130.15 0.00 + 1048576 40 262.02 589.78 469.19 0.00 + 2097152 20 978.80 1228.84 1125.50 0.00 + 4194304 10 4717.21 4801.83 4754.16 0.00 + +#---------------------------------------------------------------- +# Benchmarking One_get_all +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.02 0.00 0.00 + 1 1000 0.19 5.36 0.00 + 2 1000 0.19 10.58 0.00 + 4 1000 0.19 21.11 0.00 + 8 1000 0.19 42.02 0.00 + 16 1000 0.19 84.61 0.00 + 32 1000 0.19 170.48 0.00 + 64 1000 0.19 331.61 0.00 + 128 1000 0.19 666.67 0.00 + 256 1000 0.19 1340.31 0.00 + 512 1000 0.20 2581.95 0.00 + 1024 1000 0.26 3990.65 0.00 + 2048 1000 0.39 5279.71 0.00 + 4096 1000 0.46 8894.68 0.00 + 8192 1000 0.92 8926.66 0.00 + 16384 1000 1.09 14985.82 0.00 + 32768 1000 2.08 15717.57 0.00 + 65536 640 3.84 17069.44 0.00 + 131072 320 9.14 14337.54 0.00 + 262144 160 17.26 15191.25 0.00 + 524288 80 37.32 14046.56 0.00 + 1048576 40 71.54 14656.68 0.00 + 2097152 20 145.87 14376.86 0.00 + 4194304 10 450.93 9301.45 0.00 + +#---------------------------------------------------------------- +# Benchmarking One_get_all +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 0.06 0.00 0.00 + 1 1000 0.81 1.23 0.00 + 2 1000 0.57 3.50 0.00 + 4 1000 0.56 7.08 0.00 + 8 1000 0.81 9.94 0.00 + 16 1000 0.54 29.77 0.00 + 32 1000 0.55 57.91 0.00 + 64 1000 0.60 106.00 0.00 + 128 1000 0.70 183.14 0.00 + 256 1000 0.62 409.80 0.00 + 512 1000 0.68 754.49 0.00 + 1024 1000 0.71 1447.15 0.00 + 2048 1000 0.92 2231.91 0.00 + 4096 1000 1.10 3715.53 0.00 + 8192 1000 1.50 5477.03 0.00 + 16384 1000 3.32 4937.32 0.00 + 32768 1000 5.53 5922.93 0.00 + 65536 640 9.36 7000.31 0.00 + 131072 320 20.48 6401.37 0.00 + 262144 160 41.20 6362.81 0.00 + 524288 80 75.76 6920.84 0.00 + 1048576 40 150.12 6984.80 0.00 + 2097152 20 311.11 6740.98 0.00 + 4194304 10 791.33 5300.32 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking All_get_all +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.04 0.04 0.04 0.00 + 1 1000 0.29 0.35 0.32 0.00 + 2 1000 0.26 0.26 0.26 0.00 + 4 1000 0.26 0.33 0.30 0.00 + 8 1000 0.26 0.33 0.29 0.00 + 16 1000 0.27 0.27 0.27 0.00 + 32 1000 0.52 0.57 0.54 0.00 + 64 1000 0.27 0.29 0.28 0.00 + 128 1000 0.29 0.29 0.29 0.00 + 256 1000 0.31 0.31 0.31 0.00 + 512 1000 0.33 0.33 0.33 0.00 + 1024 1000 0.44 0.44 0.44 0.00 + 2048 1000 0.37 0.49 0.43 0.00 + 4096 1000 0.52 0.68 0.60 0.00 + 8192 1000 0.85 1.09 0.97 0.00 + 16384 1000 2.25 2.26 2.26 0.00 + 32768 1000 3.00 4.60 3.80 0.00 + 65536 640 5.46 9.60 7.53 0.00 + 131072 320 10.26 18.66 14.46 0.00 + 262144 160 31.25 31.27 31.26 0.00 + 524288 80 44.18 71.27 57.73 0.00 + 1048576 40 89.14 149.06 119.10 0.00 + 2097152 20 185.91 288.22 237.07 0.00 + 4194304 10 638.24 760.67 699.45 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking All_get_all +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.06 0.06 0.06 0.00 + 1 1000 0.38 1.89 1.33 0.00 + 2 1000 1.14 1.66 1.46 0.00 + 4 1000 1.29 1.67 1.51 0.00 + 8 1000 0.88 1.69 1.28 0.00 + 16 1000 1.65 2.20 1.82 0.00 + 32 1000 2.43 2.93 2.61 0.00 + 64 1000 1.10 1.53 1.33 0.00 + 128 1000 0.96 1.63 1.35 0.00 + 256 1000 1.56 2.33 2.04 0.00 + 512 1000 1.36 2.02 1.67 0.00 + 1024 1000 2.03 3.40 2.63 0.00 + 2048 1000 2.20 3.54 2.79 0.00 + 4096 1000 3.59 4.77 4.36 0.00 + 8192 1000 0.41 5.24 3.76 0.00 + 16384 1000 7.33 9.12 7.92 0.00 + 32768 1000 12.77 14.96 13.89 0.00 + 65536 640 0.39 24.45 17.09 0.00 + 131072 320 29.66 57.55 45.48 0.00 + 262144 160 94.48 96.77 95.49 0.00 + 524288 80 89.92 224.58 185.13 0.00 + 1048576 40 516.14 588.10 548.04 0.00 + 2097152 20 1261.54 1441.99 1356.90 0.00 + 4194304 10 3114.40 3350.92 3259.18 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.08 0.08 0.08 0.00 + 1 1000 0.75 0.76 0.76 0.00 + 2 1000 0.55 0.56 0.56 0.00 + 4 1000 0.50 0.50 0.50 0.00 + 8 1000 0.53 0.53 0.53 0.00 + 16 1000 0.51 0.51 0.51 0.00 + 32 1000 0.52 0.52 0.52 0.00 + 64 1000 0.59 0.59 0.59 0.00 + 128 1000 0.88 0.90 0.89 0.00 + 256 1000 0.78 0.79 0.78 0.00 + 512 1000 0.74 0.74 0.74 0.00 + 1024 1000 1.72 1.72 1.72 0.00 + 2048 1000 1.30 1.31 1.30 0.00 + 4096 1000 1.89 1.90 1.90 0.00 + 8192 1000 2.22 2.22 2.22 0.00 + 16384 1000 4.37 4.37 4.37 0.00 + 32768 1000 8.28 8.28 8.28 0.00 + 65536 640 18.30 18.30 18.30 0.00 + 131072 320 32.77 32.77 32.77 0.00 + 262144 160 70.17 70.18 70.17 0.00 + 524288 80 162.44 162.52 162.48 0.00 + 1048576 40 444.59 445.09 444.84 0.00 + 2097152 20 1125.26 1127.89 1126.58 0.00 + 4194304 10 2425.59 2433.33 2429.46 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange_put +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.12 0.22 0.18 0.00 + 1 1000 1.34 2.49 1.93 0.00 + 2 1000 0.85 1.10 0.97 0.00 + 4 1000 0.93 0.99 0.96 0.00 + 8 1000 0.88 0.94 0.92 0.00 + 16 1000 0.94 1.11 1.01 0.00 + 32 1000 0.95 1.11 1.04 0.00 + 64 1000 1.16 1.34 1.25 0.00 + 128 1000 1.05 1.10 1.08 0.00 + 256 1000 0.85 1.02 0.95 0.00 + 512 1000 0.83 0.93 0.89 0.00 + 1024 1000 1.10 1.25 1.20 0.00 + 2048 1000 1.36 1.38 1.37 0.00 + 4096 1000 1.97 2.42 2.27 0.00 + 8192 1000 3.30 3.43 3.36 0.00 + 16384 1000 4.95 6.35 5.67 0.00 + 32768 1000 10.55 10.76 10.65 0.00 + 65536 640 23.93 26.79 25.74 0.00 + 131072 320 37.41 40.07 38.46 0.00 + 262144 160 90.34 135.90 121.06 0.00 + 524288 80 320.75 330.12 325.64 0.00 + 1048576 40 784.84 809.35 794.28 0.00 + 2097152 20 1533.42 1618.72 1581.42 0.00 + 4194304 10 3195.19 3344.94 3272.19 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange_get +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.09 0.10 0.09 0.00 + 1 1000 0.74 0.74 0.74 0.00 + 2 1000 0.70 0.70 0.70 0.00 + 4 1000 0.70 0.71 0.70 0.00 + 8 1000 0.74 0.75 0.74 0.00 + 16 1000 0.71 0.74 0.72 0.00 + 32 1000 0.72 0.75 0.74 0.00 + 64 1000 0.78 0.87 0.82 0.00 + 128 1000 1.03 1.08 1.06 0.00 + 256 1000 0.77 0.91 0.84 0.00 + 512 1000 0.76 0.77 0.77 0.00 + 1024 1000 0.97 1.03 1.00 0.00 + 2048 1000 1.25 2.19 1.72 0.00 + 4096 1000 1.77 2.09 1.93 0.00 + 8192 1000 2.93 2.94 2.93 0.00 + 16384 1000 4.45 4.45 4.45 0.00 + 32768 1000 11.16 11.18 11.17 0.00 + 65536 640 18.58 19.57 19.07 0.00 + 131072 320 40.79 40.80 40.80 0.00 + 262144 160 77.12 77.13 77.13 0.00 + 524288 80 155.84 156.87 156.35 0.00 + 1048576 40 315.84 340.65 328.24 0.00 + 2097152 20 831.45 923.38 877.42 0.00 + 4194304 10 1699.25 2066.91 1883.08 0.00 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange_get +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] defects + 0 1000 0.13 0.24 0.18 0.00 + 1 1000 0.91 1.02 0.97 0.00 + 2 1000 0.90 0.92 0.91 0.00 + 4 1000 0.95 1.11 1.04 0.00 + 8 1000 0.91 1.08 0.98 0.00 + 16 1000 0.96 1.15 1.06 0.00 + 32 1000 1.04 1.19 1.13 0.00 + 64 1000 0.88 0.90 0.89 0.00 + 128 1000 0.87 0.95 0.92 0.00 + 256 1000 0.87 1.06 0.97 0.00 + 512 1000 1.25 1.46 1.35 0.00 + 1024 1000 1.29 1.51 1.38 0.00 + 2048 1000 1.63 1.83 1.75 0.00 + 4096 1000 2.28 3.11 2.71 0.00 + 8192 1000 4.11 4.52 4.36 0.00 + 16384 1000 6.98 7.42 7.15 0.00 + 32768 1000 11.14 13.22 12.13 0.00 + 65536 640 19.22 25.50 22.35 0.00 + 131072 320 35.62 41.10 38.23 0.00 + 262144 160 90.70 101.27 97.57 0.00 + 524288 80 203.66 215.85 210.68 0.00 + 1048576 40 568.14 612.77 582.10 0.00 + 2097152 20 1213.53 1452.92 1347.19 0.00 + 4194304 10 2456.24 2920.88 2720.98 0.00 + +#---------------------------------------------------------------- +# Benchmarking Accumulate +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 2.91 0.00 0.00 + 4 100 6.08 0.66 0.00 + 8 100 5.44 1.47 0.00 + 16 100 5.70 2.81 0.00 + 32 100 6.00 5.33 0.00 + 64 100 5.96 10.73 0.00 + 128 100 6.00 21.33 0.00 + 256 100 6.61 38.74 0.00 + 512 100 6.54 78.28 0.00 + 1024 100 6.87 149.05 0.00 + 2048 100 11.08 184.90 0.00 + 4096 100 6.57 623.63 0.00 + 8192 100 12.72 644.08 0.00 + 16384 100 13.63 1201.88 0.00 + 32768 100 19.12 1713.90 0.00 + 65536 100 26.33 2489.21 0.00 + 131072 100 47.90 2736.31 0.00 + 262144 100 64.57 4059.78 0.00 + 524288 80 127.19 4122.04 0.00 + 1048576 40 256.60 4086.42 0.00 + 2097152 20 454.22 4617.04 0.00 + 4194304 10 1114.79 3762.42 0.00 + +#---------------------------------------------------------------- +# Benchmarking Accumulate +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 3.70 0.00 0.00 + 4 1000 7.87 0.51 0.00 + 8 1000 7.74 1.03 0.00 + 16 1000 6.37 2.51 0.00 + 32 1000 9.76 3.28 0.00 + 64 1000 6.06 10.56 0.00 + 128 1000 9.02 14.20 0.00 + 256 1000 6.81 37.61 0.00 + 512 1000 6.93 73.93 0.00 + 1024 1000 10.31 99.34 0.00 + 2048 1000 7.12 287.79 0.00 + 4096 1000 8.98 456.02 0.00 + 8192 1000 13.50 606.93 0.00 + 16384 1000 12.71 1289.53 0.00 + 32768 1000 15.87 2064.59 0.00 + 65536 640 25.04 2616.93 0.00 + 131072 320 35.84 3657.49 0.00 + 262144 160 70.61 3712.66 0.00 + 524288 80 123.50 4245.25 0.00 + 1048576 40 236.82 4427.69 0.00 + 2097152 20 499.69 4196.91 0.00 + 4194304 10 1273.88 3292.54 0.00 + +#---------------------------------------------------------------- +# Benchmarking Get_accumulate +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 100 2.74 0.00 0.00 + 4 100 5.55 0.72 0.00 + 8 100 5.63 1.42 0.00 + 16 100 5.71 2.80 0.00 + 32 100 6.05 5.29 0.00 + 64 100 7.31 8.76 0.00 + 128 100 7.28 17.58 0.00 + 256 100 6.00 42.66 0.00 + 512 100 6.12 83.62 0.00 + 1024 100 6.91 148.11 0.00 + 2048 100 6.45 317.67 0.00 + 4096 100 6.75 606.72 0.00 + 8192 100 10.27 797.74 0.00 + 16384 100 20.32 806.30 0.00 + 32768 100 24.84 1319.22 0.00 + 65536 100 31.28 2095.14 0.00 + 131072 100 47.50 2759.41 0.00 + 262144 100 91.02 2880.16 0.00 + 524288 80 157.68 3325.12 0.00 + 1048576 40 336.36 3117.47 0.00 + 2097152 20 798.06 2627.83 0.00 + 4194304 10 1884.25 2225.98 0.00 + +#---------------------------------------------------------------- +# Benchmarking Get_accumulate +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 0 1000 4.17 0.00 0.00 + 4 1000 10.10 0.40 0.00 + 8 1000 7.28 1.10 0.00 + 16 1000 8.68 1.84 0.00 + 32 1000 7.14 4.48 0.00 + 64 1000 8.47 7.55 0.00 + 128 1000 7.69 16.64 0.00 + 256 1000 8.99 28.49 0.00 + 512 1000 7.77 65.91 0.00 + 1024 1000 7.17 142.91 0.00 + 2048 1000 9.57 213.95 0.00 + 4096 1000 8.26 495.87 0.00 + 8192 1000 11.10 738.24 0.00 + 16384 1000 17.52 934.92 0.00 + 32768 1000 20.67 1584.99 0.00 + 65536 640 32.93 1989.91 0.00 + 131072 320 54.05 2425.10 0.00 + 262144 160 105.53 2484.06 0.00 + 524288 80 166.54 3148.07 0.00 + 1048576 40 327.03 3206.38 0.00 + 2097152 20 737.11 2845.12 0.00 + 4194304 10 1618.51 2591.46 0.00 + +#---------------------------------------------------------------- +# Benchmarking Fetch_and_op +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 4 100 7.04 0.57 0.00 + +#---------------------------------------------------------------- +# Benchmarking Fetch_and_op +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 4 1000 7.93 0.50 0.00 + +#---------------------------------------------------------------- +# Benchmarking Compare_and_swap +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: NON-AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 4 100 7.72 0.52 0.00 + +#---------------------------------------------------------------- +# Benchmarking Compare_and_swap +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- +# +# MODE: AGGREGATE +# + #bytes #repetitions t[usec] Mbytes/sec defects + 4 1000 5.79 0.69 0.00 + +#---------------------------------------------------------------- +# Benchmarking Truly_passive_put +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_pure[usec] t_ovrl[usec] defects + 0 1000 0.44 0.00 0.00 + 1 1000 2.13 0.00 0.00 + 2 1000 2.77 0.00 0.00 + 4 1000 3.29 0.00 0.00 + 8 1000 2.69 0.00 0.00 + 16 1000 2.23 0.00 0.00 + 32 1000 2.83 0.00 0.00 + 64 1000 2.05 0.00 0.00 + 128 1000 2.02 0.00 0.00 + 256 1000 2.03 0.00 0.00 + 512 1000 3.22 0.00 0.00 + 1024 1000 2.93 0.00 0.00 + 2048 1000 3.23 0.00 0.00 + 4096 1000 2.71 0.00 0.00 + 8192 1000 2.99 0.00 0.00 + 16384 1000 5.11 0.00 0.00 + 32768 1000 6.88 0.00 0.00 + 65536 640 12.70 0.00 0.00 + 131072 320 15.32 0.00 0.00 + 262144 160 24.10 0.00 0.00 + 524288 80 47.63 0.00 0.00 + 1048576 40 92.26 0.00 0.00 + 2097152 20 148.32 0.00 0.00 + 4194304 10 468.06 0.00 0.00 + + +# All processes entering MPI_Finalize +``` + +测试结果 + +benchmark运行正常,且defects例值均为0,说明各类型函数和功能都响应正常。测试通过。 + +## 3.性能测试 + +### 3.1.测试平台信息对比 + +| | arm信息 | x86信息 | +| -------- | -------------------------------- | --------------------- | +| 操作系统 | openEuler 22.03 (LTS) | openEuler 22.03 (LTS) | +| 内核版本 | 5.10.0-60.18.0.50.oe2203.aarch64 | 5.15.79.1.oe1.x86_64 | + +### 3.2.测试软件环境信息对比 + +| | arm信息 | x86信息 | +| --- | ------------- | --------- | +| gcc | bisheng 2.1.0 | gcc 9.3.0 | +| mpi | hmpi1.1.1 | hmpi1.1.1 | +| imb | 2021.3 | 2021.3 | + +### 3.3.测试硬件性能信息对比 + +| | arm信息 | x86信息 | +| ------ | ----------- | -------- | +| cpu | Kunpeng 920 | | +| 核心数 | 16 | 4 | +| 内存 | 32 GB | 8 GB | +| 磁盘io | 1.3 GB/s | 400 MB/s | +| 虚拟化 | KVM | KVM | + +### 3.4.测试选择的案例 + +IMB-MPI1测试mpi通信性能 + +### 3.6.ARM运行结果 + +#### HMPI + +```shell +#---------------------------------------------------------------- +# Intel(R) MPI Benchmarks 2021.3, MPI-1 part +#---------------------------------------------------------------- +# Date : Thu Dec 15 22:50:26 2022 +# Machine : aarch64 +# System : Linux +# Release : 5.10.0-60.18.0.50.oe2203.aarch64 +# Version : #1 SMP Wed Mar 30 02:43:08 UTC 2022 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# IMB-MPI1 + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 4194304 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong +# PingPing +# Sendrecv +# Exchange +# Allreduce +# Reduce +# Reduce_local +# Reduce_scatter +# Reduce_scatter_block +# Allgather +# Allgatherv +# Gather +# Gatherv +# Scatter +# Scatterv +# Alltoall +# Alltoallv +# Bcast +# Barrier + +#--------------------------------------------------- +# Benchmarking PingPong +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.19 0.00 + 1 1000 0.19 5.21 + 2 1000 0.19 10.43 + 4 1000 0.19 20.96 + 8 1000 0.19 41.95 + 16 1000 0.19 84.15 + 32 1000 0.22 148.08 + 64 1000 0.19 331.34 + 128 1000 0.32 400.88 + 256 1000 0.38 673.66 + 512 1000 0.45 1146.35 + 1024 1000 0.73 1409.70 + 2048 1000 0.84 2442.73 + 4096 1000 1.19 3428.66 + 8192 1000 1.65 4967.87 + 16384 1000 2.65 6171.51 + 32768 1000 4.47 7325.67 + 65536 640 8.02 8169.28 + 131072 320 15.11 8671.75 + 262144 160 32.31 8114.34 + 524288 80 73.19 7163.79 + 1048576 40 161.43 6495.65 + 2097152 20 336.46 6232.95 + 4194304 10 763.35 5494.58 + +#--------------------------------------------------- +# Benchmarking PingPing +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.24 0.00 + 1 1000 0.24 4.15 + 2 1000 0.24 8.30 + 4 1000 0.24 16.44 + 8 1000 0.24 33.32 + 16 1000 0.24 66.82 + 32 1000 0.26 124.16 + 64 1000 0.26 245.49 + 128 1000 0.40 319.59 + 256 1000 0.44 584.50 + 512 1000 0.54 954.40 + 1024 1000 0.69 1475.36 + 2048 1000 0.86 2367.93 + 4096 1000 1.21 3380.77 + 8192 1000 1.98 4132.59 + 16384 1000 4.39 3733.45 + 32768 1000 6.06 5410.46 + 65536 640 9.37 6994.63 + 131072 320 16.49 7950.13 + 262144 160 33.35 7861.30 + 524288 80 76.79 6827.34 + 1048576 40 168.30 6230.47 + 2097152 20 356.88 5876.28 + 4194304 10 889.85 4713.50 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.41 0.41 0.41 0.00 + 1 1000 0.42 0.42 0.42 4.80 + 2 1000 0.41 0.41 0.41 9.70 + 4 1000 0.42 0.42 0.42 19.25 + 8 1000 0.43 0.43 0.43 37.48 + 16 1000 0.41 0.41 0.41 77.29 + 32 1000 0.43 0.43 0.43 150.00 + 64 1000 0.43 0.43 0.43 296.35 + 128 1000 0.59 0.59 0.59 434.61 + 256 1000 0.60 0.60 0.60 847.88 + 512 1000 0.71 0.71 0.71 1444.63 + 1024 1000 0.87 0.87 0.87 2355.40 + 2048 1000 1.05 1.05 1.05 3900.99 + 4096 1000 1.29 1.29 1.29 6371.43 + 8192 1000 1.86 1.86 1.86 8798.20 + 16384 1000 3.56 3.56 3.56 9191.84 + 32768 1000 7.08 7.08 7.08 9254.44 + 65536 640 13.75 13.75 13.75 9531.16 + 131072 320 26.62 26.62 26.62 9846.42 + 262144 160 33.30 33.30 33.30 15742.79 + 524288 80 75.16 75.17 75.17 13949.81 + 1048576 40 164.38 164.39 164.38 12757.19 + 2097152 20 355.53 355.55 355.54 11796.70 + 4194304 10 926.00 926.13 926.07 9057.69 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.39 0.39 0.39 0.00 + 1 1000 0.41 0.42 0.42 4.82 + 2 1000 0.40 0.40 0.40 9.96 + 4 1000 0.40 0.40 0.40 20.06 + 8 1000 0.40 0.40 0.40 40.16 + 16 1000 0.40 0.40 0.40 80.25 + 32 1000 0.41 0.41 0.41 155.38 + 64 1000 0.41 0.41 0.41 313.96 + 128 1000 0.54 0.54 0.54 470.66 + 256 1000 0.59 0.59 0.59 861.85 + 512 1000 0.68 0.68 0.68 1512.42 + 1024 1000 0.83 0.83 0.83 2458.61 + 2048 1000 1.01 1.01 1.01 4042.60 + 4096 1000 1.33 1.33 1.33 6179.94 + 8192 1000 1.97 1.97 1.97 8316.41 + 16384 1000 3.72 3.72 3.72 8811.23 + 32768 1000 7.39 7.39 7.39 8866.11 + 65536 640 14.35 14.35 14.35 9130.97 + 131072 320 28.76 28.80 28.78 9103.70 + 262144 160 34.29 34.30 34.30 15284.84 + 524288 80 80.36 80.37 80.36 13047.47 + 1048576 40 181.51 181.85 181.73 11532.02 + 2097152 20 397.29 399.40 398.56 10501.53 + 4194304 10 1244.32 1248.97 1246.96 6716.43 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 8 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.39 0.39 0.39 0.00 + 1 1000 0.39 0.39 0.39 5.18 + 2 1000 0.38 0.38 0.38 10.40 + 4 1000 0.39 0.39 0.39 20.67 + 8 1000 0.38 0.39 0.38 41.54 + 16 1000 0.41 0.41 0.41 78.02 + 32 1000 0.40 0.40 0.40 158.56 + 64 1000 0.40 0.40 0.40 318.49 + 128 1000 0.54 0.54 0.54 476.70 + 256 1000 0.58 0.58 0.58 887.69 + 512 1000 0.71 0.71 0.71 1438.34 + 1024 1000 0.90 0.90 0.90 2279.28 + 2048 1000 1.06 1.06 1.06 3874.31 + 4096 1000 1.45 1.45 1.45 5661.17 + 8192 1000 2.31 2.32 2.32 7073.53 + 16384 1000 4.33 4.34 4.34 7553.97 + 32768 1000 9.19 9.21 9.20 7119.00 + 65536 640 18.31 18.34 18.33 7146.30 + 131072 320 36.87 37.07 36.97 7071.30 + 262144 160 35.54 35.58 35.56 14735.78 + 524288 80 87.74 87.99 87.86 11917.63 + 1048576 40 228.96 231.42 230.39 9061.95 + 2097152 20 696.36 709.10 703.27 5915.00 + 4194304 10 1985.67 2044.60 2022.89 4102.82 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.48 0.48 0.48 0.00 + 1 1000 0.52 0.52 0.52 7.73 + 2 1000 0.49 0.49 0.49 16.40 + 4 1000 0.48 0.48 0.48 33.09 + 8 1000 0.48 0.48 0.48 66.17 + 16 1000 0.50 0.50 0.50 128.26 + 32 1000 0.48 0.48 0.48 263.96 + 64 1000 0.49 0.49 0.49 527.44 + 128 1000 0.82 0.82 0.82 623.11 + 256 1000 0.87 0.87 0.87 1172.32 + 512 1000 1.10 1.10 1.10 1864.90 + 1024 1000 1.34 1.34 1.34 3050.23 + 2048 1000 1.61 1.61 1.61 5080.06 + 4096 1000 2.13 2.13 2.13 7677.53 + 8192 1000 3.22 3.22 3.22 10177.22 + 16384 1000 8.40 8.40 8.40 7800.60 + 32768 1000 11.89 11.89 11.89 11025.67 + 65536 640 18.88 18.88 18.88 13884.65 + 131072 320 34.64 34.64 34.64 15133.26 + 262144 160 75.50 75.51 75.50 13886.89 + 524288 80 158.19 158.19 158.19 13256.82 + 1048576 40 340.45 340.47 340.46 12319.03 + 2097152 20 700.72 700.84 700.78 11969.34 + 4194304 10 2203.09 2203.37 2203.23 7614.34 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.77 0.77 0.77 0.00 + 1 1000 0.77 0.77 0.77 5.20 + 2 1000 0.77 0.77 0.77 10.44 + 4 1000 0.77 0.77 0.77 20.90 + 8 1000 0.77 0.77 0.77 41.67 + 16 1000 0.75 0.75 0.75 85.35 + 32 1000 0.88 0.88 0.88 145.55 + 64 1000 0.87 0.87 0.87 295.71 + 128 1000 1.05 1.05 1.05 487.77 + 256 1000 1.12 1.12 1.12 917.85 + 512 1000 1.40 1.41 1.41 1457.15 + 1024 1000 1.60 1.61 1.60 2551.61 + 2048 1000 1.91 1.92 1.91 4277.54 + 4096 1000 2.55 2.55 2.55 6429.54 + 8192 1000 3.91 3.91 3.91 8379.06 + 16384 1000 10.10 10.10 10.10 6488.27 + 32768 1000 13.47 13.47 13.47 9728.21 + 65536 640 21.71 21.71 21.71 12074.41 + 131072 320 41.59 41.60 41.60 12603.23 + 262144 160 93.21 93.22 93.22 11248.08 + 524288 80 198.57 198.65 198.61 10556.85 + 1048576 40 421.62 422.27 421.95 9932.69 + 2097152 20 1035.20 1040.14 1037.67 8064.88 + 4194304 10 3061.07 3092.08 3082.04 5425.86 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 8 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.77 0.77 0.77 0.00 + 1 1000 0.84 0.84 0.84 4.74 + 2 1000 0.79 0.79 0.79 10.15 + 4 1000 0.80 0.80 0.80 19.95 + 8 1000 0.79 0.79 0.79 40.61 + 16 1000 0.81 0.81 0.81 79.11 + 32 1000 0.88 0.88 0.88 144.85 + 64 1000 0.88 0.88 0.88 289.34 + 128 1000 1.06 1.07 1.06 480.60 + 256 1000 1.14 1.14 1.14 894.54 + 512 1000 1.40 1.40 1.40 1462.32 + 1024 1000 1.72 1.72 1.72 2375.42 + 2048 1000 2.07 2.07 2.07 3956.40 + 4096 1000 2.82 2.82 2.82 5807.29 + 8192 1000 4.46 4.47 4.47 7333.03 + 16384 1000 10.45 10.45 10.45 6270.48 + 32768 1000 14.15 14.15 14.15 9259.96 + 65536 640 22.30 22.32 22.31 11743.20 + 131072 320 41.25 41.27 41.26 12704.24 + 262144 160 102.72 102.92 102.82 10188.22 + 524288 80 235.85 237.17 236.64 8842.36 + 1048576 40 593.86 600.68 597.88 6982.54 + 2097152 20 1860.25 1868.78 1866.15 4488.81 + 4194304 10 4225.70 4252.34 4236.68 3945.40 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.57 0.65 0.61 + 8 1000 0.58 0.66 0.62 + 16 1000 0.58 0.67 0.63 + 32 1000 0.58 0.67 0.63 + 64 1000 0.60 0.69 0.64 + 128 1000 0.77 0.86 0.81 + 256 1000 0.74 0.92 0.83 + 512 1000 1.05 1.11 1.08 + 1024 1000 1.32 1.40 1.36 + 2048 1000 1.78 1.85 1.82 + 4096 1000 2.50 2.59 2.55 + 8192 1000 3.60 3.69 3.64 + 16384 1000 5.75 5.86 5.81 + 32768 1000 10.85 10.96 10.91 + 65536 640 21.22 21.32 21.27 + 131072 320 42.70 44.17 43.44 + 262144 160 94.59 94.95 94.77 + 524288 80 203.73 204.72 204.23 + 1048576 40 470.98 471.29 471.14 + 2097152 20 1122.94 1155.79 1139.36 + 4194304 10 2715.22 2927.84 2821.53 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.96 1.25 1.10 + 8 1000 1.00 1.24 1.11 + 16 1000 0.98 1.24 1.10 + 32 1000 1.06 1.41 1.23 + 64 1000 1.07 1.40 1.23 + 128 1000 1.55 1.83 1.68 + 256 1000 1.75 2.07 1.90 + 512 1000 2.01 2.27 2.13 + 1024 1000 2.41 2.75 2.57 + 2048 1000 3.10 3.47 3.28 + 4096 1000 4.44 4.81 4.61 + 8192 1000 6.21 6.46 6.33 + 16384 1000 9.78 10.01 9.88 + 32768 1000 16.98 17.31 17.15 + 65536 640 32.35 33.07 32.78 + 131072 320 64.75 69.63 66.91 + 262144 160 145.17 160.05 152.37 + 524288 80 322.27 355.67 342.87 + 1048576 40 717.91 764.14 738.44 + 2097152 20 1802.42 1816.75 1807.00 + 4194304 10 5184.73 5351.95 5267.76 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 1.32 2.15 1.71 + 8 1000 1.31 2.18 1.73 + 16 1000 1.31 2.18 1.72 + 32 1000 1.44 2.30 1.85 + 64 1000 1.42 2.32 1.85 + 128 1000 1.98 2.73 2.32 + 256 1000 2.99 3.39 3.16 + 512 1000 3.20 3.51 3.32 + 1024 1000 3.67 4.01 3.82 + 2048 1000 4.48 4.78 4.59 + 4096 1000 5.75 6.12 5.89 + 8192 1000 8.12 8.60 8.33 + 16384 1000 12.69 13.31 12.97 + 32768 1000 22.49 23.25 22.92 + 65536 640 41.84 43.13 42.62 + 131072 320 83.84 87.97 85.94 + 262144 160 185.44 200.78 193.20 + 524288 80 493.02 544.50 520.64 + 1048576 40 1109.07 1193.86 1149.72 + 2097152 20 2877.06 3014.58 2945.01 + 4194304 10 7059.88 7147.18 7111.93 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.20 0.42 0.31 + 8 1000 0.23 0.69 0.46 + 16 1000 0.23 0.68 0.46 + 32 1000 0.19 0.42 0.30 + 64 1000 0.19 0.42 0.31 + 128 1000 0.30 0.63 0.46 + 256 1000 0.31 0.70 0.50 + 512 1000 0.35 0.84 0.59 + 1024 1000 0.43 1.13 0.78 + 2048 1000 0.55 1.66 1.11 + 4096 1000 0.69 2.36 1.53 + 8192 1000 0.89 3.83 2.36 + 16384 1000 1.54 6.51 4.03 + 32768 1000 2.85 12.04 7.45 + 65536 640 5.72 23.10 14.41 + 131072 320 10.87 44.93 27.90 + 262144 160 36.87 109.83 73.35 + 524288 80 80.07 221.53 150.80 + 1048576 40 166.22 459.74 312.98 + 2097152 20 355.49 997.70 676.59 + 4194304 10 783.35 2145.13 1464.24 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.21 1.35 0.68 + 8 1000 0.22 1.36 0.69 + 16 1000 0.21 1.40 0.69 + 32 1000 0.22 1.43 0.74 + 64 1000 0.21 1.41 0.73 + 128 1000 0.31 1.58 0.83 + 256 1000 0.31 1.81 0.92 + 512 1000 0.36 2.16 1.08 + 1024 1000 0.45 2.78 1.36 + 2048 1000 0.53 3.91 1.82 + 4096 1000 0.57 6.79 3.77 + 8192 1000 0.74 10.98 6.05 + 16384 1000 1.49 20.01 11.07 + 32768 1000 2.71 36.89 20.42 + 65536 640 5.42 47.53 21.91 + 131072 320 10.62 99.12 44.27 + 262144 160 40.90 354.53 170.54 + 524288 80 81.64 478.30 243.33 + 1048576 40 184.78 1546.00 762.26 + 2097152 20 414.77 3321.51 1654.52 + 4194304 10 1160.80 8267.85 4245.48 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.06 + 4 1000 0.22 1.93 0.83 + 8 1000 0.22 2.10 0.90 + 16 1000 0.21 2.06 0.87 + 32 1000 0.22 2.06 0.90 + 64 1000 0.22 2.15 0.93 + 128 1000 0.36 2.78 1.19 + 256 1000 0.36 2.82 1.21 + 512 1000 0.41 3.57 1.50 + 1024 1000 0.52 4.51 1.82 + 2048 1000 0.59 6.28 2.43 + 4096 1000 0.73 9.42 3.45 + 8192 1000 0.85 12.03 4.26 + 16384 1000 1.59 21.07 7.61 + 32768 1000 3.06 38.69 14.08 + 65536 640 6.24 75.54 27.76 + 131072 320 11.73 158.19 55.95 + 262144 160 42.56 375.95 152.97 + 524288 80 84.17 767.32 309.35 + 1048576 40 181.33 1619.35 655.09 + 2097152 20 546.93 4094.36 1769.36 + 4194304 10 1303.91 9534.03 4166.76 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.07 0.08 0.08 + 8 1000 0.08 0.08 0.08 + 16 1000 0.08 0.08 0.08 + 32 1000 0.08 0.08 0.08 + 64 1000 0.09 0.09 0.09 + 128 1000 0.11 0.11 0.11 + 256 1000 0.13 0.13 0.13 + 512 1000 0.19 0.20 0.19 + 1024 1000 0.32 0.32 0.32 + 2048 1000 0.54 0.54 0.54 + 4096 1000 0.99 0.99 0.99 + 8192 1000 1.88 1.88 1.88 + 16384 1000 3.67 3.68 3.68 + 32768 1000 7.38 7.39 7.38 + 65536 640 14.95 14.99 14.97 + 131072 320 29.15 30.08 29.61 + 262144 160 63.34 64.24 63.79 + 524288 80 143.14 153.41 148.27 + 1048576 40 296.69 324.50 310.59 + 2097152 20 589.16 625.06 607.11 + 4194304 10 1310.58 1401.33 1355.96 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.08 0.08 0.08 + 8 1000 0.08 0.08 0.08 + 16 1000 0.08 0.08 0.08 + 32 1000 0.08 0.08 0.08 + 64 1000 0.09 0.09 0.09 + 128 1000 0.11 0.11 0.11 + 256 1000 0.13 0.13 0.13 + 512 1000 0.19 0.20 0.19 + 1024 1000 0.31 0.31 0.31 + 2048 1000 0.54 0.54 0.54 + 4096 1000 0.98 1.00 0.99 + 8192 1000 1.87 1.88 1.87 + 16384 1000 3.66 3.70 3.68 + 32768 1000 7.39 7.40 7.39 + 65536 640 14.90 15.00 14.96 + 131072 320 29.12 30.07 29.70 + 262144 160 64.68 66.70 65.80 + 524288 80 146.58 154.67 150.49 + 1048576 40 294.40 322.76 308.93 + 2097152 20 587.09 716.17 660.54 + 4194304 10 1741.57 2165.96 1954.01 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.07 0.11 0.08 + 8 1000 0.07 0.08 0.08 + 16 1000 0.08 0.08 0.08 + 32 1000 0.08 0.11 0.08 + 64 1000 0.08 0.09 0.09 + 128 1000 0.10 0.12 0.11 + 256 1000 0.13 0.15 0.14 + 512 1000 0.19 0.21 0.19 + 1024 1000 0.31 0.31 0.31 + 2048 1000 0.54 0.56 0.55 + 4096 1000 0.98 0.99 0.98 + 8192 1000 1.87 1.88 1.87 + 16384 1000 3.68 3.72 3.69 + 32768 1000 7.37 7.41 7.40 + 65536 640 14.93 15.04 14.99 + 131072 320 29.14 30.25 29.90 + 262144 160 62.15 67.51 64.97 + 524288 80 140.20 167.16 151.83 + 1048576 40 291.21 379.49 333.51 + 2097152 20 833.91 1082.37 939.43 + 4194304 10 2167.61 2772.70 2437.27 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.07 0.09 0.08 + 4 1000 0.84 0.85 0.85 + 8 1000 0.83 0.85 0.84 + 16 1000 0.83 0.85 0.84 + 32 1000 0.86 0.87 0.87 + 64 1000 0.88 0.90 0.89 + 128 1000 1.07 1.11 1.09 + 256 1000 1.15 1.17 1.16 + 512 1000 1.36 1.38 1.37 + 1024 1000 1.83 1.85 1.84 + 2048 1000 2.32 2.39 2.35 + 4096 1000 3.24 3.29 3.27 + 8192 1000 5.21 5.31 5.26 + 16384 1000 9.87 10.07 9.97 + 32768 1000 18.83 18.93 18.88 + 65536 640 38.59 38.85 38.72 + 131072 320 87.84 89.86 88.85 + 262144 160 176.99 187.55 182.27 + 524288 80 413.72 419.87 416.80 + 1048576 40 875.63 888.52 882.07 + 2097152 20 2109.60 2227.64 2168.62 + 4194304 10 5347.82 5361.50 5354.66 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.56 1.74 1.66 + 8 1000 1.59 1.73 1.66 + 16 1000 1.65 1.82 1.73 + 32 1000 1.64 1.84 1.73 + 64 1000 1.83 1.97 1.90 + 128 1000 1.96 2.16 2.06 + 256 1000 2.24 2.48 2.35 + 512 1000 2.65 3.12 2.94 + 1024 1000 3.41 3.91 3.69 + 2048 1000 4.83 5.27 5.03 + 4096 1000 7.69 8.04 7.85 + 8192 1000 14.32 14.61 14.46 + 16384 1000 27.19 27.45 27.33 + 32768 1000 55.81 59.02 57.20 + 65536 640 127.09 137.00 132.10 + 131072 320 280.68 314.87 301.32 + 262144 160 616.51 656.35 640.36 + 524288 80 1373.20 1473.30 1421.93 + 1048576 40 3665.46 3791.18 3741.53 + 2097152 20 7725.84 8105.98 7989.92 + 4194304 10 15819.50 16689.39 16411.02 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.07 0.06 + 4 1000 2.31 2.62 2.43 + 8 1000 2.31 2.61 2.41 + 16 1000 2.34 2.68 2.47 + 32 1000 2.58 2.84 2.66 + 64 1000 2.77 3.07 2.90 + 128 1000 3.28 3.64 3.43 + 256 1000 4.11 4.40 4.23 + 512 1000 4.86 5.21 5.00 + 1024 1000 6.56 7.02 6.78 + 2048 1000 10.05 10.56 10.30 + 4096 1000 17.17 17.72 17.42 + 8192 1000 32.57 33.84 33.18 + 16384 1000 71.14 73.64 72.23 + 32768 1000 160.04 167.83 164.44 + 65536 640 377.10 403.47 392.88 + 131072 320 888.41 977.49 943.70 + 262144 160 2314.91 2386.38 2353.07 + 524288 80 5131.17 5323.45 5245.81 + 1048576 40 11028.41 11359.50 11216.35 + 2097152 20 23466.98 24070.23 23770.65 + 4194304 10 60077.05 60907.31 60570.72 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.89 0.91 0.90 + 8 1000 0.84 0.84 0.84 + 16 1000 0.83 0.85 0.84 + 32 1000 0.84 0.87 0.86 + 64 1000 0.84 0.86 0.85 + 128 1000 1.07 1.12 1.09 + 256 1000 1.18 1.21 1.19 + 512 1000 1.35 1.39 1.37 + 1024 1000 1.83 1.89 1.86 + 2048 1000 2.32 2.41 2.37 + 4096 1000 3.24 3.32 3.28 + 8192 1000 5.18 5.33 5.25 + 16384 1000 9.93 10.05 9.99 + 32768 1000 18.66 18.77 18.71 + 65536 640 39.08 40.03 39.56 + 131072 320 141.53 142.42 141.98 + 262144 160 285.93 286.50 286.21 + 524288 80 681.96 682.71 682.33 + 1048576 40 1218.21 1219.25 1218.73 + 2097152 20 2664.59 2665.86 2665.23 + 4194304 10 7524.28 7525.24 7524.76 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 1.43 1.57 1.52 + 8 1000 5.30 5.53 5.41 + 16 1000 5.30 5.59 5.45 + 32 1000 1.63 1.81 1.71 + 64 1000 1.72 1.96 1.84 + 128 1000 1.93 2.16 2.03 + 256 1000 2.12 2.40 2.26 + 512 1000 2.63 2.97 2.79 + 1024 1000 3.30 3.77 3.53 + 2048 1000 4.70 5.13 4.91 + 4096 1000 7.53 7.80 7.67 + 8192 1000 14.32 14.58 14.43 + 16384 1000 26.77 27.06 26.91 + 32768 1000 54.68 55.27 54.88 + 65536 640 122.66 134.93 129.55 + 131072 320 270.67 278.57 274.39 + 262144 160 577.01 599.49 588.65 + 524288 80 1374.00 1421.97 1400.86 + 1048576 40 9101.81 9466.87 9329.57 + 2097152 20 19580.69 20583.60 20207.63 + 4194304 10 48262.96 53410.58 50546.86 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 2.16 2.50 2.29 + 8 1000 2.16 2.44 2.26 + 16 1000 2.17 2.48 2.27 + 32 1000 2.56 2.82 2.65 + 64 1000 2.76 3.07 2.91 + 128 1000 3.12 3.42 3.21 + 256 1000 3.91 4.25 4.03 + 512 1000 4.72 5.19 4.89 + 1024 1000 6.34 6.76 6.54 + 2048 1000 9.96 10.37 10.13 + 4096 1000 17.11 17.42 17.26 + 8192 1000 32.63 33.61 33.07 + 16384 1000 69.05 71.15 70.36 + 32768 1000 172.62 186.24 180.52 + 65536 640 365.65 397.95 381.76 + 131072 320 846.93 879.06 861.27 + 262144 160 2355.40 2501.92 2428.62 + 524288 80 10059.40 10179.26 10125.62 + 1048576 40 21291.06 23103.67 22321.53 + 2097152 20 56558.18 60645.54 58841.52 + 4194304 10 66069.97 67751.50 67201.83 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.76 0.81 0.78 + 2 1000 0.74 0.77 0.75 + 4 1000 0.75 0.79 0.77 + 8 1000 0.75 0.79 0.77 + 16 1000 0.74 0.79 0.76 + 32 1000 0.77 0.82 0.80 + 64 1000 0.79 0.83 0.81 + 128 1000 0.94 0.98 0.96 + 256 1000 1.00 1.03 1.02 + 512 1000 1.11 1.14 1.13 + 1024 1000 1.23 1.36 1.29 + 2048 1000 1.40 1.52 1.46 + 4096 1000 1.78 1.88 1.83 + 8192 1000 2.47 2.62 2.54 + 16384 1000 4.45 4.62 4.54 + 32768 1000 8.48 8.77 8.63 + 65536 640 16.92 16.98 16.95 + 131072 320 36.75 38.47 37.61 + 262144 160 56.56 56.96 56.76 + 524288 80 130.89 130.90 130.89 + 1048576 40 272.66 275.33 274.00 + 2097152 20 569.83 575.91 572.87 + 4194304 10 1433.97 1540.65 1487.31 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 1.42 1.57 1.49 + 2 1000 1.45 1.60 1.52 + 4 1000 1.45 1.60 1.51 + 8 1000 1.44 1.59 1.50 + 16 1000 1.49 1.66 1.56 + 32 1000 1.47 1.66 1.54 + 64 1000 1.62 1.76 1.68 + 128 1000 1.67 1.88 1.77 + 256 1000 1.85 2.11 1.96 + 512 1000 2.02 2.37 2.21 + 1024 1000 2.36 2.75 2.56 + 2048 1000 2.93 3.30 3.08 + 4096 1000 3.88 4.28 4.06 + 8192 1000 7.26 7.53 7.39 + 16384 1000 13.34 13.66 13.53 + 32768 1000 25.52 26.12 25.88 + 65536 640 51.26 53.09 52.16 + 131072 320 104.75 105.91 105.32 + 262144 160 185.50 186.71 186.09 + 524288 80 396.77 408.16 402.50 + 1048576 40 894.79 949.85 922.48 + 2097152 20 2601.28 2757.88 2679.45 + 4194304 10 5686.41 6016.88 5851.97 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 2.16 2.45 2.28 + 2 1000 2.11 2.38 2.23 + 4 1000 2.11 2.48 2.28 + 8 1000 2.10 2.43 2.23 + 16 1000 2.15 2.42 2.25 + 32 1000 2.26 2.63 2.40 + 64 1000 2.43 2.74 2.55 + 128 1000 2.78 3.18 2.92 + 256 1000 3.16 3.55 3.30 + 512 1000 3.64 4.05 3.78 + 1024 1000 4.43 4.80 4.56 + 2048 1000 5.70 6.13 5.89 + 4096 1000 9.00 9.37 9.20 + 8192 1000 17.15 17.97 17.59 + 16384 1000 33.32 35.50 34.51 + 32768 1000 67.49 72.23 69.68 + 65536 640 134.20 160.82 148.07 + 131072 320 279.70 310.00 294.87 + 262144 160 537.27 594.07 564.29 + 524288 80 1528.31 1564.63 1541.19 + 1048576 40 3683.15 3910.18 3785.97 + 2097152 20 7876.44 8092.11 7966.26 + 4194304 10 15811.30 15965.65 15862.23 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 1 1000 0.81 0.83 0.82 + 2 1000 0.79 0.81 0.80 + 4 1000 0.77 0.80 0.78 + 8 1000 0.77 0.79 0.78 + 16 1000 0.78 0.81 0.80 + 32 1000 0.78 0.80 0.79 + 64 1000 0.83 0.85 0.84 + 128 1000 0.97 0.99 0.98 + 256 1000 1.01 1.04 1.03 + 512 1000 1.15 1.18 1.16 + 1024 1000 1.33 1.35 1.34 + 2048 1000 1.45 1.57 1.51 + 4096 1000 1.83 1.88 1.85 + 8192 1000 2.59 2.67 2.63 + 16384 1000 4.52 4.64 4.58 + 32768 1000 8.59 8.63 8.61 + 65536 640 16.89 16.89 16.89 + 131072 320 33.12 33.22 33.17 + 262144 160 57.08 57.41 57.24 + 524288 80 132.93 133.27 133.10 + 1048576 40 281.55 282.21 281.88 + 2097152 20 598.63 602.74 600.69 + 4194304 10 1531.98 1601.40 1566.69 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 1 1000 5.45 5.58 5.52 + 2 1000 3.09 3.19 3.14 + 4 1000 3.56 3.65 3.61 + 8 1000 3.04 3.15 3.11 + 16 1000 3.10 3.21 3.17 + 32 1000 3.32 3.46 3.40 + 64 1000 3.37 3.59 3.49 + 128 1000 3.64 3.94 3.81 + 256 1000 3.51 3.89 3.73 + 512 1000 3.71 4.05 3.91 + 1024 1000 4.03 4.49 4.21 + 2048 1000 4.67 5.03 4.84 + 4096 1000 4.73 5.41 5.19 + 8192 1000 9.01 9.35 9.24 + 16384 1000 17.30 18.66 17.76 + 32768 1000 29.82 31.49 30.68 + 65536 640 57.79 59.58 58.92 + 131072 320 121.03 148.41 135.39 + 262144 160 280.67 284.00 282.24 + 524288 80 602.66 618.98 611.29 + 1048576 40 1428.87 1460.81 1444.53 + 2097152 20 3569.04 3679.01 3623.94 + 4194304 10 7720.38 7983.05 7851.60 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.07 0.06 + 1 1000 6.44 6.55 6.49 + 2 1000 4.17 4.27 4.21 + 4 1000 6.56 6.74 6.63 + 8 1000 4.14 4.26 4.21 + 16 1000 4.39 4.68 4.53 + 32 1000 4.32 4.61 4.46 + 64 1000 4.50 5.06 4.73 + 128 1000 5.30 6.08 5.70 + 256 1000 5.65 6.80 6.23 + 512 1000 6.60 8.04 7.26 + 1024 1000 10.61 11.10 10.85 + 2048 1000 12.25 12.90 12.57 + 4096 1000 15.19 16.31 15.75 + 8192 1000 23.51 24.94 24.25 + 16384 1000 40.72 43.39 41.95 + 32768 1000 76.32 83.98 79.96 + 65536 640 161.55 188.67 179.64 + 131072 320 390.54 462.38 430.67 + 262144 160 885.78 1084.31 990.28 + 524288 80 2238.41 2702.11 2429.73 + 1048576 40 5013.73 5837.52 5415.62 + 2097152 20 10665.54 12292.41 11359.28 + 4194304 10 21274.52 24384.26 22481.50 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.06 + 1 1000 0.47 1.17 0.82 + 2 1000 0.18 0.41 0.29 + 4 1000 0.19 0.34 0.26 + 8 1000 0.19 0.34 0.26 + 16 1000 0.19 0.37 0.28 + 32 1000 0.21 0.40 0.31 + 64 1000 0.21 0.40 0.31 + 128 1000 0.29 0.53 0.41 + 256 1000 0.29 0.64 0.46 + 512 1000 0.34 0.70 0.52 + 1024 1000 0.44 0.82 0.63 + 2048 1000 0.54 1.06 0.80 + 4096 1000 0.69 1.39 1.04 + 8192 1000 0.89 1.91 1.40 + 16384 1000 1.61 2.79 2.20 + 32768 1000 2.88 4.84 3.86 + 65536 640 5.86 9.77 7.81 + 131072 320 11.21 23.87 17.54 + 262144 160 40.10 58.94 49.52 + 524288 80 81.42 123.08 102.25 + 1048576 40 174.01 260.28 217.14 + 2097152 20 370.26 560.44 465.35 + 4194304 10 880.43 1333.76 1107.10 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.06 + 1 1000 0.18 0.78 0.46 + 2 1000 0.18 0.68 0.41 + 4 1000 0.18 0.68 0.42 + 8 1000 0.18 0.67 0.41 + 16 1000 0.18 0.80 0.47 + 32 1000 0.19 0.83 0.49 + 64 1000 0.19 0.86 0.49 + 128 1000 0.32 1.03 0.63 + 256 1000 0.31 1.22 0.71 + 512 1000 0.33 1.37 0.80 + 1024 1000 0.57 1.63 0.87 + 2048 1000 0.68 2.02 1.06 + 4096 1000 0.76 2.87 1.36 + 8192 1000 0.86 4.88 2.62 + 16384 1000 1.54 7.17 4.11 + 32768 1000 3.53 12.26 5.79 + 65536 640 6.99 24.13 11.36 + 131072 320 13.94 51.57 23.49 + 262144 160 45.32 152.99 105.09 + 524288 80 88.48 305.35 207.55 + 1048576 40 183.34 639.10 431.83 + 2097152 20 414.82 1441.11 967.10 + 4194304 10 1280.17 4482.35 3000.00 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.05 + 1 1000 0.18 1.62 0.70 + 2 1000 0.18 1.54 0.66 + 4 1000 0.18 1.61 0.69 + 8 1000 0.18 1.62 0.69 + 16 1000 0.18 1.70 0.71 + 32 1000 0.19 1.78 0.74 + 64 1000 0.19 1.90 0.74 + 128 1000 0.28 2.03 0.85 + 256 1000 0.29 2.35 0.96 + 512 1000 0.31 2.85 1.12 + 1024 1000 0.37 3.58 1.38 + 2048 1000 0.46 4.68 1.75 + 4096 1000 0.57 6.43 2.32 + 8192 1000 0.77 10.23 3.68 + 16384 1000 1.63 16.74 6.44 + 32768 1000 2.99 30.61 12.03 + 65536 640 5.93 72.44 27.66 + 131072 320 12.02 162.67 65.99 + 262144 160 61.40 340.16 157.60 + 524288 80 141.32 777.54 357.54 + 1048576 40 382.92 2178.24 983.22 + 2097152 20 1219.19 5509.11 2658.79 + 4194304 10 2385.13 16033.09 8163.96 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.07 0.11 0.09 + 1 1000 0.17 0.35 0.26 + 2 1000 0.18 0.38 0.28 + 4 1000 0.17 0.33 0.25 + 8 1000 0.17 0.33 0.25 + 16 1000 0.17 0.32 0.25 + 32 1000 0.19 0.37 0.28 + 64 1000 0.19 0.37 0.28 + 128 1000 0.24 0.49 0.36 + 256 1000 0.25 0.53 0.39 + 512 1000 0.29 0.63 0.46 + 1024 1000 0.38 0.84 0.61 + 2048 1000 0.53 1.04 0.79 + 4096 1000 0.65 1.35 1.00 + 8192 1000 0.87 1.86 1.36 + 16384 1000 1.55 2.69 2.12 + 32768 1000 2.88 4.96 3.92 + 65536 640 5.73 9.70 7.72 + 131072 320 10.91 19.36 15.13 + 262144 160 54.33 54.51 54.42 + 524288 80 117.54 118.03 117.78 + 1048576 40 257.64 258.28 257.96 + 2097152 20 569.20 570.27 569.74 + 4194304 10 1397.74 1399.36 1398.55 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.07 0.07 + 1 1000 0.32 0.79 0.47 + 2 1000 0.32 0.77 0.46 + 4 1000 0.32 0.80 0.47 + 8 1000 0.32 0.79 0.47 + 16 1000 0.32 0.81 0.47 + 32 1000 0.36 0.85 0.51 + 64 1000 0.35 0.86 0.52 + 128 1000 0.43 1.07 0.61 + 256 1000 0.43 1.18 0.64 + 512 1000 0.49 1.44 0.76 + 1024 1000 0.57 1.62 0.87 + 2048 1000 0.73 1.95 1.05 + 4096 1000 0.75 2.63 1.23 + 8192 1000 1.02 3.91 1.75 + 16384 1000 1.93 6.72 3.16 + 32768 1000 3.46 12.29 5.75 + 65536 640 6.92 24.66 11.45 + 131072 320 13.63 51.70 23.31 + 262144 160 65.09 147.30 116.39 + 524288 80 133.50 295.79 234.53 + 1048576 40 277.78 625.09 494.70 + 2097152 20 616.22 1376.88 1093.93 + 4194304 10 1977.94 4454.09 3537.94 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.09 0.07 + 1 1000 0.51 1.61 0.75 + 2 1000 0.55 1.65 0.78 + 4 1000 0.51 1.61 0.74 + 8 1000 0.51 1.64 0.75 + 16 1000 0.55 1.60 0.75 + 32 1000 0.55 1.65 0.77 + 64 1000 0.60 1.70 0.81 + 128 1000 0.60 2.04 0.87 + 256 1000 0.64 2.29 0.91 + 512 1000 0.68 2.70 1.01 + 1024 1000 0.77 3.12 1.19 + 2048 1000 0.90 3.60 1.35 + 4096 1000 0.98 5.02 1.57 + 8192 1000 1.19 7.75 2.12 + 16384 1000 2.49 14.50 4.10 + 32768 1000 5.01 27.58 7.92 + 65536 640 10.04 56.33 15.92 + 131072 320 26.38 114.02 37.58 + 262144 160 69.21 325.57 213.86 + 524288 80 148.38 692.13 454.97 + 1048576 40 320.66 1506.92 984.42 + 2097152 20 1021.75 4931.44 3215.65 + 4194304 10 2228.91 10661.00 6981.97 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 1 1000 0.33 0.34 0.34 + 2 1000 0.25 0.26 0.25 + 4 1000 0.25 0.28 0.27 + 8 1000 0.25 0.26 0.26 + 16 1000 0.25 0.26 0.25 + 32 1000 0.27 0.29 0.28 + 64 1000 0.27 0.30 0.28 + 128 1000 0.37 0.46 0.42 + 256 1000 0.38 0.51 0.44 + 512 1000 0.45 0.64 0.54 + 1024 1000 0.56 0.81 0.68 + 2048 1000 0.70 1.04 0.87 + 4096 1000 0.88 1.37 1.12 + 8192 1000 1.18 1.99 1.59 + 16384 1000 2.10 3.47 2.78 + 32768 1000 4.23 5.67 4.95 + 65536 640 8.51 10.64 9.58 + 131072 320 20.00 20.30 20.15 + 262144 160 41.90 41.96 41.93 + 524288 80 106.56 106.58 106.57 + 1048576 40 229.74 229.81 229.78 + 2097152 20 490.18 490.43 490.30 + 4194304 10 1217.89 1217.98 1217.93 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.05 + 1 1000 0.38 0.53 0.45 + 2 1000 0.37 0.49 0.43 + 4 1000 0.36 0.48 0.43 + 8 1000 0.36 0.47 0.42 + 16 1000 0.37 0.49 0.43 + 32 1000 0.37 0.49 0.43 + 64 1000 0.45 0.62 0.54 + 128 1000 0.54 0.75 0.65 + 256 1000 0.60 0.89 0.76 + 512 1000 0.78 1.18 1.01 + 1024 1000 1.00 1.59 1.31 + 2048 1000 0.99 1.69 1.38 + 4096 1000 1.58 3.24 2.51 + 8192 1000 1.86 3.41 2.68 + 16384 1000 3.47 6.34 4.96 + 32768 1000 11.36 13.46 12.33 + 65536 640 19.15 22.02 20.90 + 131072 320 38.57 42.43 40.87 + 262144 160 47.63 58.55 54.72 + 524288 80 177.56 190.81 186.43 + 1048576 40 218.75 543.12 419.82 + 2097152 20 546.07 1284.67 1006.97 + 4194304 10 1816.91 4180.41 3301.82 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.05 + 1 1000 0.43 0.67 0.55 + 2 1000 0.42 0.68 0.55 + 4 1000 0.41 0.66 0.54 + 8 1000 0.43 0.65 0.54 + 16 1000 0.42 0.66 0.54 + 32 1000 0.50 0.72 0.61 + 64 1000 0.46 0.87 0.71 + 128 1000 0.60 1.13 0.89 + 256 1000 0.91 1.72 1.27 + 512 1000 1.15 2.34 1.76 + 1024 1000 1.47 2.94 2.15 + 2048 1000 1.87 3.86 2.84 + 4096 1000 3.10 6.00 4.49 + 8192 1000 5.78 9.77 7.76 + 16384 1000 6.07 16.07 12.22 + 32768 1000 11.95 25.19 20.86 + 65536 640 18.67 41.13 36.07 + 131072 320 31.17 57.38 47.86 + 262144 160 50.82 136.33 119.97 + 524288 80 185.54 219.76 205.28 + 1048576 40 228.96 1230.46 786.49 + 2097152 20 809.61 4005.84 2582.19 + 4194304 10 1999.27 10081.93 6533.26 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.09 0.08 + 1 1000 0.22 0.23 0.23 + 2 1000 0.25 0.25 0.25 + 4 1000 0.22 0.23 0.23 + 8 1000 0.22 0.23 0.22 + 16 1000 0.21 0.21 0.21 + 32 1000 0.26 0.27 0.27 + 64 1000 0.24 0.25 0.25 + 128 1000 0.39 0.43 0.41 + 256 1000 0.39 0.51 0.45 + 512 1000 0.43 0.64 0.53 + 1024 1000 0.57 0.78 0.68 + 2048 1000 0.68 1.05 0.87 + 4096 1000 0.90 1.38 1.14 + 8192 1000 1.18 2.00 1.59 + 16384 1000 2.25 3.55 2.90 + 32768 1000 4.26 5.83 5.04 + 65536 640 8.50 10.76 9.63 + 131072 320 16.76 20.02 18.39 + 262144 160 43.04 43.21 43.12 + 524288 80 103.74 103.75 103.74 + 1048576 40 228.60 228.67 228.63 + 2097152 20 491.96 492.15 492.05 + 4194304 10 1196.97 1196.98 1196.98 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.09 0.07 + 1 1000 0.20 0.39 0.32 + 2 1000 0.21 0.39 0.31 + 4 1000 0.21 0.39 0.31 + 8 1000 0.21 0.39 0.31 + 16 1000 0.20 0.38 0.31 + 32 1000 0.21 0.39 0.32 + 64 1000 0.22 0.40 0.32 + 128 1000 0.41 0.68 0.59 + 256 1000 0.48 0.75 0.65 + 512 1000 0.57 0.92 0.79 + 1024 1000 0.82 1.38 1.16 + 2048 1000 0.95 1.77 1.41 + 4096 1000 1.33 2.30 1.86 + 8192 1000 1.88 3.47 2.72 + 16384 1000 3.54 6.42 5.02 + 32768 1000 6.12 12.38 9.60 + 65536 640 10.82 23.27 18.09 + 131072 320 24.17 50.67 40.06 + 262144 160 43.65 108.05 83.80 + 524288 80 99.34 245.09 191.13 + 1048576 40 220.04 530.35 414.41 + 2097152 20 527.48 1237.40 971.40 + 4194304 10 1814.85 4251.36 3335.76 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.08 0.07 + 1 1000 0.19 0.73 0.41 + 2 1000 0.19 0.73 0.41 + 4 1000 0.18 0.73 0.41 + 8 1000 0.18 0.73 0.39 + 16 1000 0.19 0.73 0.39 + 32 1000 0.19 0.77 0.41 + 64 1000 0.19 0.76 0.40 + 128 1000 0.35 1.27 0.76 + 256 1000 0.43 1.30 0.84 + 512 1000 0.58 1.71 1.12 + 1024 1000 0.89 2.82 1.98 + 2048 1000 0.99 3.37 2.30 + 4096 1000 1.43 4.53 3.16 + 8192 1000 2.26 7.18 5.00 + 16384 1000 3.67 13.16 8.92 + 32768 1000 6.48 25.71 17.13 + 65536 640 13.70 57.35 38.04 + 131072 320 26.37 117.12 77.47 + 262144 160 42.97 244.36 155.50 + 524288 80 102.04 545.69 348.12 + 1048576 40 236.83 1271.41 817.11 + 2097152 20 806.97 4005.28 2613.71 + 4194304 10 2008.31 9991.36 6460.41 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.90 0.95 0.93 + 2 1000 0.86 0.92 0.89 + 4 1000 0.88 0.92 0.90 + 8 1000 0.88 0.92 0.90 + 16 1000 0.90 0.93 0.91 + 32 1000 0.87 0.92 0.89 + 64 1000 0.87 0.92 0.90 + 128 1000 1.10 1.16 1.13 + 256 1000 1.19 1.20 1.20 + 512 1000 1.24 1.27 1.25 + 1024 1000 1.44 1.50 1.47 + 2048 1000 1.53 1.65 1.59 + 4096 1000 2.01 2.08 2.05 + 8192 1000 2.78 2.89 2.83 + 16384 1000 4.94 4.99 4.96 + 32768 1000 8.53 8.67 8.60 + 65536 640 13.32 13.65 13.48 + 131072 320 25.11 25.33 25.22 + 262144 160 59.55 61.51 60.53 + 524288 80 129.35 131.42 130.39 + 1048576 40 267.03 268.53 267.78 + 2097152 20 597.33 611.16 604.24 + 4194304 10 1831.55 1871.67 1851.61 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 2.37 2.92 2.70 + 2 1000 2.37 2.94 2.70 + 4 1000 2.38 2.88 2.69 + 8 1000 2.36 2.93 2.70 + 16 1000 2.36 2.98 2.71 + 32 1000 2.36 2.86 2.63 + 64 1000 2.46 2.91 2.69 + 128 1000 2.72 3.28 3.04 + 256 1000 2.81 3.39 3.16 + 512 1000 3.03 3.86 3.51 + 1024 1000 3.39 4.36 3.99 + 2048 1000 4.09 5.05 4.68 + 4096 1000 5.14 6.11 5.75 + 8192 1000 7.72 8.62 8.32 + 16384 1000 17.50 17.98 17.72 + 32768 1000 24.15 24.39 24.28 + 65536 640 42.23 43.43 42.75 + 131072 320 78.29 102.78 93.54 + 262144 160 158.36 200.73 180.10 + 524288 80 359.38 388.63 374.87 + 1048576 40 925.76 1078.66 1013.74 + 2097152 20 2825.88 2915.38 2879.14 + 4194304 10 5636.74 5791.84 5736.39 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 3.78 4.16 3.99 + 2 1000 3.80 4.32 4.06 + 4 1000 3.79 4.30 4.07 + 8 1000 3.83 4.35 4.08 + 16 1000 3.75 4.12 3.98 + 32 1000 3.78 4.16 4.01 + 64 1000 3.78 4.22 4.03 + 128 1000 4.59 5.13 4.92 + 256 1000 4.86 5.44 5.23 + 512 1000 5.70 6.62 6.33 + 1024 1000 6.94 8.08 7.62 + 2048 1000 8.33 9.59 9.18 + 4096 1000 11.65 12.86 12.46 + 8192 1000 18.94 20.54 19.67 + 16384 1000 42.86 49.18 45.97 + 32768 1000 62.84 75.71 69.95 + 65536 640 101.08 164.90 143.39 + 131072 320 197.06 293.37 256.89 + 262144 160 471.71 714.79 617.43 + 524288 80 1763.17 2130.64 2018.75 + 1048576 40 3899.54 4127.30 4045.85 + 2097152 20 7933.11 8198.15 8091.14 + 4194304 10 15999.19 16457.20 16225.69 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 1.14 1.21 1.17 + 1 1000 1.17 1.23 1.20 + 2 1000 1.17 1.23 1.20 + 4 1000 1.16 1.22 1.19 + 8 1000 1.16 1.22 1.19 + 16 1000 1.19 1.25 1.22 + 32 1000 1.21 1.26 1.24 + 64 1000 1.19 1.26 1.23 + 128 1000 1.37 1.42 1.40 + 256 1000 1.39 1.44 1.42 + 512 1000 1.53 1.58 1.55 + 1024 1000 1.71 1.79 1.75 + 2048 1000 1.90 1.92 1.91 + 4096 1000 2.29 2.33 2.31 + 8192 1000 3.08 3.10 3.09 + 16384 1000 5.20 5.28 5.24 + 32768 1000 9.64 9.74 9.69 + 65536 640 18.14 18.30 18.22 + 131072 320 35.08 36.57 35.82 + 262144 160 57.04 57.14 57.09 + 524288 80 140.28 140.60 140.44 + 1048576 40 279.14 279.42 279.28 + 2097152 20 620.88 621.87 621.38 + 4194304 10 1915.84 1917.58 1916.71 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 3.06 3.35 3.22 + 1 1000 3.10 3.45 3.28 + 2 1000 3.14 3.47 3.30 + 4 1000 3.12 3.50 3.31 + 8 1000 3.12 3.39 3.26 + 16 1000 3.08 3.49 3.30 + 32 1000 3.13 3.44 3.29 + 64 1000 3.12 3.43 3.27 + 128 1000 3.53 3.88 3.70 + 256 1000 3.61 4.02 3.82 + 512 1000 3.98 4.40 4.21 + 1024 1000 4.32 4.87 4.63 + 2048 1000 4.94 5.44 5.23 + 4096 1000 6.19 6.62 6.44 + 8192 1000 8.35 8.94 8.65 + 16384 1000 19.95 21.86 20.88 + 32768 1000 26.60 30.68 29.13 + 65536 640 46.06 53.34 50.24 + 131072 320 81.01 100.50 91.71 + 262144 160 170.23 200.68 185.41 + 524288 80 337.33 422.76 380.59 + 1048576 40 930.24 1101.65 1030.94 + 2097152 20 2758.14 2848.18 2822.03 + 4194304 10 5663.37 5771.31 5730.97 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 4.81 5.01 4.90 + 1 1000 4.93 5.23 5.05 + 2 1000 4.89 5.12 5.00 + 4 1000 4.92 5.20 5.02 + 8 1000 4.88 5.15 4.99 + 16 1000 4.95 5.13 5.06 + 32 1000 4.97 5.22 5.09 + 64 1000 4.97 5.17 5.07 + 128 1000 5.75 6.03 5.91 + 256 1000 6.03 6.46 6.26 + 512 1000 7.18 7.58 7.39 + 1024 1000 8.43 9.05 8.73 + 2048 1000 9.79 10.49 10.18 + 4096 1000 13.10 13.66 13.36 + 8192 1000 20.04 21.42 20.70 + 16384 1000 46.21 58.16 52.50 + 32768 1000 65.23 88.61 78.79 + 65536 640 102.67 171.24 147.07 + 131072 320 194.09 293.40 259.61 + 262144 160 512.03 768.36 665.36 + 524288 80 1793.31 2150.38 2040.99 + 1048576 40 3879.39 4100.21 4021.65 + 2097152 20 7905.51 8225.90 8105.91 + 4194304 10 15965.63 16403.05 16162.66 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.29 0.36 0.33 + 2 1000 0.29 0.36 0.32 + 4 1000 0.30 0.35 0.33 + 8 1000 0.30 0.36 0.33 + 16 1000 0.29 0.36 0.33 + 32 1000 0.30 0.37 0.33 + 64 1000 0.30 0.36 0.33 + 128 1000 0.37 0.48 0.43 + 256 1000 0.34 0.46 0.40 + 512 1000 0.39 0.57 0.48 + 1024 1000 0.49 0.67 0.58 + 2048 1000 0.59 0.88 0.74 + 4096 1000 0.81 1.27 1.04 + 8192 1000 1.01 1.80 1.40 + 16384 1000 1.72 2.61 2.16 + 32768 1000 3.14 4.54 3.84 + 65536 640 6.24 8.35 7.29 + 131072 320 11.94 14.51 13.22 + 262144 160 24.06 27.95 26.00 + 524288 80 52.21 56.15 54.18 + 1048576 40 113.50 119.47 116.49 + 2097152 20 232.43 238.56 235.50 + 4194304 10 478.86 496.16 487.51 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.36 0.46 0.41 + 2 1000 0.36 0.46 0.41 + 4 1000 0.36 0.45 0.41 + 8 1000 0.36 0.46 0.41 + 16 1000 0.36 0.46 0.41 + 32 1000 0.37 0.46 0.42 + 64 1000 0.37 0.50 0.45 + 128 1000 0.60 0.64 0.63 + 256 1000 0.45 0.68 0.59 + 512 1000 0.53 0.80 0.68 + 1024 1000 0.71 1.03 0.89 + 2048 1000 0.93 1.45 1.22 + 4096 1000 1.22 2.00 1.63 + 8192 1000 1.71 3.41 2.69 + 16384 1000 3.13 4.95 4.13 + 32768 1000 5.74 8.44 7.02 + 65536 640 11.63 15.37 13.50 + 131072 320 23.10 28.90 25.93 + 262144 160 46.82 60.35 54.43 + 524288 80 108.96 142.33 128.11 + 1048576 40 225.80 274.10 254.67 + 2097152 20 505.56 583.67 553.06 + 4194304 10 1190.03 1326.45 1269.11 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.43 0.56 0.50 + 2 1000 0.43 0.57 0.51 + 4 1000 0.43 0.54 0.50 + 8 1000 0.43 0.58 0.51 + 16 1000 0.44 0.55 0.50 + 32 1000 0.50 0.59 0.56 + 64 1000 0.49 0.58 0.55 + 128 1000 0.81 0.94 0.88 + 256 1000 0.55 0.90 0.72 + 512 1000 0.70 1.14 0.88 + 1024 1000 0.94 1.53 1.19 + 2048 1000 1.41 2.42 1.81 + 4096 1000 1.66 3.08 2.27 + 8192 1000 2.24 4.56 3.41 + 16384 1000 4.37 7.67 6.02 + 32768 1000 8.32 14.39 11.36 + 65536 640 17.08 27.35 22.26 + 131072 320 35.70 53.67 44.40 + 262144 160 72.45 108.26 92.67 + 524288 80 165.32 245.11 219.04 + 1048576 40 378.95 542.68 488.54 + 2097152 20 925.22 1183.89 1088.28 + 4194304 10 2403.77 2730.41 2584.13 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.63 0.63 0.63 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 1.18 1.18 1.18 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 8 +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 2.09 2.09 2.09 + + +# All processes entering MPI_Finalize +``` + +#### OpenMPI + +```shell +#---------------------------------------------------------------- +# Intel(R) MPI Benchmarks 2021.3, MPI-1 part +#---------------------------------------------------------------- +# Date : Thu Dec 15 23:59:55 2022 +# Machine : aarch64 +# System : Linux +# Release : 5.10.0-60.18.0.50.oe2203.aarch64 +# Version : #1 SMP Wed Mar 30 02:43:08 UTC 2022 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# IMB-MPI1 + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 4194304 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong +# PingPing +# Sendrecv +# Exchange +# Allreduce +# Reduce +# Reduce_local +# Reduce_scatter +# Reduce_scatter_block +# Allgather +# Allgatherv +# Gather +# Gatherv +# Scatter +# Scatterv +# Alltoall +# Alltoallv +# Bcast +# Barrier + +#--------------------------------------------------- +# Benchmarking PingPong +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.29 0.00 + 1 1000 0.34 2.93 + 2 1000 0.34 5.90 + 4 1000 0.36 11.19 + 8 1000 0.33 24.46 + 16 1000 0.35 45.56 + 32 1000 0.35 90.28 + 64 1000 0.38 167.75 + 128 1000 0.44 290.98 + 256 1000 0.48 532.95 + 512 1000 0.74 687.46 + 1024 1000 0.88 1164.25 + 2048 1000 1.06 1933.53 + 4096 1000 2.76 1485.75 + 8192 1000 2.97 2759.49 + 16384 1000 3.97 4123.52 + 32768 1000 6.01 5448.51 + 65536 640 8.90 7367.68 + 131072 320 15.57 8418.43 + 262144 160 31.99 8195.12 + 524288 80 74.53 7034.12 + 1048576 40 161.56 6490.34 + 2097152 20 352.80 5944.27 + 4194304 10 752.06 5577.12 + +#--------------------------------------------------- +# Benchmarking PingPing +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.40 0.00 + 1 1000 0.44 2.30 + 2 1000 0.43 4.61 + 4 1000 0.43 9.35 + 8 1000 0.43 18.76 + 16 1000 0.44 36.34 + 32 1000 0.43 73.74 + 64 1000 0.45 140.95 + 128 1000 0.48 265.22 + 256 1000 0.52 490.81 + 512 1000 1.09 470.45 + 1024 1000 1.12 912.78 + 2048 1000 1.36 1505.41 + 4096 1000 2.94 1394.81 + 8192 1000 3.52 2325.00 + 16384 1000 4.46 3676.29 + 32768 1000 6.08 5387.21 + 65536 640 9.43 6948.09 + 131072 320 16.19 8094.78 + 262144 160 34.00 7710.29 + 524288 80 77.55 6760.39 + 1048576 40 171.11 6128.05 + 2097152 20 359.13 5839.58 + 4194304 10 786.46 5333.11 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.39 0.39 0.39 0.00 + 1 1000 0.43 0.43 0.43 4.68 + 2 1000 0.42 0.42 0.42 9.41 + 4 1000 0.42 0.42 0.42 18.89 + 8 1000 0.42 0.42 0.42 38.10 + 16 1000 0.43 0.43 0.43 74.61 + 32 1000 0.43 0.43 0.43 148.29 + 64 1000 0.46 0.46 0.46 280.90 + 128 1000 0.49 0.49 0.49 523.50 + 256 1000 0.52 0.52 0.52 977.32 + 512 1000 1.06 1.06 1.06 965.49 + 1024 1000 1.13 1.13 1.13 1810.26 + 2048 1000 1.37 1.37 1.37 2996.58 + 4096 1000 2.90 2.90 2.90 2822.25 + 8192 1000 3.50 3.50 3.50 4685.27 + 16384 1000 4.27 4.27 4.27 7671.15 + 32768 1000 6.09 6.09 6.09 10753.32 + 65536 640 9.62 9.62 9.62 13626.83 + 131072 320 16.61 16.61 16.61 15779.39 + 262144 160 33.02 33.02 33.02 15879.61 + 524288 80 75.81 75.81 75.81 13831.63 + 1048576 40 169.11 169.12 169.12 12400.16 + 2097152 20 366.55 366.55 366.55 11442.56 + 4194304 10 803.60 803.65 803.63 10438.15 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.39 0.39 0.39 0.00 + 1 1000 0.44 0.45 0.45 4.40 + 2 1000 0.44 0.44 0.44 9.18 + 4 1000 0.44 0.44 0.44 18.10 + 8 1000 0.44 0.44 0.44 36.40 + 16 1000 0.45 0.45 0.45 71.16 + 32 1000 0.45 0.45 0.45 142.69 + 64 1000 0.48 0.48 0.48 268.16 + 128 1000 0.53 0.53 0.53 485.86 + 256 1000 0.58 0.58 0.58 882.55 + 512 1000 1.05 1.05 1.05 976.28 + 1024 1000 1.23 1.23 1.23 1658.61 + 2048 1000 1.54 1.54 1.54 2657.36 + 4096 1000 4.81 4.82 4.81 1701.21 + 8192 1000 5.71 5.71 5.71 2869.69 + 16384 1000 7.63 7.63 7.63 4295.20 + 32768 1000 10.94 10.95 10.95 5984.61 + 65536 640 17.78 17.79 17.79 7366.43 + 131072 320 32.08 32.14 32.11 8157.55 + 262144 160 63.62 63.84 63.74 8212.56 + 524288 80 145.62 146.60 146.13 7152.80 + 1048576 40 327.36 331.82 329.71 6320.13 + 2097152 20 652.20 668.91 661.07 6270.37 + 4194304 10 1999.39 2102.76 2052.02 3989.34 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 8 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.42 0.42 0.42 0.00 + 1 1000 0.45 0.45 0.45 4.46 + 2 1000 0.49 0.49 0.49 8.13 + 4 1000 0.54 0.54 0.54 14.74 + 8 1000 0.45 0.45 0.45 35.48 + 16 1000 0.48 0.48 0.48 66.43 + 32 1000 0.47 0.47 0.47 136.45 + 64 1000 0.50 0.50 0.50 254.01 + 128 1000 0.54 0.54 0.54 474.19 + 256 1000 0.61 0.61 0.61 844.40 + 512 1000 1.07 1.07 1.07 960.92 + 1024 1000 1.18 1.18 1.18 1741.02 + 2048 1000 1.44 1.44 1.44 2842.55 + 4096 1000 5.06 5.06 5.06 1618.06 + 8192 1000 6.30 6.31 6.30 2596.90 + 16384 1000 8.27 8.28 8.28 3956.77 + 32768 1000 11.11 11.13 11.12 5888.52 + 65536 640 17.40 17.42 17.41 7522.98 + 131072 320 31.24 31.39 31.32 8351.09 + 262144 160 66.88 67.18 67.02 7804.59 + 524288 80 152.13 153.88 153.01 6814.25 + 1048576 40 324.90 337.11 331.17 6220.97 + 2097152 20 919.44 950.86 933.63 4411.08 + 4194304 10 2663.93 2959.19 2861.76 2834.77 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.65 0.65 0.65 0.00 + 1 1000 0.73 0.73 0.73 5.47 + 2 1000 0.78 0.78 0.78 10.23 + 4 1000 0.73 0.73 0.73 22.06 + 8 1000 0.74 0.74 0.74 43.01 + 16 1000 0.78 0.78 0.78 82.14 + 32 1000 0.78 0.78 0.78 164.02 + 64 1000 0.84 0.84 0.84 306.01 + 128 1000 0.97 0.97 0.97 525.44 + 256 1000 1.10 1.10 1.10 926.91 + 512 1000 1.97 1.97 1.97 1040.19 + 1024 1000 2.18 2.18 2.18 1876.99 + 2048 1000 2.55 2.55 2.55 3210.86 + 4096 1000 6.21 6.21 6.21 2638.73 + 8192 1000 7.00 7.00 7.00 4681.76 + 16384 1000 8.85 8.85 8.85 7407.78 + 32768 1000 12.39 12.39 12.39 10579.14 + 65536 640 19.45 19.45 19.45 13480.01 + 131072 320 35.58 35.58 35.58 14734.74 + 262144 160 74.71 74.71 74.71 14034.77 + 524288 80 175.07 175.09 175.08 11977.43 + 1048576 40 379.82 379.83 379.82 11042.58 + 2097152 20 802.35 802.42 802.38 10454.17 + 4194304 10 1836.56 1837.14 1836.85 9132.27 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.75 0.75 0.75 0.00 + 1 1000 0.82 0.82 0.82 4.89 + 2 1000 0.82 0.82 0.82 9.72 + 4 1000 0.84 0.84 0.84 19.01 + 8 1000 0.81 0.81 0.81 39.53 + 16 1000 0.86 0.86 0.86 74.41 + 32 1000 0.84 0.84 0.84 152.95 + 64 1000 0.88 0.88 0.88 289.38 + 128 1000 1.01 1.02 1.01 504.34 + 256 1000 1.15 1.15 1.15 889.07 + 512 1000 2.13 2.13 2.13 962.59 + 1024 1000 2.34 2.34 2.34 1748.81 + 2048 1000 2.91 2.91 2.91 2810.32 + 4096 1000 7.50 7.50 7.50 2183.84 + 8192 1000 8.23 8.23 8.23 3980.99 + 16384 1000 10.05 10.06 10.05 6517.47 + 32768 1000 13.46 13.46 13.46 9737.24 + 65536 640 21.49 21.49 21.49 12197.47 + 131072 320 43.01 43.03 43.02 12185.10 + 262144 160 95.24 95.29 95.27 11003.79 + 524288 80 207.83 207.95 207.91 10085.02 + 1048576 40 442.18 443.07 442.66 9466.53 + 2097152 20 1054.36 1059.52 1057.65 7917.35 + 4194304 10 3013.99 3016.51 3015.46 5561.80 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 8 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.72 0.72 0.72 0.00 + 1 1000 0.85 0.85 0.85 4.73 + 2 1000 1.02 1.02 1.02 7.87 + 4 1000 0.82 0.82 0.82 19.52 + 8 1000 0.86 0.86 0.86 37.23 + 16 1000 0.83 0.84 0.83 76.63 + 32 1000 0.88 0.88 0.88 145.10 + 64 1000 0.87 0.87 0.87 292.58 + 128 1000 1.04 1.05 1.05 489.36 + 256 1000 1.12 1.12 1.12 915.22 + 512 1000 2.15 2.15 2.15 950.90 + 1024 1000 2.46 2.46 2.46 1665.38 + 2048 1000 2.98 2.99 2.99 2743.01 + 4096 1000 7.66 7.66 7.66 2138.27 + 8192 1000 8.69 8.69 8.69 3769.41 + 16384 1000 10.61 10.62 10.61 6172.63 + 32768 1000 13.95 13.95 13.95 9396.01 + 65536 640 21.87 21.88 21.87 11983.10 + 131072 320 46.64 46.73 46.69 11219.54 + 262144 160 98.56 98.78 98.72 10615.56 + 524288 80 234.52 235.20 234.82 8916.54 + 1048576 40 566.01 568.39 567.60 7379.33 + 2097152 20 1909.20 1924.11 1917.09 4359.74 + 4194304 10 4166.54 4212.88 4187.93 3982.36 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.62 0.64 0.63 + 8 1000 0.60 0.63 0.61 + 16 1000 0.63 0.66 0.64 + 32 1000 0.62 0.65 0.64 + 64 1000 0.65 0.67 0.66 + 128 1000 0.72 0.75 0.73 + 256 1000 0.79 0.81 0.80 + 512 1000 1.37 1.40 1.38 + 1024 1000 1.61 1.66 1.63 + 2048 1000 2.21 2.25 2.23 + 4096 1000 3.58 3.59 3.59 + 8192 1000 7.71 8.21 7.96 + 16384 1000 13.20 13.22 13.21 + 32768 1000 18.38 18.41 18.40 + 65536 640 29.27 29.34 29.31 + 131072 320 54.60 54.61 54.60 + 262144 160 115.96 116.24 116.10 + 524288 80 234.84 235.34 235.09 + 1048576 40 496.25 496.35 496.30 + 2097152 20 1032.53 1032.83 1032.68 + 4194304 10 2311.92 2312.22 2312.07 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.03 1.19 1.13 + 8 1000 1.10 1.26 1.20 + 16 1000 1.07 1.24 1.17 + 32 1000 1.07 1.24 1.17 + 64 1000 1.10 1.28 1.21 + 128 1000 1.17 1.35 1.28 + 256 1000 1.35 1.54 1.46 + 512 1000 2.46 2.65 2.56 + 1024 1000 2.92 3.22 3.05 + 2048 1000 4.07 4.35 4.18 + 4096 1000 11.41 12.54 11.80 + 8192 1000 12.17 13.55 12.84 + 16384 1000 22.26 23.85 23.03 + 32768 1000 30.67 32.18 31.38 + 65536 640 48.56 49.43 48.97 + 131072 320 88.03 90.54 89.22 + 262144 160 184.16 188.78 186.45 + 524288 80 398.72 424.35 411.64 + 1048576 40 825.49 848.12 836.89 + 2097152 20 1756.75 2033.79 1895.31 + 4194304 10 4392.95 4929.54 4661.40 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.62 1.93 1.76 + 8 1000 1.66 1.73 1.70 + 16 1000 1.69 1.80 1.75 + 32 1000 1.67 1.79 1.73 + 64 1000 1.75 1.91 1.82 + 128 1000 1.96 2.20 2.07 + 256 1000 2.19 2.35 2.26 + 512 1000 3.66 3.94 3.79 + 1024 1000 4.40 4.72 4.56 + 2048 1000 6.16 6.39 6.26 + 4096 1000 16.80 17.85 17.31 + 8192 1000 16.01 16.89 16.41 + 16384 1000 27.79 29.12 28.41 + 32768 1000 44.12 47.28 45.87 + 65536 640 69.10 75.12 72.43 + 131072 320 124.71 134.54 130.23 + 262144 160 257.00 278.63 267.87 + 524288 80 564.58 590.49 574.97 + 1048576 40 1194.46 1273.95 1236.12 + 2097152 20 3211.13 3371.30 3287.60 + 4194304 10 8332.93 8577.72 8453.29 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.23 0.49 0.36 + 8 1000 0.23 0.54 0.39 + 16 1000 0.24 0.53 0.39 + 32 1000 0.22 0.49 0.35 + 64 1000 0.22 0.57 0.40 + 128 1000 0.25 0.65 0.45 + 256 1000 0.28 0.77 0.52 + 512 1000 0.97 0.98 0.97 + 1024 1000 1.11 1.22 1.17 + 2048 1000 1.27 1.68 1.47 + 4096 1000 2.82 3.41 3.12 + 8192 1000 3.20 4.84 4.02 + 16384 1000 4.14 7.79 5.97 + 32768 1000 5.85 13.54 9.70 + 65536 640 9.33 25.24 17.28 + 131072 320 19.02 52.40 35.71 + 262144 160 39.45 114.69 77.07 + 524288 80 81.93 236.13 159.03 + 1048576 40 167.36 486.18 326.77 + 2097152 20 368.92 1040.56 704.74 + 4194304 10 794.56 2259.31 1526.94 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.22 1.10 0.59 + 8 1000 0.22 1.11 0.59 + 16 1000 0.23 1.13 0.61 + 32 1000 0.23 1.15 0.62 + 64 1000 0.24 1.20 0.65 + 128 1000 0.26 1.33 0.71 + 256 1000 0.30 1.54 0.81 + 512 1000 1.00 2.36 1.63 + 1024 1000 1.07 2.90 1.88 + 2048 1000 1.28 4.16 2.49 + 4096 1000 3.10 12.25 8.63 + 8192 1000 4.07 17.96 12.25 + 16384 1000 5.95 29.03 19.26 + 32768 1000 8.22 47.49 30.02 + 65536 640 10.71 57.22 30.27 + 131072 320 21.50 119.01 61.44 + 262144 160 44.85 367.55 179.63 + 524288 80 80.10 509.71 254.95 + 1048576 40 187.97 1684.75 818.32 + 2097152 20 389.83 3447.95 1677.23 + 4194304 10 1040.27 8221.24 4113.45 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.21 1.69 0.72 + 8 1000 0.21 1.70 0.74 + 16 1000 0.22 1.74 0.75 + 32 1000 0.22 1.77 0.76 + 64 1000 0.22 1.85 0.78 + 128 1000 0.23 2.18 0.88 + 256 1000 0.24 2.44 0.98 + 512 1000 0.95 3.42 1.93 + 1024 1000 1.09 4.53 2.39 + 2048 1000 1.28 6.61 3.15 + 4096 1000 2.78 15.27 7.70 + 8192 1000 3.53 16.84 8.53 + 16384 1000 4.59 27.21 13.07 + 32768 1000 6.64 45.64 20.24 + 65536 640 12.21 85.58 36.12 + 131072 320 22.72 186.89 78.28 + 262144 160 55.06 413.80 172.62 + 524288 80 102.12 905.52 367.40 + 1048576 40 214.92 1859.27 767.21 + 2097152 20 634.37 4558.31 1938.31 + 4194304 10 1467.55 10127.63 4371.52 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.07 0.07 0.07 + 8 1000 0.10 0.10 0.10 + 16 1000 0.07 0.07 0.07 + 32 1000 0.08 0.08 0.08 + 64 1000 0.08 0.08 0.08 + 128 1000 0.10 0.10 0.10 + 256 1000 0.13 0.14 0.13 + 512 1000 0.19 0.19 0.19 + 1024 1000 0.34 0.34 0.34 + 2048 1000 0.59 0.60 0.60 + 4096 1000 1.08 1.09 1.08 + 8192 1000 2.08 2.08 2.08 + 16384 1000 4.06 4.07 4.06 + 32768 1000 8.10 8.12 8.11 + 65536 640 16.18 16.23 16.20 + 131072 320 32.37 32.40 32.38 + 262144 160 71.38 71.50 71.44 + 524288 80 143.42 143.79 143.61 + 1048576 40 309.17 325.78 317.47 + 2097152 20 644.86 661.11 652.99 + 4194304 10 1343.07 1384.49 1363.78 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.07 0.07 0.07 + 8 1000 0.07 0.07 0.07 + 16 1000 0.07 0.08 0.07 + 32 1000 0.08 0.09 0.08 + 64 1000 0.08 0.09 0.09 + 128 1000 0.10 0.11 0.10 + 256 1000 0.13 0.14 0.13 + 512 1000 0.19 0.20 0.20 + 1024 1000 0.33 0.34 0.33 + 2048 1000 0.59 0.59 0.59 + 4096 1000 1.07 1.09 1.08 + 8192 1000 2.08 2.10 2.09 + 16384 1000 4.07 4.08 4.07 + 32768 1000 8.10 8.12 8.11 + 65536 640 16.19 16.20 16.19 + 131072 320 32.31 32.72 32.52 + 262144 160 67.84 72.20 70.40 + 524288 80 148.33 152.96 151.10 + 1048576 40 309.40 348.34 325.88 + 2097152 20 641.74 724.06 679.40 + 4194304 10 1634.60 2100.81 1849.57 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.07 0.07 0.07 + 8 1000 0.07 0.07 0.07 + 16 1000 0.07 0.10 0.08 + 32 1000 0.08 0.08 0.08 + 64 1000 0.08 0.09 0.09 + 128 1000 0.10 0.11 0.10 + 256 1000 0.13 0.14 0.13 + 512 1000 0.19 0.21 0.20 + 1024 1000 0.33 0.34 0.34 + 2048 1000 0.61 0.62 0.61 + 4096 1000 1.08 1.13 1.09 + 8192 1000 2.08 2.10 2.09 + 16384 1000 4.06 4.12 4.09 + 32768 1000 8.08 8.12 8.11 + 65536 640 16.24 16.26 16.26 + 131072 320 32.29 33.87 32.68 + 262144 160 69.27 73.07 70.91 + 524288 80 146.56 161.39 153.44 + 1048576 40 304.89 404.12 357.00 + 2097152 20 826.19 1091.31 979.27 + 4194304 10 2551.18 3116.42 2710.79 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.08 0.07 + 4 1000 0.68 0.69 0.69 + 8 1000 0.69 0.69 0.69 + 16 1000 0.69 0.70 0.70 + 32 1000 0.71 0.71 0.71 + 64 1000 0.74 0.77 0.76 + 128 1000 0.81 0.81 0.81 + 256 1000 0.90 0.90 0.90 + 512 1000 1.40 1.47 1.43 + 1024 1000 1.81 1.85 1.83 + 2048 1000 2.50 2.51 2.50 + 4096 1000 5.96 6.03 6.00 + 8192 1000 8.42 8.42 8.42 + 16384 1000 12.24 12.48 12.36 + 32768 1000 20.37 20.66 20.52 + 65536 640 39.21 39.33 39.27 + 131072 320 85.72 86.25 85.98 + 262144 160 182.52 185.07 183.79 + 524288 80 412.71 426.05 419.38 + 1048576 40 979.02 1024.08 1001.55 + 2097152 20 2326.63 2390.25 2358.44 + 4194304 10 5646.02 5767.22 5706.62 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.12 1.36 1.26 + 8 1000 1.15 1.40 1.30 + 16 1000 1.18 1.40 1.30 + 32 1000 1.22 1.46 1.36 + 64 1000 1.35 1.64 1.51 + 128 1000 1.49 1.79 1.67 + 256 1000 2.09 2.40 2.26 + 512 1000 2.84 3.40 3.14 + 1024 1000 3.70 4.20 3.96 + 2048 1000 7.44 8.04 7.76 + 4096 1000 13.48 14.28 13.89 + 8192 1000 19.18 20.16 19.69 + 16384 1000 31.37 32.39 31.87 + 32768 1000 56.07 57.62 56.84 + 65536 640 138.87 141.22 140.21 + 131072 320 281.61 285.70 283.76 + 262144 160 605.11 633.59 620.05 + 524288 80 1453.61 1500.78 1472.48 + 1048576 40 3753.98 3930.06 3858.45 + 2097152 20 7856.96 8071.99 7998.25 + 4194304 10 16642.70 17522.67 17221.63 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.08 0.08 + 4 1000 1.77 1.84 1.82 + 8 1000 1.76 1.81 1.78 + 16 1000 1.82 1.88 1.85 + 32 1000 1.92 2.04 1.97 + 64 1000 2.13 2.24 2.18 + 128 1000 2.76 2.99 2.85 + 256 1000 3.87 3.97 3.91 + 512 1000 5.44 5.57 5.50 + 1024 1000 9.28 9.64 9.49 + 2048 1000 16.30 16.76 16.53 + 4096 1000 26.00 26.69 26.30 + 8192 1000 40.30 41.26 40.76 + 16384 1000 72.48 73.38 72.89 + 32768 1000 172.28 176.42 174.12 + 65536 640 350.60 355.45 353.50 + 131072 320 745.88 764.27 752.33 + 262144 160 2275.30 2335.52 2303.22 + 524288 80 5193.01 5412.87 5314.74 + 1048576 40 11482.75 11891.51 11731.67 + 2097152 20 23773.59 24157.58 23993.54 + 4194304 10 61121.65 61635.76 61448.39 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 0.67 0.68 0.67 + 8 1000 0.65 0.65 0.65 + 16 1000 0.69 0.70 0.69 + 32 1000 0.68 0.68 0.68 + 64 1000 0.73 0.74 0.73 + 128 1000 0.77 0.77 0.77 + 256 1000 0.91 0.91 0.91 + 512 1000 1.45 1.46 1.46 + 1024 1000 1.99 2.01 2.00 + 2048 1000 2.57 2.60 2.58 + 4096 1000 5.82 5.87 5.85 + 8192 1000 8.24 8.24 8.24 + 16384 1000 11.76 11.89 11.82 + 32768 1000 19.91 20.08 19.99 + 65536 640 38.71 39.13 38.92 + 131072 320 149.72 150.35 150.04 + 262144 160 303.06 303.68 303.37 + 524288 80 611.70 612.75 612.22 + 1048576 40 1297.23 1298.46 1297.84 + 2097152 20 2759.81 2761.52 2760.66 + 4194304 10 6865.86 6867.13 6866.50 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.10 1.28 1.20 + 8 1000 2.74 2.89 2.82 + 16 1000 2.73 2.93 2.83 + 32 1000 1.28 1.46 1.38 + 64 1000 1.32 1.52 1.43 + 128 1000 1.48 1.73 1.62 + 256 1000 2.11 2.29 2.20 + 512 1000 3.19 3.31 3.26 + 1024 1000 4.01 4.12 4.06 + 2048 1000 8.11 8.31 8.24 + 4096 1000 14.38 14.58 14.50 + 8192 1000 20.35 20.61 20.49 + 16384 1000 31.51 31.74 31.67 + 32768 1000 55.65 56.04 55.83 + 65536 640 114.87 115.57 115.35 + 131072 320 244.63 251.21 247.06 + 262144 160 544.56 560.38 552.98 + 524288 80 1296.50 1341.59 1319.70 + 1048576 40 9079.51 9455.21 9313.61 + 2097152 20 19765.17 20749.83 20388.18 + 4194304 10 47071.34 49910.14 48694.50 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 4 1000 1.65 1.70 1.67 + 8 1000 1.63 1.68 1.65 + 16 1000 1.74 1.83 1.78 + 32 1000 1.97 2.12 2.03 + 64 1000 2.21 2.31 2.25 + 128 1000 2.67 2.85 2.73 + 256 1000 3.74 3.85 3.80 + 512 1000 5.26 5.52 5.37 + 1024 1000 9.45 9.59 9.52 + 2048 1000 16.06 16.44 16.24 + 4096 1000 25.80 26.11 25.98 + 8192 1000 39.76 40.03 39.90 + 16384 1000 72.04 72.94 72.37 + 32768 1000 149.89 151.72 150.87 + 65536 640 346.32 359.78 351.97 + 131072 320 796.66 830.59 814.32 + 262144 160 2419.97 2468.37 2441.90 + 524288 80 10435.75 10594.26 10523.62 + 1048576 40 21979.09 23777.69 23020.45 + 2097152 20 53645.10 57481.53 55823.11 + 4194304 10 66291.25 68106.70 67504.31 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.53 0.54 0.54 + 2 1000 0.56 0.56 0.56 + 4 1000 0.53 0.54 0.53 + 8 1000 0.54 0.54 0.54 + 16 1000 0.54 0.54 0.54 + 32 1000 0.56 0.57 0.57 + 64 1000 0.56 0.56 0.56 + 128 1000 0.60 0.60 0.60 + 256 1000 0.65 0.66 0.66 + 512 1000 1.13 1.15 1.14 + 1024 1000 1.24 1.26 1.25 + 2048 1000 1.48 1.50 1.49 + 4096 1000 3.32 3.46 3.39 + 8192 1000 3.92 3.92 3.92 + 16384 1000 5.14 5.24 5.19 + 32768 1000 7.49 7.54 7.51 + 65536 640 12.27 12.29 12.28 + 131072 320 23.92 24.89 24.40 + 262144 160 61.39 62.12 61.76 + 524288 80 139.46 142.19 140.83 + 1048576 40 283.06 292.53 287.79 + 2097152 20 606.65 629.88 618.26 + 4194304 10 1538.72 1621.72 1580.22 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 1.01 1.22 1.12 + 2 1000 1.00 1.19 1.10 + 4 1000 0.98 1.20 1.10 + 8 1000 1.02 1.24 1.14 + 16 1000 1.03 1.21 1.13 + 32 1000 1.04 1.25 1.15 + 64 1000 1.12 1.37 1.25 + 128 1000 1.23 1.43 1.34 + 256 1000 1.75 1.95 1.87 + 512 1000 2.34 2.62 2.49 + 1024 1000 2.68 3.02 2.86 + 2048 1000 5.60 5.86 5.75 + 4096 1000 10.02 10.26 10.11 + 8192 1000 12.86 15.65 14.24 + 16384 1000 17.15 17.71 17.40 + 32768 1000 26.46 29.44 27.91 + 65536 640 46.52 47.93 47.16 + 131072 320 121.97 127.09 124.51 + 262144 160 232.77 238.85 235.79 + 524288 80 494.04 539.73 516.80 + 1048576 40 1271.68 1315.60 1293.59 + 2097152 20 2882.50 3696.15 3288.84 + 4194304 10 6976.11 7849.88 7412.90 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 1.49 1.59 1.54 + 2 1000 1.52 1.60 1.56 + 4 1000 1.51 1.60 1.56 + 8 1000 1.59 1.67 1.62 + 16 1000 1.56 1.67 1.61 + 32 1000 1.60 1.84 1.72 + 64 1000 1.72 2.09 1.88 + 128 1000 2.11 2.54 2.31 + 256 1000 2.97 3.38 3.16 + 512 1000 3.88 4.28 4.05 + 1024 1000 6.51 7.41 6.91 + 2048 1000 12.83 14.02 13.39 + 4096 1000 20.41 21.54 20.82 + 8192 1000 27.15 29.20 27.87 + 16384 1000 42.89 46.73 45.03 + 32768 1000 82.30 87.79 84.65 + 65536 640 158.80 189.33 173.91 + 131072 320 337.23 382.72 356.89 + 262144 160 774.30 872.74 801.39 + 524288 80 1825.55 2219.59 2000.64 + 1048576 40 4768.49 5385.96 5083.29 + 2097152 20 8757.75 11249.27 10474.50 + 4194304 10 19070.29 22437.27 20189.12 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.54 0.55 0.55 + 2 1000 0.54 0.55 0.54 + 4 1000 0.56 0.56 0.56 + 8 1000 0.55 0.55 0.55 + 16 1000 0.55 0.56 0.55 + 32 1000 0.55 0.56 0.56 + 64 1000 0.57 0.58 0.58 + 128 1000 0.63 0.63 0.63 + 256 1000 0.69 0.69 0.69 + 512 1000 1.20 1.23 1.22 + 1024 1000 1.41 1.45 1.43 + 2048 1000 1.57 1.59 1.58 + 4096 1000 4.10 4.13 4.11 + 8192 1000 3.94 3.95 3.94 + 16384 1000 5.15 5.16 5.15 + 32768 1000 7.47 7.49 7.48 + 65536 640 12.10 12.11 12.11 + 131072 320 24.27 25.00 24.63 + 262144 160 61.17 62.89 62.03 + 524288 80 136.09 139.58 137.83 + 1048576 40 284.03 290.88 287.46 + 2097152 20 609.60 630.71 620.15 + 4194304 10 1406.38 1492.43 1449.40 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.05 + 1 1000 1.74 2.00 1.87 + 2 1000 1.72 2.04 1.89 + 4 1000 1.77 2.10 1.94 + 8 1000 1.74 2.04 1.90 + 16 1000 1.72 2.01 1.87 + 32 1000 1.80 2.16 2.00 + 64 1000 1.87 2.27 2.08 + 128 1000 2.59 3.41 3.00 + 256 1000 2.13 2.67 2.38 + 512 1000 2.84 3.29 3.05 + 1024 1000 3.22 3.63 3.40 + 2048 1000 5.70 6.60 6.13 + 4096 1000 12.81 14.58 13.90 + 8192 1000 12.92 14.23 13.56 + 16384 1000 18.73 23.95 21.40 + 32768 1000 28.02 36.71 32.42 + 65536 640 46.66 63.08 54.88 + 131072 320 122.47 135.40 128.91 + 262144 160 228.77 229.55 229.13 + 524288 80 542.91 552.78 547.96 + 1048576 40 1214.31 1256.24 1235.39 + 2097152 20 3643.14 3764.00 3703.50 + 4194304 10 7830.25 7919.12 7874.60 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 2.26 3.61 3.13 + 2 1000 2.22 3.68 3.19 + 4 1000 2.23 3.68 3.19 + 8 1000 2.60 3.32 3.04 + 16 1000 2.75 3.39 3.12 + 32 1000 2.77 3.61 3.27 + 64 1000 3.39 4.52 3.99 + 128 1000 3.99 5.12 4.61 + 256 1000 4.58 6.24 5.48 + 512 1000 8.89 16.51 13.35 + 1024 1000 8.43 10.19 9.42 + 2048 1000 13.72 17.71 16.03 + 4096 1000 20.35 26.48 23.64 + 8192 1000 29.72 36.92 33.60 + 16384 1000 42.51 57.28 50.70 + 32768 1000 81.87 99.21 91.00 + 65536 640 188.81 204.05 194.29 + 131072 320 374.61 433.82 405.88 + 262144 160 828.02 973.71 897.71 + 524288 80 2093.38 2283.19 2216.32 + 1048576 40 4929.89 5665.13 5178.81 + 2097152 20 10466.70 11589.78 10796.70 + 4194304 10 19420.43 23233.37 20954.27 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.76 1.06 0.91 + 2 1000 0.20 0.51 0.35 + 4 1000 0.22 0.52 0.37 + 8 1000 0.22 0.51 0.37 + 16 1000 0.23 0.53 0.38 + 32 1000 0.23 0.51 0.37 + 64 1000 0.23 0.58 0.41 + 128 1000 0.26 0.65 0.45 + 256 1000 0.28 0.73 0.50 + 512 1000 0.94 1.09 1.01 + 1024 1000 1.02 1.16 1.09 + 2048 1000 1.20 1.34 1.27 + 4096 1000 2.43 2.77 2.60 + 8192 1000 3.02 3.38 3.20 + 16384 1000 4.17 4.48 4.32 + 32768 1000 6.44 6.78 6.61 + 65536 640 11.19 11.54 11.37 + 131072 320 17.20 24.21 20.70 + 262144 160 40.35 58.22 49.28 + 524288 80 81.82 122.63 102.23 + 1048576 40 170.85 267.03 218.94 + 2097152 20 364.31 581.91 473.11 + 4194304 10 870.73 1429.28 1150.01 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.22 1.14 0.58 + 2 1000 0.23 1.18 0.59 + 4 1000 0.22 1.14 0.57 + 8 1000 0.23 1.24 0.62 + 16 1000 0.23 1.14 0.59 + 32 1000 0.23 1.17 0.60 + 64 1000 0.24 1.22 0.63 + 128 1000 0.26 1.29 0.67 + 256 1000 0.29 1.60 0.94 + 512 1000 0.96 2.09 1.54 + 1024 1000 1.13 2.01 1.52 + 2048 1000 1.44 2.49 1.89 + 4096 1000 2.76 6.89 5.40 + 8192 1000 3.55 9.93 7.17 + 16384 1000 4.86 14.05 10.03 + 32768 1000 7.07 17.26 13.52 + 65536 640 13.49 32.69 25.66 + 131072 320 31.73 73.58 58.03 + 262144 160 45.19 151.86 104.18 + 524288 80 90.55 306.89 208.12 + 1048576 40 176.36 653.32 433.38 + 2097152 20 408.34 1522.99 1016.42 + 4194304 10 1101.35 4032.47 2667.83 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.23 1.56 0.72 + 2 1000 0.23 1.53 0.71 + 4 1000 0.23 1.56 0.71 + 8 1000 0.23 1.57 0.72 + 16 1000 0.24 1.64 0.75 + 32 1000 0.24 1.64 0.74 + 64 1000 0.24 1.78 0.79 + 128 1000 0.27 2.18 0.94 + 256 1000 0.27 2.54 1.24 + 512 1000 0.96 3.07 1.78 + 1024 1000 1.12 5.10 2.49 + 2048 1000 1.46 7.66 3.83 + 4096 1000 2.98 11.78 6.62 + 8192 1000 3.54 16.74 9.25 + 16384 1000 4.92 24.75 13.01 + 32768 1000 7.94 40.97 20.24 + 65536 640 15.25 83.16 39.57 + 131072 320 27.01 172.64 81.11 + 262144 160 61.73 375.44 179.36 + 524288 80 147.63 843.56 408.33 + 1048576 40 394.80 2182.24 1037.42 + 2097152 20 1143.25 5427.61 2659.98 + 4194304 10 2148.27 15940.86 8123.50 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.07 0.06 + 1 1000 0.21 0.47 0.34 + 2 1000 0.21 0.47 0.34 + 4 1000 0.21 0.47 0.34 + 8 1000 0.21 0.47 0.34 + 16 1000 0.22 0.48 0.35 + 32 1000 0.22 0.46 0.34 + 64 1000 0.22 0.56 0.39 + 128 1000 0.24 0.61 0.43 + 256 1000 0.26 0.70 0.48 + 512 1000 0.84 1.00 0.92 + 1024 1000 1.01 1.15 1.08 + 2048 1000 1.20 1.33 1.26 + 4096 1000 2.40 2.74 2.57 + 8192 1000 3.08 3.36 3.22 + 16384 1000 4.23 4.57 4.40 + 32768 1000 6.47 6.84 6.66 + 65536 640 11.15 11.56 11.36 + 131072 320 24.00 24.42 24.21 + 262144 160 58.22 58.26 58.24 + 524288 80 123.57 123.72 123.65 + 1048576 40 259.35 260.21 259.78 + 2097152 20 564.88 565.72 565.30 + 4194304 10 1425.50 1426.33 1425.92 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.07 0.06 + 1 1000 0.20 1.10 0.43 + 2 1000 0.21 1.07 0.43 + 4 1000 0.20 1.07 0.42 + 8 1000 0.20 1.08 0.42 + 16 1000 0.21 1.11 0.44 + 32 1000 0.21 1.10 0.44 + 64 1000 0.21 1.16 0.45 + 128 1000 0.23 1.29 0.50 + 256 1000 0.24 1.44 0.55 + 512 1000 1.04 1.65 1.27 + 1024 1000 1.15 1.88 1.44 + 2048 1000 1.37 2.36 1.78 + 4096 1000 2.81 6.83 5.38 + 8192 1000 3.51 8.42 6.67 + 16384 1000 4.61 11.21 8.84 + 32768 1000 7.01 16.99 13.33 + 65536 640 14.61 34.27 27.03 + 131072 320 31.17 73.72 57.86 + 262144 160 64.96 149.56 117.84 + 524288 80 133.87 299.98 237.81 + 1048576 40 284.18 641.25 506.26 + 2097152 20 650.63 1482.27 1169.43 + 4194304 10 1835.06 4025.14 3196.35 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.08 0.06 + 1 1000 0.18 1.62 0.37 + 2 1000 0.18 1.59 0.37 + 4 1000 0.18 1.59 0.37 + 8 1000 0.18 1.58 0.37 + 16 1000 0.18 1.66 0.38 + 32 1000 0.18 1.62 0.38 + 64 1000 0.19 1.76 0.41 + 128 1000 0.19 2.11 0.45 + 256 1000 0.21 2.32 0.49 + 512 1000 1.07 2.88 1.62 + 1024 1000 1.16 3.26 1.81 + 2048 1000 1.49 4.30 2.34 + 4096 1000 4.08 15.75 10.60 + 8192 1000 4.87 19.24 12.90 + 16384 1000 6.30 26.20 17.41 + 32768 1000 10.52 44.48 29.41 + 65536 640 19.89 90.30 59.60 + 131072 320 35.56 169.64 110.98 + 262144 160 70.52 339.48 221.34 + 524288 80 153.19 720.41 468.83 + 1048576 40 348.89 1668.01 1093.76 + 2097152 20 940.43 4477.95 2943.18 + 4194304 10 2010.53 10030.67 6560.24 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.29 0.46 0.38 + 2 1000 0.24 0.44 0.34 + 4 1000 0.25 0.45 0.35 + 8 1000 0.24 0.43 0.33 + 16 1000 0.25 0.44 0.35 + 32 1000 0.26 0.46 0.36 + 64 1000 0.26 0.53 0.39 + 128 1000 0.29 0.59 0.44 + 256 1000 0.33 0.69 0.51 + 512 1000 0.97 1.10 1.03 + 1024 1000 1.13 1.27 1.20 + 2048 1000 1.36 1.49 1.42 + 4096 1000 2.54 2.83 2.69 + 8192 1000 3.18 3.47 3.33 + 16384 1000 4.19 4.49 4.34 + 32768 1000 6.50 6.81 6.65 + 65536 640 11.23 11.53 11.38 + 131072 320 20.25 20.65 20.45 + 262144 160 43.39 43.68 43.54 + 524288 80 101.70 101.88 101.79 + 1048576 40 227.26 227.40 227.33 + 2097152 20 527.22 527.26 527.24 + 4194304 10 1194.86 1195.07 1194.97 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.35 0.74 0.56 + 2 1000 0.35 0.74 0.56 + 4 1000 0.35 0.74 0.56 + 8 1000 0.35 0.76 0.57 + 16 1000 0.39 0.79 0.61 + 32 1000 0.37 0.79 0.60 + 64 1000 0.38 0.83 0.63 + 128 1000 0.46 0.89 0.70 + 256 1000 1.08 1.61 1.30 + 512 1000 1.63 2.05 1.86 + 1024 1000 2.02 2.40 2.24 + 2048 1000 1.27 3.86 2.78 + 4096 1000 6.89 7.83 7.37 + 8192 1000 3.61 11.07 8.08 + 16384 1000 4.80 13.71 10.17 + 32768 1000 10.59 13.25 12.02 + 65536 640 20.25 23.03 21.79 + 131072 320 40.39 43.58 42.08 + 262144 160 81.85 86.00 84.24 + 524288 80 115.67 140.46 133.42 + 1048576 40 237.11 550.49 433.30 + 2097152 20 530.58 1239.98 974.26 + 4194304 10 1556.54 3385.34 2709.75 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.46 1.54 1.06 + 2 1000 0.46 1.54 1.06 + 4 1000 0.47 1.53 1.06 + 8 1000 0.49 1.57 1.10 + 16 1000 0.49 1.58 1.09 + 32 1000 0.50 1.61 1.12 + 64 1000 0.55 1.71 1.19 + 128 1000 1.18 1.96 1.58 + 256 1000 1.82 2.33 2.06 + 512 1000 2.82 3.19 3.02 + 1024 1000 4.62 5.06 4.87 + 2048 1000 8.11 9.14 8.63 + 4096 1000 13.29 15.11 14.11 + 8192 1000 16.90 18.88 17.75 + 16384 1000 5.59 15.12 11.34 + 32768 1000 9.41 23.55 19.03 + 65536 640 22.81 41.88 36.81 + 131072 320 56.50 77.46 71.97 + 262144 160 51.34 127.52 105.28 + 524288 80 127.52 253.38 206.42 + 1048576 40 244.23 1239.74 798.55 + 2097152 20 698.19 3295.44 2147.93 + 4194304 10 1926.42 9732.45 6320.09 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.06 0.06 + 1 1000 0.25 0.44 0.34 + 2 1000 0.25 0.44 0.35 + 4 1000 0.24 0.43 0.34 + 8 1000 0.25 0.43 0.34 + 16 1000 0.28 0.47 0.38 + 32 1000 0.26 0.46 0.36 + 64 1000 0.29 0.52 0.40 + 128 1000 0.29 0.58 0.44 + 256 1000 0.33 0.72 0.53 + 512 1000 0.91 1.05 0.98 + 1024 1000 1.07 1.17 1.12 + 2048 1000 1.34 1.46 1.40 + 4096 1000 2.52 2.81 2.67 + 8192 1000 3.00 3.30 3.15 + 16384 1000 4.01 4.30 4.16 + 32768 1000 6.29 6.59 6.44 + 65536 640 10.85 11.16 11.00 + 131072 320 19.74 20.05 19.89 + 262144 160 42.79 43.11 42.95 + 524288 80 105.75 105.86 105.80 + 1048576 40 226.19 226.25 226.22 + 2097152 20 501.92 502.06 501.99 + 4194304 10 1210.22 1210.47 1210.34 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.07 0.06 + 1 1000 0.44 0.45 0.44 + 2 1000 0.43 0.45 0.44 + 4 1000 0.43 0.44 0.44 + 8 1000 0.46 0.48 0.47 + 16 1000 0.43 0.49 0.47 + 32 1000 0.47 0.50 0.49 + 64 1000 0.50 0.53 0.51 + 128 1000 0.55 0.66 0.61 + 256 1000 0.64 0.78 0.72 + 512 1000 0.88 2.98 2.03 + 1024 1000 1.07 3.42 2.37 + 2048 1000 1.34 4.04 2.84 + 4096 1000 3.12 9.63 7.00 + 8192 1000 3.62 10.95 8.02 + 16384 1000 4.65 13.45 9.96 + 32768 1000 6.71 18.68 13.98 + 65536 640 11.64 30.33 23.15 + 131072 320 20.58 52.92 40.56 + 262144 160 43.27 108.02 83.82 + 524288 80 102.82 247.31 193.32 + 1048576 40 235.86 560.58 437.01 + 2097152 20 519.43 1214.32 952.60 + 4194304 10 1513.09 3311.39 2649.82 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.06 0.08 0.06 + 1 1000 0.55 1.16 0.88 + 2 1000 0.55 1.15 0.88 + 4 1000 0.58 1.20 0.92 + 8 1000 0.54 1.15 0.87 + 16 1000 0.55 1.25 0.94 + 32 1000 0.56 1.26 0.94 + 64 1000 0.57 1.30 0.97 + 128 1000 0.60 1.46 1.09 + 256 1000 0.65 1.73 1.29 + 512 1000 1.12 6.11 3.95 + 1024 1000 1.14 6.88 4.40 + 2048 1000 1.36 8.40 5.33 + 4096 1000 3.40 22.06 13.90 + 8192 1000 3.92 25.17 15.89 + 16384 1000 4.90 30.77 19.49 + 32768 1000 7.21 43.67 27.77 + 65536 640 11.81 68.70 43.80 + 131072 320 21.11 117.13 74.98 + 262144 160 43.57 240.01 154.07 + 524288 80 106.31 547.97 356.41 + 1048576 40 239.36 1218.89 785.98 + 2097152 20 719.55 3352.21 2193.69 + 4194304 10 1888.30 9448.60 6154.39 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.56 0.58 0.57 + 2 1000 0.53 0.54 0.54 + 4 1000 0.55 0.55 0.55 + 8 1000 0.55 0.56 0.56 + 16 1000 0.55 0.55 0.55 + 32 1000 0.56 0.56 0.56 + 64 1000 0.59 0.59 0.59 + 128 1000 0.65 0.65 0.65 + 256 1000 0.67 0.67 0.67 + 512 1000 1.20 1.21 1.20 + 1024 1000 1.35 1.37 1.36 + 2048 1000 1.61 1.64 1.62 + 4096 1000 3.31 3.44 3.37 + 8192 1000 3.90 4.12 4.01 + 16384 1000 5.19 5.43 5.31 + 32768 1000 7.58 7.66 7.62 + 65536 640 12.45 12.49 12.47 + 131072 320 25.03 25.07 25.05 + 262144 160 56.27 56.61 56.44 + 524288 80 120.23 124.47 122.35 + 1048576 40 260.07 269.74 264.90 + 2097152 20 586.10 629.46 607.78 + 4194304 10 1690.73 1694.47 1692.60 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 1.13 1.24 1.19 + 2 1000 1.10 1.20 1.16 + 4 1000 1.12 1.23 1.18 + 8 1000 1.14 1.25 1.20 + 16 1000 1.14 1.26 1.19 + 32 1000 1.13 1.25 1.18 + 64 1000 1.15 1.26 1.21 + 128 1000 1.29 1.46 1.37 + 256 1000 1.43 1.59 1.50 + 512 1000 2.64 3.19 2.90 + 1024 1000 2.98 3.76 3.36 + 2048 1000 3.77 4.42 4.06 + 4096 1000 10.74 12.98 11.89 + 8192 1000 12.39 14.87 13.65 + 16384 1000 15.43 18.31 16.91 + 32768 1000 21.54 25.24 23.36 + 65536 640 40.73 43.85 42.13 + 131072 320 77.35 84.20 81.92 + 262144 160 143.48 169.84 160.38 + 524288 80 453.34 530.39 491.94 + 1048576 40 877.71 1011.52 972.65 + 2097152 20 2912.02 2997.24 2958.93 + 4194304 10 5591.99 5966.73 5845.99 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 2.37 2.47 2.39 + 2 1000 2.30 2.43 2.35 + 4 1000 2.30 2.40 2.34 + 8 1000 2.32 2.44 2.35 + 16 1000 2.39 2.50 2.42 + 32 1000 2.39 2.49 2.42 + 64 1000 2.44 2.57 2.48 + 128 1000 2.61 2.78 2.70 + 256 1000 2.71 3.16 3.00 + 512 1000 6.29 6.93 6.64 + 1024 1000 7.60 8.12 7.85 + 2048 1000 9.69 10.18 9.88 + 4096 1000 26.30 29.29 28.10 + 8192 1000 31.54 34.79 33.32 + 16384 1000 40.30 43.57 42.50 + 32768 1000 60.66 69.68 65.01 + 65536 640 112.31 144.91 131.77 + 131072 320 206.99 300.95 277.81 + 262144 160 532.89 725.08 678.88 + 524288 80 1754.84 2100.12 2005.81 + 1048576 40 3959.25 4060.83 4000.43 + 2097152 20 7857.85 8247.39 8084.97 + 4194304 10 15815.18 16292.21 16105.19 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.51 0.52 0.51 + 1 1000 0.57 0.58 0.57 + 2 1000 0.58 0.58 0.58 + 4 1000 0.57 0.58 0.57 + 8 1000 0.57 0.59 0.58 + 16 1000 0.58 0.60 0.59 + 32 1000 0.59 0.59 0.59 + 64 1000 0.61 0.61 0.61 + 128 1000 0.64 0.65 0.65 + 256 1000 0.74 0.75 0.74 + 512 1000 1.27 1.28 1.27 + 1024 1000 1.36 1.37 1.36 + 2048 1000 1.60 1.63 1.62 + 4096 1000 3.38 3.43 3.41 + 8192 1000 4.01 4.02 4.01 + 16384 1000 5.18 5.22 5.20 + 32768 1000 7.47 7.50 7.48 + 65536 640 12.71 12.83 12.77 + 131072 320 24.91 24.92 24.91 + 262144 160 56.69 56.72 56.70 + 524288 80 130.17 130.79 130.48 + 1048576 40 292.26 292.75 292.50 + 2097152 20 635.92 636.37 636.15 + 4194304 10 1768.15 1770.96 1769.55 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 1.11 1.17 1.15 + 1 1000 2.32 2.38 2.35 + 2 1000 2.28 2.35 2.32 + 4 1000 2.36 2.42 2.40 + 8 1000 2.34 2.41 2.36 + 16 1000 2.38 2.47 2.42 + 32 1000 2.32 2.39 2.35 + 64 1000 2.31 2.39 2.34 + 128 1000 2.48 2.64 2.55 + 256 1000 2.60 2.78 2.68 + 512 1000 2.81 3.05 2.95 + 1024 1000 3.18 3.39 3.29 + 2048 1000 3.93 4.20 4.07 + 4096 1000 11.46 12.70 12.04 + 8192 1000 13.57 14.92 14.20 + 16384 1000 16.46 17.71 17.07 + 32768 1000 23.18 24.87 24.03 + 65536 640 37.56 42.97 40.33 + 131072 320 74.97 83.86 79.91 + 262144 160 161.95 173.94 169.42 + 524288 80 351.85 402.74 387.72 + 1048576 40 950.10 1000.32 975.24 + 2097152 20 2835.93 2944.44 2902.52 + 4194304 10 5753.04 5999.40 5888.17 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 2.38 2.52 2.44 + 1 1000 4.94 5.38 5.22 + 2 1000 4.97 5.42 5.22 + 4 1000 4.98 5.43 5.24 + 8 1000 5.06 5.58 5.36 + 16 1000 4.99 5.48 5.29 + 32 1000 4.97 5.51 5.29 + 64 1000 4.97 5.46 5.24 + 128 1000 5.41 5.98 5.72 + 256 1000 5.78 6.23 6.06 + 512 1000 6.50 6.92 6.70 + 1024 1000 7.39 7.87 7.71 + 2048 1000 9.15 9.99 9.67 + 4096 1000 26.47 29.42 28.35 + 8192 1000 31.10 34.60 33.09 + 16384 1000 38.63 43.08 41.43 + 32768 1000 58.69 67.33 63.35 + 65536 640 106.86 122.64 117.33 + 131072 320 206.41 229.73 223.89 + 262144 160 457.18 549.83 508.25 + 524288 80 1771.45 2019.90 1924.10 + 1048576 40 3855.04 4160.41 4000.38 + 2097152 20 7813.22 8271.91 8003.98 + 4194304 10 15631.01 16441.34 16097.38 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.05 + 1 1000 0.24 0.49 0.37 + 2 1000 0.24 0.49 0.37 + 4 1000 0.24 0.48 0.36 + 8 1000 0.24 0.49 0.36 + 16 1000 0.25 0.49 0.37 + 32 1000 0.25 0.51 0.38 + 64 1000 0.25 0.54 0.40 + 128 1000 0.28 0.58 0.43 + 256 1000 0.31 0.66 0.48 + 512 1000 0.92 1.06 0.99 + 1024 1000 0.95 1.09 1.02 + 2048 1000 1.21 1.34 1.27 + 4096 1000 2.31 2.66 2.49 + 8192 1000 2.66 2.99 2.82 + 16384 1000 3.44 3.77 3.60 + 32768 1000 5.24 5.58 5.41 + 65536 640 8.44 8.77 8.60 + 131072 320 15.50 15.86 15.68 + 262144 160 32.36 32.68 32.52 + 524288 80 76.48 76.56 76.52 + 1048576 40 162.56 162.85 162.70 + 2097152 20 336.47 337.14 336.80 + 4194304 10 736.64 738.10 737.37 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.32 0.66 0.50 + 2 1000 0.32 0.66 0.50 + 4 1000 0.32 0.64 0.50 + 8 1000 0.33 0.65 0.50 + 16 1000 0.34 0.66 0.51 + 32 1000 0.35 0.67 0.52 + 64 1000 0.34 0.73 0.54 + 128 1000 0.39 0.77 0.59 + 256 1000 0.43 0.85 0.65 + 512 1000 1.09 1.81 1.45 + 1024 1000 1.26 2.00 1.62 + 2048 1000 1.57 2.38 1.97 + 4096 1000 3.71 7.33 5.68 + 8192 1000 4.32 6.76 5.60 + 16384 1000 6.24 9.30 8.04 + 32768 1000 11.32 14.95 13.49 + 65536 640 22.57 25.92 24.53 + 131072 320 44.46 48.13 46.60 + 262144 160 92.97 96.36 94.90 + 524288 80 195.21 200.22 198.14 + 1048576 40 171.46 518.32 390.29 + 2097152 20 346.97 1083.19 809.12 + 4194304 10 784.44 2463.68 1831.47 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 8 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.05 + 1 1000 0.58 1.18 0.90 + 2 1000 0.58 1.19 0.90 + 4 1000 0.58 1.17 0.89 + 8 1000 0.36 1.62 1.13 + 16 1000 0.38 1.67 1.15 + 32 1000 0.38 1.67 1.16 + 64 1000 0.63 1.28 0.97 + 128 1000 0.72 1.45 1.09 + 256 1000 0.84 1.62 1.23 + 512 1000 1.34 2.77 2.00 + 1024 1000 1.47 3.22 2.31 + 2048 1000 1.86 3.88 2.89 + 4096 1000 5.47 12.65 9.67 + 8192 1000 7.27 16.21 12.47 + 16384 1000 7.38 23.73 16.20 + 32768 1000 10.01 25.79 21.14 + 65536 640 23.63 44.60 39.39 + 131072 320 61.09 82.33 76.83 + 262144 160 139.06 159.94 153.90 + 524288 80 312.25 330.97 320.48 + 1048576 40 652.60 699.52 679.42 + 2097152 20 1302.10 1471.78 1399.16 + 4194304 10 2732.73 3041.29 2953.37 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 2 +# ( 6 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.41 0.41 0.41 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 4 +# ( 4 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.96 0.96 0.96 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 8 +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 1.42 1.42 1.42 + + +# All processes entering MPI_Finalize +``` + +### 3.7.x86运行结果 + +#### HMPI + +```shell +#---------------------------------------------------------------- +# Intel(R) MPI Benchmarks 2021.3, MPI-1 part +#---------------------------------------------------------------- +# Date : Thu Dec 15 22:38:41 2022 +# Machine : x86_64 +# System : Linux +# Release : 5.15.79.1-microsoft-standard-WSL2 +# Version : #1 SMP Wed Nov 23 01:01:46 UTC 2022 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# IMB-MPI1 + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 4194304 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong +# PingPing +# Sendrecv +# Exchange +# Allreduce +# Reduce +# Reduce_local +# Reduce_scatter +# Reduce_scatter_block +# Allgather +# Allgatherv +# Gather +# Gatherv +# Scatter +# Scatterv +# Alltoall +# Alltoallv +# Bcast +# Barrier + +#--------------------------------------------------- +# Benchmarking PingPong +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.11 0.00 + 1 1000 0.13 7.56 + 2 1000 0.19 10.68 + 4 1000 0.12 34.71 + 8 1000 0.13 63.75 + 16 1000 0.12 129.14 + 32 1000 0.16 203.63 + 64 1000 0.14 444.91 + 128 1000 0.24 525.02 + 256 1000 0.30 862.39 + 512 1000 0.28 1834.80 + 1024 1000 0.39 2623.29 + 2048 1000 0.59 3484.77 + 4096 1000 0.99 4117.62 + 8192 1000 1.72 4763.62 + 16384 1000 2.06 7951.85 + 32768 1000 3.74 8770.41 + 65536 640 8.21 7981.70 + 131072 320 13.81 9490.45 + 262144 160 25.67 10212.95 + 524288 80 73.69 7114.66 + 1048576 40 145.01 7231.31 + 2097152 20 220.73 9500.98 + 4194304 10 506.66 8278.34 + +#--------------------------------------------------- +# Benchmarking PingPing +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.21 0.00 + 1 1000 0.63 1.59 + 2 1000 0.23 8.73 + 4 1000 0.21 19.48 + 8 1000 0.22 36.40 + 16 1000 0.20 79.44 + 32 1000 0.29 109.74 + 64 1000 0.15 438.06 + 128 1000 0.23 564.37 + 256 1000 0.34 756.05 + 512 1000 0.28 1803.45 + 1024 1000 0.44 2337.90 + 2048 1000 0.64 3193.51 + 4096 1000 1.01 4041.04 + 8192 1000 1.39 5885.48 + 16384 1000 2.10 7785.22 + 32768 1000 4.18 7847.49 + 65536 640 5.61 11681.02 + 131072 320 10.72 12228.65 + 262144 160 20.82 12588.32 + 524288 80 63.39 8270.51 + 1048576 40 78.02 13439.84 + 2097152 20 149.31 14045.15 + 4194304 10 669.72 6262.77 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.27 0.27 0.27 0.00 + 1 1000 0.27 0.27 0.27 7.46 + 2 1000 0.26 0.26 0.26 15.67 + 4 1000 0.27 0.27 0.27 29.96 + 8 1000 0.28 0.28 0.28 57.37 + 16 1000 0.28 0.28 0.28 115.69 + 32 1000 0.32 0.32 0.32 199.56 + 64 1000 0.31 0.31 0.31 417.48 + 128 1000 0.39 0.39 0.39 662.70 + 256 1000 0.36 0.36 0.36 1437.39 + 512 1000 0.31 0.31 0.31 3269.48 + 1024 1000 0.62 0.62 0.62 3306.43 + 2048 1000 0.75 0.75 0.75 5473.01 + 4096 1000 1.02 1.02 1.02 8015.66 + 8192 1000 1.60 1.60 1.60 10242.56 + 16384 1000 2.30 2.30 2.30 14261.22 + 32768 1000 3.87 3.87 3.87 16937.87 + 65536 640 7.82 7.82 7.82 16758.45 + 131072 320 18.72 18.73 18.72 13999.45 + 262144 160 23.89 23.89 23.89 21944.20 + 524288 80 41.94 41.95 41.94 24998.83 + 1048576 40 83.80 83.81 83.81 25022.69 + 2097152 20 151.56 151.65 151.60 27658.70 + 4194304 10 489.31 489.47 489.39 17138.15 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.30 0.30 0.30 0.00 + 1 1000 0.29 0.29 0.29 6.94 + 2 1000 0.28 0.28 0.28 14.14 + 4 1000 0.29 0.29 0.29 27.25 + 8 1000 0.23 0.23 0.23 70.77 + 16 1000 0.21 0.21 0.21 153.40 + 32 1000 0.22 0.22 0.22 290.51 + 64 1000 0.20 0.21 0.21 624.09 + 128 1000 0.26 0.26 0.26 979.72 + 256 1000 0.28 0.28 0.28 1797.12 + 512 1000 0.31 0.31 0.31 3308.56 + 1024 1000 0.41 0.41 0.41 5029.47 + 2048 1000 0.66 0.66 0.66 6178.91 + 4096 1000 0.98 0.98 0.98 8355.77 + 8192 1000 1.68 1.68 1.68 9740.21 + 16384 1000 2.88 2.89 2.89 11352.55 + 32768 1000 7.93 7.96 7.95 8233.17 + 65536 640 8.32 8.33 8.33 15730.80 + 131072 320 25.44 25.50 25.47 10281.67 + 262144 160 21.10 21.17 21.15 24768.54 + 524288 80 48.82 49.02 48.92 21391.87 + 1048576 40 95.16 97.38 96.44 21535.20 + 2097152 20 481.27 484.35 483.17 8659.57 + 4194304 10 1323.31 1333.49 1330.38 6290.72 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.25 0.25 0.25 0.00 + 1 1000 0.38 0.38 0.38 10.60 + 2 1000 0.33 0.33 0.33 24.20 + 4 1000 0.34 0.34 0.34 47.52 + 8 1000 0.34 0.34 0.34 94.45 + 16 1000 0.35 0.35 0.35 183.33 + 32 1000 0.30 0.30 0.30 430.11 + 64 1000 0.27 0.27 0.27 961.68 + 128 1000 0.38 0.38 0.38 1340.31 + 256 1000 0.40 0.40 0.40 2537.17 + 512 1000 0.51 0.51 0.51 4026.74 + 1024 1000 1.10 1.10 1.10 3716.88 + 2048 1000 1.13 1.13 1.13 7270.79 + 4096 1000 1.48 1.48 1.48 11055.33 + 8192 1000 2.36 2.36 2.36 13892.40 + 16384 1000 5.75 5.75 5.75 11403.12 + 32768 1000 7.33 7.33 7.33 17879.14 + 65536 640 9.49 9.49 9.49 27617.27 + 131072 320 23.61 23.61 23.61 22205.89 + 262144 160 39.03 39.04 39.04 26862.03 + 524288 80 72.49 72.51 72.50 28921.25 + 1048576 40 155.36 155.40 155.38 26990.81 + 2097152 20 370.82 370.88 370.85 22617.81 + 4194304 10 1138.46 1139.03 1138.75 14729.39 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.48 0.48 0.48 0.00 + 1 1000 0.49 0.49 0.49 8.14 + 2 1000 0.49 0.49 0.49 16.47 + 4 1000 0.32 0.32 0.32 49.35 + 8 1000 0.43 0.44 0.43 73.56 + 16 1000 0.43 0.43 0.43 149.32 + 32 1000 0.35 0.35 0.35 362.30 + 64 1000 0.34 0.35 0.34 742.03 + 128 1000 0.78 0.78 0.78 659.20 + 256 1000 0.74 0.74 0.74 1378.20 + 512 1000 0.64 0.64 0.64 3196.50 + 1024 1000 1.15 1.15 1.15 3568.88 + 2048 1000 1.55 1.55 1.55 5287.55 + 4096 1000 2.33 2.33 2.33 7024.83 + 8192 1000 3.72 3.73 3.72 8796.54 + 16384 1000 3.96 3.96 3.96 16564.97 + 32768 1000 6.61 6.61 6.61 19831.45 + 65536 640 9.23 9.23 9.23 28390.25 + 131072 320 28.62 28.63 28.63 18311.94 + 262144 160 39.23 39.24 39.23 26724.25 + 524288 80 75.91 76.16 76.09 27535.23 + 1048576 40 357.60 358.45 358.01 11701.06 + 2097152 20 948.37 950.49 949.48 8825.56 + 4194304 10 2796.74 2845.13 2831.96 5896.82 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.03 0.03 + 4 1000 0.40 0.54 0.47 + 8 1000 0.33 0.42 0.38 + 16 1000 0.32 0.42 0.37 + 32 1000 0.45 0.58 0.52 + 64 1000 0.39 0.68 0.53 + 128 1000 0.43 0.46 0.45 + 256 1000 0.63 0.70 0.66 + 512 1000 0.61 0.63 0.62 + 1024 1000 0.76 0.83 0.80 + 2048 1000 0.94 1.00 0.97 + 4096 1000 1.41 1.50 1.46 + 8192 1000 2.13 2.14 2.14 + 16384 1000 3.70 3.80 3.75 + 32768 1000 6.67 6.72 6.69 + 65536 640 12.25 12.26 12.26 + 131072 320 22.13 22.26 22.19 + 262144 160 41.17 42.69 41.93 + 524288 80 93.93 98.98 96.45 + 1048576 40 308.80 311.73 310.27 + 2097152 20 780.81 799.76 790.29 + 4194304 10 1935.58 2049.08 1992.33 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 4 1000 0.49 0.79 0.63 + 8 1000 0.50 0.75 0.63 + 16 1000 0.45 0.71 0.59 + 32 1000 0.53 0.85 0.69 + 64 1000 0.49 0.76 0.63 + 128 1000 1.16 1.37 1.29 + 256 1000 1.01 1.12 1.07 + 512 1000 1.24 1.39 1.33 + 1024 1000 1.66 1.79 1.74 + 2048 1000 2.08 2.23 2.15 + 4096 1000 2.55 2.82 2.68 + 8192 1000 3.89 4.04 3.96 + 16384 1000 6.07 6.23 6.15 + 32768 1000 12.16 12.57 12.34 + 65536 640 19.63 19.73 19.69 + 131072 320 64.43 68.24 66.55 + 262144 160 177.83 202.87 192.17 + 524288 80 146.31 152.52 149.19 + 1048576 40 525.75 530.23 528.61 + 2097152 20 1563.05 1581.81 1573.21 + 4194304 10 3786.31 3917.41 3835.28 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 4 1000 0.10 0.27 0.19 + 8 1000 0.12 0.32 0.22 + 16 1000 0.13 0.38 0.25 + 32 1000 0.10 0.32 0.21 + 64 1000 0.10 0.30 0.20 + 128 1000 0.18 0.42 0.30 + 256 1000 0.12 0.37 0.24 + 512 1000 0.20 0.34 0.27 + 1024 1000 0.19 0.39 0.29 + 2048 1000 0.23 0.55 0.39 + 4096 1000 0.30 0.88 0.59 + 8192 1000 0.59 2.13 1.36 + 16384 1000 1.18 2.95 2.07 + 32768 1000 2.45 5.22 3.83 + 65536 640 4.45 9.27 6.86 + 131072 320 8.71 16.55 12.63 + 262144 160 16.22 28.08 22.15 + 524288 80 39.04 78.31 58.67 + 1048576 40 68.08 127.46 97.77 + 2097152 20 213.82 387.31 300.57 + 4194304 10 436.79 791.39 614.09 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 4 1000 0.09 0.88 0.42 + 8 1000 0.09 0.80 0.40 + 16 1000 0.09 0.84 0.42 + 32 1000 0.09 0.87 0.41 + 64 1000 0.08 0.70 0.33 + 128 1000 0.18 1.04 0.56 + 256 1000 0.20 1.02 0.54 + 512 1000 0.25 1.21 0.66 + 1024 1000 0.29 1.61 0.84 + 2048 1000 0.25 1.86 0.91 + 4096 1000 0.31 3.47 2.00 + 8192 1000 0.48 6.11 3.39 + 16384 1000 1.12 10.65 6.16 + 32768 1000 2.49 17.47 10.50 + 65536 640 4.69 19.15 10.60 + 131072 320 11.60 40.19 23.63 + 262144 160 24.96 141.44 86.91 + 524288 80 35.92 135.20 81.28 + 1048576 40 89.15 478.39 271.23 + 2097152 20 235.92 1253.73 721.56 + 4194304 10 693.91 3518.80 1998.92 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 4 1000 0.05 0.05 0.05 + 8 1000 0.05 0.05 0.05 + 16 1000 0.05 0.07 0.06 + 32 1000 0.05 0.08 0.06 + 64 1000 0.05 0.08 0.06 + 128 1000 0.05 0.08 0.07 + 256 1000 0.05 0.09 0.07 + 512 1000 0.08 0.10 0.09 + 1024 1000 0.08 0.11 0.09 + 2048 1000 0.10 0.11 0.10 + 4096 1000 0.16 0.16 0.16 + 8192 1000 0.26 0.27 0.27 + 16384 1000 0.69 0.71 0.70 + 32768 1000 1.24 1.58 1.41 + 65536 640 3.04 3.52 3.28 + 131072 320 4.93 7.37 6.15 + 262144 160 12.76 19.06 15.91 + 524288 80 24.59 35.90 30.25 + 1048576 40 52.04 53.14 52.59 + 2097152 20 130.88 134.17 132.53 + 4194304 10 468.87 490.46 479.67 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.07 0.05 + 4 1000 0.04 0.07 0.06 + 8 1000 0.05 0.06 0.06 + 16 1000 0.05 0.06 0.06 + 32 1000 0.05 0.07 0.06 + 64 1000 0.05 0.07 0.06 + 128 1000 0.05 0.07 0.06 + 256 1000 0.05 0.06 0.05 + 512 1000 0.05 0.09 0.07 + 1024 1000 0.07 0.09 0.08 + 2048 1000 0.13 0.15 0.14 + 4096 1000 0.15 0.19 0.16 + 8192 1000 0.27 0.62 0.46 + 16384 1000 0.66 1.25 0.94 + 32768 1000 1.38 2.29 1.80 + 65536 640 2.56 5.08 3.88 + 131072 320 5.34 5.65 5.43 + 262144 160 12.79 21.23 17.18 + 524288 80 32.15 50.21 42.13 + 1048576 40 130.92 181.02 157.94 + 2097152 20 684.78 709.62 694.64 + 4194304 10 1617.89 1759.07 1673.24 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.06 0.05 + 4 1000 0.37 0.76 0.56 + 8 1000 0.51 0.64 0.57 + 16 1000 0.35 0.36 0.36 + 32 1000 0.37 0.40 0.38 + 64 1000 0.38 0.41 0.39 + 128 1000 0.59 0.73 0.66 + 256 1000 0.50 0.51 0.51 + 512 1000 0.50 0.51 0.51 + 1024 1000 0.88 1.17 1.02 + 2048 1000 1.35 1.39 1.37 + 4096 1000 1.62 1.64 1.63 + 8192 1000 3.16 3.33 3.25 + 16384 1000 4.44 4.57 4.50 + 32768 1000 9.51 9.85 9.68 + 65536 640 17.73 18.24 17.99 + 131072 320 35.83 38.85 37.34 + 262144 160 78.64 79.92 79.28 + 524288 80 159.00 162.06 160.53 + 1048576 40 550.81 565.27 558.04 + 2097152 20 1576.94 1620.94 1598.94 + 4194304 10 3623.67 3630.93 3627.30 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 4 1000 1.00 1.11 1.06 + 8 1000 1.02 1.19 1.09 + 16 1000 0.77 0.84 0.80 + 32 1000 1.00 1.33 1.13 + 64 1000 1.08 1.52 1.26 + 128 1000 0.96 1.16 1.06 + 256 1000 1.30 1.44 1.36 + 512 1000 1.98 2.16 2.05 + 1024 1000 1.57 1.67 1.62 + 2048 1000 3.54 4.11 3.82 + 4096 1000 4.83 5.39 5.09 + 8192 1000 8.85 9.08 8.95 + 16384 1000 15.91 17.12 16.67 + 32768 1000 29.70 30.98 30.18 + 65536 640 64.85 71.35 69.63 + 131072 320 111.64 116.09 114.00 + 262144 160 339.68 346.54 344.11 + 524288 80 1319.32 1328.49 1323.81 + 1048576 40 3326.78 3366.44 3349.66 + 2097152 20 6838.31 6963.41 6915.80 + 4194304 10 14746.30 15056.83 14877.57 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.03 0.03 + 4 1000 0.39 0.49 0.44 + 8 1000 0.64 0.70 0.67 + 16 1000 0.48 0.54 0.51 + 32 1000 0.38 0.40 0.39 + 64 1000 0.44 0.46 0.45 + 128 1000 0.47 0.48 0.47 + 256 1000 0.50 0.52 0.51 + 512 1000 0.81 1.01 0.91 + 1024 1000 0.83 0.91 0.87 + 2048 1000 1.12 1.23 1.17 + 4096 1000 1.78 1.80 1.79 + 8192 1000 2.66 2.90 2.78 + 16384 1000 6.99 7.07 7.03 + 32768 1000 11.20 11.25 11.23 + 65536 640 20.20 21.22 20.71 + 131072 320 50.11 51.09 50.60 + 262144 160 91.89 92.26 92.07 + 524288 80 178.50 178.61 178.56 + 1048576 40 552.97 553.13 553.05 + 2097152 20 1839.21 1839.31 1839.26 + 4194304 10 3427.87 3430.03 3428.95 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.05 + 4 1000 0.84 1.13 0.99 + 8 1000 3.58 4.62 4.06 + 16 1000 4.47 5.10 4.72 + 32 1000 1.45 1.67 1.56 + 64 1000 1.05 1.38 1.21 + 128 1000 1.22 1.38 1.29 + 256 1000 1.27 1.51 1.36 + 512 1000 1.64 1.88 1.79 + 1024 1000 1.72 1.94 1.84 + 2048 1000 2.37 2.65 2.50 + 4096 1000 5.04 5.59 5.20 + 8192 1000 7.54 8.80 8.25 + 16384 1000 13.49 15.30 14.48 + 32768 1000 32.00 33.79 32.77 + 65536 640 81.04 82.73 81.85 + 131072 320 160.46 162.07 161.01 + 262144 160 339.70 345.34 343.08 + 524288 80 1203.91 1234.44 1216.74 + 1048576 40 3069.81 3338.49 3238.48 + 2097152 20 6681.52 7225.47 7016.97 + 4194304 10 14691.20 15989.75 15435.48 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.06 0.05 + 1 1000 0.33 0.34 0.33 + 2 1000 0.36 0.37 0.36 + 4 1000 0.34 0.35 0.35 + 8 1000 0.36 0.37 0.36 + 16 1000 0.37 0.38 0.38 + 32 1000 0.34 0.36 0.35 + 64 1000 0.39 0.41 0.40 + 128 1000 0.44 0.49 0.47 + 256 1000 0.47 0.47 0.47 + 512 1000 0.48 0.73 0.61 + 1024 1000 0.59 0.64 0.61 + 2048 1000 0.80 0.83 0.82 + 4096 1000 1.44 1.49 1.47 + 8192 1000 1.74 1.78 1.76 + 16384 1000 2.88 2.89 2.88 + 32768 1000 6.06 6.54 6.30 + 65536 640 12.63 16.47 14.55 + 131072 320 26.43 27.23 26.83 + 262144 160 29.65 29.74 29.70 + 524288 80 56.23 96.49 76.36 + 1048576 40 134.14 224.22 179.18 + 2097152 20 673.28 699.62 686.45 + 4194304 10 1369.36 1387.71 1378.53 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 1 1000 0.66 0.89 0.77 + 2 1000 0.62 0.70 0.67 + 4 1000 0.69 0.77 0.74 + 8 1000 0.69 0.97 0.79 + 16 1000 0.81 1.06 0.94 + 32 1000 0.80 1.09 0.94 + 64 1000 0.90 1.17 1.03 + 128 1000 1.32 1.84 1.55 + 256 1000 1.34 1.71 1.47 + 512 1000 1.04 1.42 1.20 + 1024 1000 1.87 1.95 1.90 + 2048 1000 2.38 2.57 2.47 + 4096 1000 3.41 3.69 3.60 + 8192 1000 5.56 5.92 5.76 + 16384 1000 8.05 8.66 8.35 + 32768 1000 15.01 16.49 15.87 + 65536 640 33.18 36.75 34.21 + 131072 320 64.78 70.35 67.64 + 262144 160 95.68 99.57 97.62 + 524288 80 270.16 280.74 275.34 + 1048576 40 1097.00 1102.94 1099.89 + 2097152 20 2532.39 2539.87 2536.29 + 4194304 10 5664.18 5669.25 5666.18 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.34 0.39 0.36 + 2 1000 0.37 0.43 0.40 + 4 1000 0.35 0.35 0.35 + 8 1000 0.39 0.40 0.39 + 16 1000 0.38 0.39 0.38 + 32 1000 0.55 0.61 0.58 + 64 1000 0.40 0.70 0.55 + 128 1000 0.54 0.55 0.55 + 256 1000 0.62 0.62 0.62 + 512 1000 0.74 0.77 0.75 + 1024 1000 0.68 0.68 0.68 + 2048 1000 1.27 1.63 1.45 + 4096 1000 1.41 1.70 1.55 + 8192 1000 1.90 2.18 2.04 + 16384 1000 2.75 2.85 2.80 + 32768 1000 6.26 10.59 8.43 + 65536 640 11.48 18.73 15.10 + 131072 320 19.48 21.74 20.61 + 262144 160 27.87 28.09 27.98 + 524288 80 48.91 97.14 73.03 + 1048576 40 138.25 216.02 177.14 + 2097152 20 606.24 608.93 607.59 + 4194304 10 1384.62 1459.10 1421.86 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 4.07 4.56 4.37 + 2 1000 2.43 2.83 2.63 + 4 1000 3.19 3.75 3.41 + 8 1000 1.68 2.08 1.92 + 16 1000 2.63 2.76 2.69 + 32 1000 2.89 3.17 3.03 + 64 1000 1.76 1.85 1.80 + 128 1000 2.22 2.82 2.58 + 256 1000 1.94 2.17 2.06 + 512 1000 1.85 2.04 1.94 + 1024 1000 2.97 3.57 3.31 + 2048 1000 3.27 3.95 3.58 + 4096 1000 3.76 4.06 3.87 + 8192 1000 5.58 6.62 6.24 + 16384 1000 10.97 12.21 11.49 + 32768 1000 18.62 20.46 19.55 + 65536 640 37.15 39.97 38.20 + 131072 320 76.42 81.20 78.98 + 262144 160 138.67 141.36 140.03 + 524288 80 440.65 459.47 450.45 + 1048576 40 1137.11 1140.43 1138.78 + 2097152 20 2673.57 2704.45 2687.61 + 4194304 10 5306.04 5358.06 5330.34 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.24 0.78 0.51 + 2 1000 0.10 0.21 0.16 + 4 1000 0.11 0.26 0.19 + 8 1000 0.07 0.22 0.15 + 16 1000 0.07 0.22 0.15 + 32 1000 0.08 0.26 0.17 + 64 1000 0.08 0.26 0.17 + 128 1000 0.11 0.35 0.23 + 256 1000 0.14 0.39 0.27 + 512 1000 0.19 0.49 0.34 + 1024 1000 0.13 0.51 0.32 + 2048 1000 0.19 0.66 0.42 + 4096 1000 0.28 1.09 0.68 + 8192 1000 0.47 1.61 1.04 + 16384 1000 1.06 2.45 1.76 + 32768 1000 2.22 4.21 3.21 + 65536 640 3.97 6.38 5.18 + 131072 320 9.41 16.19 12.80 + 262144 160 18.77 31.45 25.11 + 524288 80 29.63 48.71 39.17 + 1048576 40 88.98 228.98 158.98 + 2097152 20 169.41 274.94 222.18 + 4194304 10 512.83 855.75 684.29 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.22 0.09 + 1 1000 0.10 0.57 0.29 + 2 1000 0.13 4.23 1.45 + 4 1000 0.10 0.78 0.37 + 8 1000 0.11 0.53 0.30 + 16 1000 0.11 0.52 0.30 + 32 1000 0.16 1.21 0.61 + 64 1000 0.11 0.78 0.41 + 128 1000 0.16 1.07 0.51 + 256 1000 0.16 0.73 0.43 + 512 1000 0.20 0.84 0.50 + 1024 1000 0.23 1.07 0.48 + 2048 1000 0.33 1.32 0.60 + 4096 1000 0.43 2.13 0.91 + 8192 1000 0.71 4.71 2.27 + 16384 1000 1.27 6.41 3.58 + 32768 1000 2.85 9.51 4.63 + 65536 640 4.88 17.04 8.10 + 131072 320 11.75 34.45 18.21 + 262144 160 15.97 60.32 39.99 + 524288 80 35.96 135.27 89.55 + 1048576 40 92.69 412.46 241.20 + 2097152 20 279.95 992.28 653.97 + 4194304 10 626.42 2315.46 1510.07 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 0.08 0.23 0.16 + 2 1000 0.06 0.22 0.14 + 4 1000 0.06 0.21 0.13 + 8 1000 0.06 0.24 0.15 + 16 1000 0.06 0.22 0.14 + 32 1000 0.07 0.25 0.16 + 64 1000 0.09 0.22 0.16 + 128 1000 0.20 0.29 0.24 + 256 1000 0.18 0.28 0.23 + 512 1000 0.19 0.30 0.25 + 1024 1000 0.21 0.35 0.28 + 2048 1000 0.18 0.72 0.45 + 4096 1000 0.27 0.96 0.61 + 8192 1000 0.45 1.60 1.03 + 16384 1000 1.05 2.45 1.75 + 32768 1000 2.17 3.75 2.96 + 65536 640 4.21 6.89 5.55 + 131072 320 9.16 13.37 11.26 + 262144 160 24.05 24.18 24.12 + 524288 80 54.15 54.34 54.25 + 1048576 40 210.34 210.41 210.38 + 2097152 20 330.37 330.38 330.37 + 4194304 10 844.66 846.72 845.69 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.06 0.05 + 1 1000 0.12 0.40 0.20 + 2 1000 0.13 0.84 0.32 + 4 1000 0.13 0.46 0.23 + 8 1000 0.12 0.44 0.21 + 16 1000 0.14 0.47 0.23 + 32 1000 0.12 0.50 0.23 + 64 1000 0.14 0.57 0.26 + 128 1000 0.18 0.61 0.30 + 256 1000 0.21 0.67 0.34 + 512 1000 0.21 0.89 0.38 + 1024 1000 0.20 0.99 0.41 + 2048 1000 0.25 1.27 0.53 + 4096 1000 0.36 2.45 0.91 + 8192 1000 0.54 3.53 1.34 + 16384 1000 1.26 5.63 2.63 + 32768 1000 2.48 8.81 4.23 + 65536 640 4.65 15.92 7.56 + 131072 320 10.10 28.75 15.97 + 262144 160 25.86 62.30 48.66 + 524288 80 50.71 120.89 93.52 + 1048576 40 220.55 384.71 323.42 + 2097152 20 461.44 1006.43 802.66 + 4194304 10 1110.01 2305.93 1863.30 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.04 0.04 + 1 1000 0.14 0.18 0.16 + 2 1000 0.11 0.14 0.13 + 4 1000 0.08 0.11 0.10 + 8 1000 0.08 0.12 0.10 + 16 1000 0.08 0.11 0.10 + 32 1000 0.10 0.12 0.11 + 64 1000 0.10 0.13 0.11 + 128 1000 0.13 0.22 0.18 + 256 1000 0.14 0.22 0.18 + 512 1000 0.14 0.25 0.19 + 1024 1000 0.19 0.48 0.33 + 2048 1000 0.24 0.54 0.39 + 4096 1000 0.46 0.62 0.54 + 8192 1000 0.77 1.08 0.93 + 16384 1000 1.44 2.72 2.08 + 32768 1000 2.83 4.11 3.47 + 65536 640 6.23 6.69 6.46 + 131072 320 14.86 15.97 15.42 + 262144 160 24.16 24.18 24.17 + 524288 80 115.47 115.71 115.59 + 1048576 40 126.39 126.43 126.41 + 2097152 20 707.59 713.88 710.74 + 4194304 10 1096.86 1098.16 1097.51 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.04 0.03 + 1 1000 0.20 0.47 0.31 + 2 1000 0.19 0.50 0.32 + 4 1000 0.20 0.44 0.32 + 8 1000 0.19 0.33 0.27 + 16 1000 0.20 0.37 0.29 + 32 1000 0.22 0.50 0.35 + 64 1000 0.28 0.52 0.39 + 128 1000 0.29 0.60 0.45 + 256 1000 0.30 0.50 0.40 + 512 1000 0.33 0.64 0.50 + 1024 1000 1.31 2.00 1.69 + 2048 1000 0.82 1.58 1.15 + 4096 1000 1.12 2.34 1.76 + 8192 1000 1.93 3.24 2.48 + 16384 1000 2.84 5.41 4.09 + 32768 1000 8.40 10.42 9.13 + 65536 640 13.90 17.89 15.14 + 131072 320 36.02 40.42 37.53 + 262144 160 77.14 113.94 92.65 + 524288 80 106.52 142.50 130.29 + 1048576 40 200.42 456.76 360.97 + 2097152 20 431.43 939.13 748.42 + 4194304 10 1126.20 2453.30 1945.94 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 1 1000 0.11 0.17 0.14 + 2 1000 0.09 0.12 0.11 + 4 1000 0.09 0.12 0.10 + 8 1000 0.13 0.16 0.14 + 16 1000 0.12 0.15 0.14 + 32 1000 0.13 0.19 0.16 + 64 1000 0.08 0.12 0.10 + 128 1000 0.13 0.21 0.17 + 256 1000 0.14 0.22 0.18 + 512 1000 0.14 0.28 0.21 + 1024 1000 0.21 0.45 0.33 + 2048 1000 0.24 0.56 0.40 + 4096 1000 0.45 1.04 0.74 + 8192 1000 0.76 1.70 1.23 + 16384 1000 1.75 3.14 2.45 + 32768 1000 2.87 3.84 3.35 + 65536 640 6.79 8.25 7.52 + 131072 320 13.76 15.95 14.85 + 262144 160 25.63 26.02 25.82 + 524288 80 117.74 119.76 118.75 + 1048576 40 111.86 114.78 113.32 + 2097152 20 384.75 396.65 390.70 + 4194304 10 1038.59 1039.23 1038.91 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.09 0.06 + 1 1000 0.15 0.25 0.19 + 2 1000 0.17 0.24 0.20 + 4 1000 0.18 0.24 0.20 + 8 1000 0.18 0.24 0.21 + 16 1000 0.17 0.23 0.21 + 32 1000 0.18 0.26 0.23 + 64 1000 0.15 0.28 0.23 + 128 1000 0.23 0.38 0.33 + 256 1000 0.37 0.53 0.46 + 512 1000 0.42 0.62 0.54 + 1024 1000 0.37 0.68 0.52 + 2048 1000 0.35 1.16 0.79 + 4096 1000 0.97 1.82 1.35 + 8192 1000 1.44 3.33 2.30 + 16384 1000 2.85 5.29 4.01 + 32768 1000 4.06 8.60 6.54 + 65536 640 9.74 19.45 15.45 + 131072 320 15.85 35.48 27.73 + 262144 160 27.17 63.36 49.52 + 524288 80 53.32 126.73 98.59 + 1048576 40 159.08 355.21 280.27 + 2097152 20 530.42 1142.96 911.24 + 4194304 10 1099.13 2292.91 1836.58 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.03 0.03 + 1 1000 0.40 0.42 0.41 + 2 1000 0.37 0.39 0.38 + 4 1000 0.39 0.43 0.41 + 8 1000 0.39 0.41 0.40 + 16 1000 0.59 0.61 0.60 + 32 1000 0.53 0.56 0.54 + 64 1000 0.51 0.51 0.51 + 128 1000 0.60 0.74 0.67 + 256 1000 0.61 0.74 0.67 + 512 1000 0.56 0.60 0.58 + 1024 1000 0.83 0.86 0.85 + 2048 1000 1.09 1.10 1.09 + 4096 1000 1.39 1.67 1.53 + 8192 1000 2.16 2.22 2.19 + 16384 1000 2.78 2.86 2.82 + 32768 1000 5.44 5.58 5.51 + 65536 640 7.96 7.99 7.97 + 131072 320 20.12 20.58 20.35 + 262144 160 33.06 33.46 33.26 + 524288 80 63.99 124.33 94.16 + 1048576 40 135.68 141.00 138.34 + 2097152 20 614.43 633.47 623.95 + 4194304 10 1724.02 1736.35 1730.19 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.05 + 1 1000 1.62 1.97 1.84 + 2 1000 1.57 1.83 1.70 + 4 1000 1.81 1.96 1.88 + 8 1000 1.85 2.46 2.16 + 16 1000 1.34 1.87 1.58 + 32 1000 1.68 2.02 1.83 + 64 1000 1.56 1.70 1.62 + 128 1000 2.37 2.98 2.72 + 256 1000 2.80 3.49 3.09 + 512 1000 1.82 2.66 2.27 + 1024 1000 2.55 3.00 2.81 + 2048 1000 2.48 3.59 3.02 + 4096 1000 3.11 3.22 3.16 + 8192 1000 4.91 5.92 5.25 + 16384 1000 12.64 13.42 13.07 + 32768 1000 18.77 21.10 19.92 + 65536 640 26.15 29.85 27.62 + 131072 320 52.86 58.66 55.66 + 262144 160 97.72 136.03 122.83 + 524288 80 555.40 572.11 560.98 + 1048576 40 1220.62 1285.43 1266.34 + 2097152 20 2615.10 2733.68 2673.44 + 4194304 10 5510.09 5615.03 5538.86 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.43 0.43 0.43 + 1 1000 0.44 0.45 0.44 + 2 1000 0.44 0.45 0.44 + 4 1000 0.75 0.77 0.76 + 8 1000 0.69 0.82 0.75 + 16 1000 0.55 0.59 0.57 + 32 1000 0.79 0.84 0.82 + 64 1000 0.78 0.79 0.78 + 128 1000 0.97 0.99 0.98 + 256 1000 0.69 0.70 0.70 + 512 1000 1.23 1.27 1.25 + 1024 1000 1.02 1.22 1.12 + 2048 1000 5.33 5.36 5.34 + 4096 1000 2.10 2.15 2.12 + 8192 1000 2.19 2.20 2.20 + 16384 1000 3.58 3.65 3.62 + 32768 1000 5.39 5.64 5.52 + 65536 640 12.01 13.02 12.52 + 131072 320 22.40 23.28 22.84 + 262144 160 33.12 33.15 33.13 + 524288 80 65.32 65.33 65.32 + 1048576 40 173.68 175.42 174.55 + 2097152 20 648.94 649.34 649.14 + 4194304 10 1441.24 1443.22 1442.23 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 1.29 1.89 1.68 + 1 1000 1.24 1.37 1.28 + 2 1000 1.47 1.82 1.64 + 4 1000 1.89 1.98 1.93 + 8 1000 1.21 1.31 1.25 + 16 1000 1.25 1.48 1.32 + 32 1000 2.08 2.26 2.15 + 64 1000 1.41 1.44 1.42 + 128 1000 1.72 1.85 1.78 + 256 1000 1.75 1.85 1.79 + 512 1000 1.87 3.03 2.39 + 1024 1000 2.34 3.25 2.63 + 2048 1000 2.89 3.63 3.25 + 4096 1000 3.72 4.19 3.96 + 8192 1000 4.71 5.82 5.24 + 16384 1000 12.19 13.92 13.34 + 32768 1000 17.68 20.60 19.23 + 65536 640 26.84 31.57 29.52 + 131072 320 51.62 62.90 57.51 + 262144 160 110.11 137.93 124.46 + 524288 80 496.80 511.04 502.08 + 1048576 40 1298.93 1326.61 1311.90 + 2097152 20 2678.97 2832.66 2761.56 + 4194304 10 5580.63 5710.08 5639.89 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.03 0.03 + 1 1000 0.15 0.29 0.22 + 2 1000 0.11 0.16 0.14 + 4 1000 0.13 0.26 0.20 + 8 1000 0.13 0.18 0.15 + 16 1000 0.12 0.18 0.15 + 32 1000 0.12 0.17 0.14 + 64 1000 0.24 0.34 0.29 + 128 1000 0.28 0.39 0.34 + 256 1000 0.25 0.36 0.30 + 512 1000 0.21 0.36 0.29 + 1024 1000 0.28 0.50 0.39 + 2048 1000 0.50 0.75 0.63 + 4096 1000 0.42 1.01 0.72 + 8192 1000 0.71 1.70 1.21 + 16384 1000 1.27 2.38 1.82 + 32768 1000 2.23 3.87 3.05 + 65536 640 4.20 5.80 5.00 + 131072 320 9.83 11.88 10.86 + 262144 160 18.65 20.22 19.44 + 524288 80 35.86 37.54 36.70 + 1048576 40 84.26 87.95 86.10 + 2097152 20 219.88 223.64 221.76 + 4194304 10 364.98 390.76 377.87 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.17 0.28 0.23 + 2 1000 0.21 0.32 0.27 + 4 1000 0.15 0.21 0.18 + 8 1000 0.16 0.26 0.21 + 16 1000 0.16 0.22 0.19 + 32 1000 0.30 0.64 0.47 + 64 1000 0.17 0.33 0.24 + 128 1000 0.24 0.39 0.33 + 256 1000 0.24 0.44 0.36 + 512 1000 0.24 0.46 0.35 + 1024 1000 0.31 0.85 0.59 + 2048 1000 0.42 1.03 0.76 + 4096 1000 0.75 1.76 1.25 + 8192 1000 1.08 3.35 2.14 + 16384 1000 2.33 5.27 3.79 + 32768 1000 4.17 7.79 6.13 + 65536 640 9.76 13.78 11.69 + 131072 320 16.18 20.77 18.29 + 262144 160 36.27 40.89 38.37 + 524288 80 66.47 73.34 70.02 + 1048576 40 190.20 202.67 194.90 + 2097152 20 407.06 437.23 422.50 + 4194304 10 1098.57 1151.94 1124.53 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.32 0.32 0.32 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 4 +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.61 0.61 0.61 + + +# All processes entering MPI_Finalize +``` + +#### OpenMPI + +```shell +#---------------------------------------------------------------- +# Intel(R) MPI Benchmarks 2021.3, MPI-1 part +#---------------------------------------------------------------- +# Date : Fri Dec 16 00:05:35 2022 +# Machine : x86_64 +# System : Linux +# Release : 5.15.79.1-microsoft-standard-WSL2 +# Version : #1 SMP Wed Nov 23 01:01:46 UTC 2022 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# IMB-MPI1 + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 4194304 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong +# PingPing +# Sendrecv +# Exchange +# Allreduce +# Reduce +# Reduce_local +# Reduce_scatter +# Reduce_scatter_block +# Allgather +# Allgatherv +# Gather +# Gatherv +# Scatter +# Scatterv +# Alltoall +# Alltoallv +# Bcast +# Barrier + +#--------------------------------------------------- +# Benchmarking PingPong +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.26 0.00 + 1 1000 0.18 5.68 + 2 1000 0.20 10.25 + 4 1000 0.18 22.10 + 8 1000 0.20 39.67 + 16 1000 0.22 74.38 + 32 1000 0.22 147.30 + 64 1000 0.22 288.42 + 128 1000 0.29 434.41 + 256 1000 0.25 1014.67 + 512 1000 0.43 1186.83 + 1024 1000 0.50 2045.14 + 2048 1000 0.59 3467.37 + 4096 1000 1.32 3109.63 + 8192 1000 1.81 4535.87 + 16384 1000 2.53 6466.56 + 32768 1000 3.48 9411.90 + 65536 640 6.36 10307.19 + 131072 320 9.75 13437.04 + 262144 160 17.26 15190.15 + 524288 80 45.22 11594.96 + 1048576 40 79.86 13130.38 + 2097152 20 181.69 11542.79 + 4194304 10 595.13 7047.71 + +#--------------------------------------------------- +# Benchmarking PingPing +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 1000 0.29 0.00 + 1 1000 0.32 3.08 + 2 1000 0.31 6.43 + 4 1000 0.33 12.25 + 8 1000 0.31 26.11 + 16 1000 0.46 34.49 + 32 1000 0.43 74.31 + 64 1000 0.32 199.50 + 128 1000 0.26 500.00 + 256 1000 0.32 798.50 + 512 1000 0.66 779.77 + 1024 1000 0.66 1547.06 + 2048 1000 0.52 3947.57 + 4096 1000 1.47 2778.08 + 8192 1000 2.06 3971.11 + 16384 1000 2.59 6328.56 + 32768 1000 4.26 7700.15 + 65536 640 7.84 8356.19 + 131072 320 13.81 9491.52 + 262144 160 20.60 12724.66 + 524288 80 43.86 11955.03 + 1048576 40 75.60 13870.51 + 2097152 20 206.35 10163.08 + 4194304 10 1023.95 4096.20 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.23 0.23 0.23 0.00 + 1 1000 0.25 0.25 0.25 7.87 + 2 1000 0.26 0.26 0.26 15.38 + 4 1000 0.26 0.26 0.26 30.38 + 8 1000 0.31 0.31 0.31 50.87 + 16 1000 0.33 0.33 0.33 95.92 + 32 1000 0.33 0.33 0.33 196.32 + 64 1000 0.33 0.33 0.33 388.23 + 128 1000 0.34 0.34 0.34 744.62 + 256 1000 0.43 0.43 0.43 1203.01 + 512 1000 0.71 0.71 0.71 1441.44 + 1024 1000 0.71 0.71 0.71 2890.61 + 2048 1000 0.53 0.53 0.53 7709.39 + 4096 1000 1.61 1.61 1.61 5098.33 + 8192 1000 1.71 1.71 1.71 9557.81 + 16384 1000 2.35 2.35 2.35 13952.14 + 32768 1000 3.76 3.76 3.76 17441.85 + 65536 640 7.47 7.47 7.47 17555.63 + 131072 320 13.48 13.49 13.48 19439.67 + 262144 160 18.40 18.41 18.40 28483.27 + 524288 80 44.39 44.40 44.40 23617.91 + 1048576 40 69.35 69.36 69.36 30234.67 + 2097152 20 157.68 157.76 157.72 26586.61 + 4194304 10 836.31 836.49 836.40 10028.34 + +#----------------------------------------------------------------------------- +# Benchmarking Sendrecv +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.28 0.28 0.28 0.00 + 1 1000 0.40 0.41 0.40 4.94 + 2 1000 0.33 0.33 0.33 12.06 + 4 1000 0.31 0.31 0.31 25.96 + 8 1000 0.51 0.51 0.51 31.55 + 16 1000 0.40 0.40 0.40 80.79 + 32 1000 0.33 0.33 0.33 194.65 + 64 1000 0.36 0.36 0.36 359.85 + 128 1000 0.77 0.77 0.77 330.54 + 256 1000 0.56 0.56 0.56 919.71 + 512 1000 0.86 0.86 0.86 1191.53 + 1024 1000 0.98 0.99 0.98 2078.77 + 2048 1000 1.36 1.36 1.36 3020.43 + 4096 1000 3.00 3.00 3.00 2732.58 + 8192 1000 4.25 4.25 4.25 3853.06 + 16384 1000 5.75 5.76 5.76 5689.19 + 32768 1000 7.37 7.37 7.37 8889.13 + 65536 640 9.51 9.52 9.51 13770.33 + 131072 320 20.82 20.86 20.84 12568.14 + 262144 160 32.65 32.73 32.69 16016.13 + 524288 80 70.10 70.46 70.29 14882.12 + 1048576 40 227.93 237.94 234.45 8813.97 + 2097152 20 661.45 679.76 671.28 6170.23 + 4194304 10 1801.55 1886.96 1851.68 4445.57 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.51 0.51 0.51 0.00 + 1 1000 0.58 0.58 0.58 6.93 + 2 1000 0.61 0.61 0.61 13.17 + 4 1000 0.63 0.63 0.63 25.37 + 8 1000 0.67 0.67 0.67 47.41 + 16 1000 0.69 0.69 0.69 92.38 + 32 1000 0.68 0.68 0.68 187.24 + 64 1000 0.67 0.67 0.67 381.69 + 128 1000 0.58 0.58 0.58 881.84 + 256 1000 0.67 0.67 0.67 1533.85 + 512 1000 0.90 0.90 0.90 2283.17 + 1024 1000 1.46 1.46 1.46 2806.25 + 2048 1000 1.83 1.83 1.83 4481.16 + 4096 1000 2.99 2.99 2.99 5482.72 + 8192 1000 3.71 3.71 3.71 8821.64 + 16384 1000 6.40 6.40 6.40 10243.20 + 32768 1000 7.24 7.24 7.24 18110.37 + 65536 640 13.15 13.15 13.15 19929.93 + 131072 320 24.39 24.40 24.39 21491.61 + 262144 160 35.77 35.78 35.78 29307.74 + 524288 80 77.41 77.42 77.41 27088.86 + 1048576 40 166.86 166.89 166.87 25132.52 + 2097152 20 1179.08 1179.24 1179.16 7113.57 + 4194304 10 1234.15 1234.61 1234.38 13589.08 + +#----------------------------------------------------------------------------- +# Benchmarking Exchange +# #processes = 4 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000 0.56 0.56 0.56 0.00 + 1 1000 0.67 0.67 0.67 5.96 + 2 1000 0.66 0.66 0.66 12.12 + 4 1000 0.42 0.42 0.42 38.17 + 8 1000 0.69 0.69 0.69 46.68 + 16 1000 0.74 0.75 0.74 85.88 + 32 1000 0.45 0.45 0.45 285.84 + 64 1000 0.77 0.77 0.77 331.05 + 128 1000 0.81 0.81 0.81 628.84 + 256 1000 0.93 0.94 0.94 1083.83 + 512 1000 1.63 1.63 1.63 1253.44 + 1024 1000 1.34 1.34 1.34 3051.48 + 2048 1000 1.76 1.76 1.76 4657.46 + 4096 1000 3.52 3.52 3.52 4659.84 + 8192 1000 4.96 4.96 4.96 6607.25 + 16384 1000 6.16 6.16 6.16 10632.40 + 32768 1000 8.89 8.89 8.89 14739.61 + 65536 640 14.63 14.63 14.63 17914.04 + 131072 320 24.67 24.68 24.67 21246.66 + 262144 160 47.37 47.59 47.48 22033.83 + 524288 80 114.83 114.90 114.86 18251.98 + 1048576 40 360.60 361.06 360.92 11616.56 + 2097152 20 1372.81 1373.62 1373.20 6106.96 + 4194304 10 2944.74 2984.20 2966.24 5622.01 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 4 1000 0.36 0.36 0.36 + 8 1000 0.47 0.55 0.51 + 16 1000 0.43 0.47 0.45 + 32 1000 0.45 0.46 0.45 + 64 1000 0.37 0.37 0.37 + 128 1000 0.36 0.40 0.38 + 256 1000 0.43 0.44 0.44 + 512 1000 0.61 0.62 0.62 + 1024 1000 0.94 1.01 0.97 + 2048 1000 1.37 1.38 1.37 + 4096 1000 2.01 2.02 2.01 + 8192 1000 3.03 3.25 3.14 + 16384 1000 6.43 6.43 6.43 + 32768 1000 9.71 9.73 9.72 + 65536 640 17.10 17.21 17.15 + 131072 320 25.51 25.58 25.54 + 262144 160 48.72 48.99 48.86 + 524288 80 101.68 101.68 101.68 + 1048576 40 184.23 185.26 184.75 + 2097152 20 629.48 630.34 629.91 + 4194304 10 1889.18 1889.91 1889.55 + +#---------------------------------------------------------------- +# Benchmarking Allreduce +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 4 1000 0.66 0.83 0.75 + 8 1000 0.53 0.77 0.66 + 16 1000 0.75 1.04 0.88 + 32 1000 0.89 1.50 1.26 + 64 1000 0.75 0.91 0.83 + 128 1000 0.68 0.90 0.78 + 256 1000 0.69 0.93 0.81 + 512 1000 2.18 2.46 2.35 + 1024 1000 1.85 1.98 1.90 + 2048 1000 2.39 2.57 2.46 + 4096 1000 5.80 6.14 6.00 + 8192 1000 9.87 10.10 9.98 + 16384 1000 14.01 14.15 14.09 + 32768 1000 19.26 19.98 19.63 + 65536 640 26.40 27.09 26.69 + 131072 320 59.71 62.56 61.12 + 262144 160 113.04 115.33 114.20 + 524288 80 199.78 213.30 206.49 + 1048576 40 563.75 596.66 580.26 + 2097152 20 1660.43 1741.05 1700.81 + 4194304 10 4375.76 4447.30 4411.43 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.04 0.04 + 4 1000 0.08 0.41 0.24 + 8 1000 0.13 0.50 0.32 + 16 1000 0.18 0.44 0.31 + 32 1000 0.15 0.38 0.27 + 64 1000 0.10 0.53 0.32 + 128 1000 0.09 0.54 0.31 + 256 1000 0.10 0.80 0.45 + 512 1000 0.47 0.52 0.50 + 1024 1000 0.74 0.80 0.77 + 2048 1000 0.92 0.96 0.94 + 4096 1000 1.63 1.66 1.64 + 8192 1000 2.24 2.45 2.35 + 16384 1000 2.23 2.70 2.46 + 32768 1000 3.08 4.33 3.70 + 65536 640 5.77 8.65 7.21 + 131072 320 9.70 16.26 12.98 + 262144 160 19.70 33.95 26.83 + 524288 80 47.85 75.05 61.45 + 1048576 40 61.01 114.01 87.51 + 2097152 20 164.94 307.52 236.23 + 4194304 10 596.37 1093.82 845.09 + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.08 0.05 + 4 1000 0.14 0.95 0.46 + 8 1000 0.10 0.83 0.39 + 16 1000 0.16 0.89 0.44 + 32 1000 0.18 0.89 0.45 + 64 1000 0.13 1.05 0.48 + 128 1000 0.09 0.79 0.36 + 256 1000 0.11 1.08 0.50 + 512 1000 0.57 1.43 1.01 + 1024 1000 0.69 1.49 1.09 + 2048 1000 1.01 2.33 1.62 + 4096 1000 1.85 6.13 4.38 + 8192 1000 2.04 8.53 6.04 + 16384 1000 3.45 13.05 9.23 + 32768 1000 4.38 20.17 13.66 + 65536 640 5.91 22.24 13.58 + 131072 320 11.21 43.89 26.25 + 262144 160 22.85 103.74 60.77 + 524288 80 40.60 150.88 90.21 + 1048576 40 110.48 555.21 320.48 + 2097152 20 306.59 1480.92 872.33 + 4194304 10 746.41 3832.83 2228.23 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 4 1000 0.05 0.07 0.06 + 8 1000 0.05 0.07 0.06 + 16 1000 0.07 0.15 0.11 + 32 1000 0.05 0.06 0.06 + 64 1000 0.05 0.07 0.06 + 128 1000 0.07 0.09 0.08 + 256 1000 0.06 0.08 0.07 + 512 1000 0.06 0.11 0.09 + 1024 1000 0.08 0.13 0.10 + 2048 1000 0.18 0.18 0.18 + 4096 1000 0.16 0.22 0.19 + 8192 1000 0.26 0.28 0.27 + 16384 1000 0.71 0.92 0.82 + 32768 1000 1.78 2.26 2.02 + 65536 640 3.90 4.17 4.04 + 131072 320 6.83 7.00 6.91 + 262144 160 14.85 18.74 16.79 + 524288 80 39.38 39.61 39.50 + 1048576 40 68.25 76.08 72.16 + 2097152 20 126.16 126.74 126.45 + 4194304 10 630.61 684.03 657.32 + +#---------------------------------------------------------------- +# Benchmarking Reduce_local +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 4 1000 0.04 0.09 0.06 + 8 1000 0.05 0.09 0.07 + 16 1000 0.05 0.13 0.08 + 32 1000 0.04 0.06 0.05 + 64 1000 0.04 0.07 0.06 + 128 1000 0.06 0.12 0.08 + 256 1000 0.05 0.09 0.07 + 512 1000 0.08 0.11 0.09 + 1024 1000 0.07 0.14 0.09 + 2048 1000 0.09 0.19 0.14 + 4096 1000 0.15 0.17 0.16 + 8192 1000 0.28 0.37 0.31 + 16384 1000 0.67 0.70 0.69 + 32768 1000 1.38 1.53 1.46 + 65536 640 3.47 4.72 4.14 + 131072 320 6.23 7.45 6.82 + 262144 160 12.44 14.11 13.30 + 524288 80 27.95 48.00 37.74 + 1048576 40 171.38 193.43 185.13 + 2097152 20 426.38 469.51 456.50 + 4194304 10 1412.00 1494.24 1438.41 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 4 1000 0.42 0.46 0.44 + 8 1000 0.45 0.47 0.46 + 16 1000 0.48 0.57 0.53 + 32 1000 0.51 0.57 0.54 + 64 1000 0.51 0.57 0.54 + 128 1000 0.58 0.61 0.59 + 256 1000 0.64 0.86 0.75 + 512 1000 0.92 0.99 0.96 + 1024 1000 1.18 1.24 1.21 + 2048 1000 1.54 1.73 1.64 + 4096 1000 4.60 4.74 4.67 + 8192 1000 5.05 5.29 5.17 + 16384 1000 6.96 7.12 7.04 + 32768 1000 12.15 12.46 12.31 + 65536 640 24.86 24.89 24.87 + 131072 320 40.04 40.42 40.23 + 262144 160 78.85 81.54 80.19 + 524288 80 162.22 165.57 163.90 + 1048576 40 743.00 762.76 752.88 + 2097152 20 1820.41 1870.24 1845.32 + 4194304 10 3497.36 3514.24 3505.80 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.06 0.06 + 4 1000 0.95 1.25 1.10 + 8 1000 0.98 1.18 1.09 + 16 1000 0.99 1.20 1.11 + 32 1000 0.91 1.06 1.00 + 64 1000 1.00 1.13 1.05 + 128 1000 1.13 1.19 1.16 + 256 1000 1.21 1.49 1.35 + 512 1000 1.96 2.07 2.02 + 1024 1000 2.49 2.82 2.64 + 2048 1000 4.00 4.27 4.13 + 4096 1000 8.23 9.25 8.73 + 8192 1000 11.25 13.15 12.12 + 16384 1000 16.53 17.21 16.84 + 32768 1000 33.09 35.00 34.00 + 65536 640 64.72 68.11 66.52 + 131072 320 144.47 150.16 147.28 + 262144 160 446.53 451.95 449.65 + 524288 80 1412.09 1434.95 1424.26 + 1048576 40 3451.50 3535.92 3491.86 + 2097152 20 6824.30 6988.88 6924.68 + 4194304 10 16108.30 16311.70 16206.27 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 4 1000 0.40 0.42 0.41 + 8 1000 0.38 0.38 0.38 + 16 1000 0.40 0.42 0.41 + 32 1000 0.53 0.64 0.58 + 64 1000 0.58 0.64 0.61 + 128 1000 0.50 0.54 0.52 + 256 1000 0.70 0.73 0.72 + 512 1000 0.81 0.81 0.81 + 1024 1000 1.10 1.12 1.11 + 2048 1000 1.24 1.28 1.26 + 4096 1000 3.35 3.45 3.40 + 8192 1000 5.76 5.81 5.79 + 16384 1000 7.56 7.70 7.63 + 32768 1000 12.89 13.37 13.13 + 65536 640 27.23 27.47 27.35 + 131072 320 69.59 70.03 69.81 + 262144 160 138.31 139.02 138.67 + 524288 80 354.46 355.07 354.77 + 1048576 40 556.10 556.43 556.26 + 2097152 20 1509.76 1509.80 1509.78 + 4194304 10 4102.37 4102.69 4102.53 + +#---------------------------------------------------------------- +# Benchmarking Reduce_scatter_block +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.05 0.05 0.05 + 4 1000 0.67 1.08 0.88 + 8 1000 2.17 2.44 2.35 + 16 1000 2.26 2.63 2.46 + 32 1000 0.85 1.18 1.01 + 64 1000 0.84 1.29 1.10 + 128 1000 1.10 1.61 1.33 + 256 1000 1.75 1.92 1.83 + 512 1000 2.11 2.42 2.31 + 1024 1000 2.38 2.58 2.48 + 2048 1000 4.30 4.49 4.40 + 4096 1000 6.05 6.28 6.17 + 8192 1000 11.24 11.81 11.53 + 16384 1000 14.69 15.34 15.01 + 32768 1000 35.71 36.34 36.07 + 65536 640 61.05 63.58 62.71 + 131072 320 118.62 124.11 121.72 + 262144 160 368.33 374.39 371.93 + 524288 80 1125.28 1139.82 1130.23 + 1048576 40 3498.83 3853.47 3723.06 + 2097152 20 7675.02 8262.08 8042.12 + 4194304 10 16271.41 17634.28 17024.18 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 1 1000 0.33 0.35 0.34 + 2 1000 0.37 0.38 0.37 + 4 1000 0.36 0.38 0.37 + 8 1000 0.37 0.38 0.37 + 16 1000 0.41 0.42 0.41 + 32 1000 0.40 0.44 0.42 + 64 1000 0.39 0.41 0.40 + 128 1000 0.45 0.48 0.47 + 256 1000 0.45 0.48 0.47 + 512 1000 0.70 0.75 0.72 + 1024 1000 1.54 1.55 1.54 + 2048 1000 1.02 1.04 1.03 + 4096 1000 1.67 1.69 1.68 + 8192 1000 2.63 2.80 2.71 + 16384 1000 3.68 3.74 3.71 + 32768 1000 6.03 6.76 6.40 + 65536 640 9.61 10.13 9.87 + 131072 320 17.46 18.41 17.93 + 262144 160 37.40 38.49 37.95 + 524288 80 75.37 75.44 75.40 + 1048576 40 154.49 239.57 197.03 + 2097152 20 494.53 504.67 499.60 + 4194304 10 1969.52 2182.18 2075.85 + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.06 0.05 + 1 1000 0.78 0.91 0.84 + 2 1000 0.47 0.75 0.60 + 4 1000 0.71 0.96 0.83 + 8 1000 0.67 0.97 0.80 + 16 1000 0.87 1.02 0.94 + 32 1000 0.51 0.78 0.63 + 64 1000 0.76 0.97 0.84 + 128 1000 0.64 0.95 0.78 + 256 1000 0.89 1.07 0.98 + 512 1000 1.40 1.58 1.47 + 1024 1000 1.54 1.69 1.61 + 2048 1000 3.50 3.78 3.65 + 4096 1000 5.48 5.65 5.59 + 8192 1000 7.60 7.83 7.68 + 16384 1000 12.86 13.03 12.94 + 32768 1000 20.38 21.55 20.98 + 65536 640 31.22 32.91 32.06 + 131072 320 72.71 78.52 75.60 + 262144 160 151.10 158.53 155.43 + 524288 80 346.13 368.60 356.80 + 1048576 40 1021.08 1192.76 1106.77 + 2097152 20 2725.36 3304.12 3015.00 + 4194304 10 6026.64 6358.86 6190.94 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 0.35 0.35 0.35 + 2 1000 0.32 0.32 0.32 + 4 1000 0.31 0.32 0.31 + 8 1000 0.42 0.45 0.43 + 16 1000 0.32 0.39 0.36 + 32 1000 0.42 0.45 0.44 + 64 1000 0.47 0.47 0.47 + 128 1000 0.46 0.46 0.46 + 256 1000 0.47 0.47 0.47 + 512 1000 0.91 0.95 0.93 + 1024 1000 0.81 0.83 0.82 + 2048 1000 1.11 1.19 1.15 + 4096 1000 3.17 3.48 3.33 + 8192 1000 3.39 3.76 3.58 + 16384 1000 4.84 4.93 4.88 + 32768 1000 5.89 6.10 6.00 + 65536 640 9.42 9.64 9.53 + 131072 320 17.51 18.09 17.80 + 262144 160 35.09 35.38 35.23 + 524288 80 63.51 66.63 65.07 + 1048576 40 135.39 224.13 179.76 + 2097152 20 448.60 454.47 451.53 + 4194304 10 1625.34 1669.00 1647.17 + +#---------------------------------------------------------------- +# Benchmarking Allgatherv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.06 0.05 + 1 1000 1.29 1.55 1.40 + 2 1000 1.36 1.45 1.41 + 4 1000 1.38 1.60 1.48 + 8 1000 1.59 1.88 1.72 + 16 1000 1.15 1.32 1.23 + 32 1000 1.50 1.69 1.59 + 64 1000 1.52 1.75 1.62 + 128 1000 3.25 4.99 4.50 + 256 1000 1.66 2.43 2.06 + 512 1000 2.33 2.63 2.46 + 1024 1000 2.45 2.71 2.62 + 2048 1000 6.06 7.10 6.63 + 4096 1000 10.02 10.47 10.19 + 8192 1000 9.87 9.98 9.92 + 16384 1000 17.67 20.85 19.27 + 32768 1000 22.59 25.84 24.20 + 65536 640 45.40 51.51 48.48 + 131072 320 94.94 105.70 100.41 + 262144 160 184.51 189.98 187.35 + 524288 80 471.08 487.16 479.48 + 1048576 40 1247.41 1341.55 1295.87 + 2097152 20 2871.42 2934.03 2902.35 + 4194304 10 6353.78 6441.01 6397.11 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.45 0.87 0.66 + 2 1000 0.13 0.44 0.28 + 4 1000 0.15 0.38 0.26 + 8 1000 0.16 0.41 0.29 + 16 1000 0.14 0.41 0.27 + 32 1000 0.14 0.43 0.29 + 64 1000 0.23 0.54 0.39 + 128 1000 0.09 0.50 0.30 + 256 1000 0.10 0.60 0.35 + 512 1000 0.50 0.69 0.59 + 1024 1000 0.60 0.77 0.68 + 2048 1000 0.77 0.88 0.83 + 4096 1000 1.25 1.41 1.33 + 8192 1000 1.87 2.24 2.06 + 16384 1000 2.98 3.32 3.15 + 32768 1000 3.71 3.99 3.85 + 65536 640 6.56 6.74 6.65 + 131072 320 10.45 15.43 12.94 + 262144 160 16.48 25.95 21.22 + 524288 80 44.54 67.84 56.19 + 1048576 40 93.77 240.52 167.14 + 2097152 20 233.52 369.21 301.37 + 4194304 10 740.35 1255.77 998.06 + +#---------------------------------------------------------------- +# Benchmarking Gather +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 1 1000 0.08 0.78 0.35 + 2 1000 0.10 0.73 0.33 + 4 1000 0.09 0.88 0.41 + 8 1000 0.08 0.95 0.45 + 16 1000 0.09 0.90 0.42 + 32 1000 0.09 1.08 0.46 + 64 1000 0.10 0.95 0.42 + 128 1000 0.10 0.93 0.45 + 256 1000 0.15 1.08 0.62 + 512 1000 0.63 1.30 0.96 + 1024 1000 0.76 1.38 1.05 + 2048 1000 1.29 2.06 1.52 + 4096 1000 1.49 3.43 2.75 + 8192 1000 2.43 6.37 4.38 + 16384 1000 2.90 8.49 5.80 + 32768 1000 4.48 10.80 8.49 + 65536 640 9.28 22.18 17.43 + 131072 320 16.13 37.76 29.36 + 262144 160 21.39 78.12 52.32 + 524288 80 48.26 157.60 107.06 + 1048576 40 140.18 570.12 353.28 + 2097152 20 327.59 1231.90 800.72 + 4194304 10 730.96 2813.06 1817.03 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 0.07 0.38 0.23 + 2 1000 0.07 0.32 0.20 + 4 1000 0.07 0.33 0.20 + 8 1000 0.07 0.31 0.19 + 16 1000 0.08 0.35 0.21 + 32 1000 0.08 0.35 0.21 + 64 1000 0.08 0.42 0.25 + 128 1000 0.08 0.49 0.29 + 256 1000 0.14 0.62 0.38 + 512 1000 0.40 0.53 0.46 + 1024 1000 0.59 0.71 0.65 + 2048 1000 0.70 0.82 0.76 + 4096 1000 1.58 1.80 1.69 + 8192 1000 1.88 2.31 2.10 + 16384 1000 2.38 2.68 2.53 + 32768 1000 4.06 4.50 4.28 + 65536 640 8.93 9.08 9.01 + 131072 320 13.99 14.14 14.06 + 262144 160 25.63 26.67 26.15 + 524288 80 59.31 59.37 59.34 + 1048576 40 203.69 204.05 203.87 + 2097152 20 345.34 346.53 345.93 + 4194304 10 1136.14 1137.23 1136.69 + +#---------------------------------------------------------------- +# Benchmarking Gatherv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.07 0.05 + 1 1000 0.08 0.78 0.26 + 2 1000 0.08 1.01 0.35 + 4 1000 0.07 0.65 0.24 + 8 1000 0.07 0.73 0.26 + 16 1000 0.08 0.88 0.29 + 32 1000 0.08 0.75 0.25 + 64 1000 0.08 0.78 0.26 + 128 1000 0.09 0.88 0.29 + 256 1000 0.14 1.93 0.59 + 512 1000 0.66 1.26 0.93 + 1024 1000 0.94 1.66 1.24 + 2048 1000 0.87 1.82 1.36 + 4096 1000 1.57 3.69 2.94 + 8192 1000 2.41 5.90 4.65 + 16384 1000 2.96 7.35 5.79 + 32768 1000 4.48 11.19 8.67 + 65536 640 8.65 20.36 16.04 + 131072 320 15.29 35.65 28.15 + 262144 160 39.85 106.77 76.72 + 524288 80 77.21 167.39 134.80 + 1048576 40 281.45 520.31 431.04 + 2097152 20 546.98 1212.79 963.89 + 4194304 10 1257.33 2689.45 2158.32 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.04 0.04 + 1 1000 0.10 0.30 0.20 + 2 1000 0.09 0.42 0.25 + 4 1000 0.11 0.45 0.28 + 8 1000 0.15 0.39 0.27 + 16 1000 0.15 0.38 0.26 + 32 1000 0.09 0.46 0.28 + 64 1000 0.11 0.45 0.28 + 128 1000 0.10 0.52 0.31 + 256 1000 0.16 0.52 0.34 + 512 1000 0.75 0.99 0.87 + 1024 1000 0.48 0.56 0.52 + 2048 1000 0.52 0.56 0.54 + 4096 1000 1.04 1.24 1.14 + 8192 1000 1.70 1.94 1.82 + 16384 1000 3.04 3.35 3.19 + 32768 1000 3.78 3.97 3.88 + 65536 640 6.65 7.34 6.99 + 131072 320 12.80 13.09 12.94 + 262144 160 24.01 24.35 24.18 + 524288 80 55.83 55.93 55.88 + 1048576 40 101.15 101.35 101.25 + 2097152 20 319.96 320.17 320.07 + 4194304 10 1242.24 1243.26 1242.75 + +#---------------------------------------------------------------- +# Benchmarking Scatter +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.07 0.05 + 1 1000 0.20 0.47 0.36 + 2 1000 0.19 0.45 0.31 + 4 1000 0.18 0.41 0.29 + 8 1000 0.18 0.63 0.41 + 16 1000 0.21 0.74 0.45 + 32 1000 0.27 0.58 0.43 + 64 1000 0.20 0.58 0.41 + 128 1000 0.20 0.63 0.41 + 256 1000 0.61 1.23 0.88 + 512 1000 1.03 1.30 1.16 + 1024 1000 1.33 1.63 1.50 + 2048 1000 0.90 3.55 2.44 + 4096 1000 3.42 3.94 3.65 + 8192 1000 2.22 6.00 4.42 + 16384 1000 2.70 8.39 6.11 + 32768 1000 6.42 9.99 8.26 + 65536 640 15.80 19.24 17.27 + 131072 320 35.48 43.18 38.50 + 262144 160 76.39 98.59 85.57 + 524288 80 161.41 179.57 169.98 + 1048576 40 190.88 462.15 359.05 + 2097152 20 649.38 1421.91 1126.04 + 4194304 10 1188.08 2483.04 2013.01 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 0.09 0.40 0.25 + 2 1000 0.09 0.45 0.27 + 4 1000 0.09 0.49 0.29 + 8 1000 0.11 0.54 0.33 + 16 1000 0.09 0.48 0.29 + 32 1000 0.15 0.41 0.28 + 64 1000 0.16 0.48 0.32 + 128 1000 0.16 0.60 0.38 + 256 1000 0.16 0.55 0.36 + 512 1000 0.54 0.73 0.64 + 1024 1000 0.52 0.67 0.59 + 2048 1000 0.84 0.98 0.91 + 4096 1000 1.28 1.54 1.41 + 8192 1000 1.67 1.91 1.79 + 16384 1000 2.40 2.61 2.50 + 32768 1000 3.49 3.74 3.61 + 65536 640 6.78 7.16 6.97 + 131072 320 16.21 16.60 16.40 + 262144 160 29.46 30.44 29.95 + 524288 80 77.12 78.97 78.04 + 1048576 40 144.35 144.49 144.42 + 2097152 20 410.91 411.52 411.21 + 4194304 10 1360.36 1379.60 1369.98 + +#---------------------------------------------------------------- +# Benchmarking Scatterv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.06 0.05 + 1 1000 0.22 0.26 0.24 + 2 1000 0.27 0.30 0.29 + 4 1000 0.24 0.27 0.25 + 8 1000 0.23 0.28 0.26 + 16 1000 0.24 0.32 0.28 + 32 1000 0.27 0.29 0.27 + 64 1000 0.26 0.31 0.29 + 128 1000 0.32 0.41 0.35 + 256 1000 0.37 0.47 0.41 + 512 1000 0.50 1.94 1.27 + 1024 1000 0.62 2.64 1.73 + 2048 1000 0.86 3.38 2.36 + 4096 1000 1.89 5.99 4.31 + 8192 1000 2.29 6.40 4.71 + 16384 1000 2.77 8.16 6.09 + 32768 1000 4.49 13.05 9.77 + 65536 640 7.80 18.20 14.23 + 131072 320 15.38 38.14 29.31 + 262144 160 73.86 214.10 162.53 + 524288 80 81.73 189.55 147.96 + 1048576 40 180.12 422.30 330.73 + 2097152 20 527.02 1087.92 874.09 + 4194304 10 1333.51 2718.23 2212.36 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.04 0.05 0.04 + 1 1000 0.40 0.43 0.42 + 2 1000 0.32 0.33 0.33 + 4 1000 0.33 0.37 0.35 + 8 1000 0.33 0.38 0.35 + 16 1000 0.32 0.33 0.33 + 32 1000 0.45 0.60 0.52 + 64 1000 0.44 0.44 0.44 + 128 1000 0.44 0.48 0.46 + 256 1000 0.43 0.45 0.44 + 512 1000 0.67 0.68 0.68 + 1024 1000 0.64 0.64 0.64 + 2048 1000 0.74 0.77 0.75 + 4096 1000 2.19 2.19 2.19 + 8192 1000 2.51 2.61 2.56 + 16384 1000 3.10 3.30 3.20 + 32768 1000 5.56 5.77 5.67 + 65536 640 6.82 6.85 6.84 + 131072 320 15.42 15.48 15.45 + 262144 160 36.60 36.96 36.78 + 524288 80 107.55 133.62 120.58 + 1048576 40 246.94 252.61 249.77 + 2097152 20 607.25 621.75 614.50 + 4194304 10 1516.01 1526.55 1521.28 + +#---------------------------------------------------------------- +# Benchmarking Alltoall +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.06 0.04 + 1 1000 0.76 0.99 0.87 + 2 1000 1.14 1.49 1.31 + 4 1000 1.02 1.04 1.03 + 8 1000 0.70 0.81 0.78 + 16 1000 0.80 1.35 1.07 + 32 1000 0.66 1.05 0.87 + 64 1000 0.78 1.06 0.93 + 128 1000 0.84 1.08 1.00 + 256 1000 0.91 1.44 1.17 + 512 1000 1.78 2.03 1.90 + 1024 1000 2.10 2.40 2.25 + 2048 1000 2.75 2.84 2.80 + 4096 1000 5.84 6.29 6.07 + 8192 1000 7.68 8.91 8.26 + 16384 1000 9.52 10.38 9.91 + 32768 1000 18.18 18.37 18.29 + 65536 640 30.13 31.65 31.01 + 131072 320 55.37 59.75 56.64 + 262144 160 120.01 131.47 126.16 + 524288 80 734.28 855.06 793.95 + 1048576 40 1427.72 1464.75 1443.44 + 2097152 20 2849.51 2942.02 2902.55 + 4194304 10 5531.94 5691.21 5615.79 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.42 0.42 0.42 + 1 1000 0.39 0.41 0.40 + 2 1000 0.42 0.43 0.43 + 4 1000 0.33 0.42 0.37 + 8 1000 0.33 0.36 0.34 + 16 1000 0.42 0.46 0.44 + 32 1000 0.47 0.50 0.48 + 64 1000 0.38 0.41 0.40 + 128 1000 0.49 0.49 0.49 + 256 1000 0.53 0.63 0.58 + 512 1000 0.72 0.73 0.73 + 1024 1000 0.80 0.80 0.80 + 2048 1000 0.93 0.95 0.94 + 4096 1000 2.05 2.07 2.06 + 8192 1000 3.00 3.01 3.01 + 16384 1000 3.61 3.62 3.61 + 32768 1000 4.90 4.91 4.91 + 65536 640 9.09 9.20 9.14 + 131072 320 19.90 19.95 19.93 + 262144 160 38.56 39.48 39.02 + 524288 80 63.34 63.84 63.59 + 1048576 40 204.51 204.70 204.60 + 2097152 20 781.03 781.39 781.21 + 4194304 10 1928.13 1929.03 1928.58 + +#---------------------------------------------------------------- +# Benchmarking Alltoallv +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.70 0.91 0.80 + 1 1000 1.25 1.35 1.29 + 2 1000 1.48 1.54 1.51 + 4 1000 1.23 1.29 1.26 + 8 1000 1.25 1.32 1.29 + 16 1000 1.60 1.81 1.69 + 32 1000 1.15 1.21 1.18 + 64 1000 1.19 1.24 1.21 + 128 1000 1.66 1.76 1.72 + 256 1000 1.68 1.95 1.82 + 512 1000 1.58 1.83 1.72 + 1024 1000 2.18 2.31 2.25 + 2048 1000 2.79 2.93 2.86 + 4096 1000 6.25 7.32 6.93 + 8192 1000 7.01 7.43 7.18 + 16384 1000 10.88 11.89 11.38 + 32768 1000 15.87 18.40 17.24 + 65536 640 26.12 31.23 29.05 + 131072 320 56.81 58.07 57.60 + 262144 160 132.07 141.73 137.63 + 524288 80 660.26 683.23 670.93 + 1048576 40 1489.76 1603.81 1559.38 + 2097152 20 3029.63 3169.46 3116.94 + 4194304 10 5748.52 5944.13 5852.51 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.04 0.03 + 1 1000 0.15 0.35 0.25 + 2 1000 0.13 0.33 0.23 + 4 1000 0.14 0.36 0.25 + 8 1000 0.16 0.38 0.27 + 16 1000 0.12 0.41 0.26 + 32 1000 0.10 0.43 0.27 + 64 1000 0.14 0.48 0.31 + 128 1000 0.13 0.55 0.34 + 256 1000 0.12 0.55 0.34 + 512 1000 0.52 0.73 0.62 + 1024 1000 0.69 0.87 0.78 + 2048 1000 0.83 1.00 0.91 + 4096 1000 1.61 1.86 1.74 + 8192 1000 1.67 1.93 1.80 + 16384 1000 2.94 3.39 3.17 + 32768 1000 3.56 4.21 3.89 + 65536 640 8.01 8.27 8.14 + 131072 320 9.05 9.79 9.42 + 262144 160 22.11 22.34 22.23 + 524288 80 43.34 55.66 49.50 + 1048576 40 79.26 84.75 82.00 + 2097152 20 169.04 169.42 169.23 + 4194304 10 627.77 629.03 628.40 + +#---------------------------------------------------------------- +# Benchmarking Bcast +# #processes = 4 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 0 1000 0.03 0.05 0.04 + 1 1000 0.18 0.37 0.28 + 2 1000 0.18 0.36 0.28 + 4 1000 0.31 0.54 0.43 + 8 1000 0.32 0.53 0.42 + 16 1000 0.21 0.42 0.31 + 32 1000 0.24 0.48 0.37 + 64 1000 0.20 0.45 0.34 + 128 1000 0.20 0.61 0.40 + 256 1000 0.22 0.69 0.46 + 512 1000 0.90 1.49 1.19 + 1024 1000 0.79 1.82 1.34 + 2048 1000 1.25 2.56 1.80 + 4096 1000 2.27 4.60 3.65 + 8192 1000 3.12 4.90 3.71 + 16384 1000 4.72 6.42 5.31 + 32768 1000 8.43 11.44 9.61 + 65536 640 19.96 24.58 21.57 + 131072 320 39.55 83.76 51.44 + 262144 160 71.67 83.03 75.25 + 524288 80 129.91 144.61 137.79 + 1048576 40 139.53 373.28 285.45 + 2097152 20 413.37 1166.02 899.89 + 4194304 10 876.21 2297.16 1775.61 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 2 +# ( 2 additional processes waiting in MPI_Barrier) +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.25 0.25 0.25 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 4 +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1000 0.72 0.72 0.72 + + +# All processes entering MPI_Finalize +``` + +### 3.8.测试总结 + +性能测试arm平台均在x86平台50%以上,且随着线程数的增加,两个平台的对于同一个应用的所耗费的时间差距逐渐减少。 + +且线程增加并不会无限制减少应用的实际耗费时间,在合理的范围内分配线程数才能更好的利用算力资源。 diff --git "a/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\347\247\273\346\244\215\346\214\207\345\215\227\343\200\213.md" "b/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\347\247\273\346\244\215\346\214\207\345\215\227\343\200\213.md" new file mode 100644 index 0000000..4111e31 --- /dev/null +++ "b/doc/imb/2021.3/\343\200\212\345\237\272\344\272\216openEuler\347\232\204imb\350\275\257\344\273\266\347\247\273\346\244\215\346\214\207\345\215\227\343\200\213.md" @@ -0,0 +1,287 @@ +# 《基于openEuler的imb软件移植指南》 + +## 1.介绍 + +- IMB(Intel MPI Benchmarks)用于评估HPC集群在不同消息粒度下节点间点对点、全局通信的效率。 + +- 英特尔(R)MPI基准测试提供了一套符合MPI-1、MPI-2和MPI-3标准的基本基准测试。可以使用一个可执行文件运行所有支持的基准,或在命令行中指定的子集。使用命令行参数来指定各种设置,如时间测量、消息长度和通信器的选择。 + +- 官网地址: + +- GITHUB托管地址: + +## 2.环境要求 + +- 操作系统:OpenEuler arm/x86 (本文档以 x86 架构为例) + +## 3.配置编译环境 + +配置环境指导,手动进行配置依赖环境。 + +### 3.1.环境总览 + +- 编译器: gcc + +- MPI:hmpi + + 具体版本和下载地址如下 + +| 名称 | 版本 | 软件下载地址 | +| -------- | ------ | ---------------------------------------------------------------------------------------------- | +| hmpi | 1.1.1 | | +| gcc | 9.3.0 | | + +### 3.2.创建文件夹 + +```bash +mkdir -p $HOME/build +mkdir -p $HOME/install +mkdir -p $HOME/tmp +``` + +### 3.3.安装预设 + +设置环境变量,方便修改自定义安装目录 + +- 编译目录为 $HOME/build , 根据实际情况进行修改 +- 软件安装目录为 $HOME/install , 根据实际情况进行修改 +- 下载目录为 $HOME/tmp , 根据实际情况进行修改 + +```bash +#为了方便自定义软件安装目录 +#环境变量DEP_INSTALL_DIR将在后文中作为软件安装目录的根目录 +export DEP_INSTALL_DIR=$HOME/install +#环境变量DEP_BUILD_DIR将在后文中作为编译的根目录 +export DEP_BUILD_DIR=$HOME/build +#环境变量DEP_DOWNLOAD_DIR将在后文中作为下载文件的保存目录 +export DEP_DOWNLOAD_DIR=$HOME/tmp + +#注: 以上变量只在一次会话中有效。如果中途断开ssh会话,则在后续的安装过程中不会生效,需要重新运行 +``` + +### 3.4.安装环境依赖和gcc编译器 + +```bash +#环境依赖 +yum -y install wget tar libatomic + +#安装bisheng编译器 +wget -P $DEP_DOWNLOAD_DIR https://ftp.gnu.org/gnu/gcc/gcc-9.3.0/gcc-9.3.0.tar.gz +tar -xf $DEP_DOWNLOAD_DIR/gcc-9.3.0.tar.gz -C $DEP_INSTALL_DIR +sed -i "35s/ftp/http/g" ./contrib/download_prerequisites +./contrib/download_prerequisites +./configure --disable-multilib --enable-languages="c,c++,fortran" --prefix=$1 --disable-static --enable-shared +make -j && make install +#设置环境变量 +echo "export PATH=$DEP_INSTALL_DIR/gcc-9.3.0/bin:$PATH" >> ~/.bashrc && source ~/.bashrc +export CC=`which clang` +export CXX=`which clang++` +``` + +## 3.5.下载并编译hmpi + +```bash +yum -y install libstdc++ libstdc++-devel +yum -y install unzip make autoconf automake git libtool +#下载解压源码文件 +wget https://github.com/kunpengcompute/hucx/archive/refs/tags/v1.1.1-huawei.zip -O $DEP_DOWNLOAD_DIR/hucx-1.1.1-huawei.zip +wget https://github.com/kunpengcompute/xucg/archive/refs/tags/v1.1.1-huawei.zip -O $DEP_DOWNLOAD_DIR/xucg-1.1.1-huawei.zip +wget https://github.com/kunpengcompute/hmpi/archive/refs/tags/v1.1.1-huawei.zip -O $DEP_DOWNLOAD_DIR/hmpi-1.1.1-huawei.zip + +cd $DEP_BUILD_DIR +unzip -q $DEP_DOWNLOAD_DIR/hucx-1.1.1-huawei.zip +unzip -q $DEP_DOWNLOAD_DIR/xucg-1.1.1-huawei.zip +unzip -q $DEP_DOWNLOAD_DIR/hmpi-1.1.1-huawei.zip +cp -rf xucg-1.1.1-huawei/* hucx-1.1.1-huawei/src/ucg/ + +#编译hucx +cd $DEP_BUILD_DIR/hucx-1.1.1-huawei +./autogen.sh +./contrib/configure-opt --prefix=$DEP_INSTALL_DIR/hmpi/hucx CFLAGS="-DHAVE___CLEAR_CACHE=1" --disable-numa --without-java +for file in `find . -name Makefile`;do sed -i "s/-Werror//g" $file;done +for file in `find . -name Makefile`;do sed -i "s/-implicit-function-declaration//g" $file;done +make -j +make install + +yum -y install flex +#编译hmpi +cd $DEP_BUILD_DIR/hmpi-1.1.1-huawei +./autogen.pl +./configure --prefix=$DEP_INSTALL_DIR/hmpi --with-platform=contrib/platform/mellanox/optimized --enable-mpi1-compatibility --with-ucx=$DEP_INSTALL_DIR/hmpi/hucx +make -j +make install + +echo "export PATH=$DEP_INSTALL_DIR/hmpi/bin:$PATH" >> ~/.bashrc && source ~/.bashrc +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort +``` + +## 4.编译imb + +### 4.1.下载并编译imb + +获取imb软件源码并解压文件 + +```bash + +# 下载源码文件 +wget https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.3.tar.gz -O $DEP_DOWNLOAD_DIR/IMB-v2021.3.tar.gz +# 解压源码文件 +tar -xvf $DEP_DOWNLOAD_DIR/IMB-v2021.3.tar.gz -C $DEP_BUILD_DIR +cd $DEP_BUILD_DIR/IMB-v2021.3 +#编译源码 +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort +make all -j +``` + +### 4.2. 运行测试文件 + +运行imb项目文件 + +```bash +# mpirun -n IMB- [arguments] +mpirun -n $(nproc) IMB-EXT +mpirun -n $(nproc) IMB-IO +``` + +## 附A:使用hpcrunner进行一键安装imb + +推荐使用hpcrunner进行安装imb + +### 1.克隆仓库 + +```bash +git clone https://gitee.com/openeuler/hpcrunner.git +``` + +## 2.初始化hpcrunner 和 安装必要软件包 + +初始化项目助手 + +```bash +cd hpcrunner +source init.sh +``` + +安装必要软件包 + +**arm / x86 需要的软件包不同,根据实际环境进行选择** + +```bash +# arm +yum install -y environment-modules git wget unzip make flex tar +# x86 +yum install -y environment-modules git wget unzip make flex tar +yum install -y gcc gcc-c++ gcc-gfortran glibc-devel libgfortran +yum install -y tcsh tcl lsof tk bc +``` + +### 3.选择平台对应配置文件 + +- arm平台的配置文件为 `templates/imb/2021.3/data.imb.arm.cpu.config` + + ```bash + ./jarvis -usetemplates/imb/2021.3/data.imb.arm.cpu.config + ``` + +- x86 平台的配置文件为 `templates/imb/2021.3/data.imb.amd.cpu.config` + + ```bash + ./jarvis -use templates/imb/2021.3/data.imb.amd.cpu.config + ``` + +### 4.下载imb源码 + +```bash +./jarvis -d +``` + +### 5.一键配置依赖环境 + +```bash +./jarvis -dp +``` + +### 6.一键进行编译 + +```bash +./jarvis -b +``` + +### 7.一键进行运行测试 + +```bash +./jarvis -r +``` + +## 附B:使用singularity运行容器 + +### 使用教程 + +### 下载容器镜像 + +通过链接下载: + +[百度云盘](https://pan.baidu.com/s/1UjHiv6DN_oOVXcuohP5Uqg?pwd=vxit) + +或者扫码下载: + +![百度云](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAGQCAIAAAAP3aGbAAAJ7ElEQVR4nO3dQY4kNxIAwe6F/v/l0UXY0xYx4FKh8JbZdTCV2VlZDh4C5PevX7++AAr+80/fAMDvEiwgQ7CADMECMgQLyBAsIEOwgAzBAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CADMECMv64+D/f39/P72PMpw0LD3/UYY/Du0fx/APvrvXWkgd4YcmXePdHdX+Md3+vFRaQIVhAhmABGYIFZAgWkCFYQIZgARmCBWTcDI4eLDlH+mKa7u7O76YHu9Ohd57PlG74fpdbcufPX3UrLCBDsIAMwQIyBAvIECwgQ7CADMECMgQLyHg8OHrQnZa8m3u8s3yQcsMI6NnF33V355OTxht2WD2YHFK1wgIyBAvIECwgQ7CADMECMgQLyBAsIEOwgIy5wdHlJqfp7gYpNwzKTnr7ADdMbJ4t2SN0OSssIEOwgAzBAjIEC8gQLCBDsIAMwQIyBAvIMDj6l+c7cD6/1t1tfLrW8zs3fvk797DkhemywgIyBAvIECwgQ7CADMECMgQLyBAsIEOwgIy5wdEfOfzWPZ/9+adNTkteXGvDsO4Sy2/vzAoLyBAsIEOwgAzBAjIEC8gQLCBDsIAMwQIyHg+OLjnl/MLdHOCS/3UwdnT7hj/qcK0lf9TzRzHzaXtYYQEZggVkCBaQIVhAhmABGYIFZAgWkCFYQMZ3evvBGfuPF387JbhkkPLO8kfB/8kKC8gQLCBDsIAMwQIyBAvIECwgQ7CAjLmTnzfsana41vNhq+XHOx8s2fttbPxt8ozuO2MzX88v9PwDrbCADMECMgQLyBAsIEOwgAzBAjIEC8gQLCBjxQZ+kzOWYxu8HUyOI/7bvt+Le1iyS1/3zZx8gFZYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQsWJw9LmLcbX9O46O3caPfCXu/MhDv5dMOxscBX44wQIyBAvIECwgQ7CADMECMgQLyBAsIOPx4OiSo703nN+9ZNDukyXf1HMXg8H7N+e8+8CLC+2fGbbCAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CAjLkdRyfPCn87Gjc5ArphvnHDcOP1tS7YNvZ3LrTkrbDCAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CAjD/eftyP3Jxzcs7zubFB2SVP6WLH0f3ToWO/nbuvY/KnbYUFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZN4Ojz+cAnw+eje3QeGfDiODBkg1C7x7Fp3/a8GDP3r6Bk1vXTv52rLCADMECMgQLyBAsIEOwgAzBAjIEC8gQLCBjxY6jk9camxJcvo3qwZIR0LfXWvK2TJ7q/taG39SXFRYQIlhAhmABGYIFZAgWkCFYQIZgARmCBWTcDI5umAM8G5tf3bBX6vk2/vELDbv4u57PlD7/X2OW396XFRYQIlhAhmABGYIFZAgWkCFYQIZgARmPN/CbHJsamxm5u9CPfBQHk4NdG3ZhnBzBu3hKz89gX8IKC8gQLCBDsIAMwQIyBAvIECwgQ7CADMECMr43jIpNnhz7yZIJxrHNESe/9yXbOl5Ysrffwdu9Gyd/BXfXssICMgQLyBAsIEOwgAzBAjIEC8gQLCBDsICMxzuOPh8hW/KBYzbc3uQc4Ni+rJMv0vO51rfv8+TtPWeFBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGY93HF2y1+KG8ctJywcL77x9lybflucPcMNms5ODsgdWWECGYAEZggVkCBaQIVhAhmABGYIFZAgWkHGz4+iS87vHBgsPlpz6/XY6dHLsdmz3y/2zxN3pUEfVA/wPggVkCBaQIVhAhmABGYIFZAgWkCFYQMbjHUcvb2JwXO3iQs8t2bzxEztw/s6FJk+xv/jADb+pM4OjwA8nWECGYAEZggVkCBaQIVhAhmABGYIFZNzsOPrc5AzexYUOlo9EPvd85vD5Y98wU3pnbAfdDXOt16ywgAzBAjIEC8gQLCBDsIAMwQIyBAvIECwg42bH0clhvyW7I/I3GTsAfclemstf2g0b3p5ZYQEZggVkCBaQIVhAhmABGYIFZAgWkCFYQMbNjqP7p8veTu5NbsM49r+WTzCejX2/z5/S/i09l7PCAjIEC8gQLCBDsIAMwQIyBAvIECwg42YOa/mE0fX/ujA5zTR2MvDze7izfMJow+t3uNb+M8nv7tAKC8gQLCBDsIAMwQIyBAvIECwgQ7CADMECMm4GR5fYcF70kmnJsQ38uuOIzy80+YFv3+e7Cy2Z47XCAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CAjO+LebAl59C+HX3cMIZ69namdP83deft+/z2Qudr/cjjyg/sOAr8cIIFZAgWkCFYQIZgARmCBWQIFpAhWEDGzeDo6eN2jwh+Dc7gvb2H58aeXuI2PlkyLbnhpb3zfMjcCgvIECwgQ7CADMECMgQLyBAsIEOwgAzBAjJujqp/vi3h3f9actj6J89v7+0M3uTt3Xn7gZM7cN7ZsJHvhpHmMyssIEOwgAzBAjIEC8gQLCBDsIAMwQIyBAvImNtxdHLm8OJay2/v+gMvbLiH820s/343WP6OXbPCAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CAjJvB0e506Pl/Xeg+ig1Pb/Jak9/U2Hn0zy2ZDj2wwgIyBAvIECwgQ7CADMECMgQLyBAsIEOwgIybo+rvpssmZ9LGpmHvbBgsXH6y/PUHfnq2S77EDdda/k2dWWEBGYIFZAgWkCFYQIZgARmCBWQIFpAhWEDGzeDohq0Rr40NFh7cDam+3Rhzw/Dq2du/9/kzf36tOxc7yr690DArLCBDsIAMwQIyBAvIECwgQ7CADMECMm7msA6WnBz7dk5n0uSh0G/v4flR0m+nmfbPnd0Z+8Ut+WlbYQEZggVkCBaQIVhAhmABGYIFZAgWkCFYQMbjwdGDDSORkxfaP3M4Zmw69GDJ1oN3NrxLG6Z/v6ywgBDBAjIEC8gQLCBDsIAMwQIyBAvIECwgY25wdLnnw29vDy6+/sC3JwMvmaEd24x0ybTkwYYdZSdZYQEZggVkCBaQIVhAhmABGYIFZAgWkCFYQIbB0b/cTdN1Z0qfDzc+n7HcsM1md6Z08n2eZIUFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZc4OjSwbPPlkyIjg2tHk3sblkx9G3ltze89ds5tPO/+v5r94KC8gQLCBDsIAMwQIyBAvIECwgQ7CADMECMh4Pji6ZwRuz/BD2O5MbZk4+wE+WDFKObb46+SN9/sJYYQEZggVkCBaQIVhAhmABGYIFZAgWkCFYQMb38o1AAf7LCgvIECwgQ7CADMECMgQLyBAsIEOwgAzBAjIEC8gQLCBDsIAMwQIyBAvIECwgQ7CADMECMgQLyPgTEwanYgjj78sAAAAASUVORK5CYII=) + +#### 1.安装singularity + +安装singularity, + +具体步骤如下 + +```bash +mkdir -p ~/install +mkdir -p ~/build + +#安装编译所需依赖 +yum -y install libatomic libstdc++ libstdc++-devel libseccomp-devel glib2-devel gcc squashfs-tools tar + +#安装bisheng编译器 +cd ~/build +wget https://mirrors.huaweicloud.com/kunpeng/archive/compiler/bisheng_compiler/bisheng-compiler-2.1.0-aarch64-linux.tar.gz +tar -C ~/install -xf bisheng-compiler-2.1.0-aarch64-linux.tar.gz +echo "export PATH=$HOME/install/bisheng-compiler-2.1.0-aarch64-linux/bin:$PATH" >> ~/.bashrc && source ~/.bashrc +export CC=`which clang` +export CXX=`which clang++` + +#安装go编译器 +cd ~/build +wget https://go.dev/dl/go1.19.linux-arm64.tar.gz +tar -C ~/install -xf go1.19.linux-arm64.tar.gz +echo "export PATH=$HOME/install/go/bin:$PATH" >> ~/.bashrc && source ~/.bashrc + +#安装singularity +cd ~/build +wget https://github.com/sylabs/singularity/releases/download/v3.10.2/singularity-ce-3.10.2.tar.gz +tar -xf singularity-ce-3.10.2.tar.gz +cd singularity-ce-3.10.2 +./mconfig --prefix=$HOME/install/singularity +make -C ./builddir +make -C ./builddir install +echo "export PATH=$HOME/install/singularity/bin:$PATH" >> ~/.bashrc && source ~/.bashrc +``` + +#### 2.构建镜像 + +```shell +# x86 +singularity build ./name-of-image.sif openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def +# arm +singularity build ./name-of-image.sif openeuler-bisheng2-hmpi1-imb-2021.3.def +# 转换为沙盒 +singularity build --sandbox image-sandbox name-of-image.sif +``` + +#### 3.在沙盒中运行 + +```shell +#进入沙盒 +singularity shell -w image-sandbox +#在沙盒中运行内置的测试案例 +cd /hpcrunner +./jarvis -r +``` diff --git a/templates/imb/2021.3/data.imb.amd.cpu.config b/templates/imb/2021.3/data.imb.amd.cpu.config new file mode 100644 index 0000000..ad06a1a --- /dev/null +++ b/templates/imb/2021.3/data.imb.amd.cpu.config @@ -0,0 +1,44 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] +imb/2021.3 https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.3.tar.gz + +[DEPENDENCY] +set -e +set -x +module purge +./jarvis -install gcc/9.3.0 com +module use ./software/modulefiles +module load gcc/9.3.0 +export CC=`which gcc` +export CXX=`which g++` +export FC=`which gfortran` +./jarvis -install hmpi/1.1.1 gcc +module load hmpi/1.1.1 +cd ${JARVIS_TMP} +tar -xvf ${JARVIS_DOWNLOAD}/IMB-v2021.3.tar.gz -C ${JARVIS_TMP} + +[ENV] +module purge +module use ${JARVIS_ROOT}/software/modulefiles +module load gcc/9.3.0 +module load hmpi/1.1.1 +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort + +[APP] +app_name = imb +build_dir = ${JARVIS_TMP}/mpi-benchmarks-IMB-v2021.3 +binary_dir = ${JARVIS_LIBS}/IMB-v2021.3 +case_dir = + +[BUILD] +make clean +make all -j +mkdir -p ${JARVIS_LIBS}/IMB-v2021.3 +cp -r IMB-* ${JARVIS_LIBS}/IMB-v2021.3 + +[RUN] +run = export OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 && mpirun -n 4 +binary = IMB-MPI1 +nodes = 1 diff --git a/templates/imb/2021.3/data.imb.arm.cpu.config b/templates/imb/2021.3/data.imb.arm.cpu.config new file mode 100644 index 0000000..e9cf0c5 --- /dev/null +++ b/templates/imb/2021.3/data.imb.arm.cpu.config @@ -0,0 +1,42 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] +imb/2021.3 https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.3.tar.gz + +[DEPENDENCY] +set -e +set -x +module purge +module use ./software/modulefiles +./jarvis -install bisheng/2.1.0 com +module load bisheng/2.1.0 +export CC=clang CXX=clang++ FC=flang +./jarvis -install hmpi/1.1.1 clang +module load hmpi/1.1.1 +cd ${JARVIS_TMP} +tar -xvf ${JARVIS_DOWNLOAD}/IMB-v2021.3.tar.gz -C ${JARVIS_TMP} + +[ENV] +module purge +module use ${JARVIS_ROOT}/software/modulefiles +module load bisheng/2.1.0 +module load hmpi/1.1.1 +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort + +[APP] +app_name = imb +build_dir = ${JARVIS_TMP}/mpi-benchmarks-IMB-v2021.3 +binary_dir = ${JARVIS_LIBS}/IMB-v2021.3 +case_dir = + +[BUILD] +make clean +make all -j +mkdir -p ${JARVIS_LIBS}/IMB-v2021.3 +cp -r IMB-* ${JARVIS_LIBS}/IMB-v2021.3 + +[RUN] +run = export OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 && mpirun -n 4 +binary = IMB-MPI1 +nodes = 1 diff --git a/test/test-imb.sh b/test/test-imb.sh new file mode 100755 index 0000000..fe1b798 --- /dev/null +++ b/test/test-imb.sh @@ -0,0 +1,16 @@ +#!/bin/bash +cd .. +# release imb src code +rm -rf tmp/IMB-v2021.3 +# copy templates +cp -rf templates/imb/2021.3/data.imb.amd.cpu.config ./ +# switch to config +./jarvis -use data.imb.amd.cpu.config +# download imb src code +./jarvis -d +# install dependency +./jarvis -dp +# build +./jarvis -b +# run +./jarvis -r -- Gitee From fd1a3241941fcfb2379de40404e5ce915a21813b Mon Sep 17 00:00:00 2001 From: binshuo Date: Wed, 21 Dec 2022 01:27:18 +0000 Subject: [PATCH 2/2] chore : container imb configuration Signed-off-by: binshuo --- container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def b/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def index 13c917f..1fc94ec 100644 --- a/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def +++ b/container/imb/openeuler-gcc-9.3.0-hmpi1-imb-2021.3.def @@ -24,9 +24,9 @@ From: openeuler/openeuler ./jarvis -d # install dependency ./jarvis -dp - # build qmcpack + # build imb ./jarvis -b - # run qmcpack + # run imb ./jarvis -r # clean downloads directory rm -rf downloads -- Gitee