From 4b8017daa6d8231a56f9d3b1f429c64127f85743 Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Tue, 18 Apr 2023 17:04:11 +0800 Subject: [PATCH] Add all-reduce precise example --- examples/precise/allreduce-exp.F90 | 129 +++++++++++++++++++++++++++++ examples/precise/run.sh | 22 +++++ 2 files changed, 151 insertions(+) create mode 100644 examples/precise/allreduce-exp.F90 create mode 100644 examples/precise/run.sh diff --git a/examples/precise/allreduce-exp.F90 b/examples/precise/allreduce-exp.F90 new file mode 100644 index 0000000..fdfd41e --- /dev/null +++ b/examples/precise/allreduce-exp.F90 @@ -0,0 +1,129 @@ +! 本精度Demo证明了规约算法不满足结合律,在16个节点,每个节点16个rank的配置下,shuffed data和random data只是顺序不同,但是结果却产生较大差异,因此同一个MPI,规约顺序不一致那么结果是不一致的,在arm和X86平台同时运行,发现random data的结果是一样的,说明不同的MPI,规约顺序一致的前提下,也只有Recursive算法可以保持一致 +#define N 256 +program all_reduce_example + implicit none + include 'mpif.h' + integer :: ierr, myrank, numprocs, i, j + real*8 :: data(N), rdata(N), sum(N),rsum(N), temp, rtemp, eps + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, myrank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, ierr) + call random_seed() + eps = 1.0d-12 ! 定义相对误差阈值 +data =[0.7038086369959871, 2.8251120218371284E-002, & +5.0046173844393138E-002, 0.9789957831859226, 0.3899646098248724, & +0.5580048927465384, 0.2869520527156482, 0.2856470017368906, & +0.5375782503396351, 0.7270150266098625, 0.7999334394302196, & +5.8484019322378344E-002, 0.1530875603643835, 0.2379960762633573, & +0.9308569877732396, 0.3438403549637741, 0.6866513745295322, & +0.8568961973603706, 0.2662471964817286, 0.9809031616176327, & +0.3228361381496967, 7.6615984354404532E-002, 0.4149010901069090, & +0.5531992491973767, 0.2665501633545233, 0.8604143884893318, & +0.8036310109642670, 0.2148345295371286, 0.6116832685197551, & +0.4196377237189068, 9.8410464752689109E-002, 0.7344879987375066, & +0.5586748845009026, 0.2983346430492873, 0.2765339210792774, & +0.3646576612344177, 0.7153911603551393, 0.8815110226505993, & +0.3749506274036918, 0.6914350111861864, 0.9178569104317944, & +0.9819413237096626, 0.7419254111399312, 0.1785816383679588, & +0.9062695407233150, 0.5295401789515495, 0.4015790474285694, & +0.8403358758926203, 0.9130696371054654, 0.4649444252242176, & +0.8278748220008367, 0.6781129685078469, 0.2049935371270379, & +0.6284607974606047, 0.3464554478748170, 0.2028254494045285, & +0.3695479796940333, 0.1228504475588323, 0.6104021211702673, & +8.8380859014748125E-002, 0.3814070877724873, 0.2758175204173483, & +0.6523906265103818, 1.1981168598836689E-002, 0.9287167349073684, & +0.2944767248779527, 0.7407619456415659, 0.4802654485112186, & +0.6900941371066835, 0.1337102720344063, 0.9229375223385574, & +8.7217393516382913E-002, 0.6830908979157471, 5.9642116800716849E-002, & +0.2565607195932387, 0.5333396435088247, 0.1755982525311310, & +6.4497985688234394E-002, 0.3354596372180652, 0.9089513461036205, & +0.5453208121076614, 0.1043149874384994, 0.3589747105661871, & +7.6221582859631098E-002, 0.3892167946148390, 0.2354149492143449, & +0.2380252594729058, 0.2819122329047445, 0.1634389763760140, & +7.2307692530586110E-002, 0.2950570660150618, 0.4945859790661444, & +0.8152518764135692, 0.3390372289071450, 0.1368056782188205, & +0.6305167032331269, 0.4035373251697649, 0.3605726885212306, & +0.4433522163456445, 0.4957803887850076, 0.7067382860927580, & +0.7927541197846040, 0.5959876377355755, 0.6813774758185502, & +0.7776926216897522, 0.8701772624687720, 0.8650618123151901, & +0.8910447037506373, 0.1759634548846947, 0.5929444981033214, & +0.2092144913759171, 1.8674905340105852E-003, 0.5215614069837642, & +0.5795007800544596, 0.9535171866245520, 0.6525667077215616, & +0.4976478793190182, 0.2282996930765222, 0.3722548998390636, & +0.5495048243601275, 0.3339441835401118, 0.2753405010087704, & +9.8476955545294231E-002, 0.2373167121542537, 0.4405495281107648, & +0.5099076384248065, 0.8682849991120918, 0.3076914469212113, & +0.2391842026882642, 0.9621109350945289, 8.9408418479266061E-002, & +0.8218021857366438, 0.9602581546427729, 0.7368320820072825, & +0.1904106281710511, 0.4616633183183296, 0.3713070100967713, & +0.2942023381828847, 1.2172583016052840E-002, 0.2888875837163454, & +0.6989800304725833, 0.8118565382075360, 0.8041099766076911, & +0.8804575821281446, 0.5965790306375567, 0.9381642331608475, & +0.7739674733020649, 0.8935183950869572, 0.7022597678647884, & +0.5568371852803295, 0.6749963151681300, 0.9643781014731161, & +0.3551817134052868, 7.3566777961559637E-002, 0.8510395234632142, & +0.6871688981841828, 0.2532656851894615, 5.4161743877870094E-002, & +0.8854233161690956, 0.6551495000709053, 0.5676264803123274, & +0.8498447158270181, 0.9923259770387176, 0.6593907894711606, & +0.5486678951578625, 0.2698862481771158, 0.4066819011073477, & +0.6673222922068476, 0.6237688909442767, 0.9038496085631493, & +0.3434530261386755, 0.2577214245705619, 0.3544911903910304, & +0.8770345761337381, 0.9580113524410194, 0.2288763423077711, & +0.9128709246414672, 0.9221176707033578, 0.7268792919607563, & +0.9503373294797370, 0.8882671317789317, 0.4615388197993298, & +0.1920039188804736, 0.1335611930681040, 0.6176596216865846, & +0.5120360227232084, 0.3653884283624791, 0.5354569450191491, & +0.3912826176386659, 0.9721508120776150, 0.3890705988569465, & +0.3233997808034985, 0.7643332873269202, 0.3041535422801331, & +0.8942684827809728, 0.1159498908177028, 0.2737371102832356, & +0.6526004191058519, 0.7656923620794629, 8.6272401661446452E-002, & +0.2495110838858068, 0.8913967319698202, 0.1646364418290602, & +0.1310807904419420, 0.6217293466805955, 0.6407937015244727, & +0.8635475440474352, 0.5537070406860067, 0.4544805712454405, & +0.3860626340075157, 0.9449472438046058, 0.7578160268284080, & +0.6696569315037095, 0.7282176815286761, 3.8663053113367596E-002, & +0.7106396058840687, 0.8440884284898544, 0.9191680153895163, & +0.6196144134984962, 0.2032994949424278, 0.8417203963260107, & +0.4658177751704500, 0.5599617169139890, 0.4831619575459314, & +0.7570065356284346, 0.2962009675714512, 0.8518804091779657, & +0.5049089607185948, 0.2409779843743394, 0.4266634671321441, & +2.4418649100127254E-002, 0.8905434622913333, 0.2155485666026635, & +8.5066412864193808E-002, 0.3458314825216604, 0.6440330625986235, & +9.3842957233761126E-002, 5.7268962928674227E-002, 0.5508841880346438, & +0.9057931994356494, 0.1271950201445549, 0.8508494928621957, & +0.3534699305001254, 0.4027645972126095, 0.4107021601542442, & +0.3681730045188942, 0.2775129599943398, 0.3778885796002527, & +0.2933080595039428, 0.6262507267569077, 0.4532394173830880, & +0.6233444425160002, 2.1921642198876157E-002, 0.3871510167377039, & +0.6835196896855820, 4.1236054177318238E-003] + !do i = 1, N + ! call random_number(data(i)) + !end do + rdata = data + if (myrank == 0) then + write(*,*) 'random array: ', rdata + do i = N, 2, -1 + call random_number(rtemp) + j = 1 + int((i - 1) * rtemp) + temp = data(i) + data(i) = data(j) + data(j) = temp + end do + write(*,*) 'Shuffled array: ', data + end if + call MPI_Allreduce(data, sum, N, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr) + call MPI_Allreduce(rdata, rsum, N, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr) + if (myrank == 0) then + write(*,100) 'The sum of all shuffed data is ', sum(1) + write(*,100) 'The sum of all random data is ', rsum(1) + if (abs(sum(1) - rsum(1)) < eps * max(abs(sum(1)), abs(rsum(1)))) then + write(*,*) '相等' + else + write(*,*) '不相等' + end if + 100 FORMAT('', b64.64) + end if + call MPI_FINALIZE(ierr) +end program all_reduce_example +!Intel MPI: mpif90 -o all_reduce_example allreduce-exp.F90 && mpiexec -n 10 -genv I_MPI_ADJUST_ALLREDUCE=1 ./all_reduce_example +!HMPI: mpif90 -o all_reduce_example allreduce-exp.F90 && mpirun -n 10 -x UCX_BUILTIN_ALLREDUCE_ALGORITHM=1 ./all_reduce_example \ No newline at end of file diff --git a/examples/precise/run.sh b/examples/precise/run.sh new file mode 100644 index 0000000..6d93ff6 --- /dev/null +++ b/examples/precise/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#DSUB -n precise-exp +#DSUB --job_type cosched:hmpi +#DSUB -N 16 +#DSUB -R "cpu=128" +#DSUB -A root.default +#DSUB -q root.default +#DSUB -o precise_%J.log +#DSUB -e precise_err_%J.log + +echo " HOSTFILE generated:" +echo "-----------------------" +cat $CCS_HOST_FILE +echo "-----------------------" +mpif90 -o all_reduce_example allreduce-exp.F90 +EXEC_CMD="mpirun $CCS_MPI_OPTIONS -x OMP_NUM_THREADS=1 --map-by ppr:16:node:pe=1 -x UCX_BUILTIN_ALLREDUCE_ALGORITHM=1 ./all_reduce_example" +date +echo "$EXEC_CMD" +$EXEC_CMD +ret=$? +date +exit $ret -- Gitee