diff --git a/4-add-test-cases-for-openmp-optimization.patch b/4-add-test-cases-for-openmp-optimization.patch index ca652ac33bba57850560b9fce7d67ab20270cb0d..0b0c064da5604e7ed90003c5bf2285c22252e54a 100644 --- a/4-add-test-cases-for-openmp-optimization.patch +++ b/4-add-test-cases-for-openmp-optimization.patch @@ -1,6 +1,6 @@ -From 7d436b81de679f3e13810a8e91395d3e15b3878b Mon Sep 17 00:00:00 2001 +From 1c04d8e4b34e8a9c8415cca22ef6c710662c13a3 Mon Sep 17 00:00:00 2001 From: xieyihui -Date: Thu, 8 Dec 2022 10:52:07 +0800 +Date: Fri, 13 Jan 2023 21:12:04 +0800 Subject: [PATCH] Add test cases for OpenMP optimizations diff --git a/test/openmp_optimization/fortran_func001.f90 b/test/openmp_optimization/fortran_func001.f90 @@ -86,31 +86,33 @@ index 0000000..81fc5d8 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_func007.f90 b/test/openmp_optimization/fortran_func007.f90 new file mode 100644 -index 0000000..e638b63 +index 0000000..c7eda6f --- /dev/null +++ b/test/openmp_optimization/fortran_func007.f90 -@@ -0,0 +1,7 @@ +@@ -0,0 +1,9 @@ +subroutine add1(s) + use omp_lib -+ real :: s -+ !$omp critical -+ s = s + 1 -+ !$omp end critical ++ real :: s(1000) ++ !$omp do ++ do i = 1, 1000 ++ s(i) = i ++ end do ++ !$omp end do +end subroutine add1 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_func008.f90 b/test/openmp_optimization/fortran_func008.f90 new file mode 100644 -index 0000000..04dd669 +index 0000000..0121681 --- /dev/null +++ b/test/openmp_optimization/fortran_func008.f90 @@ -0,0 +1,10 @@ +subroutine add1(s, n) + use omp_lib -+ real :: s ++ real :: s(100000) + integer :: n + !$omp parallel do + do i = 1, n -+ s = s + 1 ++ s(i) = i + end do + !$omp end parallel do +end subroutine add1 @@ -510,7 +512,7 @@ index 0000000..653a0c9 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_main007.f90 b/test/openmp_optimization/fortran_main007.f90 new file mode 100644 -index 0000000..7aac1b3 +index 0000000..fdcf30d --- /dev/null +++ b/test/openmp_optimization/fortran_main007.f90 @@ -0,0 +1,18 @@ @@ -519,9 +521,9 @@ index 0000000..7aac1b3 +program main + use omp_lib + real(kind = 8) :: starttime, endtime, time -+ real :: a ++ real :: a(1000) + starttime = omp_get_wtime() -+ do n = 1, 50000000 ++ do n = 1, 500000 + call add1(a) + end do + endtime = omp_get_wtime() @@ -535,10 +537,10 @@ index 0000000..7aac1b3 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_main008.f90 b/test/openmp_optimization/fortran_main008.f90 new file mode 100644 -index 0000000..6f9af95 +index 0000000..5502775 --- /dev/null +++ b/test/openmp_optimization/fortran_main008.f90 -@@ -0,0 +1,18 @@ +@@ -0,0 +1,20 @@ +! Test optimizaton for fortran use openmp about alternative code. The +! goal is to have the performance of the serial code if it is faster +! than the parallel @@ -546,9 +548,11 @@ index 0000000..6f9af95 +program main + use omp_lib + real(kind = 8) :: starttime, endtime, time -+ real :: a ++ real :: a(100000) + starttime = omp_get_wtime() -+ call add1(a, 8000) ++ do i = 1, 10 ++ call add1(a, 8000) ++ end do + endtime = omp_get_wtime() + time = endtime - starttime + print *, time @@ -885,10 +889,10 @@ index 0000000..ec85db7 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_main020.f90 b/test/openmp_optimization/fortran_main020.f90 new file mode 100644 -index 0000000..51da303 +index 0000000..7c5c7a9 --- /dev/null +++ b/test/openmp_optimization/fortran_main020.f90 -@@ -0,0 +1,21 @@ +@@ -0,0 +1,24 @@ +! Test optimizaton for fortran use openmp about Change the way +! variables are passed +! @@ -897,7 +901,7 @@ index 0000000..51da303 + real(kind = 8) :: starttime, endtime, time + integer :: a(32), b(32) + starttime = omp_get_wtime() -+ do i = 1, 100000 ++ do i = 1, 1000 + call f(a, b) + end do + endtime = omp_get_wtime() @@ -905,8 +909,11 @@ index 0000000..51da303 + print *, time +end +subroutine work(a, b) -+ a = a + 1 -+ b = b + 1 ++ integer :: a(32), b(32) ++ do i = 1, 32 ++ a(i) = a(i) + 1 ++ b(i) = b(i) + 1 ++ end do +end +! Reference +! 孙志刚. 基于键涨落模型数值模拟的并行优化[D].山东大学,2013. @@ -990,41 +997,45 @@ index 0000000..69e4b7b \ No newline at end of file diff --git a/test/openmp_optimization/fortran_optimized_func007.f90 b/test/openmp_optimization/fortran_optimized_func007.f90 new file mode 100644 -index 0000000..826dcbc +index 0000000..0d6eac3 --- /dev/null +++ b/test/openmp_optimization/fortran_optimized_func007.f90 -@@ -0,0 +1,11 @@ +@@ -0,0 +1,15 @@ +subroutine add1(s) + use omp_lib -+ real :: s ++ real :: s(1000) + if(omp_in_parallel()) then -+ !$omp critical -+ s = s + 1 -+ !$omp end critical ++ !$omp do ++ do i = 1, 1000 ++ s(i) = i ++ end do ++ !$omp end do + else -+ s = s + 1 ++ do i = 1, 1000 ++ s(i) = i ++ end do + end if +end subroutine add1 \ No newline at end of file diff --git a/test/openmp_optimization/fortran_optimized_func008.f90 b/test/openmp_optimization/fortran_optimized_func008.f90 new file mode 100644 -index 0000000..d43d572 +index 0000000..e11d2d6 --- /dev/null +++ b/test/openmp_optimization/fortran_optimized_func008.f90 @@ -0,0 +1,16 @@ +subroutine add1(s, n) + use omp_lib -+ real :: s ++ real :: s(100000) + integer :: n + if(n > 10000) then + !$omp parallel do + do i = 1, n -+ s = s + 1 ++ s(i) = i + end do + !$omp end parallel do + else + do i = 1, n -+ s = s + 1 ++ s(i) = i + end do + end if +end subroutine add1 @@ -1243,22 +1254,22 @@ index 0000000..a7d30da \ No newline at end of file diff --git a/test/openmp_optimization/fortran_optimized_func020.f90 b/test/openmp_optimization/fortran_optimized_func020.f90 new file mode 100644 -index 0000000..bbc7ac3 +index 0000000..b5101cd --- /dev/null +++ b/test/openmp_optimization/fortran_optimized_func020.f90 @@ -0,0 +1,15 @@ +subroutine f(a, b) + use omp_lib -+ integer :: a(32), b(32), tid, ia, ib ++ integer :: a(32), b(32), tid, ia(32), ib(32) + !$omp parallel private(tid, ia, ib) shared(a, b) + tid = omp_get_thread_num() -+ ia = a(tid) -+ ib = b(tid) ++ ia = a ++ ib = b + do i = 1, 100 + call work(ia, ib) + end do -+ a(tid) = ia -+ b(tid) = ib ++ a = ia ++ b = ib + !$omp end parallel +end + @@ -1373,7 +1384,7 @@ index 0000000..af058f6 \ No newline at end of file diff --git a/test/openmp_optimization/optimized_main007.f90 b/test/openmp_optimization/optimized_main007.f90 new file mode 100644 -index 0000000..7aac1b3 +index 0000000..fdcf30d --- /dev/null +++ b/test/openmp_optimization/optimized_main007.f90 @@ -0,0 +1,18 @@ @@ -1382,9 +1393,9 @@ index 0000000..7aac1b3 +program main + use omp_lib + real(kind = 8) :: starttime, endtime, time -+ real :: a ++ real :: a(1000) + starttime = omp_get_wtime() -+ do n = 1, 50000000 ++ do n = 1, 500000 + call add1(a) + end do + endtime = omp_get_wtime() @@ -1398,10 +1409,10 @@ index 0000000..7aac1b3 \ No newline at end of file diff --git a/test/openmp_optimization/optimized_main008.f90 b/test/openmp_optimization/optimized_main008.f90 new file mode 100644 -index 0000000..6f9af95 +index 0000000..5502775 --- /dev/null +++ b/test/openmp_optimization/optimized_main008.f90 -@@ -0,0 +1,18 @@ +@@ -0,0 +1,20 @@ +! Test optimizaton for fortran use openmp about alternative code. The +! goal is to have the performance of the serial code if it is faster +! than the parallel @@ -1409,9 +1420,11 @@ index 0000000..6f9af95 +program main + use omp_lib + real(kind = 8) :: starttime, endtime, time -+ real :: a ++ real :: a(100000) + starttime = omp_get_wtime() -+ call add1(a, 8000) ++ do i = 1, 10 ++ call add1(a, 8000) ++ end do + endtime = omp_get_wtime() + time = endtime - starttime + print *, time @@ -1752,10 +1765,10 @@ index 0000000..ec85db7 \ No newline at end of file diff --git a/test/openmp_optimization/optimized_main020.f90 b/test/openmp_optimization/optimized_main020.f90 new file mode 100644 -index 0000000..51da303 +index 0000000..7c5c7a9 --- /dev/null +++ b/test/openmp_optimization/optimized_main020.f90 -@@ -0,0 +1,21 @@ +@@ -0,0 +1,24 @@ +! Test optimizaton for fortran use openmp about Change the way +! variables are passed +! @@ -1764,7 +1777,7 @@ index 0000000..51da303 + real(kind = 8) :: starttime, endtime, time + integer :: a(32), b(32) + starttime = omp_get_wtime() -+ do i = 1, 100000 ++ do i = 1, 1000 + call f(a, b) + end do + endtime = omp_get_wtime() @@ -1772,15 +1785,18 @@ index 0000000..51da303 + print *, time +end +subroutine work(a, b) -+ a = a + 1 -+ b = b + 1 ++ integer :: a(32), b(32) ++ do i = 1, 32 ++ a(i) = a(i) + 1 ++ b(i) = b(i) + 1 ++ end do +end +! Reference +! 孙志刚. 基于键涨落模型数值模拟的并行优化[D].山东大学,2013. \ No newline at end of file diff --git a/test/openmp_optimization/readme.md b/test/openmp_optimization/readme.md new file mode 100644 -index 0000000..d999a41 +index 0000000..d1131ce --- /dev/null +++ b/test/openmp_optimization/readme.md @@ -0,0 +1,83 @@ @@ -1800,8 +1816,8 @@ index 0000000..d999a41 +test004: 0.961 0.574 +test005: 0.983 0.582 +test006: Unable to optimize with hand-written code -+test007: 3.681 0.961 -+test008: 0.0218 0.000026 ++test007: 3.255 0.303 ++test008: 0.00555 0.000025 +test009: 1.480 0.818 +test010: 2.222 1.398 +test011: 3.271 2.957 @@ -1809,11 +1825,11 @@ index 0000000..d999a41 +test013: 0.388 0.393 +test014: 0.287 0.000411 +test015: 0.127 0.249 -+test016: 0.759 0.301 ++test016: 0.759 0.0160 +test017: ERROR +test018: ERROR +test019: 1.044 0.0272 -+test020: 5.573 0.343 ++test020: 1.316 0.343 + +test every case 20 times with flang -O3 + unoptimized optimized @@ -1823,8 +1839,8 @@ index 0000000..d999a41 +test004: 0.969 0.568 +test005: 0.977 0.575 +test006: Unable to optimize with hand-written code -+test007: 3.714 1.577 -+test008: 0.0203 0.0029 ++test007: 2.485 0.054 ++test008: 0.00490 0.00154 +test009: 1.003 0.775 +test010: 2.216 1.334 +test011: 3.842 2.610 @@ -1836,7 +1852,7 @@ index 0000000..d999a41 +test017: 0.0355 0.0323 +test018: 0.609 1.3886 +test019: 1.028 0.0262 -+test020: 6.012 0.348 ++test020: 0.290 0.0164 + +Reference +[1] Müller, Matthias S.. "Some Simple OpenMP Optimization Techniques." @@ -1945,16 +1961,16 @@ index 0000000..23de4fe \ No newline at end of file diff --git a/test/openmp_optimization/run_flang.sh b/test/openmp_optimization/run_flang.sh new file mode 100644 -index 0000000..49ec972 +index 0000000..095efad --- /dev/null +++ b/test/openmp_optimization/run_flang.sh @@ -0,0 +1,68 @@ +for i in {001..002} +do +echo "------- test $i ------." -+flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o @@ -1964,9 +1980,9 @@ index 0000000..49ec972 +for i in {004..005} +do +echo "------- test $i ------." -+flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o @@ -1976,9 +1992,9 @@ index 0000000..49ec972 +for i in {007..020} +do +echo "------- test $i ------." -+flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o @@ -1987,9 +2003,9 @@ index 0000000..49ec972 +for i in {001..002} +do +echo "------- test $i ------." -+flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o @@ -1998,9 +2014,9 @@ index 0000000..49ec972 +for i in {004..005} +do +echo "------- test $i ------." -+flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o @@ -2009,15 +2025,44 @@ index 0000000..49ec972 +for i in {007..020} +do +echo "------- test $i ------." -+flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -+flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -+flang fortran-test2.o fortran-test.o -fopenmp -O3 ++flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt +export OMP_NUM_THREADS=8 +./a.out +rm *.o +rm a.out +done \ No newline at end of file +diff --git a/test/openmp_optimization/t.sh b/test/openmp_optimization/t.sh +new file mode 100644 +index 0000000..985983f +--- /dev/null ++++ b/test/openmp_optimization/t.sh +@@ -0,0 +1,23 @@ ++for i in {021..021} ++do ++echo "------- test $i ------." ++flang fortran_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++export OMP_NUM_THREADS=8 ++./a.out ++rm *.o ++rm a.out ++done ++ ++for i in {021..021} ++do ++echo "------- test $i ------." ++flang optimized_main$i.f90 -c -o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran_optimized_func$i.f90 -c -o fortran-test2.o -fopenmp -O3 -Rpass=openmp-opt ++flang fortran-test2.o fortran-test.o -fopenmp -O3 -Rpass=openmp-opt ++export OMP_NUM_THREADS=8 ++./a.out ++rm *.o ++rm a.out ++done -- 2.25.1