diff --git a/MindFlow/applications/cfd/acoustic/README.md b/MindFlow/applications/cfd/acoustic/README.md
index 181434fb706d6673c00ec54d97254afe0d917338..beebfcfe809ef9c1bb0b0af2eaaf95034f75baf4 100644
--- a/MindFlow/applications/cfd/acoustic/README.md
+++ b/MindFlow/applications/cfd/acoustic/README.md
@@ -100,7 +100,7 @@ To facilitate direct verification by users, preset inputs are provided [here](ht
 ### Method 1: Running the `solve_acoustic.py` script
 
 ```shell
-python solve_acoustic.py --config_file_path ./configs.yaml --device_id 0 --mode GRAPH
+python solve_acoustic.py --config_file_path ./config.yaml --device_id 0 --mode GRAPH
 ```
 
 Where
diff --git a/MindFlow/applications/cfd/acoustic/README_CN.md b/MindFlow/applications/cfd/acoustic/README_CN.md
index 0dce5c291805563f772b230634c8b0e2eb5349a5..7512de08d5b83c1f75b1887e9cb1397f1f10e015 100644
--- a/MindFlow/applications/cfd/acoustic/README_CN.md
+++ b/MindFlow/applications/cfd/acoustic/README_CN.md
@@ -102,7 +102,7 @@ $$
 ### 运行方式一：`solve_acoustic.py` 脚本
 
 ```shell
-python solve_acoustic.py --config_file_path ./configs.yaml --device_id 0 --mode GRAPH
+python solve_acoustic.py --config_file_path ./config.yaml --device_id 0 --mode GRAPH
 ```
 
 其中，
diff --git a/MindFlow/applications/cfd/acoustic/cbs/cbs.py b/MindFlow/applications/cfd/acoustic/cbs/cbs.py
index 5706ae37a731318da617af0da922065f1ae0c95a..0aba76a0a21787002931c38477508bea5a0d340c 100644
--- a/MindFlow/applications/cfd/acoustic/cbs/cbs.py
+++ b/MindFlow/applications/cfd/acoustic/cbs/cbs.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """The CBS (convergen Born series) API"""
 from math import factorial
+from time import time as toc
 import numpy as np
 import mindspore as ms
 from mindspore import Tensor, nn, ops, numpy as mnp, lazy_inline
@@ -199,8 +200,9 @@ class CBS(nn.Cell):
             ui_init = ops.zeros_like(c_star, dtype=ms.float32) # (batch, 1, nz, nx)
 
         # pad initial field
+        # note: here u_init is conjugated, because the output is also conjugated
         ur = ops.pad(ur_init, padding=[n1] * 4, value=0) # note: better padding (with gradual damping) can be applied
-        ui = ops.pad(ui_init, padding=[n2] * 4, value=0) # (batch, 1, nz_padded, nx_padded)
+        ui = ops.pad(-1. * ui_init, padding=[n2] * 4, value=0) # (batch, 1, nz_padded, nx_padded)
 
         # start iteration
         errs_list = []
@@ -242,6 +244,8 @@ class CBS(nn.Cell):
         msg = 'PML layers cannot be removed during iteration, but can be removed for the final result'
         assert not self.remove_pml, msg
 
+        tic = toc()
+
         ur, ui, errs_list = self(c_star, f_star, ur_init, ui_init)
 
         for ep in range(max_iter // self.n_iter):
@@ -251,12 +255,14 @@ class CBS(nn.Cell):
 
             if print_info:
                 print(f'step {(ep + 1) * self.n_iter}, max error {err_max:.6f}', end=', ')
-                print(f'min error {err_min:.6f}, mean error {err_ave:.6f}')
+                print(f'min error {err_min:.6f}, mean error {err_ave:.6f}', end=', ')
+                print(f'mean step time {(toc() - tic) / self.n_iter:.4f}s')
+                tic = toc()
 
             if err_max < tol:
                 break
 
-            ur, ui, errs = self(c_star, f_star, ur, -ui)
+            ur, ui, errs = self(c_star, f_star, ur, ui)
             errs_list += errs
 
         if remove_pml and self.pml_size:
diff --git a/MindFlow/applications/cfd/acoustic/cbs/dft.py b/MindFlow/applications/cfd/acoustic/cbs/dft.py
index a0f5109a4558c9ab748f36878b763d94950ba101..d93d9a8b16e1b970ee1f7c6f08e9ee831bdffbf7 100644
--- a/MindFlow/applications/cfd/acoustic/cbs/dft.py
+++ b/MindFlow/applications/cfd/acoustic/cbs/dft.py
@@ -15,7 +15,7 @@
 ''' provide complex dft based on the real dft API in mindflow.dft '''
 import numpy as np
 import mindspore as ms
-from mindspore import nn, ops, numpy as mnp
+from mindspore import nn, ops, numpy as mnp, mint
 from mindflow.cell.neural_operators.dft import dft1, dft2, dft3
 
 
@@ -53,6 +53,24 @@ class MyDFTn(nn.Cell):
         self.mask_y0 = ms.Tensor(mask_y0, dtype=ms.float32, const_arg=True)
         self.mask_z0 = ms.Tensor(mask_z0, dtype=ms.float32, const_arg=True)
 
+        # bug note: ops.flip/mint.flip/mint.roll has bug for MS2.4.0 in PYNATIVE_MODE
+        # mnp.flip has bug after MS2.4.0 in GRAPH_MODE
+        # ops.roll only supports GPU, mnp.roll is ok but slow
+        msver = tuple([int(s) for s in ms.__version__.split('.')])
+        kwargs1 = (dict(axis=-1), dict(axis=-2), dict(axis=-3))
+        kwargs2 = (dict(dims=(-1,)), dict(dims=(-2,)), dict(dims=(-3,)))
+
+        if msver <= (2, 4, 0) and ms.get_context('mode') == ms.PYNATIVE_MODE:
+            self.fliper = mnp.flip
+            self.roller = mnp.roll
+            self.flipkw = kwargs1
+            self.rollkw = kwargs1
+        else:
+            self.fliper = mint.flip
+            self.roller = mint.roll
+            self.flipkw = kwargs2
+            self.rollkw = kwargs2
+
     def construct(self, ar, ai):
         shape = tuple(self.shape)
         n = shape[-1]
@@ -74,15 +92,21 @@ class MyDFTn(nn.Cell):
 
         br_half1 = ops.pad((brr - bii) * self.mask_xm, [0, n//2 - 1])
         bi_half1 = ops.pad((bri + bir) * self.mask_xm, [0, n//2 - 1])
-        # bug note: mnp.roll() & mnp.flip are ok, but ops.roll() only supports GPU, ops.flip() has bug in MS2.4.0
-        br_half2 = mnp.roll(mnp.flip(ops.pad((brr + bii) * self.mask_x0, [n//2 - 1, 0]), axis=-1), n//2, axis=-1)
-        bi_half2 = mnp.roll(mnp.flip(ops.pad((bir - bri) * self.mask_x0, [n//2 - 1, 0]), axis=-1), n//2, axis=-1)
+
+        br_half2 = self.roller(self.fliper(
+            ops.pad((brr + bii) * self.mask_x0, [n//2 - 1, 0]), **self.flipkw[0]), n//2, **self.rollkw[0])
+        bi_half2 = self.roller(self.fliper(
+            ops.pad((bir - bri) * self.mask_x0, [n//2 - 1, 0]), **self.flipkw[0]), n//2, **self.rollkw[0])
         if ndim > 1:
-            br_half2 = br_half2 * (1 - self.mask_y0) + mnp.roll(mnp.flip(br_half2 * self.mask_y0, axis=-2), 1, axis=-2)
-            bi_half2 = bi_half2 * (1 - self.mask_y0) + mnp.roll(mnp.flip(bi_half2 * self.mask_y0, axis=-2), 1, axis=-2)
+            br_half2 = br_half2 * (1 - self.mask_y0) + self.roller(self.fliper(
+                br_half2 * self.mask_y0, **self.flipkw[1]), 1, **self.rollkw[1])
+            bi_half2 = bi_half2 * (1 - self.mask_y0) + self.roller(self.fliper(
+                bi_half2 * self.mask_y0, **self.flipkw[1]), 1, **self.rollkw[1])
         if ndim > 2:
-            br_half2 = br_half2 * (1 - self.mask_z0) + mnp.roll(mnp.flip(br_half2 * self.mask_z0, axis=-3), 1, axis=-3)
-            bi_half2 = bi_half2 * (1 - self.mask_z0) + mnp.roll(mnp.flip(bi_half2 * self.mask_z0, axis=-3), 1, axis=-3)
+            br_half2 = br_half2 * (1 - self.mask_z0) + self.roller(self.fliper(
+                br_half2 * self.mask_z0, **self.flipkw[2]), 1, **self.rollkw[2])
+            bi_half2 = bi_half2 * (1 - self.mask_z0) + self.roller(self.fliper(
+                bi_half2 * self.mask_z0, **self.flipkw[2]), 1, **self.rollkw[2])
 
         br = br_half1 + br_half2
         bi = bi_half1 + bi_half2
diff --git a/tests/st/mindflow/cell/test_optimizers.py b/tests/st/mindflow/cell/test_optimizers.py
index 891695344f18dcfbfb785c4fc7d87b0da974479b..545f31da78a4620fe4fd37af4c1ab3fea8601b5c 100644
--- a/tests/st/mindflow/cell/test_optimizers.py
+++ b/tests/st/mindflow/cell/test_optimizers.py
@@ -174,7 +174,7 @@ def test_adahessian_st(mode, model_option):
 
     assert ops.isfinite(loss)
 
-@pytest.mark.level0
+@pytest.mark.level1
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
 def test_adahessian_compare():