diff --git a/ci/build.sh b/ci/build.sh
index d4ce6ce04a11151b04cf5c8b6541d8cdc155d86b..88c3dacb4656ab3d6d53e78980133cdc0c9c2e94 100644
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -54,6 +54,11 @@ function parse_script_args() {
             args_num=$((args_num-1))
             shift
             ;;
+        --tocpu=*)
+            export 'NPU_TOCPU'=${1:8}
+            args_num=$((args_num-1))
+            shift
+            ;;
         -*)
             echo "ERROR Unsupported parameters: ${1}"
             return 1
diff --git a/scripts/codegen/dest/utils.py b/scripts/codegen/dest/utils.py
index 1289de681c169111206248841cbe1cb0b4fc5be4..c98abb49daadbfca8ea02551fcda70b967945141 100644
--- a/scripts/codegen/dest/utils.py
+++ b/scripts/codegen/dest/utils.py
@@ -21,9 +21,11 @@ from codegen.api.signature import DispatcherSignature, NativeSignature
 from codegen.model import SchemaKind, NativeFunction
 from codegen.api.native import native_arguments
 
+backend = None
+
 def transfer_args_of_wrapper_func_to_cpu(sig: DispatcherSignature, func: NativeFunction) -> Tuple[str, List[str]]:
     convert: str = f"// Convert args to cpu in order to use at::native kernel \n  " \
-                   f"TORCH_WARN_ONCE(\"Cur kernel: {sig.func.name} is running on cpu.\"); \n  "
+                   f"std::cout << \"Cur kernel: {sig.func.name} is running on cpu.\" << std::endl; \n  "
     args_names: List[str] = []
     args = native_arguments(sig.func, func.use_c10_dispatcher)
     for arg in args:
@@ -53,7 +55,6 @@ def transfer_args_of_wrapper_func_to_cpu(sig: DispatcherSignature, func: NativeF
 
 def transfer_ret_of_wrapper_func_to_xla(sig: DispatcherSignature, func_call: str) -> str:
     ret_code = ''
-    backend = "XLA"
     if sig.func.kind() == SchemaKind.functional:
         if sig.returns_type().cpp_type() == 'at::Tensor':
             ret_code = f"return {func_call}.toBackend(Backend::{backend});"
diff --git a/scripts/codegen/gen_backend_stubs.py b/scripts/codegen/gen_backend_stubs.py
index 9f6c6bcccf80325e5aaa008982c0fcec19b2224c..e75d95c5e72168d90827f865cf916b64abae4302 100644
--- a/scripts/codegen/gen_backend_stubs.py
+++ b/scripts/codegen/gen_backend_stubs.py
@@ -29,6 +29,7 @@ from codegen.selective_build.selector import SelectiveBuilder
 from codegen.utils import Target, concat_map, context
 from codegen.context import native_function_manager
 import codegen.dest as dest
+import codegen.dest.utils as utils
 import codegen.api.dispatcher as dispatcher
 from codegen.api.signature import DispatcherSignature
 
@@ -265,14 +266,13 @@ but expected {expected_overload_count} kernel(s). The expected function schemas
         print(f"Unsupported Ops List:\n{unsupported_ops_list}")
 
 def error_on_cpu_kernels(
+        cur_backend_key: DispatchKey,
         native_functions: Sequence[NativeFunction],
         backend_indices: Dict[DispatchKey, BackendIndex],
-        backend_key: DispatchKey,
-        autograd_key: DispatchKey,
 ) -> None:
 
     expected_backend_op_names: List[OperatorName] = \
-        list(backend_indices[backend_key].index.keys()) + list(backend_indices[autograd_key].index.keys())
+        list(backend_indices[cur_backend_key].index.keys())
     expected_backend_native_funcs: List[NativeFunction] = \
         [f for f in native_functions if f.func.name in expected_backend_op_names]
     expected_backend_kernel_name_counts: Dict[str, List[NativeFunction]] = defaultdict(list)
@@ -285,6 +285,8 @@ def error_on_cpu_kernels(
 
 def main() -> None:
     parser = argparse.ArgumentParser(description='Generate backend stub files')
+    parser.add_argument(
+        '--to_cpu', type=str, default="TRUE", help='move op which npu does not support to cpu')
     parser.add_argument(
         '-s',
         '--source_yaml',
@@ -297,9 +299,9 @@ def main() -> None:
         '--impl_path', type=str, default=None, help='path to the source C++ file containing kernel definitions')
     options = parser.parse_args()
 
-    run(options.source_yaml, options.output_dir, options.dry_run, options.impl_path)
+    run(options.to_cpu, options.source_yaml, options.output_dir, options.dry_run, options.impl_path)
 
-def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str]) -> None:
+def run(to_cpu: str, source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str]) -> None:
 
     template_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "templates")
 
@@ -314,6 +316,7 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st
     grouped_native_functions = get_grouped_native_functions(native_functions)
     parsed_backend_yaml = parse_backend_yaml(source_yaml, grouped_native_functions, backend_indices)
     true_backend = parsed_backend_yaml.true_backend
+    utils.backend = true_backend
     backend_key = parsed_backend_yaml.backend_key
     autograd_key = parsed_backend_yaml.autograd_key
     cpp_namespace = parsed_backend_yaml.cpp_namespace
@@ -348,48 +351,6 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st
             ))),
         })
 
-        error_on_cpu_kernels(native_functions, backend_indices, backend_key, autograd_key)
-
-        dispatch_key = 'XLA'
-        native_func_header = f'#include "torch_npu/csrc/aten/NPUNativeFunctions.h"\n'
-        fm.write_with_template(f'RegisterCPU.cpp', 'RegisterDispatchKey.cpp', lambda: {
-            'external_backend_headers': native_func_header,
-            'namespaced_headers': '',
-            'DispatchKey': dispatch_key,
-            'dispatch_namespace': dispatch_key.lower(),
-            'dispatch_helpers': dest.gen_registration_helpers(backend_indices[DispatchKey.CPU]),
-            'dispatch_namespaced_definitions': list(concat_map(
-                dest.RegisterDispatchKeyCPU(
-                    backend_indices[DispatchKey.CPU],
-                    Target.NAMESPACED_DEFINITION,
-                    selector,
-                    rocm=False,
-                    cpp_namespace=cpp_namespace,
-                    class_method_name=f'NPUNativeFunctions'),
-                grouped_native_functions
-            )),
-            'dispatch_anonymous_definitions': list(concat_map(
-                dest.RegisterDispatchKeyCPU(
-                    backend_indices[DispatchKey.CPU],
-                    Target.ANONYMOUS_DEFINITION,
-                    selector,
-                    rocm=False,
-                    cpp_namespace=cpp_namespace,
-                    class_method_name=f'NPUNativeFunctions'),
-                    grouped_native_functions
-            )),
-            'dispatch_registrations': list(concat_map(
-                dest.RegisterDispatchKeyCPU(
-                    backend_indices[DispatchKey.CPU],
-                    Target.REGISTRATION,
-                    selector,
-                    rocm=False,
-                    cpp_namespace=cpp_namespace,
-                    class_method_name=f'NPUNativeFunctions'),
-                grouped_native_functions
-            )),
-        })
-
         for dispatch_key in [backend_dispatch_key, autograd_dispatch_key]:
             if not dispatch_key:
                 continue
@@ -433,5 +394,52 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st
                 )),
             })
 
+        if to_cpu.upper() in ['OFF', '0', 'NO', 'FALSE', 'F', 'N']:
+            return
+
+        backend_list = [backend_key, autograd_key, DispatchKey.Math, DispatchKey.CompositeExplicitAutograd]
+        for key in backend_list:
+            error_on_cpu_kernels(key, native_functions, backend_indices)
+
+        dispatch_key = true_backend
+        native_func_header = f'#include "torch_npu/csrc/aten/NPUNativeFunctions.h"\n'
+        fm.write_with_template(f'RegisterCPU.cpp', 'RegisterDispatchKey.cpp', lambda: {
+            'external_backend_headers': native_func_header,
+            'namespaced_headers': '',
+            'DispatchKey': dispatch_key,
+            'dispatch_namespace': dispatch_key.lower(),
+            'dispatch_helpers': dest.gen_registration_helpers(backend_indices[DispatchKey.CPU]),
+            'dispatch_namespaced_definitions': list(concat_map(
+                dest.RegisterDispatchKeyCPU(
+                    backend_indices[DispatchKey.CPU],
+                    Target.NAMESPACED_DEFINITION,
+                    selector,
+                    rocm=False,
+                    cpp_namespace=cpp_namespace,
+                    class_method_name=f'NPUNativeFunctions'),
+                grouped_native_functions
+            )),
+            'dispatch_anonymous_definitions': list(concat_map(
+                dest.RegisterDispatchKeyCPU(
+                    backend_indices[DispatchKey.CPU],
+                    Target.ANONYMOUS_DEFINITION,
+                    selector,
+                    rocm=False,
+                    cpp_namespace=cpp_namespace,
+                    class_method_name=f'NPUNativeFunctions'),
+                grouped_native_functions
+            )),
+            'dispatch_registrations': list(concat_map(
+                dest.RegisterDispatchKeyCPU(
+                    backend_indices[DispatchKey.CPU],
+                    Target.REGISTRATION,
+                    selector,
+                    rocm=False,
+                    cpp_namespace=cpp_namespace,
+                    class_method_name=f'NPUNativeFunctions'),
+                grouped_native_functions
+            )),
+        })
+
 if __name__ == '__main__':
     main()
diff --git a/scripts/generate_code.sh b/scripts/generate_code.sh
index 9cd97012b2c290d974866f787c6a628f79efd0f8..82413604e9e916622805ad5c674f38d2b9b260ad 100644
--- a/scripts/generate_code.sh
+++ b/scripts/generate_code.sh
@@ -17,6 +17,7 @@
 
 CDIR="$(cd "$(dirname "$0")" ; pwd -P)"
 NDIR="$CDIR/.."
+TOCPU=$1
 
 cd $NDIR/scripts
 
@@ -26,6 +27,7 @@ cp -f codegen/native_functions.yaml codegen/native_functions.yaml_bk
 sed -i '/ _foreach/a\  device_check: NoCheck' codegen/native_functions.yaml   # Only for pytorch 1.8.1
 
 python3 -m codegen.gen_backend_stubs  \
+  --to_cpu=${TOCPU}                   \
   --output_dir="$NDIR/torch_npu/csrc/aten/" \
   --source_yaml="$NDIR/torch_npu/csrc/aten/npu_native_functions.yaml" \
   --impl_path="$NDIR/torch_npu/csrc/aten"  # Used to double-check the yaml file definitions.
diff --git a/setup.py b/setup.py
index 5f2d24d121ad8147ad47314c7e4d7dc0f60be5bb..30c36a3509b0bfdbf2d9b7bca3b7cc04a0342fe9 100644
--- a/setup.py
+++ b/setup.py
@@ -100,8 +100,8 @@ def get_package_dir():
     return package_dir
 
 
-def generate_bindings_code(base_dir):
-    generate_code_cmd = ["sh", os.path.join(base_dir, 'scripts', 'generate_code.sh')]
+def generate_bindings_code(base_dir, verbose):
+    generate_code_cmd = ["sh", os.path.join(base_dir, 'scripts', 'generate_code.sh'), verbose]
     if subprocess.call(generate_code_cmd) != 0:
         print(
             'Failed to generate ATEN bindings: {}'.format(generate_code_cmd),
@@ -267,11 +267,11 @@ class PythonPackageBuild(build_py, object):
             self.copy_file(src, dst)
         super(PythonPackageBuild, self).finalize_options()
 
-
+to_cpu = os.getenv('NPU_TOCPU', default='TRUE')
 build_mode = _get_build_mode()
 if build_mode not in ['clean']:
     # Generate bindings code, including RegisterNPU.cpp & NPUNativeFunctions.h.
-    generate_bindings_code(BASE_DIR)
+    generate_bindings_code(BASE_DIR, to_cpu)
     build_stub(BASE_DIR)
 
 # Setup include directories folders.