diff --git a/ci/build.sh b/ci/build.sh index d4ce6ce04a11151b04cf5c8b6541d8cdc155d86b..88c3dacb4656ab3d6d53e78980133cdc0c9c2e94 100644 --- a/ci/build.sh +++ b/ci/build.sh @@ -54,6 +54,11 @@ function parse_script_args() { args_num=$((args_num-1)) shift ;; + --tocpu=*) + export 'NPU_TOCPU'=${1:8} + args_num=$((args_num-1)) + shift + ;; -*) echo "ERROR Unsupported parameters: ${1}" return 1 diff --git a/scripts/codegen/dest/utils.py b/scripts/codegen/dest/utils.py index 1289de681c169111206248841cbe1cb0b4fc5be4..c98abb49daadbfca8ea02551fcda70b967945141 100644 --- a/scripts/codegen/dest/utils.py +++ b/scripts/codegen/dest/utils.py @@ -21,9 +21,11 @@ from codegen.api.signature import DispatcherSignature, NativeSignature from codegen.model import SchemaKind, NativeFunction from codegen.api.native import native_arguments +backend = None + def transfer_args_of_wrapper_func_to_cpu(sig: DispatcherSignature, func: NativeFunction) -> Tuple[str, List[str]]: convert: str = f"// Convert args to cpu in order to use at::native kernel \n " \ - f"TORCH_WARN_ONCE(\"Cur kernel: {sig.func.name} is running on cpu.\"); \n " + f"std::cout << \"Cur kernel: {sig.func.name} is running on cpu.\" << std::endl; \n " args_names: List[str] = [] args = native_arguments(sig.func, func.use_c10_dispatcher) for arg in args: @@ -53,7 +55,6 @@ def transfer_args_of_wrapper_func_to_cpu(sig: DispatcherSignature, func: NativeF def transfer_ret_of_wrapper_func_to_xla(sig: DispatcherSignature, func_call: str) -> str: ret_code = '' - backend = "XLA" if sig.func.kind() == SchemaKind.functional: if sig.returns_type().cpp_type() == 'at::Tensor': ret_code = f"return {func_call}.toBackend(Backend::{backend});" diff --git a/scripts/codegen/gen_backend_stubs.py b/scripts/codegen/gen_backend_stubs.py index 9f6c6bcccf80325e5aaa008982c0fcec19b2224c..e75d95c5e72168d90827f865cf916b64abae4302 100644 --- a/scripts/codegen/gen_backend_stubs.py +++ b/scripts/codegen/gen_backend_stubs.py @@ -29,6 +29,7 @@ from codegen.selective_build.selector import SelectiveBuilder from codegen.utils import Target, concat_map, context from codegen.context import native_function_manager import codegen.dest as dest +import codegen.dest.utils as utils import codegen.api.dispatcher as dispatcher from codegen.api.signature import DispatcherSignature @@ -265,14 +266,13 @@ but expected {expected_overload_count} kernel(s). The expected function schemas print(f"Unsupported Ops List:\n{unsupported_ops_list}") def error_on_cpu_kernels( + cur_backend_key: DispatchKey, native_functions: Sequence[NativeFunction], backend_indices: Dict[DispatchKey, BackendIndex], - backend_key: DispatchKey, - autograd_key: DispatchKey, ) -> None: expected_backend_op_names: List[OperatorName] = \ - list(backend_indices[backend_key].index.keys()) + list(backend_indices[autograd_key].index.keys()) + list(backend_indices[cur_backend_key].index.keys()) expected_backend_native_funcs: List[NativeFunction] = \ [f for f in native_functions if f.func.name in expected_backend_op_names] expected_backend_kernel_name_counts: Dict[str, List[NativeFunction]] = defaultdict(list) @@ -285,6 +285,8 @@ def error_on_cpu_kernels( def main() -> None: parser = argparse.ArgumentParser(description='Generate backend stub files') + parser.add_argument( + '--to_cpu', type=str, default="TRUE", help='move op which npu does not support to cpu') parser.add_argument( '-s', '--source_yaml', @@ -297,9 +299,9 @@ def main() -> None: '--impl_path', type=str, default=None, help='path to the source C++ file containing kernel definitions') options = parser.parse_args() - run(options.source_yaml, options.output_dir, options.dry_run, options.impl_path) + run(options.to_cpu, options.source_yaml, options.output_dir, options.dry_run, options.impl_path) -def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str]) -> None: +def run(to_cpu: str, source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str]) -> None: template_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "templates") @@ -314,6 +316,7 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st grouped_native_functions = get_grouped_native_functions(native_functions) parsed_backend_yaml = parse_backend_yaml(source_yaml, grouped_native_functions, backend_indices) true_backend = parsed_backend_yaml.true_backend + utils.backend = true_backend backend_key = parsed_backend_yaml.backend_key autograd_key = parsed_backend_yaml.autograd_key cpp_namespace = parsed_backend_yaml.cpp_namespace @@ -348,48 +351,6 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st ))), }) - error_on_cpu_kernels(native_functions, backend_indices, backend_key, autograd_key) - - dispatch_key = 'XLA' - native_func_header = f'#include "torch_npu/csrc/aten/NPUNativeFunctions.h"\n' - fm.write_with_template(f'RegisterCPU.cpp', 'RegisterDispatchKey.cpp', lambda: { - 'external_backend_headers': native_func_header, - 'namespaced_headers': '', - 'DispatchKey': dispatch_key, - 'dispatch_namespace': dispatch_key.lower(), - 'dispatch_helpers': dest.gen_registration_helpers(backend_indices[DispatchKey.CPU]), - 'dispatch_namespaced_definitions': list(concat_map( - dest.RegisterDispatchKeyCPU( - backend_indices[DispatchKey.CPU], - Target.NAMESPACED_DEFINITION, - selector, - rocm=False, - cpp_namespace=cpp_namespace, - class_method_name=f'NPUNativeFunctions'), - grouped_native_functions - )), - 'dispatch_anonymous_definitions': list(concat_map( - dest.RegisterDispatchKeyCPU( - backend_indices[DispatchKey.CPU], - Target.ANONYMOUS_DEFINITION, - selector, - rocm=False, - cpp_namespace=cpp_namespace, - class_method_name=f'NPUNativeFunctions'), - grouped_native_functions - )), - 'dispatch_registrations': list(concat_map( - dest.RegisterDispatchKeyCPU( - backend_indices[DispatchKey.CPU], - Target.REGISTRATION, - selector, - rocm=False, - cpp_namespace=cpp_namespace, - class_method_name=f'NPUNativeFunctions'), - grouped_native_functions - )), - }) - for dispatch_key in [backend_dispatch_key, autograd_dispatch_key]: if not dispatch_key: continue @@ -433,5 +394,52 @@ def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[st )), }) + if to_cpu.upper() in ['OFF', '0', 'NO', 'FALSE', 'F', 'N']: + return + + backend_list = [backend_key, autograd_key, DispatchKey.Math, DispatchKey.CompositeExplicitAutograd] + for key in backend_list: + error_on_cpu_kernels(key, native_functions, backend_indices) + + dispatch_key = true_backend + native_func_header = f'#include "torch_npu/csrc/aten/NPUNativeFunctions.h"\n' + fm.write_with_template(f'RegisterCPU.cpp', 'RegisterDispatchKey.cpp', lambda: { + 'external_backend_headers': native_func_header, + 'namespaced_headers': '', + 'DispatchKey': dispatch_key, + 'dispatch_namespace': dispatch_key.lower(), + 'dispatch_helpers': dest.gen_registration_helpers(backend_indices[DispatchKey.CPU]), + 'dispatch_namespaced_definitions': list(concat_map( + dest.RegisterDispatchKeyCPU( + backend_indices[DispatchKey.CPU], + Target.NAMESPACED_DEFINITION, + selector, + rocm=False, + cpp_namespace=cpp_namespace, + class_method_name=f'NPUNativeFunctions'), + grouped_native_functions + )), + 'dispatch_anonymous_definitions': list(concat_map( + dest.RegisterDispatchKeyCPU( + backend_indices[DispatchKey.CPU], + Target.ANONYMOUS_DEFINITION, + selector, + rocm=False, + cpp_namespace=cpp_namespace, + class_method_name=f'NPUNativeFunctions'), + grouped_native_functions + )), + 'dispatch_registrations': list(concat_map( + dest.RegisterDispatchKeyCPU( + backend_indices[DispatchKey.CPU], + Target.REGISTRATION, + selector, + rocm=False, + cpp_namespace=cpp_namespace, + class_method_name=f'NPUNativeFunctions'), + grouped_native_functions + )), + }) + if __name__ == '__main__': main() diff --git a/scripts/generate_code.sh b/scripts/generate_code.sh index 9cd97012b2c290d974866f787c6a628f79efd0f8..82413604e9e916622805ad5c674f38d2b9b260ad 100644 --- a/scripts/generate_code.sh +++ b/scripts/generate_code.sh @@ -17,6 +17,7 @@ CDIR="$(cd "$(dirname "$0")" ; pwd -P)" NDIR="$CDIR/.." +TOCPU=$1 cd $NDIR/scripts @@ -26,6 +27,7 @@ cp -f codegen/native_functions.yaml codegen/native_functions.yaml_bk sed -i '/ _foreach/a\ device_check: NoCheck' codegen/native_functions.yaml # Only for pytorch 1.8.1 python3 -m codegen.gen_backend_stubs \ + --to_cpu=${TOCPU} \ --output_dir="$NDIR/torch_npu/csrc/aten/" \ --source_yaml="$NDIR/torch_npu/csrc/aten/npu_native_functions.yaml" \ --impl_path="$NDIR/torch_npu/csrc/aten" # Used to double-check the yaml file definitions. diff --git a/setup.py b/setup.py index 5f2d24d121ad8147ad47314c7e4d7dc0f60be5bb..30c36a3509b0bfdbf2d9b7bca3b7cc04a0342fe9 100644 --- a/setup.py +++ b/setup.py @@ -100,8 +100,8 @@ def get_package_dir(): return package_dir -def generate_bindings_code(base_dir): - generate_code_cmd = ["sh", os.path.join(base_dir, 'scripts', 'generate_code.sh')] +def generate_bindings_code(base_dir, verbose): + generate_code_cmd = ["sh", os.path.join(base_dir, 'scripts', 'generate_code.sh'), verbose] if subprocess.call(generate_code_cmd) != 0: print( 'Failed to generate ATEN bindings: {}'.format(generate_code_cmd), @@ -267,11 +267,11 @@ class PythonPackageBuild(build_py, object): self.copy_file(src, dst) super(PythonPackageBuild, self).finalize_options() - +to_cpu = os.getenv('NPU_TOCPU', default='TRUE') build_mode = _get_build_mode() if build_mode not in ['clean']: # Generate bindings code, including RegisterNPU.cpp & NPUNativeFunctions.h. - generate_bindings_code(BASE_DIR) + generate_bindings_code(BASE_DIR, to_cpu) build_stub(BASE_DIR) # Setup include directories folders.