diff --git a/codegen/autograd/aclnn_derivatives.yaml b/codegen/autograd/aclnn_derivatives.yaml deleted file mode 100644 index 2a2240682b98ec9c594e20b75a402e3a09dc0a37..0000000000000000000000000000000000000000 --- a/codegen/autograd/aclnn_derivatives.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Defines derivative formulas and Python signatures of methods on Variable -# -#If you need any guidance, please refer to the comments in derivatives.yaml in PyTorch. - -- name: matmul(Tensor self, Tensor other) -> Tensor - self, other: matmul_backward(grad, self, other, grad_input_mask) - diff --git a/codegen/autograd/gen_autograd.py b/codegen/autograd/gen_autograd.py index ebcc24e0ea1bbaf582ebef0d1d29c07692f5718d..3a95edcb3a6ceadaa4897de37f0654a048d7ecc1 100644 --- a/codegen/autograd/gen_autograd.py +++ b/codegen/autograd/gen_autograd.py @@ -41,7 +41,6 @@ torch_npu/csrc/aten/ import argparse import os from typing import List, Sequence - from codegen.api.autograd import ( match_differentiability_info, NativeFunctionWithDifferentiabilityInfo, DifferentiabilityInfo @@ -57,7 +56,6 @@ from .gen_inplace_or_view_type import gen_inplace_or_view_type from .gen_variable_factories import gen_variable_factories from .load_derivatives import load_derivatives - def gen_autograd( native_functions_path: str, out: str, @@ -66,24 +64,11 @@ def gen_autograd( ) -> None: differentiability_infos = load_derivatives( os.path.join(autograd_dir, 'derivatives.yaml'), native_functions_path, npu_native_functions_path) - - aclnn_differentiability_infos = load_derivatives( - os.path.join(autograd_dir, 'aclnn_derivatives.yaml'), native_functions_path, npu_native_functions_path) - template_path = os.path.join(autograd_dir, 'templates') - native_funcs = parse_native_and_custom_yaml(native_functions_path, npu_native_functions_path).native_functions funcs = filte_out_native_autograd_function(native_funcs, differentiability_infos) - aclnn_funcs = filte_out_aclnn_function(native_funcs, aclnn_differentiability_infos) - funcs_with_diff_infos: List[NativeFunctionWithDifferentiabilityInfo] = [] - aclnn_funcs_with_diff_infos: List[NativeFunctionWithDifferentiabilityInfo] = [] - funcs_with_diff_infos = match_differentiability_info(funcs, differentiability_infos) - aclnn_funcs_with_diff_infos = match_differentiability_info(aclnn_funcs, aclnn_differentiability_infos) - differentiability_infos = differentiability_infos + aclnn_differentiability_infos - funcs_with_diff_infos.extend(aclnn_funcs_with_diff_infos) - torch_funcs_with_diff_infos: List[NativeFunctionWithDifferentiabilityInfo] = [] npu_funcs_with_diff_infos: List[NativeFunctionWithDifferentiabilityInfo] = [] for func in funcs_with_diff_infos: @@ -109,41 +94,21 @@ def gen_autograd( # Generate variable_factories.h gen_variable_factories(out, native_functions_path, npu_native_functions_path, template_path) - def filte_out_native_autograd_function( native_funcs: List[NativeFunction], differentiability_infos: Sequence[DifferentiabilityInfo], ): result: List[NativeFunction] = [] derivatives_name_list: List[str] = [] - for info in differentiability_infos: derivatives_name_list.append(str(info.func.func.name)) for funcs in native_funcs: func_name = str(funcs.func.name) func_base_name = str(funcs.func.name.name.base) if (func_name in derivatives_name_list) or (func_base_name in derivatives_name_list): - result.append(funcs) + result.append(funcs) return result -def filte_out_aclnn_function( - native_funcs: List[NativeFunction], - aclnn_differentiability_infos: Sequence[DifferentiabilityInfo], -): - result: List[NativeFunction] = [] - derivatives_name_list: List[str] = [] - - for info in aclnn_differentiability_infos: - derivatives_name_list.append(str(info.func.func.name)) - for funcs in native_funcs: - func_name = str(funcs.func.name) - func_base_name = str(funcs.func.name.name.base) - if (func_name in derivatives_name_list) or (func_base_name in derivatives_name_list): - if funcs.op_api is True: - result.append(funcs) - return result - - def main() -> None: parser = argparse.ArgumentParser( description='Generate autograd C++ files') diff --git a/torch_npu/csrc/aten/npu_native_functions.yaml b/torch_npu/csrc/aten/npu_native_functions.yaml index 4361f40953de7511633558042ac0ebedd18d98b7..524c2d9e5cca9e2fe9e9acc508d493cf08b84341 100644 --- a/torch_npu/csrc/aten/npu_native_functions.yaml +++ b/torch_npu/csrc/aten/npu_native_functions.yaml @@ -750,10 +750,6 @@ supported: op_api: True - func: masked_fill_.Tensor op_api: True - - func: matmul - op_api: False - - func: matmul.out - op_api: False - func: masked_scatter_ op_api: False - func: masked_select @@ -1359,6 +1355,10 @@ supported: - zeros_like autograd: + - func: matmul + op_api: True + - func: matmul.out + op_api: True - celu - celu_ - func: elu.out diff --git a/torch_npu/csrc/aten/ops/op_api/MatmulKernelNpuOpApi.cpp b/torch_npu/csrc/aten/ops/op_api/MatmulKernelNpuOpApi.cpp index c1263dee14a3eb35a9a51878a223833cd688406f..3d7fd0adb8b0d1c2c634b3b435234395b51e8fbb 100644 --- a/torch_npu/csrc/aten/ops/op_api/MatmulKernelNpuOpApi.cpp +++ b/torch_npu/csrc/aten/ops/op_api/MatmulKernelNpuOpApi.cpp @@ -14,13 +14,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "torch_npu/csrc/aten/NPUNativeOpApiFunctions.h" -#include "torch_npu/csrc/aten/common/InnerNpuNativeFunction.h" #include "torch_npu/csrc/aten/ops/op_api/op_api_common.h" -#include "torch_npu/csrc/core/npu/register/OptionsManager.h" -#include "torch_npu/csrc/framework/interface/EnvVariables.h" #include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include namespace at_npu { namespace native { @@ -28,14 +26,19 @@ namespace native { const int8_t ALLOW_FP32_DOWN_PRECISION = 1; const int8_t KEEP_DTYPE = 0; +using torch::autograd::AutogradContext; +using torch::autograd::Function; +using tensor_list = std::vector; + static c10::SmallVector get_npu_output_size(const at::Tensor &tensor1, const at::Tensor &tensor2) { c10::SmallVector output_size; auto dim_tensor1 = tensor1.dim(); auto dim_tensor2 = tensor2.dim(); - TORCH_CHECK(dim_tensor1 > 0 && dim_tensor2 > 0, "matmul got error dimentions: ", "(", dim_tensor1, - ", ", dim_tensor2, ")"); + TORCH_CHECK(dim_tensor1 > 0 && dim_tensor2 > 0, + "matmul got error dimentions: ", "(", dim_tensor1, ", ", + dim_tensor2, ")"); if (dim_tensor1 == 1 && dim_tensor2 == 1) { output_size = {}; @@ -84,15 +87,16 @@ static c10::SmallVector get_npu_output_size(const at::Tensor &ten output_size = output_expand_size; } else { - TORCH_CHECK(false, "matmul got error sizes: ", "(", dim_tensor1, ", ", dim_tensor2, ")"); + TORCH_CHECK(false, "matmul got error sizes: ", "(", dim_tensor1, ", ", + dim_tensor2, ")"); } return output_size; } -static at::Tensor matmul_opt_npu(c10::optional out_opt, - const at::Tensor &tensor1, - const at::Tensor &tensor2) { +static at::Tensor matmul_forward_npu(c10::optional out_opt, + const at::Tensor &tensor1, + const at::Tensor &tensor2) { at::NoNamesGuard guard; at::Tensor out; @@ -101,7 +105,8 @@ static at::Tensor matmul_opt_npu(c10::optional out_opt, out = out_opt.value(); OpPreparation::CheckOut({tensor1, tensor2}, out, tensor1, output_size); } else { - out = OpPreparation::ApplyTensorWithoutFormat(output_size, tensor1.options()); + out = + OpPreparation::ApplyTensorWithoutFormat(output_size, tensor1.options()); } // allow dicrease precision @@ -110,11 +115,77 @@ static at::Tensor matmul_opt_npu(c10::optional out_opt, return out; } +std::tuple matmul_backward_npu(const at::Tensor &grad, + const at::Tensor &self, + const at::Tensor &other) { + if (!grad.defined()) { + return std::make_tuple(at::Tensor(), at::Tensor()); + } + + auto mat1 = self; + auto mat2 = other; + auto dim_tensor1 = self.dim(); + auto dim_tensor2 = other.dim(); + std::vector new_shape(grad.sizes().begin(), grad.sizes().end()); + // unsqueese mat1 & mat2 + if (dim_tensor1 == 1 && dim_tensor2 == 1) { + mat1 = self.view({1, self.size(0)}); + mat2 = other.view({other.size(0), 1}); + new_shape = {1, 1}; + } + // unsqueese mat1 + if (dim_tensor1 == 1 && dim_tensor2 > 1) { + mat1 = self.view({1, self.size(0)}); + new_shape.insert(new_shape.begin() + new_shape.size() - 1, 1); + } + // unsqueese mat2 + if (dim_tensor1 > 1 && dim_tensor2 == 1) { + mat2 = other.view({other.size(0), 1}); + new_shape.insert(new_shape.end(), 1); + } + + // backward input 0 + auto grad_self = matmul_forward_npu(c10::nullopt, grad.view(new_shape), + mat2.transpose(-2, -1)); + // backward input 1 + auto grad_other = matmul_forward_npu(c10::nullopt, mat1.transpose(-2, -1), + grad.view(new_shape)); + if (grad_other.size(-1) == 1 && other.dim() == 1) { + grad_other = grad_other.squeeze(-1); + } + + return std::make_tuple(grad_self, grad_other); +} + +class NPUMatmulOpApiFunction + : public torch::autograd::Function { +public: + static at::Tensor forward(AutogradContext *ctx, const at::Tensor &self, + const at::Tensor &other) { + at::AutoNonVariableTypeMode g; + ctx->save_for_backward({self, other}); + auto result = matmul_forward_npu(c10::nullopt, self, other); + return result; + } + + static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) { + auto saved = ctx->get_saved_variables(); + auto self = saved[0]; + auto other = saved[1]; + + auto grads = matmul_backward_npu(grad_outputs[0], self, other); + tensor_list output = {std::get<0>(grads), std::get<1>(grads)}; + + return output; + } +}; + at::Tensor NPUNativeOpApiFunctions::matmul(const at::Tensor &tensor1, const at::Tensor &tensor2) { DO_COMPATIBILITY(aclnnMatmul, NPUNativeFunctions::matmul(tensor1, tensor2)); - auto maybe_outnames = at::namedinference::compute_matmul_outnames(tensor1, tensor2); - auto result = matmul_opt_npu(c10::nullopt, tensor1, tensor2); + auto maybe_outnames = + at::namedinference::compute_matmul_outnames(tensor1, tensor2); + auto result = NPUMatmulOpApiFunction::apply(tensor1, tensor2); at::namedinference::propagate_names_if_nonempty(result, maybe_outnames); return result; } @@ -122,9 +193,12 @@ at::Tensor NPUNativeOpApiFunctions::matmul(const at::Tensor &tensor1, at::Tensor &NPUNativeOpApiFunctions::matmul_out(const at::Tensor &tensor1, const at::Tensor &tensor2, at::Tensor &result) { - DO_COMPATIBILITY(aclnnMatmul, NPUNativeFunctions::matmul_out(tensor1, tensor2, result)); - auto maybe_outnames = at::namedinference::compute_matmul_outnames(tensor1, tensor2); - matmul_opt_npu(c10::optional(result), tensor1, tensor2); + DO_COMPATIBILITY(aclnnMatmul, + NPUNativeFunctions::matmul_out(tensor1, tensor2, result)); + auto maybe_outnames = + at::namedinference::compute_matmul_outnames(tensor1, tensor2); + // matmul_out don't support + matmul_forward_npu(c10::optional(result), tensor1, tensor2); at::namedinference::propagate_names_if_nonempty(result, maybe_outnames); return result; } diff --git a/torch_npu/csrc/framework/autograd/FunctionsManual.cpp b/torch_npu/csrc/framework/autograd/FunctionsManual.cpp index 710b4c88cb667c1a61ead4e94ad7386a228bdb00..579056b2487d4b45e1707988f67ff682872aaa2d 100644 --- a/torch_npu/csrc/framework/autograd/FunctionsManual.cpp +++ b/torch_npu/csrc/framework/autograd/FunctionsManual.cpp @@ -44,7 +44,6 @@ #include #include "torch_npu/csrc/aten/NPUNativeFunctions.h" -#include "torch_npu/csrc/aten/NPUNativeOpApiFunctions.h" #include "FunctionsManual.h" // Helper functions for autogenerated code @@ -128,57 +127,6 @@ Tensor fast_gelu_backward(const Tensor& grad, const Tensor& self) { return at_npu::native::NPUNativeFunctions::npu_fast_gelu_backward(grad, self); } -std::tuple matmul_backward(const Tensor &grad, - const Tensor &self, const Tensor &other, - std::array grad_input_mask) { - if (!grad.defined()) { - return std::make_tuple(at::Tensor(), at::Tensor()); - } - - at::Tensor grad_self, grad_other; - if (!grad_input_mask[0] && !grad_input_mask[1]) { - return std::make_tuple(grad_self, grad_other); - } - - auto mat1 = self; - auto mat2 = other; - auto dim_tensor1 = self.dim(); - auto dim_tensor2 = other.dim(); - std::vector new_shape(grad.sizes().begin(), grad.sizes().end()); - // unsqueese mat1 & mat2 - if (dim_tensor1 == 1 && dim_tensor2 == 1) { - mat1 = self.view({1, self.size(0)}); - mat2 = other.view({other.size(0), 1}); - new_shape = {1, 1}; - } - // unsqueese mat1 - if (dim_tensor1 == 1 && dim_tensor2 > 1) { - mat1 = self.view({1, self.size(0)}); - new_shape.insert(new_shape.begin() + new_shape.size() - 1, 1); - } - // unsqueese mat2 - if (dim_tensor1 > 1 && dim_tensor2 == 1) { - mat2 = other.view({other.size(0), 1}); - new_shape.insert(new_shape.end(), 1); - } - - // backward input 0 - if (grad_input_mask[0]) { - grad_self = at_npu::native::NPUNativeOpApiFunctions::matmul( - grad.view(new_shape), mat2.transpose(-2, -1)); - } - // backward input 1 - if (grad_input_mask[1]) { - grad_other = at_npu::native::NPUNativeOpApiFunctions::matmul( - mat1.transpose(-2, -1), grad.view(new_shape)); - if (grad_other.size(-1) == 1 && other.dim() == 1) { - grad_other = grad_other.squeeze(-1); - } - } - - return std::make_tuple(grad_self, grad_other); -} - } // namespace details } // namespace generated } // namespace autograd diff --git a/torch_npu/csrc/framework/autograd/FunctionsManual.h b/torch_npu/csrc/framework/autograd/FunctionsManual.h index ff3b88f1216553240876194d1ebe2d25fd9125fc..53aa52b22bac8bd964881222143e27cee18d9de9 100644 --- a/torch_npu/csrc/framework/autograd/FunctionsManual.h +++ b/torch_npu/csrc/framework/autograd/FunctionsManual.h @@ -59,11 +59,6 @@ std::vector not_implemented_list(const char* name, const char* reason="" Tensor fast_gelu_backward(const Tensor& grad, const Tensor& self); -std::tuple matmul_backward(const Tensor &grad, - const Tensor &self, - const Tensor &other, - std::array grad_input_mask); - } // namespace details } // namespace generated } // namespace autograd