diff --git a/mindscience/sciops/__init__.py b/mindscience/sciops/__init__.py index 69a14b29e1ced3fa627e5dada3f5f6ba239fdc1c..e9b5a68f1b1793d07b2045533504ff075d8d7d3f 100644 --- a/mindscience/sciops/__init__.py +++ b/mindscience/sciops/__init__.py @@ -16,4 +16,37 @@ init """ -__all__ = [] \ No newline at end of file +__all__ = ["Einsum", "evo_attention"] + +import os +from mindspore.ops import CustomOpBuilder +from .python import * + + +class CppOpBuilder: + """ + This class is used to load the self-defined operation. + """ + + cpp_ops = {} + sciops_path = os.path.dirname(__file__) + + @classmethod + def load(cls, ops_name, **kwargs): + """ + Loading the ops based on ops_name. + """ + + if ops_name not in cls.cpp_ops: + os.environ["ASCEND_CUSTOM_OPP_PATH"] = f"{cls.sciops_path}/binary/{ops_name}:" + \ + os.environ["ASCEND_CUSTOM_OPP_PATH"] + build_dir = f"{cls.sciops_path}/build/{ops_name}" + ccsrc_file = f"{cls.sciops_path}/ccsrc/{ops_name}.cpp" + op_builder = CustomOpBuilder(ops_name, ccsrc_file, "Ascend", build_dir=build_dir, **kwargs) + cls.cpp_ops[ops_name] = op_builder.load() + return cls.cpp_ops[ops_name] + +def evo_attention(query, key, value, head_num, bias, attn_mask, scale_value, input_layout): + return CppOpBuilder.load("evoformer_attention").npu_evoformer_attention(query, key, value, bias, None, None, + attn_mask, None, scale_value, None, None, + None, head_num, input_layout, None, None) diff --git a/mindscience/sciops/binary/evoformer_attention/op_api/include/aclnn_evoformer_attention.h b/mindscience/sciops/binary/evoformer_attention/op_api/include/aclnn_evoformer_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..983f4edee907b0dbf18332665c8f78f3f2d27936 --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_api/include/aclnn_evoformer_attention.h @@ -0,0 +1,150 @@ +/** + * Copyright (c) 2023-2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef OP_API_INC_LEVEL2_ACLNN_FLASH_ATTENTION_SCORE_H_ +#define OP_API_INC_LEVEL2_ACLNN_FLASH_ATTENTION_SCORE_H_ + +#include "aclnn/aclnn_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief The first interface of aclnnEvoformerAttention, + * calculates the size of the workspace based on the specific calculation process. + * @domain aclnn_ops_infer + */ +aclnnStatus aclnnEvoformerAttentionGetWorkspaceSize( + const aclTensor *query, const aclTensor *key, const aclTensor *value, const aclTensor *realShiftOptional, + const aclTensor *dropMaskOptional, const aclTensor *paddingMaskOptional, const aclTensor *attenMaskOptional, + const aclIntArray *prefixOptional, double scaleValueOptional, double keepProbOptional, int64_t preTokensOptional, + int64_t nextTokensOptional, int64_t headNum, char *inputLayout, int64_t innerPreciseOptional, + int64_t sparseModeOptional, const aclTensor *softmaxMaxOut, const aclTensor *softmaxSumOut, + const aclTensor *softmaxOutOut, const aclTensor *attentionOutOut, uint64_t *workspaceSize, + aclOpExecutor **executor); + +/** + * @brief The second interface of aclnnEvoformerAttention, used to perform calculation. + */ +aclnnStatus aclnnEvoformerAttention(void *workspace, uint64_t workspaceSize, aclOpExecutor *executor, + const aclrtStream stream); + +/** + * @brief The first interface of aclnnFlashAttentionVarLenScore, + * calculates the size of the workspace based on the specific calculation process. + * @domain aclnn_ops_infer + */ +aclnnStatus aclnnFlashAttentionVarLenScoreGetWorkspaceSize( + const aclTensor *query, const aclTensor *key, const aclTensor *value, const aclTensor *realShiftOptional, + const aclTensor *dropMaskOptional, const aclTensor *paddingMaskOptional, const aclTensor *attenMaskOptional, + const aclIntArray *prefixOptional, const aclIntArray *actualSeqQLenOptional, + const aclIntArray *actualSeqKvLenOptional, double scaleValueOptional, double keepProbOptional, + int64_t preTokensOptional, int64_t nextTokensOptional, int64_t headNum, char *inputLayout, + int64_t innerPreciseOptional, int64_t sparseModeOptional, const aclTensor *softmaxMaxOut, + const aclTensor *softmaxSumOut, const aclTensor *softmaxOutOut, const aclTensor *attentionOutOut, + uint64_t *workspaceSize, aclOpExecutor **executor); + +/** + * @brief The second interface of aclnnFlashAttentionVarLenScore, used to perform calculation. + */ +aclnnStatus aclnnFlashAttentionVarLenScore(void *workspace, uint64_t workspaceSize, aclOpExecutor *executor, + const aclrtStream stream); + + +/** + * @brief The first interface of aclnnEvoformerAttentionV2, + * calculates the size of the workspace based on the specific calculation process. + * @domain aclnn_ops_infer +*/ +aclnnStatus aclnnEvoformerAttentionV2GetWorkspaceSize( + const aclTensor *query, + const aclTensor *key, + const aclTensor *value, + const aclTensor *realShiftOptional, + const aclTensor *dropMaskOptional, + const aclTensor *paddingMaskOptional, + const aclTensor *attenMaskOptional, + const aclIntArray *prefixOptional, + const aclIntArray *qStartIdxOptional, + const aclIntArray *kvStartIdxOptional, + double scaleValueOptional, + double keepProbOptional, + int64_t preTokensOptional, + int64_t nextTokensOptional, + int64_t headNum, + char *inputLayout, + int64_t innerPreciseOptional, + int64_t sparseModeOptional, + int64_t pseTypeOptional, + const aclTensor *softmaxMaxOut, + const aclTensor *softmaxSumOut, + const aclTensor *softmaxOutOut, + const aclTensor *attentionOutOut, + uint64_t *workspaceSize, + aclOpExecutor **executor); + +/** + * @brief The second interface of aclnnEvoformerAttentionV2, used to perform calculation. +*/ +aclnnStatus aclnnEvoformerAttentionV2( + void *workspace, + uint64_t workspaceSize, + aclOpExecutor *executor, + const aclrtStream stream); + +/** + * @brief The first interface of aclnnFlashAttentionVarLenScoreV2, + * calculates the size of the workspace based on the specific calculation process. + * @domain aclnn_ops_infer +*/ +aclnnStatus aclnnFlashAttentionVarLenScoreV2GetWorkspaceSize( + const aclTensor *query, + const aclTensor *key, + const aclTensor *value, + const aclTensor *realShiftOptional, + const aclTensor *dropMaskOptional, + const aclTensor *paddingMaskOptional, + const aclTensor *attenMaskOptional, + const aclIntArray *prefixOptional, + const aclIntArray *actualSeqQLenOptional, + const aclIntArray *actualSeqKvLenOptional, + const aclIntArray *qStartIdxOptional, + const aclIntArray *kvStartIdxOptional, + double scaleValueOptional, + double keepProbOptional, + int64_t preTokensOptional, + int64_t nextTokensOptional, + int64_t headNum, + char *inputLayout, + int64_t innerPreciseOptional, + int64_t sparseModeOptional, + int64_t pseTypeOptional, + const aclTensor *softmaxMaxOut, + const aclTensor *softmaxSumOut, + const aclTensor *softmaxOutOut, + const aclTensor *attentionOutOut, + uint64_t *workspaceSize, + aclOpExecutor **executor); + +/** + * @brief The second interface of aclnnFlashAttentionVarLenScoreV2, used to perform calculation. +*/ +aclnnStatus aclnnFlashAttentionVarLenScoreV2( + void *workspace, + uint64_t workspaceSize, + aclOpExecutor *executor, + const aclrtStream stream); + +#ifdef __cplusplus +} +#endif + +#endif // OP_API_INC_LEVEL2_ACLNN_FLASH_ATTENTION_SCORE_H_ diff --git a/mindscience/sciops/binary/evoformer_attention/op_api/lib/libcust_opapi.so b/mindscience/sciops/binary/evoformer_attention/op_api/lib/libcust_opapi.so new file mode 100644 index 0000000000000000000000000000000000000000..d8b2d039b3882fcd162ed626b845b0031142c8a6 Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_api/lib/libcust_opapi.so differ diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json new file mode 100644 index 0000000000000000000000000000000000000000..59b25b0e89d1b3e4c270c976b795964e8832bd0c --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json @@ -0,0 +1,225 @@ +{ + "EvoformerAttention":{ + "attr":{ + "list":"scale_value,keep_prob,pre_tockens,next_tockens,head_num,input_layout,inner_precise,sparse_mode,pse_type" + }, + "attr_head_num":{ + "paramType":"required", + "type":"int", + "value":"all" + }, + "attr_inner_precise":{ + "defaultValue":"0", + "paramType":"optional", + "type":"int", + "value":"all" + }, + "attr_input_layout":{ + "paramType":"required", + "type":"str", + "value":"all" + }, + "attr_keep_prob":{ + "defaultValue":"1", + "paramType":"optional", + "type":"float", + "value":"all" + }, + "attr_next_tockens":{ + "defaultValue":"2147483647", + "paramType":"optional", + "type":"int", + "value":"all" + }, + "attr_pre_tockens":{ + "defaultValue":"2147483647", + "paramType":"optional", + "type":"int", + "value":"all" + }, + "attr_pse_type":{ + "defaultValue":"1", + "paramType":"optional", + "type":"int", + "value":"all" + }, + "attr_scale_value":{ + "defaultValue":"1", + "paramType":"optional", + "type":"float", + "value":"all" + }, + "attr_sparse_mode":{ + "defaultValue":"0", + "paramType":"optional", + "type":"int", + "value":"all" + }, + "coreType":{ + "value":"AiCore" + }, + "dynamicCompileStatic":{ + "flag":"true" + }, + "dynamicFormat":{ + "flag":"true" + }, + "dynamicRankSupport":{ + "flag":"true" + }, + "dynamicShapeSupport":{ + "flag":"true" + }, + "input0":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"query", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input1":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"key", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input10":{ + "dtype":"int64,int64,int64,int64,int64,int64,int64,int64,int64,int64", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"q_start_idx", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "valueDepend":"optional" + }, + "input11":{ + "dtype":"int64,int64,int64,int64,int64,int64,int64,int64,int64,int64", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"kv_start_idx", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "valueDepend":"optional" + }, + "input2":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"value", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input3":{ + "dtype":"float16,bfloat16,float16,bfloat16,float32,float32,float32,float32,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"real_shift", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input4":{ + "dtype":"uint8,uint8,uint8,uint8,uint8,uint8,uint8,uint8,uint8,uint8", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"drop_mask", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input5":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"padding_mask", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input6":{ + "dtype":"uint8,uint8,bool,bool,uint8,uint8,bool,bool,uint8,bool", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"atten_mask", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "input7":{ + "dtype":"int64,int64,int64,int64,int64,int64,int64,int64,int64,int64", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"prefix", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "valueDepend":"optional" + }, + "input8":{ + "dtype":"int64,int64,int64,int64,int64,int64,int64,int64,int64,int64", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"actual_seq_qlen", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "valueDepend":"optional" + }, + "input9":{ + "dtype":"int64,int64,int64,int64,int64,int64,int64,int64,int64,int64", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"actual_seq_kvlen", + "paramType":"optional", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "valueDepend":"optional" + }, + "jitCompile":{ + "flag":"static_false,dynamic_false" + }, + "needCheckSupport":{ + "flag":"false" + }, + "opFile":{ + "value":"evoformer_attention" + }, + "opInterface":{ + "value":"evoformer_attention" + }, + "output0":{ + "dtype":"float32,float32,float32,float32,float32,float32,float32,float32,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"softmax_max", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "output1":{ + "dtype":"float32,float32,float32,float32,float32,float32,float32,float32,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"softmax_sum", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "output2":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"softmax_out", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "output3":{ + "dtype":"float16,bfloat16,float16,bfloat16,float16,bfloat16,float16,bfloat16,float32,float32", + "format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND", + "name":"attention_out", + "paramType":"required", + "shape":"all", + "unknownshape_format":"ND,ND,ND,ND,ND,ND,ND,ND,ND,ND" + }, + "prebuildPattern":{ + "value":"Opaque" + }, + "precision_reduce":{ + "flag":"true" + } + } +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json new file mode 100644 index 0000000000000000000000000000000000000000..666d1f47e750587c058273de5dfb980598bf2c07 --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json @@ -0,0 +1,299 @@ +{ + "binFileName": "EvoformerAttention_0260521b8ef16b74064256757404d984", + "binFileSuffix": ".o", + "blockDim": -1, + "coreType": "VectorCore", + "core_type": "AIV", + "intercoreSync": 0, + "kernelName": "EvoformerAttention_0260521b8ef16b74064256757404d984", + "magic": "RT_DEV_BINARY_MAGIC_ELF_AIVEC", + "memoryStamping": [], + "opParaSize": 2464, + "parameters": [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ], + "sha256": "3cd4c5125f11af12e4ef47393052c1fefc25a4de274ebd0ed327815a58be035d", + "workspace": { + "num": 1, + "size": [ + -1 + ], + "type": [ + 0 + ] + }, + "kernelList": [ + { + "kernelName": "EvoformerAttention_0260521b8ef16b74064256757404d984_90" + }, + { + "kernelName": "EvoformerAttention_0260521b8ef16b74064256757404d984_92" + }, + { + "kernelName": "EvoformerAttention_0260521b8ef16b74064256757404d984_94" + } + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "compileInfo": {}, + "supportInfo": { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=0,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2" + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "staticKey": "b4266c11dfa0f48b25d7c6c0e2cc048e3a248135967ab948e1d36aa28845cfa7,28e0bba14398f07d26e9652d3596a50393290dd8aa82ba8afe4230b2909486d0", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "float32", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "float32", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "deterministic": "ignore" + }, + "filePath": "ascend910b/bin/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json" +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.o b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.o new file mode 100644 index 0000000000000000000000000000000000000000..7e7571e8bc190aef1ccbb0c29162abaa448c6f5c Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.o differ diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json new file mode 100644 index 0000000000000000000000000000000000000000..15fc0f1a457e32f6089cd5221c91f2df97e3df0b --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json @@ -0,0 +1,317 @@ +{ + "binFileName": "EvoformerAttention_604ef81cb78b4517a18212e778028958", + "binFileSuffix": ".o", + "blockDim": -1, + "coreType": "MIX", + "intercoreSync": 1, + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958", + "magic": "RT_DEV_BINARY_MAGIC_ELF", + "memoryStamping": [], + "opParaSize": 2464, + "parameters": [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ], + "sha256": "b27edcbcd00294a3e0ea3546ecc98e0894b1630dfe803984ee39c1a9b116be74", + "workspace": { + "num": 1, + "size": [ + -1 + ], + "type": [ + 0 + ] + }, + "taskRation": "1:2", + "kernelList": [ + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_90" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_92" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_94" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000001110220120943" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000001110221120943" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000011102200120953" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000011102201120953" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000001102200120953" + }, + { + "kernelName": "EvoformerAttention_604ef81cb78b4517a18212e778028958_10000001102201120953" + } + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "compileInfo": {}, + "supportInfo": { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=0,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2" + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "staticKey": "39db535179a9c6b1f1375e103ffdbd3489dcf222f4c99d281f9a21286bdeb320,aec08fcdf1213a0618b2ac29719f1314f00c26c850a63c3ea2ea25e79f9bef54", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "bfloat16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "bfloat16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "deterministic": "ignore" + }, + "filePath": "ascend910b/bin/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json" +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.o b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.o new file mode 100644 index 0000000000000000000000000000000000000000..ed36d969e490bbe08978fb76227a0b026437f13f Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.o differ diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json new file mode 100644 index 0000000000000000000000000000000000000000..e71521103de6b33ff9cd9ae455e61c53706a7c01 --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json @@ -0,0 +1,311 @@ +{ + "binFileName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98", + "binFileSuffix": ".o", + "blockDim": -1, + "coreType": "MIX", + "intercoreSync": 1, + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98", + "magic": "RT_DEV_BINARY_MAGIC_ELF", + "memoryStamping": [], + "opParaSize": 2464, + "parameters": [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ], + "sha256": "ee25e9696f3b4ab88d892664696193178d7f07ef7f20bb9dbcf06b1a089653a6", + "workspace": { + "num": 1, + "size": [ + -1 + ], + "type": [ + 0 + ] + }, + "taskRation": "1:2", + "kernelList": [ + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_90" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_92" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_94" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_10000001110220330943" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_10000000011021330099" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_10000001110221330943" + }, + { + "kernelName": "EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98_10000000011021130099" + } + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "compileInfo": {}, + "supportInfo": { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=0,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2" + ], + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "staticKey": "023d283515261f89e59fbbbc483e3c4e48359cdbe5c2894ccbe3af65a5477624,40bf5208a5c8b4b9443bade7f87bd2f835b9321e1d005d1561e8f8dcd00f7eed", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "float16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "float16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "deterministic": "ignore" + }, + "filePath": "ascend910b/bin/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json" +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.o b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.o new file mode 100644 index 0000000000000000000000000000000000000000..c8f76991a0cfa2cfc583d5e050747bc98cb0e75c Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.o differ diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ed8b733fbeb75d20fc43371297e5eade5ed8c68 --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json @@ -0,0 +1,174 @@ +{ + "EvoformerAttention": { + "dynamicRankSupport": true, + "simplifiedKeyMode": 0, + "optionalInputMode": "gen_placeholder", + "optionalOutputMode": "gen_placeholder", + "params": { + "inputs": [ + { + "name": "query", + "index": 0, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "key", + "index": 1, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "value", + "index": 2, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "real_shift", + "index": 3, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "drop_mask", + "index": 4, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "padding_mask", + "index": 5, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "atten_mask", + "index": 6, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "prefix", + "index": 7, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "paramType": "optional", + "formatMode": "static_nd_agnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "softmax_out", + "index": 2, + "paramType": "required", + "formatMode": "static_nd_agnostic" + }, + { + "name": "attention_out", + "index": 3, + "paramType": "required", + "formatMode": "static_nd_agnostic" + } + ], + "attrs": [ + { + "name": "scale_value" + }, + { + "name": "keep_prob" + }, + { + "name": "pre_tockens" + }, + { + "name": "next_tockens" + }, + { + "name": "head_num" + }, + { + "name": "input_layout" + }, + { + "name": "inner_precise" + }, + { + "name": "sparse_mode" + }, + { + "name": "pse_type" + } + ] + }, + "binaryList": [ + { + "coreType": 2, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=0,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2" + ], + "binPath": "ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.o", + "jsonPath": "ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json" + }, + { + "coreType": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=0,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2" + ], + "binPath": "ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.o", + "jsonPath": "ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json" + }, + { + "coreType": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=0,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2" + ], + "binPath": "ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.o", + "jsonPath": "ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json" + } + ] + } +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/evoformer_attention.json b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/evoformer_attention.json new file mode 100644 index 0000000000000000000000000000000000000000..0a945759e9eb9f200d9ce79edcd5184628ffed7d --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/kernel/config/ascend910b/evoformer_attention.json @@ -0,0 +1,742 @@ +{ + "binList": [ + { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=0,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=1/0,2/0,2/0,2/0,2/0,2/0,2/0,2", + "EvoformerAttention/d=1,p=0/0,2/0,2/0,2/0,2/0,2/0,2/0,2" + ], + "staticKey": "b4266c11dfa0f48b25d7c6c0e2cc048e3a248135967ab948e1d36aa28845cfa7,28e0bba14398f07d26e9652d3596a50393290dd8aa82ba8afe4230b2909486d0", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "float32", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "float32", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "optionalInputMode": "gen_placeholder", + "deterministic": "ignore", + "optionalOutputMode": "gen_placeholder", + "binInfo": { + "jsonFilePath": "ascend910b/evoformer_attention/EvoformerAttention_0260521b8ef16b74064256757404d984.json" + } + }, + { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=0,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=1/27,2/27,2/27,2/0,2/0,2/27,2/27,2", + "EvoformerAttention/d=1,p=0/27,2/27,2/27,2/0,2/0,2/27,2/27,2" + ], + "staticKey": "39db535179a9c6b1f1375e103ffdbd3489dcf222f4c99d281f9a21286bdeb320,aec08fcdf1213a0618b2ac29719f1314f00c26c850a63c3ea2ea25e79f9bef54", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "bfloat16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "bfloat16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "bfloat16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "optionalInputMode": "gen_placeholder", + "deterministic": "ignore", + "optionalOutputMode": "gen_placeholder", + "binInfo": { + "jsonFilePath": "ascend910b/evoformer_attention/EvoformerAttention_604ef81cb78b4517a18212e778028958.json" + } + }, + { + "implMode": "high_performance", + "int64Mode": false, + "simplifiedKeyMode": 0, + "simplifiedKey": [ + "EvoformerAttention/d=0,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=0,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=1/1,2/1,2/1,2/0,2/0,2/1,2/1,2", + "EvoformerAttention/d=1,p=0/1,2/1,2/1,2/0,2/0,2/1,2/1,2" + ], + "staticKey": "023d283515261f89e59fbbbc483e3c4e48359cdbe5c2894ccbe3af65a5477624,40bf5208a5c8b4b9443bade7f87bd2f835b9321e1d005d1561e8f8dcd00f7eed", + "inputs": [ + { + "name": "query", + "index": 0, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "key", + "index": 1, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "value", + "index": 2, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "real_shift", + "index": 3, + "dtype": "float16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "drop_mask", + "index": 4, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "padding_mask", + "index": 5, + "dtype": "float16", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "atten_mask", + "index": 6, + "dtype": "uint8", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "prefix", + "index": 7, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_qlen", + "index": 8, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "actual_seq_kvlen", + "index": 9, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "q_start_idx", + "index": 10, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "kv_start_idx", + "index": 11, + "dtype": "int64", + "format": "ND", + "paramType": "optional", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "outputs": [ + { + "name": "softmax_max", + "index": 0, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_sum", + "index": 1, + "dtype": "float32", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "softmax_out", + "index": 2, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + }, + { + "name": "attention_out", + "index": 3, + "dtype": "float16", + "format": "ND", + "paramType": "required", + "shape": [ + -2 + ], + "format_match_mode": "FormatAgnostic" + } + ], + "attrs": [ + { + "name": "scale_value", + "dtype": "float", + "value": 0.0 + }, + { + "name": "keep_prob", + "dtype": "float", + "value": 0.0 + }, + { + "name": "pre_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "next_tockens", + "dtype": "int", + "value": 0 + }, + { + "name": "head_num", + "dtype": "int", + "value": 0 + }, + { + "name": "input_layout", + "dtype": "str", + "value": "" + }, + { + "name": "inner_precise", + "dtype": "int", + "value": 0 + }, + { + "name": "sparse_mode", + "dtype": "int", + "value": 0 + }, + { + "name": "pse_type", + "dtype": "int", + "value": 0 + } + ], + "opMode": "dynamic", + "optionalInputMode": "gen_placeholder", + "deterministic": "ignore", + "optionalOutputMode": "gen_placeholder", + "binInfo": { + "jsonFilePath": "ascend910b/evoformer_attention/EvoformerAttention_99cb40af1ded45fe1870ef31fe8ebc98.json" + } + } + ] +} \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so new file mode 100644 index 0000000000000000000000000000000000000000..48245194f7d436ce72a4e06c64fb5b0c26632a73 Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so differ diff --git a/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/liboptiling.so b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/liboptiling.so new file mode 100644 index 0000000000000000000000000000000000000000..a35ff4dad4a5a92dae0781f18e3439b7caff887b --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_impl/ai_core/tbe/op_tiling/liboptiling.so @@ -0,0 +1 @@ +lib/linux/aarch64/libcust_opmaster_rt2.0.so \ No newline at end of file diff --git a/mindscience/sciops/binary/evoformer_attention/op_proto/inc/evoformer_attention_proto.h b/mindscience/sciops/binary/evoformer_attention/op_proto/inc/evoformer_attention_proto.h new file mode 100644 index 0000000000000000000000000000000000000000..89a30f633e1fcdcb3e0c00669ed9c5c068f3ffce --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/op_proto/inc/evoformer_attention_proto.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023-2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EVOFORMER_ATTENTION_PROTO_H_ +#define EVOFORMER_ATTENTION_PROTO_H_ + +#include "graph/operator_reg.h" +#include "register/op_impl_registry.h" + +namespace ge { + +REG_OP(EvoformerAttention) + .INPUT(query, ge::TensorType::ALL()) + .INPUT(key, ge::TensorType::ALL()) + .INPUT(value, ge::TensorType::ALL()) + .OPTIONAL_INPUT(real_shift, ge::TensorType::ALL()) + .OPTIONAL_INPUT(drop_mask, ge::TensorType::ALL()) + .OPTIONAL_INPUT(padding_mask, ge::TensorType::ALL()) + .OPTIONAL_INPUT(atten_mask, ge::TensorType::ALL()) + .OPTIONAL_INPUT(prefix, ge::TensorType::ALL()) + .OPTIONAL_INPUT(actual_seq_qlen, ge::TensorType::ALL()) + .OPTIONAL_INPUT(actual_seq_kvlen, ge::TensorType::ALL()) + .OPTIONAL_INPUT(q_start_idx, ge::TensorType::ALL()) + .OPTIONAL_INPUT(kv_start_idx, ge::TensorType::ALL()) + .OUTPUT(softmax_max, ge::TensorType::ALL()) + .OUTPUT(softmax_sum, ge::TensorType::ALL()) + .OUTPUT(softmax_out, ge::TensorType::ALL()) + .OUTPUT(attention_out, ge::TensorType::ALL()) + .ATTR(scale_value, Float, 1) + .ATTR(keep_prob, Float, 1) + .ATTR(pre_tockens, Int, 2147483647) + .ATTR(next_tockens, Int, 2147483647) + .REQUIRED_ATTR(head_num, Int) + .REQUIRED_ATTR(input_layout, String) + .ATTR(inner_precise, Int, 0) + .ATTR(sparse_mode, Int, 0) + .ATTR(pse_type, Int, 1) + .OP_END_FACTORY_REG(EvoformerAttention); + +} // namespace ge + +#endif diff --git a/mindscience/sciops/binary/evoformer_attention/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so b/mindscience/sciops/binary/evoformer_attention/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so new file mode 100644 index 0000000000000000000000000000000000000000..9bf27d64dd52b1892c6323d6f946de4f38c25601 Binary files /dev/null and b/mindscience/sciops/binary/evoformer_attention/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so differ diff --git a/mindscience/sciops/binary/evoformer_attention/version.info b/mindscience/sciops/binary/evoformer_attention/version.info new file mode 100644 index 0000000000000000000000000000000000000000..9a728346a310b16c7736c3fdb66e363ead253edc --- /dev/null +++ b/mindscience/sciops/binary/evoformer_attention/version.info @@ -0,0 +1 @@ +custom_opp_compiler_version=7.7.0.1.225 diff --git a/mindscience/sciops/ccsrc/evoformer_attention.cpp b/mindscience/sciops/ccsrc/evoformer_attention.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d7286c463ca1af1db81754d85e5c086894f81493 --- /dev/null +++ b/mindscience/sciops/ccsrc/evoformer_attention.cpp @@ -0,0 +1,132 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./ms_extension.h" +#include "mindspore/ccsrc/pyboost/pyboost_utils.h" +#include "mindspore/ccsrc/pyboost/op_runner.h" +#include "mindspore/ccsrc/debug/profiler/profiler.h" +#include "runtime/pynative/op_runner.h" +#include "runtime/pynative/op_executor.h" +#include "mindspore/ccsrc/include/common/runtime_conf/runtime_conf.h" +#include "mindspore/ccsrc/runtime/pynative/task/device_task.h" +#include "mindspore/ccsrc/plugin/res_manager/ascend/stream_manager/ascend_stream_manager.h" +#include "mindspore/ops/kernel/ascend/pyboost/aclnn_utils.h" + +using BaseTensorPtr = mindspore::tensor::TensorPtr; +using BaseTensorPtrList = mindspore::tensor::TensorPtrList; + +namespace mindspore::kernel::pyboost { +void CustomPyboostExecutor(const std::string &opname, const BaseTensorPtrList &inputs, const BaseTensorPtrList &outputs, + const std::function &exec_func) { + mindspore::runtime::ProfilerRecorder profiler(mindspore::runtime::ProfilerModule::kPynative, + mindspore::runtime::ProfilerEvent::kRunOp, opname); + auto stream_id = PyBoostUtils::cur_stream_id(); + auto device_context = runtime::OpRunner::GetDeviceContext("Ascend"); + PyBoostUtils::PrepareOpInputs(device_context, stream_id, inputs); + PyBoostUtils::PrepareOpOutputs(device_context, stream_id, outputs); + PyBoostUtils::DispatchRun( + std::make_shared([device_context, inputs, outputs, exec_func]() { + MS_LOG(DEBUG) << "Run device task Add start"; + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, inputs); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + // LAUNCH_ACLNN(aclnnMul, device_context, op->stream_id(), x, y, outputs[0]); + exec_func(); + MS_LOG(DEBUG) << "Run device task end"; + })); +} + +BaseTensorPtr npu_evoformer_attention( + const BaseTensorPtr &query, const BaseTensorPtr &key, const BaseTensorPtr &value, + const std::optional &realShiftOptional, const std::optional &dropMaskOptional, + const std::optional &paddingMaskOptional, const std::optional &attenMaskOptional, + const std::optional &prefixOptional, const std::optional &scaleValueOptional, + const std::optional &keepProbOptional, const std::optional preTokensOptional, + const std::optional nextTokensOptional, const std::optional &headNum, + const std::optional &inputLayout, const std::optional &innerPreciseOptional, + const std::optional &sparseModeOptional) { + auto stream_id = PyBoostUtils::cur_stream_id(); + auto device_context = runtime::OpRunner::GetDeviceContext("Ascend"); + BaseTensorPtrList inputs = {query, key, value}; + if (realShiftOptional.has_value()) { + inputs.emplace_back(realShiftOptional.value()); + } + if (dropMaskOptional.has_value()) { + inputs.emplace_back(dropMaskOptional.value()); + } + if (paddingMaskOptional.has_value()) { + inputs.emplace_back(paddingMaskOptional.value()); + } + if (attenMaskOptional.has_value()) { + inputs.emplace_back(attenMaskOptional.value()); + } + if (prefixOptional.has_value()) { + inputs.emplace_back(prefixOptional.value()); + } + + double scaleValue = scaleValueOptional.value_or(1.0); + double keepProb = keepProbOptional.value_or(1.0); + int64_t preToken = preTokensOptional.value_or(2147483647); + int64_t nextToken = nextTokensOptional.value_or(2147483647); + int64_t head_num = headNum.value(); + string input_layout = inputLayout.value(); + int64_t inner_precise = innerPreciseOptional.value_or(0); + int64_t sparse_mode = sparseModeOptional.value_or(0); + BaseTensorPtr softmax_max; + BaseTensorPtr softmax_sum; + BaseTensorPtr softmax_out; + BaseTensorPtr attention_out; + ShapeVector softmax_max_sum_shape; + ShapeVector softmax_out_shape; + + if (input_layout == "SBH") { + softmax_max_sum_shape = {query->shape()[1], head_num, query->shape()[0], 8}; + softmax_out_shape = {query->shape()[1], head_num, query->shape()[0], query->shape()[0]}; + } else if (input_layout == "BSH" || input_layout == "BSND") { + softmax_max_sum_shape = {query->shape()[0], head_num, query->shape()[1], 8}; + softmax_out_shape = {query->shape()[0], head_num, query->shape()[1], query->shape()[0]}; + } else if (input_layout == "BNSD") { + softmax_max_sum_shape = {query->shape()[0], head_num, query->shape()[2], 8}; + softmax_out_shape = {query->shape()[0], head_num, query->shape()[2], query->shape()[0]}; + } + + attention_out = std::make_shared(query->data_type(), query->shape()); + softmax_max = std::make_shared(kNumberTypeFloat32, softmax_max_sum_shape); + softmax_sum = std::make_shared(kNumberTypeFloat32, softmax_max_sum_shape); + softmax_out = std::make_shared(query->data_type(), softmax_out_shape); + BaseTensorPtrList result = {softmax_max, softmax_sum, softmax_out, attention_out}; + CustomPyboostExecutor("aclnnEvoformerAttention", inputs, result, [=]() { + LAUNCH_ACLNN(aclnnEvoformerAttention, device_context, stream_id, + query, key, value, + realShiftOptional, dropMaskOptional, paddingMaskOptional, + attenMaskOptional, prefixOptional, scaleValue, keepProb, + preToken, nextToken, head_num, input_layout, inner_precise, + sparse_mode, softmax_max, softmax_sum, softmax_out, attention_out); + }); + return attention_out; +} +} // namespace mindspore::kernel::pyboost + +PYBIND11_MODULE(MS_EXTENSION_NAME, m) { + m.def("npu_evoformer_attention", &mindspore::kernel::pyboost::npu_evoformer_attention, "npu_evoformer_attention", + pybind11::arg("query"), pybind11::arg("key"), pybind11::arg("value"), + pybind11::arg("realShiftOptional"), pybind11::arg("dropMaskOptional"), pybind11::arg("paddingMaskOptional"), + pybind11::arg("attenMaskOptional"), pybind11::arg("prefixOptional"), pybind11::arg("scaleValueOptional"), + pybind11::arg("keepProbOptional"), pybind11::arg("preTokensOptional"), pybind11::arg("nextTokensOptional"), + pybind11::arg("headNum"), pybind11::arg("inputLayout"), pybind11::arg("innerPreciseOptional"), + pybind11::arg("sparseModeOptional")); +} diff --git a/mindscience/sciops/python/__init__.py b/mindscience/sciops/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d4ee820ed7fadb15a717d9b1b4121c9d73382b02 --- /dev/null +++ b/mindscience/sciops/python/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +init +""" + +__all__ = ["Einsum"] + +from .einsum import Einsum diff --git a/mindscience/sciops/python/einsum/__init__.py b/mindscience/sciops/python/einsum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d4ee820ed7fadb15a717d9b1b4121c9d73382b02 --- /dev/null +++ b/mindscience/sciops/python/einsum/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +init +""" + +__all__ = ["Einsum"] + +from .einsum import Einsum diff --git a/mindscience/sciops/python/einsum/constants.py b/mindscience/sciops/python/einsum/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..833c35ab1c92990307496f9ccc27806916ee2310 --- /dev/null +++ b/mindscience/sciops/python/einsum/constants.py @@ -0,0 +1,57 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +constants.py +This module contains various constants used throughout the application. + +Constants: +`T_INPUT, T_MUL, T_MATMUL, T_OUT` are the flags of tensor type. +T_MATMUL represents the matrix multiplication generated the tensor. + +`INPUT, OUT, MID_MUL, MID_MATMUL, MUL, MATMUL` are the flags of +constraint types. + +`MUST_MK, MUST_KM, MUST_ALL` are the flags for the order of the K-axis +in batch matrix multiplication. ALL means that both KM and MK are allowed. + +`MIN_WEIGHT_PROD, SEARCH_K_THRE` are the constants used in +label order optimization. +""" + +# Tensor type +T_INPUT = 0 +T_MUL = 1 +T_MATMUL = 2 +T_OUT = 3 + + +# Constraint Types +INPUT = "INPUT" +OUT = "OUT" +MID_MUL = "MID_MUL" +MID_MATMUL = "MID_MATMUL" +MUL = "MUL" +MATMUL = "MATMUL" + + +# bmm +MUST_MK = 1 +MUST_KM = 2 +MUST_ALL = 3 + + +# optimization +MIN_WEIGHT_PROD = 1048576 +SEARCH_K_THRE = 20 diff --git a/mindscience/sciops/python/einsum/einsum.py b/mindscience/sciops/python/einsum/einsum.py new file mode 100644 index 0000000000000000000000000000000000000000..afd2040ed621f72c358caa73739c78f2cf931076 --- /dev/null +++ b/mindscience/sciops/python/einsum/einsum.py @@ -0,0 +1,703 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"einsum main file" + +import math +from collections import defaultdict + +from mindspore import mint, nn, mutable +from mindspore import ops as P +from mindspore.common.tensor import Tensor +from mindspore.ops._primitive_cache import _get_cache_prim + +from . import constants as C +from .label_order import LabelOrder +from .opt_einusm_path import parse_opt_trace +from .sumproduct_pair import (sumproduct_pair_info, out_cacl_info, rearrange_tensor_to_mul, + rearrange_tensor_to_bmm, rearrange_tensor_to_out, prod_lst) + + +def _parse_equation(equation: str): + """ + Parse the einsum equation into left-hand side (LHS), right-hand side (RHS), and number of operands. + """ + arrow_pos = equation.find("->") + if arrow_pos == -1: + raise ValueError(f"invalid equation {equation}: require '->'") + + equation = equation.replace('...', '.') + arrow_pos = equation.find("->") + lhs = equation[:arrow_pos] + rhs = equation[arrow_pos + 2:] + num_ops = lhs.count(",") + 1 + + return lhs, rhs, num_ops + + +def _parse_ellipsis(lhs: str, rhs: str): + """ + Parse the ellipsis dims of equation + """ + op_labels = lhs.split(",") + [rhs] + ellipsis_idxes = [] + has_ellipsis = False + for s in op_labels: + ecnt = s.count(".") + if ecnt > 1: + raise ValueError(f"invalid equation {lhs} with multiple '...'") + if ecnt == 1: + pre, post = s.split(".") + ellipsis_idxes.append((len(pre), len(post))) + has_ellipsis = True + else: + ellipsis_idxes.append(None) + + if not has_ellipsis: + return None + + return ellipsis_idxes + + +def _sum_dims_helper(a_shape: list, a_sums: tuple[str, ...]): + """ + Helper function to filter out dimensions to be summed and return + the remaining dimensions and their indices. + a_shape: list[tuple[str, int], ...]; like this: [('i', 0), ('j', 1)] + a_sums: tuple[str, ...]): + """ + res = [] + sum_dims = [] + for i, (k, v) in enumerate(a_shape): + if k not in a_sums: + res.append((k, v)) + else: + sum_dims.append(i) + + return res, tuple(sum_dims) + + +def _cacl_mul_reshape(tensor: Tensor, add_dim_info: tuple[int, tuple[int, ...]]): + """ + Calculate the new shape and permutation indices for multiplication operations. + """ + if add_dim_info[0] == 0: + return tensor + + add_dims, perm_ids = add_dim_info + added_shape = tensor.shape + (tuple([1]) * add_dims) + new_shape = tuple(added_shape[i] for i in perm_ids) + return tensor.reshape(new_shape) + + +def _reshape_of_bmm(ta: Tensor, gb: tuple, m: int, k: int, is_trans: bool): + """ + reshape tensor for bmm with BMK or BKM format + """ + new_shape = gb + (k, m) if is_trans else gb + (m, k) + if new_shape != ta.shape: + return ta.reshape(new_shape) + return ta + + +def _cacl_matmul_reshape(ta, tb, bmm_info): + """Reshape the tensor for matrix multiplication operations. + Types: + ta: Tensor + tb: Tensor + bmm_info: tuple[bool, bool, bool, tuple[int, ...], tuple[int, ...], + tuple[int, ...], tuple[int, ...]] + """ + a_shape, b_shape = ta.shape, tb.shape + is_batch, transpose_a, transpose_b, a_b, a_m, b_n, a_k = bmm_info + + m_dims = tuple(a_shape[d] for d in a_m) + m = prod_lst(m_dims) + n_dims = tuple(b_shape[d] for d in b_n) + n = prod_lst(n_dims) + k = prod_lst(tuple(a_shape[d] for d in a_k)) + + gb, b_dims = (), () + if is_batch: + b_dims = tuple(a_shape[d] for d in a_b) + b = prod_lst(b_dims) + gb = (b,) + + out_shape = b_dims + m_dims + n_dims + if out_shape == gb + (m, n): + out_shape = None + + # transpose_a and left or right in bmm indicate BMK or BKM + ta = _reshape_of_bmm(ta, gb, m, k, transpose_a) + tb = _reshape_of_bmm(tb, gb, n, k, not transpose_b) + return ta, tb, out_shape + + +def _remove_a_diagonal(labels: str, shape: tuple[int, ...]): + """ + Removes a diagonal element from the labels and shape, ensuring no duplicate labels. + """ + if len(labels) != len(shape): + raise ValueError(f"labels: {labels} and tensor shape: {shape} are different size") + + for i in range(len(labels) - 1, 0, -1): + c = labels[i] + idx = labels.find(c, 0, i) + if idx >= 0: + if shape[i] != shape[idx]: + raise ValueError(f"tensor diagonal requires same size, \ + while with {shape[i]} and {shape[idx]}") + + pairs = [(labels[j], shape[j]) for j in range(len(labels)) if j not in (i, idx)] + new_labels = [a for a, _ in pairs] + [c] + new_shape = tuple(b for _, b in pairs) + (shape[i],) + + return (idx, i), "".join(new_labels), new_shape + + return None, labels, shape + + +def _flat_empty_struct(st: list): + """ + Flattens an empty structure to None if it contains no non-empty elements. + """ + for e in st: + if e: + return tuple(st) + + return None + + +def _convert_1_to_2(s: int): + if s == 1: + return 2 + return s + + +def _replace_e1_shape(shapes): + """Shape equal to 1 will affect preprocessing, use 2 instead. + Replaces all shape elements equal to 1 with 2. + + Args: + shapes: list[tuple[int, ...], ...] + """ + res = [] + for shape in shapes: + new_shape = tuple(_convert_1_to_2(s) for s in shape) + res.append(new_shape) + + return tuple(res) + + +def _get_ellipsis_shape(shape, label_part: tuple[int, int], elli_shapes: tuple[int, ...]): + """ + replace shape of ellipsis dims + """ + pre_ellipsis, post_ellipsis = label_part + num_dims = len(shape) + + total_labels = pre_ellipsis + post_ellipsis + if num_dims < total_labels: + raise ValueError(f"({shape}) is invalid for given equtation, require not less than {total_labels}.") + + # The shape of the dimension before '...' + pre_ellipsis_shape = shape[: pre_ellipsis] + # The shape of the dimension after '...' + post_ellipsis_shape = shape[num_dims - post_ellipsis :] + + if elli_shapes is not None: + # note: elli_shapes may be tuple([]) + new_shape = pre_ellipsis_shape + elli_shapes + post_ellipsis_shape + else: + elli_shapes = tuple(shape[pre_ellipsis: num_dims - post_ellipsis]) + new_shape = pre_ellipsis_shape + tuple([prod_lst(elli_shapes)]) + post_ellipsis_shape + + return new_shape, elli_shapes + + +def _update_weight(cacl_info): + """ + update weight by tensor's data volume + """ + for info in cacl_info: + w = max(info["WEIGHT"], C.MIN_WEIGHT_PROD) + info["WEIGHT"] = math.log(w / C.MIN_WEIGHT_PROD, 64) + 1.0 + + +class Einsum(nn.Cell): + """ + Einsum operation + """ + + def __init__(self, equation, use_opt=True): + """ + This operator performs tensor computations using Einstein summation convention (Einsum). + Supports diagonalization, reduction, transposition, matrix multiplication, product operations, + inner products, etc. + + Args: + - equation (str) + Specifies the computation to be performed. Only accepts: + Letters ([a-z][A-Z]): Represent dimensions of input tensors + ...: anonymous dimensions + Commas (','): Separate tensor dimensions + Arrow ('->'): Left side specifies input tensors, right side specifies desired output dimensions + + - use_opt (bool), optional + Defaults to `True`. When set to `False`, performs contraction path optimization. + + Inputs: + - *tensors, list of tensor inputs of variable length + + Outputs: + - output (Tensor) + + Supported Platforms: + ``Ascend`` ``CPU`` + + Examples: + >>> import mindspore as ms + >>> from mindspore import nn, Tensor, ops + >>> import numpy as np + >>> import Einsum + + >>> x = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), ms.float32) + >>> y = Tensor(np.array([[2.0, 3.0], [1.0, 2.0], [4.0, 5.0]]), ms.float32) + >>> equation = "ij,jk->ik" + >>> einsum = Einsum(equation, use_opt=False) + >>> output = einsum(x, y) + >>> print(output.shape) + (2, 2) + + >>> shapes = [(156, 16, 16), (660, 128, 16), (660, 128, 16)] + >>> x, y, z = [ops.randn(tp) for tp in shapes] + >>> equation = "ijk,zui,zuj->zuk" + >>> einsum = Einsum(equation, use_opt=True) + >>> output = einsum(x, y, z) + + # example: Linear layer implemented using einsum + class EinsumLinear(nn.Cell): + def __init__(self, in_features, out_features): + super().__init__() + self.in_features = in_features + self.out_features = out_features + + self.weight = ms.Parameter( + Tensor(np.random.randn(out_features, in_features).astype(np.float32)), + name='weight' + ) + self.bias = ms.Parameter( + Tensor(np.random.randn(out_features).astype(np.float32)), + name='bias' + ) + + # Define einsum operation + self.einsum = Einsum("ij,bj->bi") # Define matrix multiplication pattern + + def construct(self, x): + # Perform matrix multiplication using einsum: output = x @ weight.T + bias + output = self.einsum(self.weight, x) + self.bias + return output + """ + super().__init__() + if not isinstance(equation, str): + raise TypeError(f"For einsum, 'equation' must be a str, but got {type(equation)}.") + self.equation = equation.replace(" ", "") + self.lhs, self.rhs, self.num_ops = _parse_equation(self.equation) + self.num_tensors = 2 * self.num_ops - 1 + self.contract_dims = self._get_contract_dims() + self.ellipsis_idxes = _parse_ellipsis(self.lhs, self.rhs) + self.use_opt = use_opt + + # uninited + self.has_inited = False + self.trace = None + self.order_labels = None + self.diag_ops = None + self.sums_ops, self.perm_ops, self.step_ops = None, None, None + + if not use_opt or self.num_ops < 2: + shapes = self._generate_a_random_shape() + self._post_init(shapes) + + + @staticmethod + def _count_labels(op_labels): + """ + Counts the occurrences of each unique label in the operation labels. + + Args: + op_labels: list[str, ...] + Returns: + dict: A dictionary mapping each label to its count. + """ + letter_count = defaultdict(int) + + for s in op_labels: + unique_letters = set(s) + for letter in unique_letters: + letter_count[letter] += 1 + + return dict(letter_count) + + + @staticmethod + def _bind_shape_with_label(in_shapes, op_labels, rt_list=True): + """bind shape with label + Args: + in_shapes: tuple[tuple[int, ...], ...] + op_labels: list[str, ...] + rt_list: bool + return example [{'i':2, 'j':3}, {'j':3, 'k':4}, {'k':4, 'i':2}] + """ + bound_shapes = [] + for indices, shape in zip(op_labels, in_shapes): + if rt_list: + bound_shape = [(idx, dim) for idx, dim in zip(indices, shape)] + else: + bound_shape = {idx: dim for idx, dim in zip(indices, shape)} + + bound_shapes.append(bound_shape) + + return bound_shapes + + + def _post_init(self, shapes): + """ + Determine whether it has been initialized. If not, it will be called the first time it runs. + 1. Apply path contraction by opt_einsum + 2. Apply label order optimization + 3. Build calculation steps + """ + base_trace = parse_opt_trace(self.equation, shapes, self.use_opt) + + op_labels, self.diag_ops, rm_diag_shapes = self._process_diagonal(shapes) + rm_diag_shapes = _replace_e1_shape(rm_diag_shapes) + + tensor_infos = self._build_cacl_steps(rm_diag_shapes, op_labels, base_trace) + base_order = self._get_base_order() + order = LabelOrder(tensor_infos, base_trace, base_order) + self.order_labels, self.trace = order.get_order() + + self.sums_ops, self.perm_ops, self.step_ops = self._build(rm_diag_shapes, op_labels, tensor_infos) + self.has_inited = True + + + def _get_base_order(self): + """ + Generates a base order string by appending characters from lhs to rhs, + excluding commas and duplicates. + + Returns: + str: The base order string. + """ + res = self.rhs + for c in self.lhs: + if c != ',' and c not in res: + res += c + + return res + + + def _process_diagonal(self, shapes): + """ + Processes the diagonal elements of the tensors specified by the operation labels. + + Args: + shapes: The shapes of the tensors. + list[tuple[int, ...], ...] + + Returns: + tuple: A tuple containing the new operation labels, diagonal operations, and new shapes. + """ + op_labels = self.lhs.split(",") + new_op_labels, new_shapes = [], [] + diag_ops = [] + for op, shape in zip(op_labels, shapes): + diag_pairs = [] + while True: + diag_pair, op, shape = _remove_a_diagonal(op, shape) + if not diag_pair: + break + diag_pairs.append(tuple(diag_pair)) + + diag_ops.append(tuple(diag_pairs)) + new_op_labels.append(op) + new_shapes.append(shape) + + for _ in range(self.num_ops - 1): + diag_ops.append(None) + new_diag_ops = _flat_empty_struct(diag_ops) + return new_op_labels, new_diag_ops, new_shapes + + + def _generate_a_random_shape(self): + """ + Generates a random shape for the tensors based on the operation labels. + + Returns: + list of tuples: The generated shapes for the tensors. + """ + all_indices = set(self.lhs) + + # a random size + dims = {idx: 8 for idx in all_indices} + + op_labels = self.lhs.split(",") + input_shapes = [] + for labels in op_labels: + shape = tuple(dims[label] for label in labels) + input_shapes.append(shape) + + return input_shapes + + + def _get_contract_dims(self): + """ + Determines the dimensions to be contracted by comparing the sets of lhs and rhs. + + Returns: + tuple: The dimensions to be contracted. + """ + set1 = set(self.lhs) + set2 = set(self.rhs + ",") + diff_set = set1 - set2 + return tuple(diff_set) + + + def _build_cacl_steps(self, in_shapes, op_labels, base_trace): + """ + Builds the calculation steps for tensor contractions based on the input shapes and operation labels. + + Args: + in_shapes (list of tuples): The shapes of the input tensors. + op_labels: The List of labels; example: ["ijk, "zui", "zuj"] + base_trace: list of Int pair; like [(1, 0), (2, 3)] + + Types: + in_shapes: tuple[tuple[int, ...], ...] + op_labels: list[str, ...]): + + Returns: + tuple: A tuple of tuples, each containing input and calculation information for each step. + """ + label_counts = Einsum._count_labels(op_labels) + ops = Einsum._bind_shape_with_label(in_shapes, op_labels, rt_list=False) + + cacl_info = [None] * self.num_tensors + + input_info = [] + for labels in op_labels: + input_info.append({"IN": labels, "FROM": C.T_INPUT}) + + for i, j in base_trace: + a_shape = ops[i] + b_shape = ops[j] + + sum_labels = [] + a_labels_to_sum, b_labels_to_sum = [], [] + for d in self.contract_dims: + if d in a_shape and d in b_shape: + label_counts[d] -= 1 + if label_counts[d] == 1: + sum_labels.append(d) + label_counts[d] = 0 + elif label_counts[d] == 1: + if d in a_shape: + a_labels_to_sum.append(d) + label_counts[d] = 0 + elif d in b_shape: + b_labels_to_sum.append(d) + label_counts[d] = 0 + + new_shape, a_info, b_info, out_info = sumproduct_pair_info(a_shape, b_shape, a_labels_to_sum, + b_labels_to_sum, sum_labels) + ops.append(new_shape) + input_info.append(out_info) + + # dict of calculate info about: matmul or mul + cacl_info[i] = a_info + cacl_info[j] = b_info + + cacl_info[-1] = out_cacl_info(ops[self.num_tensors - 1], self.rhs) + _update_weight(cacl_info) + + res = tuple(zip(input_info, cacl_info)) + return res + + + def _build(self, in_shapes, op_labels, ops): + """ + Builds the tensor operations and permutations for the given input shapes. + + Args: + in_shapes (list of tuples): The shapes of the input tensors. + op_labels (list): The List of labels; example: ["ijk, "zui", "zuj"]. + ops (list): result of function _build_cacl_steps. + + Types: + in_shapes: tuple[tuple[int, ...], ...] + op_labels: list[str, ...] + ops: tuple[tuple[dict[str, str], ...], ...]) + + Returns: + tuple: A tuple containing the sum dimensions, permutations, and step operations. + """ + shape_infos = Einsum._bind_shape_with_label(in_shapes, op_labels, rt_list=True) + + perm_ops = [None] * self.num_tensors + sums_ops = [None] * self.num_tensors + step_ops = [] + + for i, j in self.trace: + a_mul_sums, b_mul_sums = ops[i][0].get("SUMS", []), ops[j][0].get("SUMS", []) + a_info, b_info = ops[i][1], ops[j][1] + t_type = a_info["CACL"] + a_shape, a_sum_dims = _sum_dims_helper(shape_infos[i], a_info["SUMS"] + a_mul_sums) + b_shape, b_sum_dims = _sum_dims_helper(shape_infos[j], b_info["SUMS"] + b_mul_sums) + + if t_type == C.T_MUL: + a_perm, b_perm, cacl_info, new_shape = rearrange_tensor_to_mul(self.order_labels, a_shape, b_shape) + else: + a_perm, b_perm, cacl_info, new_shape = rearrange_tensor_to_bmm(self.order_labels, a_shape, + a_info, b_shape, b_info) + + shape_infos.append(new_shape) + perm_ops[i], perm_ops[j] = a_perm, b_perm + sums_ops[i], sums_ops[j] = a_sum_dims, b_sum_dims + step_ops.append((t_type, cacl_info)) + + # out + out_shape, out_sum_dims = _sum_dims_helper(shape_infos[self.num_tensors-1], self.contract_dims) + sums_ops[-1] = out_sum_dims + perm_ops[-1] = rearrange_tensor_to_out(out_shape, self.rhs) + + sums_ops = _flat_empty_struct(sums_ops) + return sums_ops, tuple(perm_ops), tuple(step_ops) + + + def _reshape_ellipsis(self, operands): + """ + reshape the dims indicated by ellipses. + """ + if not self.ellipsis_idxes: + return operands, None + + new_operands = mutable(list()) + elli_shapes = None + for i in range(len(operands)): + if self.ellipsis_idxes[i]: + new_shape, elli_shapes = _get_ellipsis_shape(operands[i].shape, + self.ellipsis_idxes[i], None) + new_operands.append(operands[i].reshape(new_shape)) + else: + new_operands.append(operands[i]) + + return new_operands, elli_shapes + + + def _reshape_ellipsis_out(self, out: Tensor, elli_shapes: tuple[int, ...]): + # note: elli_shapes may be tuple([]) + if elli_shapes is not None and self.ellipsis_idxes[-1]: + new_shape, _ = _get_ellipsis_shape(out.shape, self.ellipsis_idxes[-1], elli_shapes) + return out.reshape(new_shape) + return out + + + def _apply_preprocess(self, t, i): + """ + Applies a series of preprocessing operations on the tensor `t` based on the operations + defined in `diag_ops`, `sums_ops`, and `perm_ops`. + + Args: + - t (Tensor): The input tensor to be preprocessed. + - i (int): The index used to access the specific operations for this tensor. + + Returns: + - Tensor: The preprocessed tensor. + """ + # diagonal + if self.diag_ops and self.diag_ops[i]: + for prev_dim, dim in self.diag_ops[i]: + t = t.diagonal(0, prev_dim, dim) + + # sums + if self.sums_ops and self.sums_ops[i]: + t = mint.sum(t, dim=self.sums_ops[i], keepdim=False) + + # permute + if self.perm_ops[i]: + t = mint.permute(t, self.perm_ops[i]) + + return t + + + def _check_inputargs(self, operands): + """Check operands.""" + if len(operands) != self.num_ops: + raise ValueError("The number of input tensors is inconsistent with the expression.") + for operand in operands: + if not isinstance(operand, Tensor): + raise TypeError(f"For einsum, members of 'operands' must be Tensor, but got {type(operand)}.") + + + def construct(self, *operands): + """ + Constructs the final output tensor by applying a series of operations defined in `trace` and `step_ops`. + + Args: + - *operands: Variable number of input tensors. + + Returns: + - Tensor: The final output tensor after applying all the operations. + """ + self._check_inputargs(operands) + operands, elli_shapes = self._reshape_ellipsis(operands) + + if not self.has_inited: + shapes = [t.shape for t in operands] + self._post_init(shapes) + + data = mutable(list(operands)) + + for k in range(len(self.trace)): + i, j = self.trace[k] + t_type, bmm_info = self.step_ops[k] + + # Apply preprocessing to the selected tensors + ta = self._apply_preprocess(data[i], i) + tb = self._apply_preprocess(data[j], j) + + # Perform the specified operation (mul or matmul) + if t_type == C.T_MUL: + ta = _cacl_mul_reshape(ta, bmm_info[0]) + tb = _cacl_mul_reshape(tb, bmm_info[1]) + t_out = ta * tb + else: + mm_class = P.BatchMatMul if bmm_info[0] else P.MatMul + matmul = _get_cache_prim(mm_class)(transpose_a=bmm_info[1], transpose_b=bmm_info[2]) + ta, tb, out_shape = _cacl_matmul_reshape(ta, tb, bmm_info) + t_out = matmul(ta, tb) + if out_shape: + t_out = t_out.reshape(out_shape) + + # append new tensor + data.append(t_out) + + # Apply final preprocessing to the last tensor + n = self.num_tensors - 1 + out_tensor = self._apply_preprocess(data[n], n) + out_tensor = self._reshape_ellipsis_out(out_tensor, elli_shapes) + + return out_tensor diff --git a/mindscience/sciops/python/einsum/graph_utils.py b/mindscience/sciops/python/einsum/graph_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7aad5695e8ea48cec124d7e7735a250d93fcdafe --- /dev/null +++ b/mindscience/sciops/python/einsum/graph_utils.py @@ -0,0 +1,195 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"graph utils" + +from collections import defaultdict + + +def build_adja_mul(in_labels, sum_labels, expand=False): + """ + Build an adjacency list from the input labels, excluding the sum labels. + If expand is True, each character is connected to all subsequent characters. + Otherwise, each character is connected to the next character only. + """ + adja = {} + + s = [c for c in in_labels if c not in sum_labels] + + for i in range(len(s) - 1): + current_char = s[i] + if expand: + next_chars = list(s[i+1:]) + else: + next_chars = [s[i + 1]] + + adja[current_char] = next_chars + + return adja + + +def build_adja_bmm(bb: str, mm: str, kk: str, sum_labels: list[str]): + """ + Build an adjacency list from the given lists bb, mm, and kk, excluding the sum labels. + Each element in bb is connected to all elements in mm and kk. + Each element in mm is connected to all elements in kk. + """ + bb = [c for c in bb if c not in sum_labels] + mm = [c for c in mm if c not in sum_labels] + kk = [c for c in kk if c not in sum_labels] + adja = {} + for b in bb: + adja[b] = list(mm + kk) + + for m in mm: + adja[m] = list(kk) + + return adja + + +def union_adjacency_lists(adj_list1, adj_list2): + """ + Compute the union of two adjacency lists. + The resulting adjacency list contains all nodes and their neighbors from both input lists. + Types: + adj_list1: dict[str, list[str, ...]] + adj_list2: dict[str, list[str, ...]]): + """ + union = {} + # Traverse the first adjacency list + for node, neighbors in adj_list1.items(): + union[node] = set(neighbors) + # Traverse the second adjacency list + for node, neighbors in adj_list2.items(): + if node in union: + union[node].update(neighbors) + else: + union[node] = set(neighbors) + # Convert sets back to lists + for node in union: + union[node] = list(union[node]) + return union + + +def difference_adjacency_lists(adj_list1, adj_list2): + """ + Compute the difference between two adjacency lists. + The resulting adjacency list contains nodes and their neighbors from the first list that are not in the second list. + Types: + adj_list1: dict[str, list[str, ...]] + adj_list2: dict[str, list[str, ...]]): + """ + difference = {} + for node, neighbors in adj_list1.items(): + if node in adj_list2: + # Compute the difference set + diff_neighbors = set(neighbors) - set(adj_list2[node]) + if diff_neighbors: + difference[node] = list(diff_neighbors) + else: + difference[node] = neighbors + return difference + + +def intersection_adjacency_lists(adj_list1, adj_list2): + """ + Compute the intersection of two adjacency lists. + The resulting adjacency list contains nodes and their neighbors that are common to both input lists. + Types: + adj_list1: dict[str, list[str, ...]] + adj_list2: dict[str, list[str, ...]]): + """ + intersection = {} + for node, neighbors in adj_list1.items(): + if node in adj_list2: + # Compute the intersection set + inter_neighbors = set(neighbors) & set(adj_list2[node]) + if inter_neighbors: + intersection[node] = list(inter_neighbors) + return intersection + + +def symmetric_difference_adjacency_lists(adj_list1, adj_list2): + """ + Compute the symmetric difference of two adjacency lists. + The resulting adjacency list contains nodes and their neighbors that are in one list but not the other. + Types: + adj_list1: dict[str, list[str, ...]] + adj_list2: dict[str, list[str, ...]]): + """ + # Compute the difference A - B + diff1 = difference_adjacency_lists(adj_list1, adj_list2) + # Compute the difference B - A + diff2 = difference_adjacency_lists(adj_list2, adj_list1) + # Combine the two differences + symmetric_diff = union_adjacency_lists(diff1, diff2) + return symmetric_diff + + +def topological_sort(graph): + """ + Calculate the in-degree of each node + graph type: dict[str, list[str, ...]] + """ + in_degree = defaultdict(int) + + for u in graph: + in_degree[u] = 0 + + # Initialize in-degrees + for u in graph: + for v in graph[u]: + in_degree[v] += 1 + + # Add all nodes with in-degree 0 to the queue + queue = [u for u in in_degree if in_degree[u] == 0] + + num_nodes = len(in_degree) + + # Store the result of the topological sort + topo_order = [] + + while queue: + u = queue.pop(0) + topo_order.append(u) + + # Decrease the in-degree of adjacent nodes + for v in graph.get(u, []): + in_degree[v] -= 1 + # If in-degree becomes 0, add to the queue + if in_degree[v] == 0: + queue.append(v) + + # If the topological sort result contains all nodes, return the result + if len(topo_order) == num_nodes: + return topo_order + return [] + + +def is_not_conflict(adja_l, adja_r): + """ + Check if there is no conflict between two adjacency lists + """ + if not adja_l or not adja_r: + return True + adja = union_adjacency_lists(adja_l, adja_r) + order = topological_sort(adja) + return len(order) > 0 + + +def reorder_subseq(subseq: str, seq: str): + """ + Reorder a subsequence to match the order in the given sequence + """ + return tuple(c for c in seq if c in subseq) diff --git a/mindscience/sciops/python/einsum/label_order.py b/mindscience/sciops/python/einsum/label_order.py new file mode 100644 index 0000000000000000000000000000000000000000..76949ae14a915099b86176f7b628962227f7c636 --- /dev/null +++ b/mindscience/sciops/python/einsum/label_order.py @@ -0,0 +1,412 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"label order" + +from . import constants as C +from .graph_utils import (build_adja_mul, build_adja_bmm, union_adjacency_lists, difference_adjacency_lists, + symmetric_difference_adjacency_lists, topological_sort, is_not_conflict, reorder_subseq) + + +def _solve_max_weight_independent_set(graph, weights): + """ + Find the maximum weight independent set in a given graph. + An independent set is a set of nodes in which no two nodes are adjacent. + The function uses a depth-first search (DFS) approach to explore all possible combinations of nodes, + keeping track of the total weight and the selected nodes. + It returns the maximum weight and the corresponding independent set. + + Args: + graph (list[list[int, ...], ...]): adjacency table + weights (list[int, ...]): list of weights + + Returns: + max weight and selected indices + """ + n = len(graph) + # Preprocess the neighbor mask for each node + neighbor_masks = [0] * n + for i in range(n): + for j in graph[i]: + neighbor_masks[i] |= 1 << j + + max_weight, max_ind_set = 0, tuple([]) + + def dfs(i, mask, total_weight, selected): + nonlocal max_weight, max_ind_set + if i >= n: + if total_weight > max_weight: + max_weight = total_weight + max_ind_set = tuple(selected) + return + + # Case where the current node is not selected + if neighbor_masks[i] != 0: + dfs(i + 1, mask, total_weight, selected) + + # Case where the current node is selected, update the mask to exclude neighbors + if not mask & (1 << i): + new_mask = mask | neighbor_masks[i] + selected.append(i) + dfs(i + 1, new_mask, total_weight + weights[i], selected) + selected.pop(-1) + + selected_nodes = [] + dfs(0, 0, 0, selected_nodes) + return max_ind_set, max_weight + + +class Constraint: + """Represents a constraint with a node ID, adjacency list, weight, and swap flag. + + Args: + node_id (int): The ID of the node. + adja (list): The adjacency list of the node. + weight (int): The weight of the node. + swap_mn (bool): A flag indicating whether to swap the M, N of matrix multiplication. + is_transpose (bool): A flag indicating whether is transposed in matrix multiplication. + is_swapped (bool): A flag indicating whether swap "a@b" in matrix multiplication. + """ + def __init__(self, id_, adja_, w_, sw_, t_): + self.node_id: int = id_ + self.adja: list = adja_ + self.weight: float = w_ + self.swap_mn: bool = sw_ + self.is_transpose: bool = t_ + self.is_swapped: bool = False + + def __str__(self): + return (f"{self.node_id}, " + f"{self.adja}, " + f"{self.weight}, " + f"{self.swap_mn}, " + f"{self.is_transpose}, " + f"{self.is_swapped}") + + def add_extra_weight(self): + # not transposed will has a little extra weight leading to mininize its number + if not self.is_transpose: + self.weight += 0.2 + + def do_swap(self): + # create a new object with swapped status + res = Constraint(self.node_id, self.adja, self.weight, self.swap_mn, + not self.is_transpose) + res.is_swapped = True + res.add_extra_weight() + return res + + +class LabelOrder: + """Manages the order of labels for tensor operations and builds constraints + for the maximum weighted independent set (MWIS). + """ + + def __init__(self, tensors_info, trace, base_order): + """Initializes the LabelOrder with tensor information, trace, and base order. + + Args: + tensors_info (dict): Information about the tensors involved in the operations. + trace (list): A trace of the operations. + base_order (list): The base order of labels. + """ + self.trace = trace + self.ops = tensors_info + self.base_order = base_order + self.constraints = [] + self.graph = self._build_mwis() + + + @staticmethod + def _get_adja_tp(in_info, out_info): + """Determines the type of adjacency list to build based on input and output tensor information. + + Args: + in_info (dict): Information about the input tensor. + out_info (dict): Information about the output tensor. + + Returns: + tuple: A tuple containing the input and output types. + """ + t1, t2 = "", "" + if in_info["FROM"] == C.T_INPUT: + t1 = C.INPUT + elif in_info["FROM"] == C.T_MUL: + t1 = C.MID_MUL + elif in_info["FROM"] == C.T_MATMUL: + t1 = C.MID_MATMUL + + if out_info["CACL"] == C.T_MUL: + t2 = C.MUL + elif out_info["CACL"] == C.T_MATMUL: + t2 = C.MATMUL + elif out_info["CACL"] == C.T_OUT: + t2 = C.OUT + + return (t1, t2) + + + @staticmethod + def _build_adja_bmm_helper(out_info, adja_in): + """Helper function to build adjacency lists for batch matrix multiplication (BMM). + + Args: + out_info (dict[str, int]): Information about the output tensor. + adja_in (adja_in: dict[str, list[str, ...]]): The input adjacency list. + + Returns: + tuple: A tuple containing conflict flags and adjacency lists for BMM. + """ + b, m, k = out_info["B"], out_info["M"], out_info["K"] + bmm_must_seq = out_info["BMM_MUST_SEQ"] + + no_conf_bmk, no_conf_bkm = False, False + adja_bmk, adja_bkm = {}, {} + if bmm_must_seq % 2 == 1: + adja_bmk = build_adja_bmm(b, m, k, []) + no_conf_bmk = is_not_conflict(adja_bmk, adja_in) + + if bmm_must_seq >= 2: + adja_bkm = build_adja_bmm(b, k, m, []) + no_conf_bkm = is_not_conflict(adja_bkm, adja_in) + + return no_conf_bmk, no_conf_bkm, adja_bmk, adja_bkm + + @staticmethod + def is_swap_conf(ci: Constraint, cj: Constraint, swap_dict: dict[int, int]): + is_swap_pair = swap_dict.get(ci.node_id, -1) == cj.node_id + if is_swap_pair: + return ci.is_swapped ^ cj.swap_mn + + return False + + def _build_mwis(self): + """Builds the maximum weighted independent set (MWIS) from the tensor operations. + + Returns: + tuple: A tuple containing the list of constraints and the graph representing the constraints. + """ + swap_mn_pos = {} + constraints_ = [] + num_ops = len(self.trace) + 1 + for id_, (in_info, out_info) in enumerate(self.ops): + weight = out_info["WEIGHT"] + adjas = self._build_adja(in_info, out_info) + for adja in adjas: + if isinstance(adja, tuple): + adja_, sw, is_transpose = adja + cs = Constraint(id_, adja_, weight, sw, is_transpose) + if sw: + idx = id_ - num_ops + pi_, pj_ = self.trace[idx] + swap_mn_pos[pi_] = id_ + swap_mn_pos[pj_] = id_ + constraints_.append(cs) + elif adja: + cs = Constraint(id_, adja, weight, False, False) + constraints_.append(cs) + + for cs in constraints_: + if cs.node_id in swap_mn_pos: + new_cs = cs.do_swap() + self.constraints.append(new_cs) + + cs.add_extra_weight() + self.constraints.append(cs) + + n = len(self.constraints) + graph = [list() for _ in range(n)] + for i in range(n): + for j in range(i+1, n): + ci, cj = self.constraints[i], self.constraints[j] + if ci.node_id == cj.node_id or not is_not_conflict(ci.adja, cj.adja) or \ + LabelOrder.is_swap_conf(ci, cj, swap_mn_pos): + graph[i].append(j) + graph[j].append(i) + + return graph + + + def _build_adja_mid_matmul_helper(self, tp, in_info, out_info, swap_mn): + """Helper function to build adjacency lists for intermediate matrix multiplication. + + Args: + tp (str): The type of operation (MUL, MATMUL, OUT). + in_info (dict[str, str]): Information about the input tensor. + out_info (dict[str, int]): Information about the output tensor. + swap_mn (bool): A flag indicating whether to swap the M, N. + + Returns: + list: A list of adjacency lists and swap flags. + """ + res = [] + if swap_mn: + b, m, n = in_info["B"], in_info["N"], in_info["M"] + else: + b, m, n = in_info["B"], in_info["M"], in_info["N"] + adja_bmn = build_adja_bmm(b, m, n, out_info["SUMS"]) + + if tp == C.MUL: + res.append((adja_bmn, swap_mn, False)) + + elif tp == C.MATMUL: + no_conf_bmk, no_conf_bkm, adja_bmk, adja_bkm = LabelOrder._build_adja_bmm_helper(out_info, adja_bmn) + # swap operands will change position in matmul a@b + is_left = out_info["LEFT"] ^ swap_mn + if no_conf_bmk: + adja = symmetric_difference_adjacency_lists(adja_bmk, adja_bmn) + res.append((adja, swap_mn, not is_left)) + if no_conf_bkm: + adja = symmetric_difference_adjacency_lists(adja_bkm, adja_bmn) + # is_transpose if is_left + res.append((adja, swap_mn, is_left)) + + elif tp == C.OUT: + adja_out = build_adja_mul(out_info["OUT"], [], expand=True) + if is_not_conflict(adja_bmn, adja_out): + adja = difference_adjacency_lists(adja_out, adja_bmn) + res.append((adja, swap_mn, False)) + else: + raise ValueError("Error calculate type:", tp) + + return res + + + def _build_adja(self, in_info, out_info): + """Builds the adjacency list for the given input and output tensor information. + + Args: + in_info (dict): Information about the input tensor. + out_info (dict): Information about the output tensor. + + Returns: + list: A list of adjacency lists. + """ + res = [] + tp = LabelOrder._get_adja_tp(in_info, out_info) + if tp == (C.INPUT, C.MUL): + adja = build_adja_mul(in_info["IN"], out_info["SUMS"]) + res.append(adja) + elif tp == (C.MID_MUL, C.OUT): + adja = build_adja_mul(out_info["OUT"], []) + res.append(adja) + elif tp == (C.INPUT, C.MATMUL): + adja_in = build_adja_mul(in_info["IN"], out_info["SUMS"]) + no_conf_bmk, no_conf_bkm, _, _ = LabelOrder._build_adja_bmm_helper(out_info, adja_in) + + if no_conf_bmk or no_conf_bkm: + all_adja = {} + for k in "BMK": + reorder_seq = reorder_subseq(out_info[k], in_info["IN"]) + adja = build_adja_mul(reorder_seq, []) + all_adja = union_adjacency_lists(all_adja, adja) + + # is_transpose is determinted + is_transpose = bool(no_conf_bmk) ^ out_info["LEFT"] + res.append((all_adja, False, is_transpose)) + + elif tp == (C.MID_MUL, C.MATMUL): + _, _, adja_bmk, adja_bkm = LabelOrder._build_adja_bmm_helper(out_info, {}) + is_left = out_info["LEFT"] + if adja_bmk: + res.append((adja_bmk, False, not is_left)) + if adja_bkm: + res.append((adja_bkm, False, is_left)) + elif tp[0] == C.MID_MATMUL: + # intermediate matrix multiplication with and without swapping M and N dimensions. + not_swap = self._build_adja_mid_matmul_helper(tp[1], in_info, out_info, False) + swapped = self._build_adja_mid_matmul_helper(tp[1], in_info, out_info, True) + res = not_swap + swapped + # if empty indicating no constraints; but need the potential possibility of swap + if not res: + res = [({}, False, False), ({}, True, False)] + else: + # (C.INPUT, C.OUT), (C.MID_MUL, C.MUL) these cases don't need process + pass + + return res + + def _greedy_add(self): + """Finds the maximum weighted independent set (MWIS) using a greedy algorithm. + + Returns: + tuple: A tuple containing the independent set and the total weight. + """ + all_adja = {} + ary = list(enumerate(self.constraints)) + ary.sort(key=lambda x: x[1].weight, reverse=True) + has_seen = set() + independent_set = [] + total_weight = 0 + for idx, c in ary: + if idx not in has_seen and is_not_conflict(c.adja, all_adja): + has_seen.update(self.graph[idx]) + all_adja = union_adjacency_lists(all_adja, c.adja) + independent_set.append(idx) + total_weight += c.weight + + return independent_set, total_weight + + def _comp_order(self, sel_order): + """Computes the order of labels based on the selected order. + + Args: + sel_order (list[str, ...]): The selected order of labels. + + Returns: + list: The computed order of labels. + """ + add_labels = [c for c in self.base_order if c not in sel_order] + return "".join(sel_order + add_labels) + + def _swap_mn_to_trace(self, swapped_indices: tuple[int, ...]): + """ + Purpose: Swaps the indices in the trace for the specified operations to adjust the contraction order. + Steps: + Adjusts the indices in the trace based on the swap_mn list. + Converts the trace to a tuple to ensure immutability. + """ + swapped_trace = [] + for pi, pj in self.trace: + if pi in swapped_indices or pj in swapped_indices: + swapped_trace.append((pj, pi)) + else: + swapped_trace.append((pi, pj)) + + return tuple(swapped_trace) + + def get_order(self): + """Return best order and swapped operations of BMM.""" + if len(self.constraints) > C.SEARCH_K_THRE: + independent_set, _ = self._greedy_add() + else: + weights = tuple(c.weight for c in self.constraints) + independent_set, _ = _solve_max_weight_independent_set(self.graph, weights) + + all_adja = {} + + swapped_indices = [] + for idx in independent_set: + cs = self.constraints[idx] + all_adja = union_adjacency_lists(all_adja, cs.adja) + if cs.is_swapped: + swapped_indices.append(cs.node_id) + + swapped_trace = self._swap_mn_to_trace(swapped_indices) + + sel_order = [] + if all_adja: + sel_order = topological_sort(all_adja) + + return self._comp_order(sel_order), swapped_trace diff --git a/mindscience/sciops/python/einsum/opt_einusm_path.py b/mindscience/sciops/python/einsum/opt_einusm_path.py new file mode 100644 index 0000000000000000000000000000000000000000..541d371dcd2af80e812a56cfeac1687b8b3f0da9 --- /dev/null +++ b/mindscience/sciops/python/einsum/opt_einusm_path.py @@ -0,0 +1,89 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"opt einsum" + +try: + import opt_einsum +except ImportError: + opt_einsum = None + + +def get_l2r_path(num_ops): + """ + Generates a left-to-right contraction path for a given number of operations. + + Parameters: + - num_ops (int): The number of operations (tensors) to contract. + + Returns: + - trace (list of tuples): A list of tuples representing the contraction path. + """ + trace = [] + init_idx = num_ops + for i in range(1, num_ops): + if i == 1: + trace.append((0, i)) + else: + trace.append((init_idx, i)) + init_idx += 1 + + return trace + + +def parse_opt_trace(equation, shapes, use_opt): + """ + Parses the optimal contraction path for a given equation and shapes of tensors. + If opt_einsum is not available, or not requested, or there are fewer than 2 operations, + use the default left-to-right path. + + Parameters: + - equation (str): The Einstein summation equation. + - shapes (list of tuples): The shapes of the tensors to be contracted. + - use_opt (bool): A flag indicating whether to use the optimal contraction path. + + Returns: + - trace (list of tuples): A list of tuples representing the contraction path. + """ + num_ops = len(shapes) + # use left to right case + if not opt_einsum or not use_opt or num_ops <= 2: + trace = get_l2r_path(num_ops) + return trace + + que = list(range(num_ops)) + tupled_path = [] + + # Get the optimal contraction path using opt_einsum. + path_info = opt_einsum.contract_path(equation, *shapes, shapes=True) + for contraction in path_info[1].contraction_list: + inds, idx_rm, _, _, _ = contraction + tupled_path.append((inds[0], inds[1], idx_rm)) + + trace = [] + idx = num_ops + for i, j, _ in tupled_path: + ind1, ind2 = que[i], que[j] + + if i > j: + i, j = j, i + que.pop(j) + que.pop(i) + + que.append(idx) + idx += 1 + + trace.append((ind1, ind2)) + + return trace diff --git a/mindscience/sciops/python/einsum/sumproduct_pair.py b/mindscience/sciops/python/einsum/sumproduct_pair.py new file mode 100644 index 0000000000000000000000000000000000000000..1a79db45d93162b15fa4e3736658fc1440c42b1a --- /dev/null +++ b/mindscience/sciops/python/einsum/sumproduct_pair.py @@ -0,0 +1,467 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"sumproduct pair" + +from .constants import T_MATMUL, T_MUL, T_OUT, MUST_KM, MUST_MK, MUST_ALL + + +def _apply_permute(perm_ids, ary): + """ + Apply a permutation to an array + """ + if perm_ids: + return [ary[i] for i in perm_ids] + return ary + + +def prod_lst(lst): + """ + Calculate the product of all elements in the list. + """ + p = 1 + for k in lst: + p *= k + + return p + + +def _prod_shape(a_shape, raw=True): + """ + Calculate the product of the elements in a_shape. + If raw is True, a_shape is treated as a list; otherwise, it is treated as a dictionary. + """ + if raw: + p = prod_lst(a_shape) + else: + p = prod_lst(a_shape.values()) + + return p + + +def _pop_s1_dims(a_shape, a_dims_to_sum): + """ + Set the dimensions in a_dims_to_sum to 1 in a_shape, then remove all dimensions with value 1. + Calculate the volume of the modified shape + """ + for idx in a_dims_to_sum: + a_shape[idx] = 1 + + vol = _prod_shape(a_shape, raw=False) + + for k in list(a_shape.keys()): + if a_shape[k] == 1: + a_shape.pop(k) + + return vol + + +def _is_ab_pat(a, b): + """ + Check if the pattern (a, b) matches one of the predefined conditions. + """ + flag = False + if a > 64000 and a > b > 128: + flag = True + + if a < 16 and b >= 1024: + flag = True + + return flag + + +def _judge_mk_helper(m, k): + """ + judge if transpose_a can be used; False for some special shape + """ + if _is_ab_pat(k, m): + return MUST_KM + + if _is_ab_pat(m, k): + return MUST_MK + + return MUST_ALL + + +def _judge_cacl_by_shape(k, m, n, a_shape, b_shape): + """judge calculate type and mk type + + Args: + B (set): labels of batch of bmm + K (set): labels of K-axis of bmm + M (set): labels of M-axis of bmm's first tensor + N (set): labels of N-axis of bmm's second tensor + a_shape (dict[str, int]): label, size pairs of first tensor + b_shape (dict[str, int]): label, size pairs of second tensor + """ + k_size = _prod_shape([a_shape[d] for d in k]) + m_size = _prod_shape([a_shape[d] for d in m]) + n_size = _prod_shape([b_shape[d] for d in n]) + + r1 = _judge_mk_helper(m_size, k_size) + r2 = _judge_mk_helper(n_size, k_size) + + use_mul = False + if k_size == 1: + use_mul = True + + if m_size == 1 and n_size == 1: + use_mul = True + + return r1, r2, use_mul + + +def sumproduct_pair_info(a_shape, b_shape, a_labels_to_sum, b_labels_to_sum, sum_labels): + """This function calculates the necessary information for performing a sum-product operation on two tensors. + It determines the batch dimensions, the dimensions to be summed, and the dimensions for matrix multiplication. + It also updates the information for each tensor and returns the new shape and operation details. + + Args: + a_shape (dict[str, int]): Shape of the first tensor. + b_shape (dict[str, int]): Shape of the second tensor. + a_labels_to_sum (list[str, ...]): Labels to sum for the first tensor. + b_labels_to_sum (list[str, ...]): Labels to sum for the second tensor. + sum_labels (list[str, ...]): Labels to be summed in the operation. + + Returns: + new_shape (dict): The new shape after the operation. + a_info (dict): Information for the first tensor. + b_info (dict): Information for the second tensor. + out_info (dict): Output information. + """ + a_weight = _pop_s1_dims(a_shape, a_labels_to_sum) + b_weight = _pop_s1_dims(b_shape, b_labels_to_sum) + + # Determine batch dimensions + a_keys, b_keys = set(a_shape.keys()), set(b_shape.keys()) + sum_keys = set(sum_labels) + batch_dims = (a_keys & b_keys) - sum_keys + + # Determine M and N dimensions + a_dims = a_keys - batch_dims - sum_keys + b_dims = b_keys - batch_dims - sum_keys + + a_bms, b_bms, use_mul = _judge_cacl_by_shape(sum_keys, a_dims, b_dims, a_shape, b_shape) + + t_type = T_MUL if use_mul else T_MATMUL + a_info = {"CACL": t_type, "SUMS": a_labels_to_sum, "WEIGHT": a_weight} + b_info = {"CACL": t_type, "SUMS": b_labels_to_sum, "WEIGHT": b_weight} + + all_shape = a_shape | b_shape + new_shape = {k: all_shape[k] for k in all_shape if k not in sum_labels} + + if use_mul: + return new_shape, a_info, b_info, {"FROM": T_MUL, "SUMS": sum_labels} + + b, k = "".join(batch_dims), "".join(sum_labels) + m, n = "".join(a_dims), "".join(b_dims) + + a_info.update({"B": b, "M": m, "K": k, "LEFT": True, "BMM_MUST_SEQ": a_bms}) + b_info.update({"B": b, "M": n, "K": k, "LEFT": False, "BMM_MUST_SEQ": b_bms}) + out_info = {"B": b, "M": m, "N": n, "FROM": T_MATMUL} + + return new_shape, a_info, b_info, out_info + + +def out_cacl_info(a_shape, rhs): + """This function calculates the output information for a tensor after applying a reduction operation. + It updates the shape of the tensor and calculates the volume of the reduced dimensions. + + Args: + a_shape (dict[str, int]): Shape of the tensor. + rhs (str): Labels to keep in the output. + + Returns: + info (dict): Output information including the operation type, output labels, weight, and labels to sum. + """ + new_a_shape = {k: v for k, v in a_shape.items() if k in rhs} + vol = _prod_shape(new_a_shape, raw=False) + sum_labels = [k for k in a_shape if k not in rhs] + info = {"CACL": T_OUT, "OUT": rhs, "WEIGHT": vol, "SUMS": sum_labels} + return info + + +def _identity_perm(perm): + """This function checks if a permutation is the identity permutation (i.e., no permutation). + If it is the identity permutation, it returns None; otherwise, it returns the permutation as a tuple. + + Args: + perm (list): Permutation to check. + + Returns: + tuple or None: The permutation as a tuple or None if it is the identity permutation. + """ + flag = True + for i in range(len(perm) - 1): + if perm[i] >= perm[i+1]: + flag = False + + if flag: + return None + return tuple(perm) + + +def _rearrange_tensor_to_mul_helper(order_labels, a_shape): + """This function rearranges the labels of a tensor to match a specified order. + It also returns the permutation and the new shape dictionary. + + Args: + order_labels (list): Desired order of labels. + a_shape (list of tuples): Shape of the tensor as a list of (label, size) pairs. + + Returns: + a_permute (tuple or None): Permutation to apply or None if no permutation is needed. + permed_labels (list): Labels after permutation. + a_shape_dict (dict): Shape dictionary of the tensor. + """ + a_labels = [label for label, _ in a_shape] + a_shape_dict = {label: sp for label, sp in a_shape} + a_permute = [] + + for label in order_labels: + if label in a_labels: + a_permute.append(a_labels.index(label)) + + permed_labels = _apply_permute(a_permute, a_labels) + a_permute = _identity_perm(a_permute) + return a_permute, permed_labels, a_shape_dict + + +def _process_labels(out_labels, a_labels): + """This function processes the labels to find missing labels and generates permutation indices. + It returns the number of missing labels and the permutation indices. + + Args: + out_labels (list): Desired output labels. + a_labels (list): Current labels of the tensor. + + Returns: + tuple: Number of missing labels and permutation indices. + """ + missing = [] + for label in out_labels: + if label not in a_labels: + missing.append(label) + + perm_ids = _get_s2t_perm_indices(a_labels + missing, out_labels) + return (len(missing), tuple(perm_ids)) + + +def rearrange_tensor_to_mul(order_labels, a_shape, b_shape): + """ + Generate permute information for tensors a and b based on the order of labels. + + Args: + order_labels (str): Ordered labels (e.g., "ijk"). + a_shape, b_shape: list[tuple[str, int], ...] + a_shape: List of tuples (label, size) for tensor a. + b_shape: List of tuples (label, size) for tensor b. + e.g., [('i', 128), ('j', 64), ('k', 256)] + + Returns: + tuple: (a_permute, b_permute) where each is a list of indices for permute. + """ + a_perm, a_labels, a_shape_dict = _rearrange_tensor_to_mul_helper(order_labels, a_shape) + b_perm, b_labels, b_shape_dict = _rearrange_tensor_to_mul_helper(order_labels, b_shape) + + out_labels = [label for label in order_labels if label in set(a_labels + b_labels)] + + a_sp1_info = _process_labels(out_labels, a_labels) + b_sp1_info = _process_labels(out_labels, b_labels) + + s_dict = a_shape_dict | b_shape_dict + new_shape = [(label, s_dict[label]) for label in out_labels] + + return a_perm, b_perm, (a_sp1_info, b_sp1_info), new_shape + + +def _sort_bmm_labels(order_labels, a_info, left, labels_g1): + """Sorts labels into B, M, K groups based on a_info and order_labels. + + Args: + order_labels (str): labels to be sorted. + a_info (dict[str, int]): Dictionary containing B, M, K labels. + left (bool): Determines the order of the output. + labels_g1 (str): String of labels to be sorted. + + Returns: + tuple: Two strings representing the sorted labels in BKM and BKM or BKM and BMK order, + and a tuple of individual B, M, K labels. + """ + rb, rm, rk = "", "", "" + + for label in order_labels: + if label in labels_g1: + if label in a_info["B"]: + rb += label + elif label in a_info["M"]: + rm += label + elif label in a_info["K"]: + rk += label + + bmk = rb + rm + rk + bkm = rb + rk + rm + if left: + return bmk, bkm, (rb, rm, rk) + return bkm, bmk, (rb, rm, rk) + + +def _group_bmm_indices(cur_labels: str, group_labels: tuple[str, ...]): + """ + Groups indices of labels into B, M, K categories based on group_labels. + + Args: + cur_labels (str): Current labels to be grouped. + group_labels (tuple): Tuple of B, M, K labels. + + Returns: + tuple: Tuples of indices for B, M, K labels. + """ + b_idx, m_idx, k_idx = [], [], [] + + rb, rm, rk = group_labels + for i, label in enumerate(cur_labels): + if label in rb: + b_idx.append(i) + elif label in rm: + m_idx.append(i) + elif label in rk: + k_idx.append(i) + else: + raise ValueError("Error") + + return tuple(b_idx), tuple(m_idx), tuple(k_idx) + + +def _get_s2t_perm_indices(s1, s2): + """ + Gets the permutation indices to transform s1 to s2. + + Args: + s1 (str): Source string. + s2 (str): Target string. + + Returns: + list: List of indices to permute s1 to match s2. + """ + indices = [] + for char in s2: + index = s1.index(char) + indices.append(index) + + return indices + + +def _judge_transpose_condition(bmm_must_seq, is_left): + """ + Determines if a transpose is needed based on bmm_must_seq and if it is left operand. + + Args: + bmm_must_seq (str): Sequence that must be followed. + is_left (bool): Indicates if the operation is on the left side. + + Returns: + bool: Whether a transpose is needed. + """ + if bmm_must_seq == MUST_MK: + transpose_a = not is_left + elif bmm_must_seq == MUST_KM: + transpose_a = is_left + else: + transpose_a = None + + return transpose_a + + +def _rearrange_tensor_to_bmm_helper(order_labels, a_shape, a_info, left): + """ + Helper function to rearrange tensor labels and determine if a transpose is needed. + + Args: + order_labels (list): List of labels to be sorted. + a_shape (list): List of tuples (label, size) for tensor; example: [('i', 128), ('j', 64), ('k', 256)]. + a_info (dict): Dictionary containing B, M, K labels; B:["z"], M:["ij"] K:["k"] + left (bool): Determines the order of the output. + + Returns: + tuple: Permutation indices, transpose information, and new shapes for B and M. + """ + a_labels = "".join([label for label, _ in a_shape]) + labels1, labels2, group_labels = _sort_bmm_labels(order_labels, a_info, left, a_labels) + + transpose_a = _judge_transpose_condition(a_info["BMM_MUST_SEQ"], left) + if transpose_a is None: + if a_labels == labels1: + transpose_a = False + elif a_labels == labels2: + transpose_a = True + else: + transpose_a = False + + target_labels = labels2 if transpose_a else labels1 + perm_ids = _get_s2t_perm_indices(a_labels, target_labels) + + a_shape_dict = {label: sp for label, sp in a_shape} + new_shape_b = tuple((label, a_shape_dict[label]) for label in group_labels[0]) + new_shape_m = tuple((label, a_shape_dict[label]) for label in group_labels[1]) + + group_shape_idxs = _group_bmm_indices(target_labels, group_labels) + new_perm_ids = _identity_perm(perm_ids) + + return new_perm_ids, (transpose_a, group_shape_idxs), (new_shape_b, new_shape_m) + + +def rearrange_tensor_to_bmm(order_labels, a_shape, a_info, b_shape, b_info): + """ + Rearranges tensors to prepare for batch matrix multiplication (BMM). + + Args: + order_labels (str): Labels to be sorted. + a_shape (list[tuple[str, int], ...]): List of tuples (label, size) for tensor a. + a_info (dict[str, int]): Dictionary containing B, M, K labels for tensor a. + b_shape (list[tuple[str, int], ...]): List of tuples (label, size) for tensor b. + b_info (dict[str, int]): Dictionary containing B, M, K labels for tensor b. + + Returns: + tuple: Permutation indices for a and b, BMM information, and the new shape. + """ + a_perm, a_bmm_info, a_group_shapes = _rearrange_tensor_to_bmm_helper(order_labels, a_shape, a_info, True) + b_perm, b_bmm_info, b_group_shapes = _rearrange_tensor_to_bmm_helper(order_labels, b_shape, b_info, False) + new_shape = a_group_shapes[0] + a_group_shapes[1] + b_group_shapes[1] + transpose_a, a_shape_idxs = a_bmm_info + transpose_b, b_shape_idxs = b_bmm_info + + a_b, a_m, a_k = a_shape_idxs + b_m = b_shape_idxs[1] + + is_batch = len(a_b) > 0 + bmm_info = (is_batch, transpose_a, transpose_b, a_b, a_m, b_m, a_k) + + return a_perm, b_perm, bmm_info, new_shape + + +def rearrange_tensor_to_out(a_shape, out_labels): + """ + Rearranges tensor labels to match the output labels. + + Args: + a_shape (list[tuple[str, int], ...]): List of tuples (label, size) for tensor a. + out_labels (str): Desired output labels. + + Returns: + list: Permutation indices to rearrange the tensor to match the output labels. + """ + in_labels = "".join([k for k, _ in a_shape]) + perm = _get_s2t_perm_indices(in_labels, out_labels) + return _identity_perm(perm) diff --git a/tests/sciops/test_einsum.py b/tests/sciops/test_einsum.py new file mode 100644 index 0000000000000000000000000000000000000000..48c20d7010f9d14ac78a129804dad29c28478ea0 --- /dev/null +++ b/tests/sciops/test_einsum.py @@ -0,0 +1,334 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""test sciops einsum""" + +import numpy as np +import pytest +from mindspore import ops + +from mindscience.sciops import Einsum + +ROTL = 1e-5 + +def calculate(equation, shapes, use_opt=True): + es = Einsum(equation, use_opt=use_opt) + tensors = [ops.randn(tp) for tp in shapes] + return es(*tensors) + +def cmp_accuracy(equation, shapes, use_opt=True): + es = Einsum(equation, use_opt=use_opt) + tensors = [ops.randn(tp) for tp in shapes] + ms_res = es(*tensors) + np_res = np.einsum(equation, *tensors) + diff = np.abs(np_res - ms_res) + return np.mean(diff) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_base_equation(): + """ + Feature: Einsum + Description: test Einsum with different equations + Expectation: success + """ + equation = "ij->ji" + shapes = [(15, 38)] + res = calculate(equation, shapes) + assert res.shape == (38, 15) + + equation = "ijkn->knji" + shapes = [(15, 38, 123, 251)] + res = calculate(equation, shapes) + assert res.shape == (123, 251, 38, 15) + + equation = "ij,j->i" + shapes = [(15, 38), (38,)] + res = calculate(equation, shapes) + assert res.shape == (15,) + + equation = "abcd,d->abc" + shapes = [(15, 38, 123, 251), (251,)] + res = calculate(equation, shapes) + assert res.shape == (15, 38, 123) + + equation = "ij,jk->ik" + shapes = [(512, 1024), (1024, 512)] + res = calculate(equation, shapes) + assert res.shape == (512, 512) + + equation = "ij,kj->ik" + shapes = [(15, 38), (123, 38)] + res = calculate(equation, shapes) + assert res.shape == (15, 123) + + equation = "abCd,dFg->abCFg" + shapes = [(15, 38, 123, 251), (251, 123, 38)] + res = calculate(equation, shapes) + assert res.shape == (15, 38, 123, 123, 38) + + equation = "i,i->" + shapes = [(1024,), (1024,)] + res = calculate(equation, shapes) + assert res.shape == () + + equation = "ij,ij->ij" + shapes = [(15, 38), (15, 38)] + res = calculate(equation, shapes) + assert res.shape == (15, 38) + + equation = "ijkn,ijkn->ijkn" + shapes = [(15, 38, 123, 251), (15, 38, 123, 251)] + res = calculate(equation, shapes) + assert res.shape == (15, 38, 123, 251) + + equation = "ii->" + shapes = [(256, 256)] + res = calculate(equation, shapes) + assert res.shape == () + + equation = "iji->j" + shapes = [(15, 38, 15)] + res = calculate(equation, shapes) + assert res.shape == (38,) + + equation = "nij,njk->nik" + shapes = [(15, 38, 123), (15, 123, 251)] + res = calculate(equation, shapes) + assert res.shape == (15, 38, 251) + + equation = "bij,jk->bik" + shapes = [(15, 38, 123), (123, 251)] + res = calculate(equation, shapes) + assert res.shape == (15, 38, 251) + + equation = "ij->i" + shapes = [(15, 38)] + res = calculate(equation, shapes) + assert res.shape == (15,) + + equation = "ijkl->ik" + shapes = [(15, 38, 123, 251)] + res = calculate(equation, shapes) + assert res.shape == (15, 123) + + equation = "ijk,jk->i" + shapes = [(15, 38, 123), (38, 123)] + res = calculate(equation, shapes) + assert res.shape == (15,) + + equation = "i,j->ij" + shapes = [(15,), (38,)] + res = calculate(equation, shapes) + assert res.shape == (15, 38) + + equation = "ij,ab->ijab" + shapes = [(256, 16), (32, 16)] + res = calculate(equation, shapes) + assert res.shape == (256, 16, 32, 16) + + equation = "ij,jk,kl->il" + shapes = [(256, 16), (16, 16), (16, 256)] + res = calculate(equation, shapes) + assert res.shape == (256, 256) + + equation = "bn,anm,bm->ba" + shapes = [(15, 38), (15, 38, 123), (15, 123)] + res = calculate(equation, shapes) + assert res.shape == (15, 15) + + equation = "ij,ij->" + shapes = [(15, 38), (15, 38)] + res = calculate(equation, shapes) + assert res.shape == () + + equation = "ijkn,ijkn->" + shapes = [(15, 38, 123, 251), (15, 38, 123, 251)] + res = calculate(equation, shapes) + assert res.shape == () + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_complex_equation(): + """ + Feature: Einsum + Description: test Einsum with different equations + Expectation: success + """ + equation = 'ijk,zui,zuj,zuw->zwk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (156, 32, 20)] + res = calculate(equation, shapes) + assert res.shape == (156, 20, 33) + + equation = 'ijk,zui,zuj,uw->zwk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (32, 9)] + res = calculate(equation, shapes) + assert res.shape == (156, 9, 33) + + equation = 'ijk,zui,zuj,zu->zuk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (156, 32)] + res = calculate(equation, shapes) + assert res.shape == (156, 32, 33) + + equation = 'ijk,zui,zuj,u->zuk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (32,)] + res = calculate(equation, shapes) + assert res.shape == (156, 32, 33) + + equation = 'ijk,zui,zvj,zuvw->zwk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (156, 32, 32, 9)] + res = calculate(equation, shapes) + assert res.shape == (156, 9, 33) + + equation = 'ijk,zui,zvj,uvw->zwk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (32, 32, 9)] + res = calculate(equation, shapes) + assert res.shape == (156, 9, 33) + + equation = 'ijk,zui,zvj,uv->zuk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (32, 32)] + res = calculate(equation, shapes) + assert res.shape == (156, 32, 33) + + equation = 'ijk,zui,zvj,uv->zuvk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (32, 32)] + res = calculate(equation, shapes) + assert res.shape == (156, 32, 32, 33) + + equation = 'ijk,zui,zvj,zuv->zuk' + shapes = [(9, 9, 33), (156, 32, 9), (156, 32, 9), (156, 32, 32)] + res = calculate(equation, shapes) + assert res.shape == (156, 32, 33) + + equation = 'zui,zuj,kij->zuk' + shapes = [(660, 128, 16), (660, 128, 16), (156, 16, 16)] + res = calculate(equation, shapes) + assert res.shape == (660, 128, 156) + + equation = 'vun,zuni->zvi' + shapes = [(128, 128, 4), (660, 128, 4, 1)] + res = calculate(equation, shapes) + assert res.shape == (660, 128, 1) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_ellipsis_equation(): + """ + Feature: Einsum + Description: test Einsum with different equations including ellipsis + Expectation: success + """ + equation = "...bij,...jk->...bik" + shapes = [(256, 128, 16), (16, 32)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 32) + + equation = "...bij,...jk->bik..." + shapes = [(16, 256, 128, 16), (16, 16, 32)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 32, 16) + + equation = "...bij,j...k->...bik" + shapes = [(15, 8, 256, 128, 16), (16, 15, 8, 32)] + res = calculate(equation, shapes) + assert res.shape == (15, 8, 256, 128, 32) + + equation = 'zui...,zuj,...kij->zu...k' + shapes = [(256, 128, 16), (256, 128, 16), (156, 16, 16)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 156) + + equation = 'zui...,zuj,...kij->zu...k' + shapes = [(256, 128, 16, 8), (256, 128, 16), (8, 156, 16, 16)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 8, 156) + + equation = 'zui...,zuj,...kij->zu...k' + shapes = [(256, 128, 16, 8, 4), (256, 128, 16), (8, 4, 156, 16, 16)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 8, 4, 156) + + equation = 'zu...i,zuj,...kij->zuk' + shapes = [(256, 128, 8, 4, 16), (256, 128, 16), (8, 4, 156, 16, 16)] + res = calculate(equation, shapes) + assert res.shape == (256, 128, 156) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_accuracy(): + """ + Feature: Einsum + Description: test Einsum accuracy with different equations + Expectation: success + """ + equation = 'zui,zuj,kij->zuk' + shapes = [(256, 128, 16), (256, 128, 16), (156, 16, 16)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "ijkl->ik" + shapes = [(9, 20, 31, 133)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "ij,jk->ik" + shapes = [(9, 20), (20, 31)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "iji->j" + shapes = [(31, 20, 31)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "nij,njk->nik" + shapes = [(9, 20, 31), (9, 31, 133)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "bij,jk->bik" + shapes = [(256, 128, 16), (16, 32)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + equation = "iiik->ik" + shapes = [(32, 32, 32, 128)] + diff = cmp_accuracy(equation, shapes) + assert diff < ROTL + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_labelorder_equation(): + """ + Feature: Einsum + Description: test Einsum labelorder with different equations + Expectation: success + """ + equation = "ijk,zi->zjk" + es = Einsum(equation, use_opt=False) + assert es.trace == ((1, 0),) + + equation = "jik,zi->zjk" + es = Einsum(equation, use_opt=False) + assert es.trace == ((1, 0),) diff --git a/tests/sciops/test_evoformer_attention.py b/tests/sciops/test_evoformer_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..ec3d2e7f76b060563f473faa04b37571e7d57767 --- /dev/null +++ b/tests/sciops/test_evoformer_attention.py @@ -0,0 +1,83 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""test sciops evoformer attention""" + +import pytest +import numpy as np + +import mindspore as ms +from mindspore import ops, Tensor + +from mindscience.sciops import evo_attention + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_evoformer_attention_shape(): + """ + Feature: Test Evoformer in platform ascend 910B. + Description: The forward output should has expected shape. + Expectation: Success or throw AssertionError. + """ + ms.set_device(device_target="Ascend") + b, n, s, d = 2048, 1, 2048, 8 + + query = Tensor(np.random.uniform(-0.1, 0.1, (b, s, n, d)), ms.bfloat16) + key = Tensor(np.random.uniform(-0.1, 0.1, (b, s, n, d)), ms.bfloat16) + value = Tensor(np.random.uniform(-0.1, 0.1, (b, s, n, d)), ms.bfloat16) + bias = Tensor(np.random.uniform(-0.1, 0.1, (1, n, s, s)), ms.bfloat16) + + mask = np.concatenate((np.ones((b, 1, 1, s - 5)).astype(np.float32), + np.zeros((b, 1, 1, 5)).astype(np.float32)), axis=-1) + evo_mask = Tensor(1 - mask.astype(np.uint8)) + + output = evo_attention(query, key, value, n, bias, evo_mask, scale_value=1.0, input_layout="BSND") + assert output.shape == (2048, 2048, 1, 8), f"For `Evoformer_Attention`, the output should be (2048, 2048, 1, 8), \ + but got {output.shape}." + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +def test_evoformer_attention_precision(): + """ + Feature: Test Evoformer in platform ascend 910B. + Description: The forward output should has expected precision. + Expectation: Success or throw AssertionError. + """ + ms.set_device(device_target="Ascend") + b, n, s, d = 128, 1, 128, 2 + scale_value = 2.0 + + query = Tensor(np.random.uniform(-0.1, 0.1, (b, n, s, d)), ms.float16) + key = Tensor(np.random.uniform(-0.1, 0.1, (b, n, s, d)), ms.float16) + value = Tensor(np.random.uniform(-0.1, 0.1, (b, n, s, d)), ms.float16) + bias = Tensor(np.random.uniform(-0.1, 0.1, (1, n, s, s)), ms.float16) + + mask = np.concatenate((np.ones((b, 1, 1, s - 5)).astype(np.float32), + np.zeros((b, 1, 1, 5)).astype(np.float32)), axis=-1) + evo_mask = Tensor(1 - mask.astype(np.uint8)) + + expected_output = evo_attention(query, key, value, n, bias, evo_mask, scale_value=scale_value, input_layout="BNSD") + + attention_mask = 1e12 * (Tensor(mask) - 1) + logits = ops.BatchMatMul(transpose_b=True)(query, key) + logits = logits * scale_value + logits = ops.add(logits, attention_mask.astype(ms.float16)) + logits = ops.add(logits, bias) + weight = ops.Softmax()(logits) + actual_output = ops.BatchMatMul()(weight, value).asnumpy() + + np.testing.assert_allclose(actual_output, expected_output, atol=1e-4, rtol=1e-7)