diff --git a/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/pylib/src/util/test.py b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/pylib/src/util/test.py index 6fe94ba4f088f31fdc535667694cc23953622ed9..efaa77efedbfa963f6e74940b9389fd6bc8fbfc8 100644 --- a/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/pylib/src/util/test.py +++ b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/pylib/src/util/test.py @@ -36,3 +36,134 @@ assert_equal = np.testing.assert_equal assert_array_equal = np.testing.assert_array_equal assert_almost_equal = np.testing.assert_almost_equal +from libs.tools import * +import math +import torch.nn as nn +import re +import numpy as np + +#模型包含6个sk片段,部分sk相同服用缓存,部分多了可选输入bias或option配置不同走在线编译 +@set_timeout(500) +def tc_ge_torch2x_superkernel_cache_reuse_0002(kernel_path, case_path, case_name, soc_version, device_id) + print_log("Current case path: %s" %case_path) + print_log("Current kernel compile path: %s" %kernel_path) + print_log("Current soc version: %s" %soc_version) + print_log("Current device id: %s" %device_id) + print_log("Begin to execute test case: %s" %case_name) + os.system("mkdir -p %s" & os.path.join(case_path, case_name)) + src_graph = ''' + |o>---------------------------------------------------------- + |o>test case: %s + |o> split+matmul+concat+permute+reducemean + |o> sk1:GroupedMatmul+MoeGatingTopK+GroupedMatmul+DequantSwigluQuant+cast+GroupedMatmul + |o> + |o> cast+matmul+permute+concat+narrow+reducemean + |o> + |o> sk2:GroupedMatmul+MoeGatingTopK+GroupedMatmul+DequantSwigluQuant+cast+GroupedMatmul + |o> + |o> concat+transpose+repeat+cast+reducemean + |o> + |o> sk3:DequantSwigluQuant+DequantSwigluQuant+DequantSwigluQuant + |o> + |o> sk4:GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul + |o> + |o> narrow+concat+transpose+repeat + |o> + |o> sk5:GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul + |o> + |o> narrow+concat+transpose+repeat + |o> + |o> sk6:GroupedMatmul+MoeGatingTopK+GroupedMatmul+DequantSwigluQuant+GroupedMatmul + |o> + |o> netoutput + |o>----------------------------------------------------------- + |o>sk fusion results: + |o> sk1: scope1[GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul] scope2[DequantSwigluQuant] + |o> sk2: scope1[GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul] scope2[DequantSwigluQuant] + |o> sk3: scope1[DequantSwigluQuant] scope2[DequantSwigluQuant+DequantSwigluQuant] + |o> sk4: GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul + |o> sk5: GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul + |o> sk6: GroupedMatmul+MoeGatingTopK+GroupedMatmul+GroupedMatmul + |o>----------------------------------------------------------- + ''' + + print_log(src_graph % case_name) + pb_path = os.path.join(case_path, case_name) + os.system("rm -rf {}/".formate(pb_path)) + + import torch + import torch_npu + import torchair as tng + import torchair._contrib.custom_torch-ops + from torchair.ge_concrete_graph import ge_apis as ge + from torchair.configs.compiler_config import CompilerConfig + + torch.mpu.set_device(int(device_id)) + data1 = torch.from_numpy(np.random.unform(-1, 1, size=(128, 64))).to(torch.float32).npu() + data2 = torch.from_numpy(np.random.unform(-1, 1, size=(8, 320))).to(torch.float32).npu() + + gmm1_x1 = torch.from_numpy(np.random.uniform(-1, 1, size=(256, 8))).to(torch.float32).npu + gmm1_weight = torch.from_numpy(np.random.uniform(-1, 1, size=(320, 32))).to(torch.float32).npu() + + moe1_bias = torch.from_numpy(np.random.uniform(-2, 2, size=(256, ))).to(torch.float32).npu() + + arn1_x2 = torch.from_numpy(np.random.uniform(-3, 3, size=(256, 8))).to(torch.float32).npu() + arn1_gamma = torch.from_numpy(np.random.uniform(-3, 3, size=(256, 8))).to(torch.float32).npu() + + gmm2_x1 = torch.from_numpy(np.random.uniform(-3, 3, size=(128, 256))).to(torch.float32).npu() + gmm2_x2 = torch.from_numpy(np.random.uniform(-3, 3, size=(256, 256))).to(torch.float32).npu() + gmm2_x3 = torch.from_numpy(np.random.uniform(-3, 3, size=(1, 256))).to(torch.float32).npu() + gmm2_x = [gmm2_x1, gmm2_x2, gmm2_x3] + + data3 = torch.from_numpy(np.random.uniform(1, 1, size=(16, 256))).to(torch.float32).npu() + dsq1_activate_scale = torch.from_numpy(np.random.uniform(1, 1, size=(16, 1))).to(torch.float32).npu() + dsq1_group_index = None + + gmm3_weight1 = torch.from_numpy(np.random.uniform(-3, 3, size=(32, 8))).to(torch.float32).npu() + gmm3_weight2 = torch.from_numpy(np.random.uniform(-3, 3, size=(128, 320))).to(torch.float32).npu() + gmm3_weight3 = torch.from_numpy(np.random.uniform(-3, 3, size=(8, 256))).to(torch.float32).npu() + gmm3_weight = [gmm3_weight1, gmm3_weight2, gmm3_weight3] + + class NoSK_Network(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, data1, data2, gmm1_x1, gmm1_weight, moe1_bias, arn1_x2, arn1_gamma, gmm2_x, data3, dsq1_activate_scale, dsq1_group_index, gmm3_weight): + split = torch.split(data1, 8 dim=1) #[128,64] dim1切8份[128,8] + matmul_01 = torch.matmul(split[0], data2) + concat_01 = torch.cat([split[1], split[2]], 0) + permute_01 = concat_01.permute(1, 0) + permute_02 = split[3].permute(1, 0) + permute_03 = split[4].permute(1, 0) + concat_dsq_01 = torch.cat([permute_02, permute_03], 1) + mean_01 = torch.mean(concat_dsq_01, 0 , False) + mean_02 = torch.mean(permute_03, 0 ,True) + + grouped_matmul_01 = torch_npu_grouped_matmul(group_type=-1, x=[matul_01, gmm1_x1], weight=[gmm1_weight, permute_01]) + + moe_gating_top_k_01 = torch_npu.npu_moe_gating_top_k(x=grouped_matmul_01[1], bias=moe1_bias, + k=8, k_group=4, group_count=8, group_select_mode=1, norm_type=1) + grouped_matmul_02 = torch_npu.npu_grouped_matmul(group_type=-1, x=gmm2_x, weight=[moe_gating_top_k_01[0], moe_gating_top_k_01[0], moe_gating_top_k_01[2]]) + dequant_swiglu_quant_01 = torch_npu.npu_dequant_swiglu_quant(x=data3, weight_scale=mean_01, activation_scale=dsq1_activate_scale, + bias=None, quant_scale=mean_02, quant_offset=None, + group_index=None, activate_left=False, quant_mode=1) + + cast_dsq_1 = dequant_swiglu_quant_01[0].to(torch.float32) + grouped_matmul_03 = torch_npu.npu_grouped_matmul(group_type=-1, x=[grouped_matmul_01[0], cast_dsq_1, grouped_matmul_02[0]], weight=gmm3_weight) + + + cast_01 = grouped_matmul_03[0].to(torch.float32) + matmul_02 = torch.matmul(cast_01, data2) + permute_04 = grouped_matmul_03[1].permute(1, 0) + concat_02 = torch.cat([permute_04, permute_04], 1) + narrow_01 = grouped_matmul_03[2].narrow(0, 0, 8) + permute_05 = split[5].permute(1, 0) + permute_06 = split[6].permute(1, 0) + concat_dsq_02 = torch.cat([permute_05, permute_06], 1) + mean_03 = torch.mean(concat_dsq_02, 0, False) + mean_04 = torch.mean(permute_06, 0, True) + + grouped_matmul_04 = torch_npu.npu_grouped_matmul(group_type=-1, x=[matmul_02, gmm1_x1], weight=[concat_02, narrow_01]) + + moe_gating_top_l_02 = torch_npu.npu_moe_gating_top_k(x=grouped_matmul_04[1],bias=moe1_bias, + k=8, k_group=4, group_count=8, group_select_mode=1, norm_type=1) \ No newline at end of file