diff --git a/operator_contrib/UnalignAddCustomSample/FrameworkLaunch/AddCustom/op_host/add_custom.cpp b/operator_contrib/UnalignAddCustomSample/FrameworkLaunch/AddCustom/op_host/add_custom.cpp index b61a67f119503aa924b79f4d6df351bddbad69d5..4e20e264c71e5142be2fdd5cc9f555942c8ad9b6 100644 --- a/operator_contrib/UnalignAddCustomSample/FrameworkLaunch/AddCustom/op_host/add_custom.cpp +++ b/operator_contrib/UnalignAddCustomSample/FrameworkLaunch/AddCustom/op_host/add_custom.cpp @@ -19,13 +19,12 @@ static ge::graphStatus TilingFunc(gert::TilingContext* context) uint32_t typeLength = 0; ge::TypeUtils::GetDataTypeLength(context->GetInputDesc(0)->GetDataType(), typeLength); uint32_t inputLength = inputNum * typeLength; - uint32_t inputBytes = inputLength / inputNum; // There are a total of 3 shared UB spaces in the input and output. If it's int8, there are 2 more TBUFs - uint32_t ubDataNumber = (inputBytes == 1) ? 5 : 3; + uint32_t ubDataNumber = (typeLength == 1) ? 5 : 3; // The number of 32B data blocks that can be used for each data. DOUBLE BUFFER is already counted here uint32_t tileBlockNum = (ubSize / BLOCK_SIZE / BUFFER_NUM) / ubDataNumber; - uint32_t tileDataNum = (tileBlockNum * BLOCK_SIZE) / inputBytes; + uint32_t tileDataNum = (tileBlockNum * BLOCK_SIZE) / typeLength; // Input data for 32B alignment uint32_t inputLengthAlgin32 = (((inputLength + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE); @@ -36,7 +35,7 @@ static ge::graphStatus TilingFunc(gert::TilingContext* context) uint32_t tailBlockNum = (inputLengthAlgin32 / BLOCK_SIZE) % coreNum; // Small chunks are calculated and sliced several times using the number of data on each core - uint32_t smallCoreDataNum = everyCoreInputBlockNum * BLOCK_SIZE / inputBytes; + uint32_t smallCoreDataNum = everyCoreInputBlockNum * BLOCK_SIZE / typeLength; uint32_t smallTileNum = everyCoreInputBlockNum / tileBlockNum; uint32_t finalSmallTileNum = (everyCoreInputBlockNum % tileBlockNum) == 0 ? smallTileNum : smallTileNum + 1; // Tail block calculation for small chunks of data @@ -45,7 +44,7 @@ static ge::graphStatus TilingFunc(gert::TilingContext* context) // The total length of a large block of data is 32B larger than that of a small block of data everyCoreInputBlockNum += 1; - uint32_t bigCoreDataNum = everyCoreInputBlockNum * BLOCK_SIZE / inputBytes; + uint32_t bigCoreDataNum = everyCoreInputBlockNum * BLOCK_SIZE / typeLength; uint32_t bigTileNum = everyCoreInputBlockNum / tileBlockNum; uint32_t finalBigTileNum = (everyCoreInputBlockNum % tileBlockNum) == 0 ? bigTileNum : bigTileNum + 1; uint32_t bigTailDataNum = bigCoreDataNum - tileDataNum * bigTileNum;