diff --git a/tf_adapter/kernels/amct_ascend_anti_quant.cc b/tf_adapter/kernels/amct_ascend_anti_quant.cc index a731b9d76597c49506172ccd55cff2cd21ea4202..7474d9b1d4f0a27d75d70e703a35306552ba8783 100644 --- a/tf_adapter/kernels/amct_ascend_anti_quant.cc +++ b/tf_adapter/kernels/amct_ascend_anti_quant.cc @@ -28,19 +28,59 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/kernels/amct_common.h" using namespace tensorflow; +template +int AscendAntiQuantInternelCpu(struct AntiQuantInputParam input_param) { + for (int i = 0; i < input_param.size; i++) { + input_param.out[i] = input_param.in[i] * input_param.scale; + } + return 0; +} + template class AscendAntiQuantOp : public OpKernel { -public: - explicit AscendAntiQuantOp(OpKernelConstruction* context) : OpKernel(context){} + public: + explicit AscendAntiQuantOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("scale", &(scale))); + OP_REQUIRES_OK(context, context->GetAttr("offset", &(offset))); + input_param.size = 0; + input_param.in = NULL; + input_param.out = NULL; + input_param.scale = scale; + input_param.offset = offset; + } + + ~AscendAntiQuantOp(){} + + void Compute(OpKernelContext* context) override { + // Grab the input tensor + const Tensor& input_tensor = context->input(0); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); + + // Do the computation. + OP_REQUIRES(context, input_tensor.NumElements() <= tensorflow::kint32max, + errors::InvalidArgument("Too many elements in tensor")); + + input_param.size = static_cast(input_tensor.NumElements()); + input_param.in = input_tensor.flat().data(); + input_param.out = output_tensor->flat().data(); - ~AscendAntiQuantOp(){} + if (input_param.size == 0) { + OP_REQUIRES(context, false, errors::InvalidArgument("AscendAntiQuantOp: input_tensor is empty!")); + } + AscendAntiQuantInternelCpu(input_param); + } - void Compute(OpKernelContext* context) override{} + private: + struct AntiQuantInputParam input_param; + float scale; + float offset; }; -REGISTER_KERNEL_BUILDER( - Name("AscendAntiQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), - AscendAntiQuantOp); +REGISTER_KERNEL_BUILDER(Name("AscendAntiQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), AscendAntiQuantOp); diff --git a/tf_adapter/kernels/amct_ascend_dequant.cc b/tf_adapter/kernels/amct_ascend_dequant.cc index f9713593181a467059faa8181fff90190c569707..0cc1c2b82ce904240c3cb34e9757eddaea4d14ab 100644 --- a/tf_adapter/kernels/amct_ascend_dequant.cc +++ b/tf_adapter/kernels/amct_ascend_dequant.cc @@ -28,19 +28,115 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/kernels/amct_common.h" using namespace tensorflow; +template +int AscendDequantInternelCpu(struct DequantInputParam input_param) { + int bound = pow(BASE, SHIFT_POW); + int channel_index = 0; + for (int i = 0; i < input_param.size; i++) { + if (input_param.channel_wise) { + if (input_param.transpose) { + channel_index = i % (input_param.channel_num * input_param.hw_size) / input_param.hw_size; + } else { + channel_index = i % input_param.channel_num; + } + } + unsigned int deqscale_int = (input_param.deqscale[channel_index] << DEQ_SCALE_BINS) >> DEQ_SCALE_BINS; + unsigned int shift_n_int = (input_param.deqscale[channel_index] << N_LFET_BINS) >> N_RIGHT_BINS; + float deqscale = *reinterpret_cast(&(deqscale_int)); + input_param.out[i] = input_param.input[i] * input_param.area_factor; + if (shift_n_int > 0) { + input_param.out[i] = floor(input_param.out[i] / pow(BASE, shift_n_int)); + if (input_param.out[i] > bound - 1) { + input_param.out[i] = bound - 1; + } else if (input_param.out[i] < -bound) { + input_param.out[i] = -bound; + } + } + input_param.out[i] = input_param.out[i] * deqscale * pow(BASE, shift_n_int) / input_param.area_factor; + } + return 0; +} + template class AscendDequantOp : public OpKernel { -public: - explicit AscendDequantOp(OpKernelConstruction* context) : OpKernel(context){} + public: + explicit AscendDequantOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("data_format", &(input_param.data_format))); + OP_REQUIRES_OK(context, context->GetAttr("ksize", &(ksize))); + input_param.area_factor = ksize[0] * ksize[1]; + inputParam.size = 0; + inputParam.input = NULL; + inputParam.out = NULL; + inputParam.deqscale = NULL; + inputParam.channel_num = 1; + inputParam.hw_size = 1; + inputParam.channel_wise = false; + inputParam.transpose = false; + } + + ~AscendDequantOp(){} + + void Compute(OpKernelContext* context) override { + // Grab the input tensor + const Tensor& input_tensor = context->input(0); + TensorShape input_tensor_shape = input_tensor.shape(); + const Tensor& deqscale_tensor = context->input(1); + TensorShape deqscale_tensor_shape = deqscale_tensor.shape(); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); + + // Do the computation. + OP_REQUIRES(context, input_tensor.NumElements() <= tensorflow::kint32max, + errors::InvalidArgument("Too many elements in tensor")); + + input_param.size = static_cast(input_tensor.NumElements()); + input_param.input = input_tensor.flat().data(); + input_param.out = output_tensor->flat().data(); + input_param.deqscale = deqscale_tensor.flat().data(); + + std::vector input_shape, deqscale_shape; + input_shape.resize(input_tensor_shape.dim_sizes().size()); + deqscale_shape.resize(deqscale_tensor_shape.dim_sizes().size()); + + for (unsigned int i = 0; i < input_shape.size(); i++) { + input_shape[i] = input_tensor_shape.dim_sizes()[i]; + } + int deqscale_size = 1; + for (unsigned int i = 0; i < deqscale_shape.size(); i++) { + deqscale_shape[i] = deqscale_tensor_shape.dim_sizes()[i]; + deqscale_size *= deqscale_shape[i]; + } + + if (deqscale_size <= 1) { + input_param.channel_wise = false; + } else { + input_param.channel_wise = true; + if (input_param.data_format == "NCHW") { + input_param.transpose = true; + input_param.channel_num = deqscale_size; + input_param.hw_size = input_shape[NCHW_H_DIM] * input_shape[NCHW_W_DIM]; + } else { + input_param.transpose = false; + input_param.channel_num = deqscale_size; + input_param.hw_size = input_shape[NHWC_H_DIM] * input_shape[NHWC_W_DIM]; + } + } - ~AscendDequantOp(){} + if (input_param.size == 0) { + OP_REQUIRES(context, false, errors::InvalidArgument("AscendDequantOp: input_tensor is empty!")); + } + AscendDequantInternelCpu(input_param); + } - void Compute(OpKernelContext* context) override{} + private: + struct DequantInputParam input_param; + std::vector ksize; }; -REGISTER_KERNEL_BUILDER( - Name("AscendDequant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), - AscendDequantOp); +REGISTER_KERNEL_BUILDER(Name("AscendDequant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), AscendDequantOp); diff --git a/tf_adapter/kernels/amct_ascend_quant.cc b/tf_adapter/kernels/amct_ascend_quant.cc index 95ea17b63b5eca97b3715961949dd93fbce35b5d..f08e712b45a51c5b875479effc7868caec52e59b 100644 --- a/tf_adapter/kernels/amct_ascend_quant.cc +++ b/tf_adapter/kernels/amct_ascend_quant.cc @@ -28,19 +28,69 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/kernels/amct_common.h" using namespace tensorflow; +template +int AscendQuantInternelCpu(struct QuantInputParam input_param) { + int bound = pow(BASE, input_param.quant_bits - 1); + for (int i = 0; i < input_param.size; i++) { + float quant_input = round(input_param.in[i] * input_param.scale) + input_param.offset; + if (quant_input < -bound) { + quant_input = -bound; + } else if (quant_input > bound - 1) { + quant_input = bound - 1; + } + input_param.out[i] = quant_input - input_param.offset; + } + return 0; +} + template class AscendQuantOp : public OpKernel { -public: - explicit AscendQuantOp(OpKernelConstruction* context) : OpKernel(context){} + public: + explicit AscendQuantOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("quant_bits", &(quant_bits))); + OP_REQUIRES_OK(context, context->GetAttr("scale", &(scale))); + OP_REQUIRES_OK(context, context->GetAttr("offset", &(offset))); + input_param.size = 0; + input_param.in = NULL; + input_param.out = NULL; + input_param.scale = scale; + input_param.offset = offset; + input_param.quant_bits = quant_bits; + } + + ~AscendQuantOp(){} + + void Compute(OpKernelContext* context) override { + // Grab the input tensor + const Tensor& input_tensor = context->input(0); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); + + // Do the computation. + OP_REQUIRES(context, input_tensor.NumElements() <= tensorflow::kint32max, + errors::InvalidArgument("Too many elements in tensor")); + + input_param.size = static_cast(input_tensor.NumElements()); + input_param.in = input_tensor.flat().data(); + input_param.out = output_tensor->flat().data(); - ~AscendQuantOp(){} + if (input_param.size == 0) { + OP_REQUIRES(context, false, errors::InvalidArgument("AscendQuantOp: input_tensor is empty!")); + } + AscendQuantInternelCpu(input_param); + } - void Compute(OpKernelContext* context) override {} + private: + struct QuantInputParam input_param; + int quant_bits; + float scale; + float offset; }; -REGISTER_KERNEL_BUILDER( - Name("AscendQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), - AscendQuantOp); +REGISTER_KERNEL_BUILDER(Name("AscendQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), AscendQuantOp); diff --git a/tf_adapter/kernels/amct_ascend_weight_quant.cc b/tf_adapter/kernels/amct_ascend_weight_quant.cc index f81533e5091c05705bc8c891fcb40ea674c5595d..2f24f60d901515b55fa6ae0ee956d174dcfb3583 100644 --- a/tf_adapter/kernels/amct_ascend_weight_quant.cc +++ b/tf_adapter/kernels/amct_ascend_weight_quant.cc @@ -28,19 +28,107 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/kernels/amct_common.h" using namespace tensorflow; +template +int AscendWeightQuantInternelCpu(struct WeightQuantInputParam input_param) +{ + if (input_param.channel_wise) { + if (input_param.transpose) { + for (int i = 0; i < input_param.size; i++) { + int index = i % (input_param.channel_in_num * input_param.channel_out_num) / input_param.channel_in_num; + input_param.out[i] = static_cast(input_param.weight[i]) - static_cast(input_param.offset[index]); + } + } else { + for (int i = 0; i < input_param.size; i++) { + int index = i % input_param.channel_out_num; + input_param.out[i] = static_cast(input_param.weight[i]) - static_cast(input_param.offset[index]); + } + } + } else { + for (int i = 0; i < input_param.size; i++) { + input_param.out[i] = static_cast(input_param.weight[i]) - static_cast(input_param.offset[0]); + } + } + return 0; +} + template class AscendWeightQuantOp : public OpKernel { -public: - explicit AscendWeightQuantOp(OpKernelConstruction* context) : OpKernel(context){} + public: + explicit AscendWeightQuantOp(OpKernelConstruction* context) : OpKernel(context) { + input_param.size = 0; + input_param.weight = NULL; + input_param.offset = NULL; + input_param.channel_in_num = 1; + input_param.channel_out_num = 1; + input_param.channel_wise = false; + input_param.transpose = false; + } + + ~AscendWeightQuantOp(){} + + void Compute(OpKernelContext* context) override { + // Grab the input tensor + const Tensor& input_tensor = context->input(0); + TensorShape input_tensor_shape = input_tensor.shape(); + const Tensor& offset_tensor = context->input(1); + TensorShape offset_tensor_shape = offset_tensor.shape(); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); + + // Do the computation. + OP_REQUIRES(context, input_tensor.NumElements() <= tensorflow::kint32max, + errors::InvalidArgument("Too many elements in tensor")); + + input_param.size = static_cast(input_tensor.NumElements()); + input_param.weight = input_tensor.flat().data(); + input_param.offset = offset_tensor.flat().data(); + input_param.out = output_tensor->flat().data(); + + std::vector weight_shape, offset_shape; + weight_shape.resize(input_tensor_shape.dim_sizes().size()); + offset_shape.resize(offset_tensor_shape.dim_sizes().size()); + + for (unsigned int i = 0; i < weight_shape.size(); i++) { + weight_shape[i] = input_tensor_shape.dim_sizes()[i]; + } + for (unsigned int i = 0; i < offset_shape.size(); i++) { + offset_shape[i] = offset_tensor_shape.dim_sizes()[i]; + } + + int offset_size = 1; + for (unsigned int i = 0; i < offset_shape.size(); i++) { + offset_shape[i] = offset_tensor_shape.dim_sizes()[i]; + offset_size *= offset_shape[i]; + } + if (offset_size <= 1) { + input_param.channel_wise = false; + } else { + input_param.channel_wise = true; + if (offset_shape[CIN_DIM] > 1) { + input_param.transpose = true; + input_param.channel_in_num = weight_shape[COUT_DIM]; + input_param.channel_out_num = weight_shape[CIN_DIM]; + } else { + input_param.transpose = false; + input_param.channel_in_num = weight_shape[CIN_DIM]; + input_param.channel_out_num = weight_shape[COUT_DIM]; + } + } - ~AscendWeightQuantOp(){} + if (input_param.size == 0) { + OP_REQUIRES(context, false, errors::InvalidArgument("AscendWeightQuantOp: input_tensor is empty!")); + } + AscendWeightQuantInternelCpu(input_param); + } - void Compute(OpKernelContext* context) override{} + private: + struct WeightQuantInputParam input_param; }; -REGISTER_KERNEL_BUILDER( - Name("AscendWeightQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), - AscendWeightQuantOp); +REGISTER_KERNEL_BUILDER(Name("AscendWeightQuant").Device(tensorflow::DEVICE_CPU).TypeConstraint("T"), AscendWeightQuantOp); diff --git a/tf_adapter/kernels/amct_common.h b/tf_adapter/kernels/amct_common.h new file mode 100644 index 0000000000000000000000000000000000000000..38e729d1e87ae3454285b3c30f5423b582c02c7c --- /dev/null +++ b/tf_adapter/kernels/amct_common.h @@ -0,0 +1,108 @@ + +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef QUANTIZE_COMMON_H +#define QUANTIZE_COMMON_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Define common constants in quantization +const int BASE = 2; +const float EPSILON = 1e-6; +const int SHIFT_POW = 15; +const int DEQ_SCALE_BINS = 32; +const int N_LFET_BINS = 24; +const int N_RIGHT_BINS = 56; +const int CIN_DIM = 2; +const int COUT_DIM = 3; +const int NCHW_H_DIM = 2; +const int NCHW_W_DIM = 3; +const int NHWC_H_DIM = 1; +const int NHWC_W_DIM = 2; + + +// Define the structure of data quantification +template +struct QuantInputParam { + int size; + const T* in; + T* out; + float scale; + float offset; + int quant_bits; +}; + +// Define the structure of weight quantification +template +struct WeightQuantInputParam { + int size; + const signed char* weight; + const signed char* offset; + T* out; + int channel_in_num; + int channel_out_num; + bool channel_wise; + bool transpose; +}; + +// Define the structure of data anti quantification +template +struct AntiQuantInputParam { + int size; + const T* in; + T* out; + float scale; + float offset; +}; + +// Define the structure of data dequantification +template +struct DequantInputParam { + int area_factor; + int size; + const T* input; + T* out; + const long long unsigned int* deqscale; + int channel_num; + int hw_size; + bool channel_wise; + bool transpose; + std::string data_format; +}; + +#endif \ No newline at end of file