From b4bca1bbf2bde0b6e79d15000497b01adb56084b Mon Sep 17 00:00:00 2001 From: jianghui58 Date: Wed, 27 Jan 2021 21:22:33 +0800 Subject: [PATCH] add converter weight quant interface --- tutorials/lite/source_en/use/converter_tool.md | 2 ++ tutorials/lite/source_en/use/post_training_quantization.md | 6 ++++-- tutorials/lite/source_zh_cn/use/converter_tool.md | 2 ++ .../lite/source_zh_cn/use/post_training_quantization.md | 6 ++++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tutorials/lite/source_en/use/converter_tool.md b/tutorials/lite/source_en/use/converter_tool.md index b9cf26bf70..366ca57bfc 100644 --- a/tutorials/lite/source_en/use/converter_tool.md +++ b/tutorials/lite/source_en/use/converter_tool.md @@ -53,6 +53,8 @@ The following describes the parameters in detail. | `--bitNum=` | No | Sets the quantization bitNum when quantType is set as WeightQuant, now supports 1 bit to 16 bit quantization. | \[1, 16] | 8 | | `--quantWeightSize=` | No | Sets a size threshold of convolution filter when quantType is set as WeightQuant. If the size is bigger than this value, it will trigger weight quantization. | \[0, +∞) | 0 | | `--quantWeightChannel=` | No | Sets a channel number threshold of convolution filter when quantType is set as WeightQuant. If the number is bigger than this, it will trigger weight quantization. | \[0, +∞) | 16 | +| `--perChannel=` | No | Set whether to use perChannel method when quantType is set as WeightQuant, otherwise use perTensor. | true, false | true | +| `--enableHuffmanCode=` | No | Set whether to do huffman encode when quantType is set as WeightQuant, which can losslessly compress model size. It's suitable for large models. | true, false | false | | `--configFile=` | No | Profile path of calibration dataset when quantType is set as PostTraining. | - | - | > - The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. diff --git a/tutorials/lite/source_en/use/post_training_quantization.md b/tutorials/lite/source_en/use/post_training_quantization.md index dfced500a6..fbfd7fc0e4 100644 --- a/tutorials/lite/source_en/use/post_training_quantization.md +++ b/tutorials/lite/source_en/use/post_training_quantization.md @@ -39,7 +39,7 @@ Quantization of 1 to 16 bits is supported. A smaller number of quantization bits Generally, the weight quantization conversion command is as follows: ```bash -./converter_lite --fmk=ModelType --modelFile=ModelFilePath --outputFile=ConvertedModelPath --quantType=WeightQuant --bitNum=BitNumValue --quantWeightSize=ConvWeightQuantSizeThresholdValue --quantWeightChannel=ConvWeightQuantChannelThresholdValue +./converter_lite --fmk=ModelType --modelFile=ModelFilePath --outputFile=ConvertedModelPath --quantType=WeightQuant --bitNum=BitNumValue --quantWeightSize=ConvWeightQuantSizeThresholdValue --quantWeightChannel=ConvWeightQuantChannelThresholdValue --perChannel=true --enableHuffmanCode=false ``` Parameters of this command are described as follows: @@ -50,6 +50,8 @@ Parameters of this command are described as follows: | `--bitNum=` | Optional | Number of bits for weight quantization. Currently, 1 to 16 bits are supported. | Integer | 8 | \[1, 16] | | `--quantWeightSize=` | Optional | Set the threshold of the convolution kernel size for weight quantization. If the size of the convolution kernel is greater than the threshold, the weight is quantized. Recommended value: 500 | Integer | 0 | \[0, +∞) | | `--quantWeightChannel=` | Optional | Set the threshold of the number of convolution channels for weight quantization. If the number of convolution channels is greater than the threshold, the weight is quantized. Recommended value: 16 | Integer | 16 | \[0, +∞)| +| `--perChannel=` | Optional | Set whether to use perChannel method for weight quantization, otherwise use perTensor. Recommended value: true | Boolean | true | true, false | +| `--enableHuffmanCode=` | Optional | Set whether to do huffman encode after weight quantization, which can losslessly compress model size. It's suitable for large models. | Boolean | false | true, false | You can adjust the weight quantization parameters based on the model and your requirements. > To ensure the accuracy of weight quantization, you are advised to set the value range of the `--bitNum` parameter to 8 bits to 16 bits. @@ -60,7 +62,7 @@ You can adjust the weight quantization parameters based on the model and your re 2. Take the TensorFlow Lite model as an example. Run the following command to convert the weight quantization model: ```bash - ./converter_lite --fmk=TFLITE --modelFile=Inception_v3.tflite --outputFile=Inception_v3.tflite --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0 + ./converter_lite --fmk=TFLITE --modelFile=Inception_v3.tflite --outputFile=Inception_v3.tflite --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0 --perChannel=true --enableHuffmanCode=false ``` 3. After the preceding command is successfully executed, the quantization model `Inception_v3.tflite.ms` is obtained. The size of the quantization model usually decreases to one fourth of the FP32 model. diff --git a/tutorials/lite/source_zh_cn/use/converter_tool.md b/tutorials/lite/source_zh_cn/use/converter_tool.md index b25fe1ef90..0c30746e4b 100644 --- a/tutorials/lite/source_zh_cn/use/converter_tool.md +++ b/tutorials/lite/source_zh_cn/use/converter_tool.md @@ -52,6 +52,8 @@ MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需 | `--bitNum=` | 否 | 设定训练后量化(权重量化)的比特数,目前支持1bit~16bit量化 | \[1,16] | 8 | | `--quantWeightSize=` | 否 | 设定参与训练后量化(权重量化)的卷积核尺寸阈值,若卷积核尺寸大于该值,则对此权重进行量化 | \[0,+∞) | 0 | | `--quantWeightChannel=` | 否 | 设定参与训练后量化(权重量化)的卷积通道数阈值,若卷积通道数大于该值,则对此权重进行量化 | \[0,+∞) | 16 | +| `--perChannel=` | 否 | 设定训练后量化(权重量化)是否采用perChannel的方式,反之为perTensor | true, false | true | +| `--enableHuffmanCode=` | 否 | 设定训练后量化(权重量化)之后是否进行哈夫曼编码,无损压缩模型大小,适用于大模型 | true, false | false | | `--configFile=` | 否 | 训练后量化(全量化)校准数据集配置文件路径 | - | - | > - 参数名和参数值之间用等号连接,中间不能有空格。 diff --git a/tutorials/lite/source_zh_cn/use/post_training_quantization.md b/tutorials/lite/source_zh_cn/use/post_training_quantization.md index 933058d5de..1bfbae9ce0 100644 --- a/tutorials/lite/source_zh_cn/use/post_training_quantization.md +++ b/tutorials/lite/source_zh_cn/use/post_training_quantization.md @@ -39,7 +39,7 @@ MindSpore Lite训练后量化分为两类: 权重量化转换命令的一般形式为: ```bash -./converter_lite --fmk=ModelType --modelFile=ModelFilePath --outputFile=ConvertedModelPath --quantType=WeightQuant --bitNum=BitNumValue --quantWeightSize=ConvWeightQuantSizeThresholdValue --quantWeightChannel=ConvWeightQuantChannelThresholdValue +./converter_lite --fmk=ModelType --modelFile=ModelFilePath --outputFile=ConvertedModelPath --quantType=WeightQuant --bitNum=BitNumValue --quantWeightSize=ConvWeightQuantSizeThresholdValue --quantWeightChannel=ConvWeightQuantChannelThresholdValue --perChannel=true --enableHuffmanCode=false ``` 下面对此命令的量化相关参数进行说明: @@ -50,6 +50,8 @@ MindSpore Lite训练后量化分为两类: | `--bitNum=` | 可选 | 设定权重量化的比特数,目前支持1bit~16bit量化 | Integer | 8 | \[1,16] | | `--quantWeightSize=` | 可选 | 设定参与权重量化的卷积核尺寸阈值,若卷积核尺寸大于该值,则对此权重进行量化;建议设置为500 | Integer | 0 | \[0,+∞) | | `--quantWeightChannel=` | 可选 | 设定参与权重量化的卷积通道数阈值,若卷积通道数大于该值,则对此权重进行量化;建议设置为16 | Integer | 16 | \[0,+∞) | +| `--perChannel=` | 可选 | 设定权重量化是否采用perChannel的方式,反之为perTensor;建议设置为true | Boolean | true | true, false | +| `--enableHuffmanCode=` | 可选 | 设定权重量化之后是否进行哈夫曼编码,无损压缩模型大小,适用于大模型 | Boolean | false | true, false | 用户可根据模型及自身需要对权重量化的参数作出调整。 > 为保证权重量化的精度,建议`--bitNum`参数设定范围为8bit~16bit。 @@ -60,7 +62,7 @@ MindSpore Lite训练后量化分为两类: 2. 以TensorFlow Lite模型为例,执行权重量化模型转换命令: ```bash - ./converter_lite --fmk=TFLITE --modelFile=Inception_v3.tflite --outputFile=Inception_v3.tflite --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0 + ./converter_lite --fmk=TFLITE --modelFile=Inception_v3.tflite --outputFile=Inception_v3.tflite --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0 --perChannel=true --enableHuffmanCode=false ``` 3. 上述命令执行成功后,便可得到量化后的模型`Inception_v3.tflite.ms`,量化后的模型大小通常会下降到FP32模型的1/4。 -- Gitee