From c9a6863d68825428ae6031b2f15f055dd1aca68e Mon Sep 17 00:00:00 2001 From: "xinchi.tian" Date: Tue, 23 Apr 2024 14:57:19 +0800 Subject: [PATCH 1/5] Add EfficientnetV2 in IXRT link #I9FP0V Add EfficientnetV2 in IXRT Signed-off-by: xinchi.tian --- .../efficientnet_v2/ixrt/README.md | 67 ++ .../efficientnet_v2/ixrt/build_engine.py | 106 ++ .../efficientnet_v2/ixrt/build_i8_engine.py | 113 +++ .../ixrt/calibration_dataset.py | 113 +++ .../efficientnet_v2/ixrt/common.py | 79 ++ .../ixrt/config/EFFICIENTNET_V2T_CONFIG | 34 + .../efficientnet_v2/ixrt/export_onnx.py | 950 ++++++++++++++++++ .../efficientnet_v2/ixrt/inference.py | 158 +++ .../efficientnet_v2/ixrt/modify_batchsize.py | 57 ++ .../efficientnet_v2/ixrt/quant.py | 167 +++ .../efficientnet_v2/ixrt/refine_model.py | 291 ++++++ .../infer_efficientnet_fp16_accuracy.sh | 145 +++ .../infer_efficientnet_fp16_performance.sh | 145 +++ .../infer_efficientnet_int8_accuracy.sh | 146 +++ .../infer_efficientnet_int8_performance.sh | 145 +++ .../efficientnet_v2/ixrt/simplify_model.py | 41 + 16 files changed, 2757 insertions(+) create mode 100755 models/cv/classification/efficientnet_v2/ixrt/README.md create mode 100755 models/cv/classification/efficientnet_v2/ixrt/build_engine.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/common.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG create mode 100755 models/cv/classification/efficientnet_v2/ixrt/export_onnx.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/inference.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/quant.py create mode 100644 models/cv/classification/efficientnet_v2/ixrt/refine_model.py create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh create mode 100644 models/cv/classification/efficientnet_v2/ixrt/simplify_model.py diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md new file mode 100755 index 00000000..d06cff37 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -0,0 +1,67 @@ +# EfficientnetV2 + +## Description +EfficientNetV2 is an improved version of the EfficientNet architecture proposed by Google, aiming to enhance model performance and efficiency. Unlike the original EfficientNet, EfficientNetV2 features a simplified design and incorporates a series of enhancement strategies to further boost performance. + +## Setup + +### Install +```bash +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install timm +pip3 install ppq +pip3 install protobuf==3.20.0 +``` + +### Download +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +git clone https://github.com/huggingface/pytorch-image-models.git +cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models +cd pytorch-image-models/timm/models +python3 export_onnx.py --origin_model /path/to/efficientnetv2_t_agc-3620981a.pth --output_model checkpoints/efficientnet.onnx +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/efficientnet_v2/ixrt +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=/Path/to/efficientnet_v2/ixrt +export CONFIG_DIR=/Path/to/config/EFFICIENTNET_V2T_CONFIG +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +``` +### FP16 + +```bash +# Accuracy +bash script/infer_efficientnet_fp16_accuracy.sh +# Performance +bash script/infer_efficientnet_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash script/infer_efficientnet_int8_accuracy.sh +# Performance +bash script/infer_efficientnet_int8_performance.sh +``` + + + +## Results + +Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) +---------------|-----------|-----------|----------|----------|-------- +EfficientnetV2 | 32 | FP16 | 1882.87 | 82.14 | 96.16 +EfficientnetV2 | 32 | INT8 | 2595.96 | 81.50 | 95.96 diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_engine.py new file mode 100755 index 00000000..41e6af8d --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/build_engine.py @@ -0,0 +1,106 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py new file mode 100644 index 00000000..6e356260 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py new file mode 100644 index 00000000..d7525d51 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/common.py b/models/cv/classification/efficientnet_v2/ixrt/common.py new file mode 100644 index 00000000..abdc147c --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG b/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG new file mode 100644 index 00000000..b9e40159 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG @@ -0,0 +1,34 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=288 +MODEL_NAME=EfficientNetv2_t +ORIGINE_MODEL=efficientnet.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=32 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py new file mode 100755 index 00000000..2ada1df1 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py @@ -0,0 +1,950 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from functools import partial +from typing import List + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD +from _efficientnet_blocks import SqueezeExcite +from _efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\ + round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT +from features import FeatureInfo, FeatureHooks +from helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq +from layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct +from registry import register_model +import argparse +import ssl + + + +ssl._create_default_https_context = ssl._create_unverified_context + +__all__ = ['EfficientNet', 'EfficientNetFeatures'] + + +def _cfg(url='', **kwargs): + return { + 'url': url, 'num_classes': 1000, 'input_size': (3, 288, 288), 'pool_size': (7, 7), + 'crop_pct': 0.875, 'interpolation': 'bicubic', + 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'first_conv': 'conv_stem', 'classifier': 'classifier', + **kwargs + } + + +default_cfgs = { + 'efficientnetv2_rw_t': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth', + input_size=(3, 224, 224), test_input_size=(3, 288, 288), pool_size=(7, 7), crop_pct=1.0), + 'gc_efficientnetv2_rw_t': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gc_efficientnetv2_rw_t_agc-927a0bde.pth', + input_size=(3, 224, 224), test_input_size=(3, 288, 288), pool_size=(7, 7), crop_pct=1.0), + 'efficientnetv2_rw_s': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_v2s_ra2_288-a6477665.pth', + input_size=(3, 288, 288), test_input_size=(3, 384, 384), pool_size=(9, 9), crop_pct=1.0), + 'efficientnetv2_rw_m': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_rw_m_agc-3d90cb1e.pth', + input_size=(3, 320, 320), test_input_size=(3, 416, 416), pool_size=(10, 10), crop_pct=1.0), + + 'efficientnetv2_s': _cfg( + url='', + input_size=(3, 288, 288), test_input_size=(3, 384, 384), pool_size=(9, 9), crop_pct=1.0), + 'efficientnetv2_m': _cfg( + url='', + input_size=(3, 320, 320), test_input_size=(3, 416, 416), pool_size=(10, 10), crop_pct=1.0), + 'efficientnetv2_l': _cfg( + url='', + input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0), + 'efficientnetv2_xl': _cfg( + url='', + input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0), +} + + +class EfficientNet(nn.Module): + """ EfficientNet + + A flexible and performant PyTorch implementation of efficient network architectures, including: + * EfficientNet-V2 Small, Medium, Large, XL & B0-B3 + * EfficientNet B0-B8, L2 + * EfficientNet-EdgeTPU + * EfficientNet-CondConv + * MixNet S, M, L, XL + * MnasNet A1, B1, and small + * MobileNet-V2 + * FBNet C + * Single-Path NAS Pixel1 + * TinyNet + """ + + def __init__( + self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32, fix_stem=False, + output_stride=32, pad_type='', round_chs_fn=round_channels, act_layer=None, norm_layer=None, + se_layer=None, drop_rate=0., drop_path_rate=0., global_pool='avg'): + super(EfficientNet, self).__init__() + act_layer = act_layer or nn.ReLU + norm_layer = norm_layer or nn.BatchNorm2d + norm_act_layer = get_norm_act_layer(norm_layer, act_layer) + se_layer = se_layer or SqueezeExcite + self.num_classes = num_classes + self.num_features = num_features + self.drop_rate = drop_rate + self.grad_checkpointing = False + + # Stem + if not fix_stem: + stem_size = round_chs_fn(stem_size) + self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type) + self.bn1 = norm_act_layer(stem_size, inplace=True) + + # Middle stages (IR/ER/DS Blocks) + builder = EfficientNetBuilder( + output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn, + act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate) + self.blocks = nn.Sequential(*builder(stem_size, block_args)) + self.feature_info = builder.features + head_chs = builder.in_chs + + # Head + Pooling + self.conv_head = create_conv2d(head_chs, self.num_features, 1, padding=pad_type) + self.bn2 = norm_act_layer(self.num_features, inplace=True) + self.global_pool, self.classifier = create_classifier( + self.num_features, self.num_classes, pool_type=global_pool) + + efficientnet_init_weights(self) + + def as_sequential(self): + layers = [self.conv_stem, self.bn1] + layers.extend(self.blocks) + layers.extend([self.conv_head, self.bn2, self.global_pool]) + layers.extend([nn.Dropout(self.drop_rate), self.classifier]) + return nn.Sequential(*layers) + + @torch.jit.ignore + def group_matcher(self, coarse=False): + return dict( + stem=r'^conv_stem|bn1', + blocks=[ + (r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None), + (r'conv_head|bn2', (99999,)) + ] + ) + + @torch.jit.ignore + def set_grad_checkpointing(self, enable=True): + self.grad_checkpointing = enable + + @torch.jit.ignore + def get_classifier(self): + return self.classifier + + def reset_classifier(self, num_classes, global_pool='avg'): + self.num_classes = num_classes + self.global_pool, self.classifier = create_classifier( + self.num_features, self.num_classes, pool_type=global_pool) + + def forward_features(self, x): + x = self.conv_stem(x) + x = self.bn1(x) + if self.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint_seq(self.blocks, x, flatten=True) + else: + x = self.blocks(x) + x = self.conv_head(x) + x = self.bn2(x) + return x + + def forward_head(self, x, pre_logits: bool = False): + x = self.global_pool(x) + if self.drop_rate > 0.: + x = F.dropout(x, p=self.drop_rate, training=self.training) + return x if pre_logits else self.classifier(x) + + def forward(self, x): + x = self.forward_features(x) + x = self.forward_head(x) + return x + + +class EfficientNetFeatures(nn.Module): + """ EfficientNet Feature Extractor + + A work-in-progress feature extraction module for EfficientNet, to use as a backbone for segmentation + and object detection models. + """ + + def __init__( + self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', in_chans=3, + stem_size=32, fix_stem=False, output_stride=32, pad_type='', round_chs_fn=round_channels, + act_layer=None, norm_layer=None, se_layer=None, drop_rate=0., drop_path_rate=0.): + super(EfficientNetFeatures, self).__init__() + act_layer = act_layer or nn.ReLU + norm_layer = norm_layer or nn.BatchNorm2d + norm_act_layer = get_norm_act_layer(norm_layer, act_layer) + se_layer = se_layer or SqueezeExcite + self.drop_rate = drop_rate + + # Stem + if not fix_stem: + stem_size = round_chs_fn(stem_size) + self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type) + self.bn1 = norm_act_layer(stem_size, inplace=True) + + # Middle stages (IR/ER/DS Blocks) + builder = EfficientNetBuilder( + output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn, + act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate, + feature_location=feature_location) + self.blocks = nn.Sequential(*builder(stem_size, block_args)) + self.feature_info = FeatureInfo(builder.features, out_indices) + self._stage_out_idx = {v['stage']: i for i, v in enumerate(self.feature_info) if i in out_indices} + + efficientnet_init_weights(self) + + # Register feature extraction hooks with FeatureHooks helper + self.feature_hooks = None + if feature_location != 'bottleneck': + hooks = self.feature_info.get_dicts(keys=('module', 'hook_type')) + self.feature_hooks = FeatureHooks(hooks, self.named_modules()) + + def forward(self, x) -> List[torch.Tensor]: + x = self.conv_stem(x) + x = self.bn1(x) + if self.feature_hooks is None: + features = [] + if 0 in self._stage_out_idx: + features.append(x) # add stem out + for i, b in enumerate(self.blocks): + x = b(x) + if i + 1 in self._stage_out_idx: + features.append(x) + return features + else: + self.blocks(x) + out = self.feature_hooks.get_output(x.device) + return list(out.values()) + + +def _create_effnet(variant, pretrained=False, **kwargs): + features_only = False + model_cls = EfficientNet + kwargs_filter = None + if kwargs.pop('features_only', False): + features_only = True + kwargs_filter = ('num_classes', 'num_features', 'head_conv', 'global_pool') + model_cls = EfficientNetFeatures + model = build_model_with_cfg( + model_cls, variant, pretrained, + pretrained_strict=not features_only, + kwargs_filter=kwargs_filter, + **kwargs) + if features_only: + model.default_cfg = pretrained_cfg_for_features(model.default_cfg) + return model + + +def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-a1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r2_k3_s2_e6_c24'], + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40_se0.25'], + # stage 3, 28x28 in + ['ir_r4_k3_s2_e6_c80'], + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c112_se0.25'], + # stage 5, 14x14in + ['ir_r3_k5_s2_e6_c160_se0.25'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320'], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-b1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r3_k3_s2_e3_c24'], + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40'], + # stage 3, 28x28 in + ['ir_r3_k5_s2_e6_c80'], + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c96'], + # stage 5, 14x14in + ['ir_r4_k5_s2_e6_c192'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320_noskip'] + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-b1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + ['ds_r1_k3_s1_c8'], + ['ir_r1_k3_s2_e3_c16'], + ['ir_r2_k3_s2_e6_c16'], + ['ir_r4_k5_s2_e6_c32_se0.25'], + ['ir_r3_k3_s1_e6_c32_se0.25'], + ['ir_r3_k5_s2_e6_c88_se0.25'], + ['ir_r1_k3_s1_e6_c144'] + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=8, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_mobilenet_v2( + variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs): + """ Generate MobileNet-V2 network + Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py + Paper: https://arxiv.org/abs/1801.04381 + """ + arch_def = [ + ['ds_r1_k3_s1_c16'], + ['ir_r2_k3_s2_e6_c24'], + ['ir_r3_k3_s2_e6_c32'], + ['ir_r4_k3_s2_e6_c64'], + ['ir_r3_k3_s1_e6_c96'], + ['ir_r3_k3_s2_e6_c160'], + ['ir_r1_k3_s1_e6_c320'], + ] + round_chs_fn = partial(round_channels, multiplier=channel_multiplier) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head), + num_features=1280 if fix_stem_head else max(1280, round_chs_fn(1280)), + stem_size=32, + fix_stem=fix_stem_head, + round_chs_fn=round_chs_fn, + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'relu6'), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """ FBNet-C + + Paper: https://arxiv.org/abs/1812.03443 + Ref Impl: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py + + NOTE: the impl above does not relate to the 'C' variant here, that was derived from paper, + it was used to confirm some building block details + """ + arch_def = [ + ['ir_r1_k3_s1_e1_c16'], + ['ir_r1_k3_s2_e6_c24', 'ir_r2_k3_s1_e1_c24'], + ['ir_r1_k5_s2_e6_c32', 'ir_r1_k5_s1_e3_c32', 'ir_r1_k5_s1_e6_c32', 'ir_r1_k3_s1_e6_c32'], + ['ir_r1_k5_s2_e6_c64', 'ir_r1_k5_s1_e3_c64', 'ir_r2_k5_s1_e6_c64'], + ['ir_r3_k5_s1_e6_c112', 'ir_r1_k5_s1_e3_c112'], + ['ir_r4_k5_s2_e6_c184'], + ['ir_r1_k3_s1_e6_c352'], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=16, + num_features=1984, # paper suggests this, but is not 100% clear + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates the Single-Path NAS model from search targeted for Pixel1 phone. + + Paper: https://arxiv.org/abs/1904.02877 + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r3_k3_s2_e3_c24'], + # stage 2, 56x56 in + ['ir_r1_k5_s2_e6_c40', 'ir_r3_k3_s1_e3_c40'], + # stage 3, 28x28 in + ['ir_r1_k5_s2_e6_c80', 'ir_r3_k3_s1_e3_c80'], + # stage 4, 14x14in + ['ir_r1_k5_s1_e6_c96', 'ir_r3_k5_s1_e3_c96'], + # stage 5, 14x14in + ['ir_r4_k5_s2_e6_c192'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320_noskip'] + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnet( + variant, channel_multiplier=1.0, depth_multiplier=1.0, channel_divisor=8, + group_size=None, pretrained=False, **kwargs): + """Creates an EfficientNet model. + + Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py + Paper: https://arxiv.org/abs/1905.11946 + + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + 'efficientnet-b8': (2.2, 3.6, 672, 0.5), + 'efficientnet-l2': (4.3, 5.3, 800, 0.5), + + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16_se0.25'], + ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], + ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25'], + ['ir_r4_k5_s2_e6_c192_se0.25'], + ['ir_r1_k3_s1_e6_c320_se0.25'], + ] + round_chs_fn = partial(round_channels, multiplier=channel_multiplier, divisor=channel_divisor) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size), + num_features=round_chs_fn(1280), + stem_size=32, + round_chs_fn=round_chs_fn, + act_layer=resolve_act_layer(kwargs, 'swish'), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnet_edge( + variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs): + """ Creates an EfficientNet-EdgeTPU model + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/edgetpu + """ + + arch_def = [ + # NOTE `fc` is present to override a mismatch between stem channels and in chs not + # present in other models + ['er_r1_k3_s1_e4_c24_fc24_noskip'], + ['er_r2_k3_s2_e8_c32'], + ['er_r4_k3_s2_e8_c48'], + ['ir_r5_k5_s2_e8_c96'], + ['ir_r4_k5_s1_e8_c144'], + ['ir_r2_k5_s2_e8_c192'], + ] + round_chs_fn = partial(round_channels, multiplier=channel_multiplier) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size), + num_features=round_chs_fn(1280), + stem_size=32, + round_chs_fn=round_chs_fn, + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'relu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnet_condconv( + variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs): + """Creates an EfficientNet-CondConv model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16_se0.25'], + ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], + ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25_cc4'], + ['ir_r4_k5_s2_e6_c192_se0.25_cc4'], + ['ir_r1_k3_s1_e6_c320_se0.25_cc4'], + ] + # NOTE unlike official impl, this one uses `cc` option where x is the base number of experts for each stage and + # the expert_multiplier increases that on a per-model basis as with depth/channel multipliers + round_chs_fn = partial(round_channels, multiplier=channel_multiplier) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, experts_multiplier=experts_multiplier), + num_features=round_chs_fn(1280), + stem_size=32, + round_chs_fn=round_chs_fn, + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'swish'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """Creates an EfficientNet-Lite model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite + Paper: https://arxiv.org/abs/1905.11946 + + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-lite0': (1.0, 1.0, 224, 0.2), + 'efficientnet-lite1': (1.0, 1.1, 240, 0.2), + 'efficientnet-lite2': (1.1, 1.2, 260, 0.3), + 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), + 'efficientnet-lite4': (1.4, 1.8, 300, 0.3), + + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16'], + ['ir_r2_k3_s2_e6_c24'], + ['ir_r2_k5_s2_e6_c40'], + ['ir_r3_k3_s2_e6_c80'], + ['ir_r3_k5_s1_e6_c112'], + ['ir_r4_k5_s2_e6_c192'], + ['ir_r1_k3_s1_e6_c320'], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True), + num_features=1280, + stem_size=32, + fix_stem=True, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + act_layer=resolve_act_layer(kwargs, 'relu6'), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnetv2_base( + variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """ Creates an EfficientNet-V2 base model + + Ref impl: https://github.com/google/automl/tree/master/efficientnetv2 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 + """ + arch_def = [ + ['cn_r1_k3_s1_e1_c16_skip'], + ['er_r2_k3_s2_e4_c32'], + ['er_r2_k3_s2_e4_c48'], + ['ir_r3_k3_s2_e4_c96_se0.25'], + ['ir_r5_k3_s1_e6_c112_se0.25'], + ['ir_r8_k3_s2_e6_c192_se0.25'], + ] + round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=round_chs_fn(1280), + stem_size=32, + round_chs_fn=round_chs_fn, + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'silu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnetv2_s( + variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, rw=False, pretrained=False, **kwargs): + """ Creates an EfficientNet-V2 Small model + + Ref impl: https://github.com/google/automl/tree/master/efficientnetv2 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 + + NOTE: `rw` flag sets up 'small' variant to behave like my initial v2 small model, + before ref the impl was released. + """ + arch_def = [ + ['cn_r2_k3_s1_e1_c24_skip'], + ['er_r4_k3_s2_e4_c48'], + ['er_r4_k3_s2_e4_c64'], + ['ir_r6_k3_s2_e4_c128_se0.25'], + ['ir_r9_k3_s1_e6_c160_se0.25'], + ['ir_r15_k3_s2_e6_c256_se0.25'], + ] + num_features = 1280 + if rw: + # my original variant, based on paper figure differs from the official release + arch_def[0] = ['er_r2_k3_s1_e1_c24'] + arch_def[-1] = ['ir_r15_k3_s2_e6_c272_se0.25'] + num_features = 1792 + + round_chs_fn = partial(round_channels, multiplier=channel_multiplier) + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size), + num_features=round_chs_fn(num_features), + stem_size=24, + round_chs_fn=round_chs_fn, + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'silu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnetv2_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """ Creates an EfficientNet-V2 Medium model + + Ref impl: https://github.com/google/automl/tree/master/efficientnetv2 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 + """ + + arch_def = [ + ['cn_r3_k3_s1_e1_c24_skip'], + ['er_r5_k3_s2_e4_c48'], + ['er_r5_k3_s2_e4_c80'], + ['ir_r7_k3_s2_e4_c160_se0.25'], + ['ir_r14_k3_s1_e6_c176_se0.25'], + ['ir_r18_k3_s2_e6_c304_se0.25'], + ['ir_r5_k3_s1_e6_c512_se0.25'], + ] + + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=1280, + stem_size=24, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'silu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnetv2_l(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """ Creates an EfficientNet-V2 Large model + + Ref impl: https://github.com/google/automl/tree/master/efficientnetv2 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 + """ + + arch_def = [ + ['cn_r4_k3_s1_e1_c32_skip'], + ['er_r7_k3_s2_e4_c64'], + ['er_r7_k3_s2_e4_c96'], + ['ir_r10_k3_s2_e4_c192_se0.25'], + ['ir_r19_k3_s1_e6_c224_se0.25'], + ['ir_r25_k3_s2_e6_c384_se0.25'], + ['ir_r7_k3_s1_e6_c640_se0.25'], + ] + + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=1280, + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'silu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_efficientnetv2_xl(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """ Creates an EfficientNet-V2 Xtra-Large model + + Ref impl: https://github.com/google/automl/tree/master/efficientnetv2 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 + """ + + arch_def = [ + ['cn_r4_k3_s1_e1_c32_skip'], + ['er_r8_k3_s2_e4_c64'], + ['er_r8_k3_s2_e4_c96'], + ['ir_r16_k3_s2_e4_c192_se0.25'], + ['ir_r24_k3_s1_e6_c256_se0.25'], + ['ir_r32_k3_s2_e6_c512_se0.25'], + ['ir_r8_k3_s1_e6_c640_se0.25'], + ] + + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=1280, + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=resolve_act_layer(kwargs, 'silu'), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MixNet Small model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet + Paper: https://arxiv.org/abs/1907.09595 + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16'], # relu + # stage 1, 112x112 in + ['ir_r1_k3_a1.1_p1.1_s2_e6_c24', 'ir_r1_k3_a1.1_p1.1_s1_e3_c24'], # relu + # stage 2, 56x56 in + ['ir_r1_k3.5.7_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish + # stage 3, 28x28 in + ['ir_r1_k3.5.7_p1.1_s2_e6_c80_se0.25_nsw', 'ir_r2_k3.5_p1.1_s1_e6_c80_se0.25_nsw'], # swish + # stage 4, 14x14in + ['ir_r1_k3.5.7_a1.1_p1.1_s1_e6_c120_se0.5_nsw', 'ir_r2_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish + # stage 5, 14x14in + ['ir_r1_k3.5.7.9.11_s2_e6_c200_se0.5_nsw', 'ir_r2_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish + # 7x7 + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + num_features=1536, + stem_size=16, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MixNet Medium-Large model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet + Paper: https://arxiv.org/abs/1907.09595 + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c24'], # relu + # stage 1, 112x112 in + ['ir_r1_k3.5.7_a1.1_p1.1_s2_e6_c32', 'ir_r1_k3_a1.1_p1.1_s1_e3_c32'], # relu + # stage 2, 56x56 in + ['ir_r1_k3.5.7.9_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish + # stage 3, 28x28 in + ['ir_r1_k3.5.7_s2_e6_c80_se0.25_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e6_c80_se0.25_nsw'], # swish + # stage 4, 14x14in + ['ir_r1_k3_s1_e6_c120_se0.5_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish + # stage 5, 14x14in + ['ir_r1_k3.5.7.9_s2_e6_c200_se0.5_nsw', 'ir_r3_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish + # 7x7 + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'), + num_features=1536, + stem_size=24, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + +def _gen_tinynet( + variant, model_width=1.0, depth_multiplier=1.0, pretrained=False, **kwargs +): + """Creates a TinyNet model. + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16_se0.25'], ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25'], ['ir_r4_k5_s2_e6_c192_se0.25'], + ['ir_r1_k3_s1_e6_c320_se0.25'], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'), + num_features=max(1280, round_channels(1280, model_width, 8, None)), + stem_size=32, + fix_stem=True, + round_chs_fn=partial(round_channels, multiplier=model_width), + act_layer=resolve_act_layer(kwargs, 'swish'), + norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + **kwargs, + ) + model = _create_effnet(variant, pretrained, **model_kwargs) + return model + + + +@register_model +def efficientnetv2_rw_t(pretrained=False, **kwargs): + """ EfficientNet-V2 Tiny (Custom variant, tiny not in paper). """ + model = _gen_efficientnetv2_s( + 'efficientnetv2_rw_t', channel_multiplier=0.8, depth_multiplier=0.9, rw=False, pretrained=pretrained, **kwargs) + return model + + +@register_model +def gc_efficientnetv2_rw_t(pretrained=False, **kwargs): + """ EfficientNet-V2 Tiny w/ Global Context Attn (Custom variant, tiny not in paper). """ + model = _gen_efficientnetv2_s( + 'gc_efficientnetv2_rw_t', channel_multiplier=0.8, depth_multiplier=0.9, + rw=False, se_layer='gc', pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_rw_s(pretrained=False, **kwargs): + """ EfficientNet-V2 Small (RW variant). + NOTE: This is my initial (pre official code release) w/ some differences. + See efficientnetv2_s and tf_efficientnetv2_s for versions that match the official w/ PyTorch vs TF padding + """ + model = _gen_efficientnetv2_s('efficientnetv2_rw_s', rw=True, pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_rw_m(pretrained=False, **kwargs): + """ EfficientNet-V2 Medium (RW variant). + """ + model = _gen_efficientnetv2_s( + 'efficientnetv2_rw_m', channel_multiplier=1.2, depth_multiplier=(1.2,) * 4 + (1.6,) * 2, rw=True, + pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_s(pretrained=False, **kwargs): + """ EfficientNet-V2 Small. """ + model = _gen_efficientnetv2_s('efficientnetv2_s', pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_m(pretrained=False, **kwargs): + """ EfficientNet-V2 Medium. """ + model = _gen_efficientnetv2_m('efficientnetv2_m', pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_l(pretrained=False, **kwargs): + """ EfficientNet-V2 Large. """ + model = _gen_efficientnetv2_l('efficientnetv2_l', pretrained=pretrained, **kwargs) + return model + + +@register_model +def efficientnetv2_xl(pretrained=False, **kwargs): + """ EfficientNet-V2 Xtra-Large. """ + model = _gen_efficientnetv2_xl('efficientnetv2_xl', pretrained=pretrained, **kwargs) + return model + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +if __name__ == '__main__': + args = parse_args() + model = efficientnetv2_rw_t(num_classes=1000, pretrained='imagenet') + model.cuda() + model.eval() + input = torch.randn(32, 3, 288, 288, device='cuda') + export_onnx_file = args.output_model + + torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'], ) + print(" ") + print('Model has been converted to ONNX') + print("exit") + exit() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/inference.py b/models/cv/classification/efficientnet_v2/ixrt/inference.py new file mode 100644 index 00000000..62ec18b3 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/inference.py @@ -0,0 +1,158 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py new file mode 100644 index 00000000..4ac42a30 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/efficientnet_v2/ixrt/quant.py b/models/cv/classification/efficientnet_v2/ixrt/quant.py new file mode 100644 index 00000000..6c06eba2 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/quant.py @@ -0,0 +1,167 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=288) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [32, 3, 288, 288] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + + +dataloader = getdataloader(config.dataset_dir, config.step, config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/refine_model.py b/models/cv/classification/efficientnet_v2/ixrt/refine_model.py new file mode 100644 index 00000000..000ee4dc --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh new file mode 100755 index 00000000..e62cc5d7 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh new file mode 100755 index 00000000..05c9986f --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh new file mode 100755 index 00000000..a58f44b3 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh new file mode 100755 index 00000000..07872405 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py b/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py new file mode 100644 index 00000000..4d53a474 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + -- Gitee From 32fc2f09f39aa8ac1fa7fe35b2c09d1e3e4b412f Mon Sep 17 00:00:00 2001 From: tianxi-yi Date: Wed, 22 May 2024 13:33:35 +0800 Subject: [PATCH 2/5] fix README.md with Model Conversion --- .../cv/classification/efficientnet_v2/ixrt/README.md | 4 ++-- .../efficientnet_v2/ixrt/export_onnx.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index d06cff37..2e5228f1 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -27,8 +27,8 @@ Dataset: to download the validation dat mkdir checkpoints git clone https://github.com/huggingface/pytorch-image-models.git cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models -cd pytorch-image-models/timm/models -python3 export_onnx.py --origin_model /path/to/efficientnetv2_t_agc-3620981a.pth --output_model checkpoints/efficientnet.onnx +cd pytorch-image-models/timm +python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx ``` ## Inference diff --git a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py index 2ada1df1..4af35a04 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py +++ b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py @@ -21,13 +21,13 @@ import torch.nn as nn import torch.nn.functional as F from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD -from _efficientnet_blocks import SqueezeExcite -from _efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\ +from ._efficientnet_blocks import SqueezeExcite +from ._efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\ round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT -from features import FeatureInfo, FeatureHooks -from helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq -from layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct -from registry import register_model +from .features import FeatureInfo, FeatureHooks +from .helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq +from .layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct +from .registry import register_model import argparse import ssl -- Gitee From 8d7e1722e9f52480a1d39e0f6bab68b353266e00 Mon Sep 17 00:00:00 2001 From: tianxi-yi Date: Wed, 22 May 2024 15:36:18 +0800 Subject: [PATCH 3/5] Add _builder.py and fix README.md --- .../efficientnet_v2/ixrt/README.md | 4 + .../efficientnet_v2/ixrt/_builder.py | 480 ++++++++++++++++++ 2 files changed, 484 insertions(+) create mode 100755 models/cv/classification/efficientnet_v2/ixrt/_builder.py diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index 2e5228f1..5071a1ab 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -27,7 +27,11 @@ Dataset: to download the validation dat mkdir checkpoints git clone https://github.com/huggingface/pytorch-image-models.git cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models +cd pytorch-image-models/timm/models +rm _builder.py +mv /Path/ixrt/_builder.py pytorch-image-models/timm/models cd pytorch-image-models/timm +mv /Path/to/efficientnetv2_t_agc-3620981a.pth /root/.cache/torch/hub/checkpoints/ python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx ``` diff --git a/models/cv/classification/efficientnet_v2/ixrt/_builder.py b/models/cv/classification/efficientnet_v2/ixrt/_builder.py new file mode 100755 index 00000000..7246c0d5 --- /dev/null +++ b/models/cv/classification/efficientnet_v2/ixrt/_builder.py @@ -0,0 +1,480 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import dataclasses +import logging +import os +from copy import deepcopy +from typing import Any, Callable, Dict, List, Optional, Tuple + +from torch import nn as nn +from torch.hub import load_state_dict_from_url + +from timm.models._features import FeatureListNet, FeatureDictNet, FeatureHookNet, FeatureGetterNet +from timm.models._features_fx import FeatureGraphNet +from timm.models._helpers import load_state_dict +from timm.models._hub import has_hf_hub, download_cached_file, check_cached_file, load_state_dict_from_hf,\ + load_custom_from_hf +from timm.models._manipulate import adapt_input_conv +from timm.models._pretrained import PretrainedCfg +from timm.models._prune import adapt_model_from_file +from timm.models._registry import get_pretrained_cfg + +_logger = logging.getLogger(__name__) + +# Global variables for rarely used pretrained checkpoint download progress and hash check. +# Use set_pretrained_download_progress / set_pretrained_check_hash functions to toggle. +_DOWNLOAD_PROGRESS = False +_CHECK_HASH = False +_USE_OLD_CACHE = int(os.environ.get('TIMM_USE_OLD_CACHE', 0)) > 0 + +__all__ = ['set_pretrained_download_progress', 'set_pretrained_check_hash', 'load_custom_pretrained', 'load_pretrained', + 'pretrained_cfg_for_features', 'resolve_pretrained_cfg', 'build_model_with_cfg'] + + +def _resolve_pretrained_source(pretrained_cfg): + cfg_source = pretrained_cfg.get('source', '') + pretrained_url = pretrained_cfg.get('url', None) + pretrained_file = pretrained_cfg.get('file', None) + pretrained_sd = pretrained_cfg.get('state_dict', None) + hf_hub_id = pretrained_cfg.get('hf_hub_id', None) + + # resolve where to load pretrained weights from + load_from = '' + pretrained_loc = '' + if cfg_source == 'hf-hub' and has_hf_hub(necessary=True): + # hf-hub specified as source via model identifier + load_from = 'hf-hub' + assert hf_hub_id + pretrained_loc = hf_hub_id + else: + # default source == timm or unspecified + if pretrained_sd: + # direct state_dict pass through is the highest priority + load_from = 'state_dict' + pretrained_loc = pretrained_sd + assert isinstance(pretrained_loc, dict) + elif pretrained_file: + # file load override is the second-highest priority if set + load_from = 'file' + pretrained_loc = pretrained_file + else: + old_cache_valid = False + if _USE_OLD_CACHE: + # prioritized old cached weights if exists and env var enabled + old_cache_valid = check_cached_file(pretrained_url) if pretrained_url else False + if not old_cache_valid and hf_hub_id and has_hf_hub(necessary=True): + # hf-hub available as alternate weight source in default_cfg + load_from = 'hf-hub' + pretrained_loc = hf_hub_id + elif pretrained_url: + load_from = 'url' + pretrained_loc = pretrained_url + + if load_from == 'hf-hub' and pretrained_cfg.get('hf_hub_filename', None): + # if a filename override is set, return tuple for location w/ (hub_id, filename) + pretrained_loc = pretrained_loc, pretrained_cfg['hf_hub_filename'] + return load_from, pretrained_loc + + +def set_pretrained_download_progress(enable=True): + """ Set download progress for pretrained weights on/off (globally). """ + global _DOWNLOAD_PROGRESS + _DOWNLOAD_PROGRESS = enable + + +def set_pretrained_check_hash(enable=True): + """ Set hash checking for pretrained weights on/off (globally). """ + global _CHECK_HASH + _CHECK_HASH = enable + + +def load_custom_pretrained( + model: nn.Module, + pretrained_cfg: Optional[Dict] = None, + load_fn: Optional[Callable] = None, +): + r"""Loads a custom (read non .pth) weight file + + Downloads checkpoint file into cache-dir like torch.hub based loaders, but calls + a passed in custom load fun, or the `load_pretrained` model member fn. + + If the object is already present in `model_dir`, it's deserialized and returned. + The default value of `model_dir` is ``/checkpoints`` where + `hub_dir` is the directory returned by :func:`~torch.hub.get_dir`. + + Args: + model: The instantiated model to load weights into + pretrained_cfg (dict): Default pretrained model cfg + load_fn: An external standalone fn that loads weights into provided model, otherwise a fn named + 'laod_pretrained' on the model will be called if it exists + """ + pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None) + if not pretrained_cfg: + _logger.warning("Invalid pretrained config, cannot load weights.") + return + + load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg) + if not load_from: + _logger.warning("No pretrained weights exist for this model. Using random initialization.") + return + if load_from == 'hf-hub': + _logger.warning("Hugging Face hub not currently supported for custom load pretrained models.") + elif load_from == 'url': + pretrained_loc = download_cached_file( + pretrained_loc, + check_hash=_CHECK_HASH, + progress=_DOWNLOAD_PROGRESS, + ) + + if load_fn is not None: + load_fn(model, pretrained_loc) + elif hasattr(model, 'load_pretrained'): + model.load_pretrained(pretrained_loc) + else: + _logger.warning("Valid function to load pretrained weights is not available, using random initialization.") + + +def load_pretrained( + model: nn.Module, + pretrained_cfg: Optional[Dict] = None, + num_classes: int = 1000, + in_chans: int = 3, + filter_fn: Optional[Callable] = None, + strict: bool = True, +): + """ Load pretrained checkpoint + + Args: + model (nn.Module) : PyTorch model module + pretrained_cfg (Optional[Dict]): configuration for pretrained weights / target dataset + num_classes (int): num_classes for target model + in_chans (int): in_chans for target model + filter_fn (Optional[Callable]): state_dict filter fn for load (takes state_dict, model as args) + strict (bool): strict load of checkpoint + + """ + pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None) + if not pretrained_cfg: + raise RuntimeError("Invalid pretrained config, cannot load weights. Use `pretrained=False` for random init.") + + load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg) + load_from = 'url' + pretrained_loc = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth' + if load_from == 'state_dict': + _logger.info(f'Loading pretrained weights from state dict') + state_dict = pretrained_loc # pretrained_loc is the actual state dict for this override + elif load_from == 'file': + _logger.info(f'Loading pretrained weights from file ({pretrained_loc})') + if pretrained_cfg.get('custom_load', False): + model.load_pretrained(pretrained_loc) + return + else: + state_dict = load_state_dict(pretrained_loc) + elif load_from == 'url': + _logger.info(f'Loading pretrained weights from url ({pretrained_loc})') + if pretrained_cfg.get('custom_load', False): + pretrained_loc = download_cached_file( + pretrained_loc, + progress=_DOWNLOAD_PROGRESS, + check_hash=_CHECK_HASH, + ) + model.load_pretrained("/home/xinchi.tian/pytorch-image-models/timm/models/efficientnetv2_t_agc-3620981a.pth") + return + else: + state_dict = load_state_dict_from_url( + pretrained_loc, + map_location='cpu', + progress=_DOWNLOAD_PROGRESS, + check_hash=_CHECK_HASH, + ) + elif load_from == 'hf-hub': + _logger.info(f'Loading pretrained weights from Hugging Face hub ({pretrained_loc})') + if isinstance(pretrained_loc, (list, tuple)): + custom_load = pretrained_cfg.get('custom_load', False) + if isinstance(custom_load, str) and custom_load == 'hf': + load_custom_from_hf(*pretrained_loc, model) + return + else: + state_dict = load_state_dict_from_hf(*pretrained_loc) + else: + state_dict = load_state_dict_from_hf(pretrained_loc) + else: + model_name = pretrained_cfg.get('architecture', 'this model') + raise RuntimeError(f"No pretrained weights exist for {model_name}. Use `pretrained=False` for random init.") + + if filter_fn is not None: + try: + state_dict = filter_fn(state_dict, model) + except TypeError as e: + # for backwards compat with filter fn that take one arg + state_dict = filter_fn(state_dict) + + input_convs = pretrained_cfg.get('first_conv', None) + if input_convs is not None and in_chans != 3: + if isinstance(input_convs, str): + input_convs = (input_convs,) + for input_conv_name in input_convs: + weight_name = input_conv_name + '.weight' + try: + state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name]) + _logger.info( + f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)') + except NotImplementedError as e: + del state_dict[weight_name] + strict = False + _logger.warning( + f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.') + + classifiers = pretrained_cfg.get('classifier', None) + label_offset = pretrained_cfg.get('label_offset', 0) + if classifiers is not None: + if isinstance(classifiers, str): + classifiers = (classifiers,) + if num_classes != pretrained_cfg['num_classes']: + for classifier_name in classifiers: + # completely discard fully connected if model num_classes doesn't match pretrained weights + state_dict.pop(classifier_name + '.weight', None) + state_dict.pop(classifier_name + '.bias', None) + strict = False + elif label_offset > 0: + for classifier_name in classifiers: + # special case for pretrained weights with an extra background class in pretrained weights + classifier_weight = state_dict[classifier_name + '.weight'] + state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:] + classifier_bias = state_dict[classifier_name + '.bias'] + state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:] + + load_result = model.load_state_dict(state_dict, strict=strict) + if load_result.missing_keys: + _logger.info( + f'Missing keys ({", ".join(load_result.missing_keys)}) discovered while loading pretrained weights.' + f' This is expected if model is being adapted.') + if load_result.unexpected_keys: + _logger.warning( + f'Unexpected keys ({", ".join(load_result.unexpected_keys)}) found while loading pretrained weights.' + f' This may be expected if model is being adapted.') + + +def pretrained_cfg_for_features(pretrained_cfg): + pretrained_cfg = deepcopy(pretrained_cfg) + # remove default pretrained cfg fields that don't have much relevance for feature backbone + to_remove = ('num_classes', 'classifier', 'global_pool') # add default final pool size? + for tr in to_remove: + pretrained_cfg.pop(tr, None) + return pretrained_cfg + + +def _filter_kwargs(kwargs, names): + if not kwargs or not names: + return + for n in names: + kwargs.pop(n, None) + + +def _update_default_model_kwargs(pretrained_cfg, kwargs, kwargs_filter): + """ Update the default_cfg and kwargs before passing to model + + Args: + pretrained_cfg: input pretrained cfg (updated in-place) + kwargs: keyword args passed to model build fn (updated in-place) + kwargs_filter: keyword arg keys that must be removed before model __init__ + """ + # Set model __init__ args that can be determined by default_cfg (if not already passed as kwargs) + default_kwarg_names = ('num_classes', 'global_pool', 'in_chans') + if pretrained_cfg.get('fixed_input_size', False): + # if fixed_input_size exists and is True, model takes an img_size arg that fixes its input size + default_kwarg_names += ('img_size',) + + for n in default_kwarg_names: + # for legacy reasons, model __init__args uses img_size + in_chans as separate args while + # pretrained_cfg has one input_size=(C, H ,W) entry + if n == 'img_size': + input_size = pretrained_cfg.get('input_size', None) + if input_size is not None: + assert len(input_size) == 3 + kwargs.setdefault(n, input_size[-2:]) + elif n == 'in_chans': + input_size = pretrained_cfg.get('input_size', None) + if input_size is not None: + assert len(input_size) == 3 + kwargs.setdefault(n, input_size[0]) + elif n == 'num_classes': + default_val = pretrained_cfg.get(n, None) + # if default is < 0, don't pass through to model + if default_val is not None and default_val >= 0: + kwargs.setdefault(n, pretrained_cfg[n]) + else: + default_val = pretrained_cfg.get(n, None) + if default_val is not None: + kwargs.setdefault(n, pretrained_cfg[n]) + + # Filter keyword args for task specific model variants (some 'features only' models, etc.) + _filter_kwargs(kwargs, names=kwargs_filter) + + +def resolve_pretrained_cfg( + variant: str, + pretrained_cfg=None, + pretrained_cfg_overlay=None, +) -> PretrainedCfg: + model_with_tag = variant + pretrained_tag = None + if pretrained_cfg: + if isinstance(pretrained_cfg, dict): + # pretrained_cfg dict passed as arg, validate by converting to PretrainedCfg + pretrained_cfg = PretrainedCfg(**pretrained_cfg) + elif isinstance(pretrained_cfg, str): + pretrained_tag = pretrained_cfg + pretrained_cfg = None + + # fallback to looking up pretrained cfg in model registry by variant identifier + if not pretrained_cfg: + if pretrained_tag: + model_with_tag = '.'.join([variant, pretrained_tag]) + pretrained_cfg = get_pretrained_cfg(model_with_tag) + + if not pretrained_cfg: + _logger.warning( + f"No pretrained configuration specified for {model_with_tag} model. Using a default." + f" Please add a config to the model pretrained_cfg registry or pass explicitly.") + pretrained_cfg = PretrainedCfg() # instance with defaults + + pretrained_cfg_overlay = pretrained_cfg_overlay or {} + if not pretrained_cfg.architecture: + pretrained_cfg_overlay.setdefault('architecture', variant) + pretrained_cfg = dataclasses.replace(pretrained_cfg, **pretrained_cfg_overlay) + + return pretrained_cfg + + +def build_model_with_cfg( + model_cls: Callable, + variant: str, + pretrained: bool, + pretrained_cfg: Optional[Dict] = None, + pretrained_cfg_overlay: Optional[Dict] = None, + model_cfg: Optional[Any] = None, + feature_cfg: Optional[Dict] = None, + pretrained_strict: bool = True, + pretrained_filter_fn: Optional[Callable] = None, + kwargs_filter: Optional[Tuple[str]] = None, + **kwargs, +): + """ Build model with specified default_cfg and optional model_cfg + + This helper fn aids in the construction of a model including: + * handling default_cfg and associated pretrained weight loading + * passing through optional model_cfg for models with config based arch spec + * features_only model adaptation + * pruning config / model adaptation + + Args: + model_cls: model class + variant: model variant name + pretrained: load pretrained weights + pretrained_cfg: model's pretrained weight/task config + model_cfg: model's architecture config + feature_cfg: feature extraction adapter config + pretrained_strict: load pretrained weights strictly + pretrained_filter_fn: filter callable for pretrained weights + kwargs_filter: kwargs to filter before passing to model + **kwargs: model args passed through to model __init__ + """ + pruned = kwargs.pop('pruned', False) + features = False + feature_cfg = feature_cfg or {} + + # resolve and update model pretrained config and model kwargs + pretrained_cfg = resolve_pretrained_cfg( + variant, + pretrained_cfg=pretrained_cfg, + pretrained_cfg_overlay=pretrained_cfg_overlay + ) + + # FIXME converting back to dict, PretrainedCfg use should be propagated further, but not into model + pretrained_cfg = pretrained_cfg.to_dict() + + _update_default_model_kwargs(pretrained_cfg, kwargs, kwargs_filter) + + # Setup for feature extraction wrapper done at end of this fn + if kwargs.pop('features_only', False): + features = True + feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4)) + if 'out_indices' in kwargs: + feature_cfg['out_indices'] = kwargs.pop('out_indices') + if 'feature_cls' in kwargs: + feature_cfg['feature_cls'] = kwargs.pop('feature_cls') + + # Instantiate the model + if model_cfg is None: + model = model_cls(**kwargs) + else: + model = model_cls(cfg=model_cfg, **kwargs) + model.pretrained_cfg = pretrained_cfg + model.default_cfg = model.pretrained_cfg # alias for backwards compat + + if pruned: + model = adapt_model_from_file(model, variant) + + # For classification models, check class attr, then kwargs, then default to 1k, otherwise 0 for feats + num_classes_pretrained = 0 if features else getattr(model, 'num_classes', kwargs.get('num_classes', 1000)) + if pretrained: + load_pretrained( + model, + pretrained_cfg=pretrained_cfg, + num_classes=num_classes_pretrained, + in_chans=kwargs.get('in_chans', 3), + filter_fn=pretrained_filter_fn, + strict=pretrained_strict, + ) + + # Wrap the model in a feature extraction module if enabled + if features: + use_getter = False + if 'feature_cls' in feature_cfg: + feature_cls = feature_cfg.pop('feature_cls') + if isinstance(feature_cls, str): + feature_cls = feature_cls.lower() + + # flatten_sequential only valid for some feature extractors + if feature_cls not in ('dict', 'list', 'hook'): + feature_cfg.pop('flatten_sequential', None) + + if 'hook' in feature_cls: + feature_cls = FeatureHookNet + elif feature_cls == 'list': + feature_cls = FeatureListNet + elif feature_cls == 'dict': + feature_cls = FeatureDictNet + elif feature_cls == 'fx': + feature_cls = FeatureGraphNet + elif feature_cls == 'getter': + use_getter = True + feature_cls = FeatureGetterNet + else: + assert False, f'Unknown feature class {feature_cls}' + else: + feature_cls = FeatureListNet + + output_fmt = getattr(model, 'output_fmt', None) + if output_fmt is not None and not use_getter: # don't set default for intermediate feat getter + feature_cfg.setdefault('output_fmt', output_fmt) + + model = feature_cls(model, **feature_cfg) + model.pretrained_cfg = pretrained_cfg_for_features(pretrained_cfg) # add back pretrained cfg + model.default_cfg = model.pretrained_cfg # alias for rename backwards compat (default_cfg -> pretrained_cfg) + + return model -- Gitee From e644d4ee832178860a12382fdfb19ce8a87e2887 Mon Sep 17 00:00:00 2001 From: tianxi-yi Date: Wed, 22 May 2024 17:17:41 +0800 Subject: [PATCH 4/5] Fix the file path for correction --- models/cv/classification/efficientnet_v2/ixrt/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index 5071a1ab..aa6aa4c0 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -31,7 +31,8 @@ cd pytorch-image-models/timm/models rm _builder.py mv /Path/ixrt/_builder.py pytorch-image-models/timm/models cd pytorch-image-models/timm -mv /Path/to/efficientnetv2_t_agc-3620981a.pth /root/.cache/torch/hub/checkpoints/ +mkdir -p /root/.cache/torch/hub/checkpoints/ +wget -P /root/.cache/torch/hub/checkpoints/ https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx ``` @@ -56,9 +57,9 @@ bash script/infer_efficientnet_fp16_performance.sh ### INT8 ```bash # Accuracy -bash script/infer_efficientnet_int8_accuracy.sh +bash scripts/infer_efficientnet_int8_accuracy.sh # Performance -bash script/infer_efficientnet_int8_performance.sh +bash scripts/infer_efficientnet_int8_performance.sh ``` -- Gitee From 768a2f5b2e5fefa03f68acaa9a903f5710171729 Mon Sep 17 00:00:00 2001 From: tianxi-yi Date: Wed, 22 May 2024 17:30:20 +0800 Subject: [PATCH 5/5] Fix script to scripts --- models/cv/classification/efficientnet_v2/ixrt/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index aa6aa4c0..fda6110a 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -49,9 +49,9 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ```bash # Accuracy -bash script/infer_efficientnet_fp16_accuracy.sh +bash scripts/infer_efficientnet_fp16_accuracy.sh # Performance -bash script/infer_efficientnet_fp16_performance.sh +bash scripts/infer_efficientnet_fp16_performance.sh ``` ### INT8 -- Gitee