From fd4fa5440ec0fc826acf5601b17355456f057d38 Mon Sep 17 00:00:00 2001 From: zhangyanmin <2716635239@qq.com> Date: Wed, 21 Sep 2022 09:34:03 +0000 Subject: [PATCH 1/3] Update cla MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit put Evo-Levit_256_384 into correct file_path update Readme and test updata performance 1P and 8P sh 删除文件 Evo-Levit_256_384 push into one commit delete custom_tune_bank and modified test/env_npu.sh support 1P obtain the fps of 1000 steps print 1000 fps then exit env_npu add FPS in full.sh update Readme update right Readme and fps --- .../classification/Evo-Levit_256_384/LICENSE | 13 + .../Evo-Levit_256_384/README.md | 154 ++++ .../Evo-Levit_256_384/benchmark.py | 486 +++++++++++ .../Evo-Levit_256_384/datasets.py | 185 ++++ .../Evo-Levit_256_384/engine_levit.py | 214 +++++ .../Evo-Levit_256_384/fix_timm/mixup.py | 319 +++++++ .../fix_timm/optim_factory.py | 187 ++++ .../Evo-Levit_256_384/levit/evo_levit.py | 788 +++++++++++++++++ .../Evo-Levit_256_384/levit/evo_levit_384.py | 813 ++++++++++++++++++ .../Evo-Levit_256_384/levit/losses_levit.py | 127 +++ .../Evo-Levit_256_384/main_levit.py | 506 +++++++++++ .../Evo-Levit_256_384/samplers.py | 74 ++ .../Evo-Levit_256_384/test/env_npu.sh | 68 ++ .../Evo-Levit_256_384/test/train_full_1P.sh | 116 +++ .../Evo-Levit_256_384/test/train_full_8P.sh | 115 +++ .../test/train_performance_1P.sh | 113 +++ .../test/train_performance_8P.sh | 127 +++ .../classification/Evo-Levit_256_384/utils.py | 277 ++++++ .../Evo-Levit_256_384/visualize.py | 599 +++++++++++++ 19 files changed, 5281 insertions(+) create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/LICENSE create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/benchmark.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/datasets.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/engine_levit.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/mixup.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/optim_factory.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit_384.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/losses_levit.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/main_levit.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/samplers.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_1P.sh create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_8P.sh create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_1P.sh create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_8P.sh create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/utils.py create mode 100644 PyTorch/contrib/cv/classification/Evo-Levit_256_384/visualize.py diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/LICENSE b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/LICENSE new file mode 100644 index 0000000000..325b1ea4fa --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/LICENSE @@ -0,0 +1,13 @@ +Copyright 2021 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md new file mode 100644 index 0000000000..51c8512781 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md @@ -0,0 +1,154 @@ +# Evo-Levit for PyTorch + +[TOC] + +# 概述 + +## 简述 + +Evo-ViT的具体框架设计,包括基于全局class attention的token选择以及慢速-快速双流token更新两个模块。其根据全局class attention的排序判断高信息token和低信息token,将低信息token整合为一个归纳token,和高信息token一起输入到原始多头注意力(Multi-head Self-Attention, MSA)模块以及前向传播(Fast Fed-forward Network, FFN)模块中进行精细更新。更新后的归纳token用来快速更新低信息token。全局class attention也在精细更新过程中进行同步更新变化。 + +- 参考实现 + +``` +url = https://github.com/YifanXu74/Evo-ViT.git +``` + +- 适配昇腾AI处理器的实现 +- 通过Git获取代码方法如下 + +``` +git clone {url} # 克隆仓库的代码 +cd {code_path} # 切换到模型代码所在路径 +``` + +# 准备训练环境 + +## 准备环境 + +- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示 + + **表1** 版本配套表 + +| 配套 | 版本 | +| ---------- | ------------------------------------------------------------ | +| 固件与驱动 | [1.0.12](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | +| CANN | [5.0.3](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | +| PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) | + +- 安装依赖 + +``` +pip install timm==0.4.12 +pip install torchvision==0.9.1 +pip install torch_npu-1.8.1rc2.20220607-cp37-cp37m-linux_aarch64.whl +pip install torch-1.8.1+ascend.rc2.20220607-cp37-cp37m-linux_aarch64.whl +pip install apex-0.1+ascend.20220607-cp37-cp37m-linux_aarch64.whl +``` + +- 关于timm包的NPU优化补丁 + +``` +# 需要先cd到当前文件目录,一般timm包的安装位置在/usr/local/lib/python3.7/dist-packages/timm/ +#先后生成补丁并升级包 +diff -uN {timm_path}/data/mixup.py {code_path}/fix_timm/mixup.py >mixup.patch +diff -uN {timm_path}/optim/optim_factory.py {code_path}/fix_timm/optim_factory.py >optim.patch +patch -p0 {timm_path}/data/mixup.py mixup.patch +patch -p0 {timm_path}/optim/optim_factory.py optim.patch +``` + +## 数据集 + +1. 获取数据集 + +​ 选用的数据集是ImageNet,用户自行获取将数据集上传到服务器任意路径下并解压。 + +​ ImageNet数据集的目录结构参考如下所示 + +``` +├── ImageNet2012 + ├──train + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ├──... + ├──val + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... +``` + +## 获取Teacher checkpoint + +Evo-Vit模型训练需要配置teacher—model,获取方式为在GitHub的[Evo-Vit]([GitHub - YifanXu74/Evo-ViT: Official implement of Evo-ViT: Slow-Fast Token Evolution for Dynamic Vision Transformer](https://github.com/YifanXu74/Evo-ViT)),checkpoint文件可以在该仓库自行下载,也可以直接使用网址进行下载,网址如下 +https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录 + +``` +cd /Evo-Levit_256_384 +``` + +2. 运行训练脚本 + +该模型支持单机单卡训练和单机8卡训练,开始训练前,请用户根据实际路径配置data_path参数。 + +- 单机单卡训练 + +​ 启动单卡训练 + +``` +bash ./test/train_full_1p.sh --data_path=/home/zym/imagenet/ +``` + +- 单机 8卡训练 + +​ 启动8卡训练 + +``` +bash ./test/train_full_8p.sh --data_path=/home/zym/imagenet/ +``` + +训练完成后,权重文件保存在 参数设置 的路径下,并输出模型训练精度和性能信息 + +# 训练结果展示 + +**表2** 训练结果展示表 + +| NAME | PT版本 | 精度 | FPS | Epochs | AMP_Type | +| ------ | ------ | ----- | ---- | ------ | -------- | +| 1P-GPU | 1.8.1 | - | 51 | 1 | O1 | +| 1P-NPU | 1.8.1 | - | 59 | 1 | O1 | +| 8P-GPU | 1.8.1 | 73.54 | 487 | 100 | O1 | +| 8P-NPU | 1.8.1 | 74.11 | 496 | 100 | O1 | + + + +# 版本说明 + +## 变更 + +2022.09.17:首次发布 + +2022.10.21: 新增teacher checkpoint网址,更新bash命令 + +2022.11.09:将NPU1P的fps更新为符合设备的59 + +# 已知问题 + + + diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/benchmark.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/benchmark.py new file mode 100644 index 0000000000..16ec6a68b3 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/benchmark.py @@ -0,0 +1,486 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import csv +import json +import time +import logging +import torch +import torch.nn as nn +import torch.nn.parallel +from collections import OrderedDict +from contextlib import suppress +from functools import partial + +from timm.models import create_model, is_model, list_models +from timm.optim import create_optimizer +from timm.data import resolve_data_config +from timm.utils import AverageMeter, setup_default_logging + +from deit import evo_deit +from levit import evo_levit +from levit import evo_levit_384 + +has_apex = False +try: + from apex import amp + + has_apex = True +except ImportError: + pass + +has_native_amp = False +try: + if getattr(torch.cuda.amp, 'autocast') is not None: + has_native_amp = True +except AttributeError: + pass + +torch.backends.cudnn.benchmark = True +_logger = logging.getLogger('validate') + +parser = argparse.ArgumentParser(description='PyTorch Benchmark') + +# benchmark specific args +parser.add_argument('--model_list', metavar='NAME', default='', + help='txt file based list of model names to benchmark') +parser.add_argument('--bench', default='inference', type=str, + help="Benchmark mode. One of 'inference', 'train', 'both'. Defaults to 'inference'") +parser.add_argument('--detail', action='store_true', default=False, + help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False') +parser.add_argument('--results_file', default='', type=str, metavar='FILENAME', + help='Output csv file for validation results (summary)') +parser.add_argument('--num_warm_iter', default=10, type=int, + metavar='N', help='Number of warmup iterations (default: 10)') +parser.add_argument('--num_bench_iter', default=40, type=int, + metavar='N', help='Number of benchmark iterations (default: 40)') + +# common inference / train args +parser.add_argument('--model', '-m', metavar='NAME', default='resnet50', + help='model architecture (default: resnet50)') +parser.add_argument('-b', '--batch_size', default=256, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--img_size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--input_size', default=None, nargs=3, type=int, + metavar='N N N', + help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') +parser.add_argument('--num_classes', type=int, default=1000, + help='Number classes in dataset') +parser.add_argument('--gp', default=None, type=str, metavar='POOL', + help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') +parser.add_argument('--channels_last', action='store_true', default=False, + help='Use channels_last memory layout') +parser.add_argument('--amp', action='store_true', default=False, + help='use PyTorch Native AMP for mixed precision training. Overrides --precision arg.') +parser.add_argument('--precision', default='float32', type=str, + help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') + +# train optimizer parameters +parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "sgd"') +parser.add_argument('--opt_eps', default=None, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: None, use opt default)') +parser.add_argument('--opt_betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') +parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='Optimizer momentum (default: 0.9)') +parser.add_argument('--weight_decay', type=float, default=0.0001, + help='weight decay (default: 0.0001)') +parser.add_argument('--clip_grad', type=float, default=None, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') +parser.add_argument('--clip_mode', type=str, default='norm', + help='Gradient clipping mode. One of ("norm", "value", "agc")') + +# model regularization / loss params that impact model or loss fn +parser.add_argument('--smoothing', type=float, default=0.1, + help='Label smoothing (default: 0.1)') +parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', + help='Dropout rate (default: 0.)') +parser.add_argument('--drop_path', type=float, default=None, metavar='PCT', + help='Drop path rate (default: None)') +parser.add_argument('--drop_block', type=float, default=None, metavar='PCT', + help='Drop block rate (default: None)') + + +def timestamp(sync=False): + return time.perf_counter() + + +def cuda_timestamp(sync=False, device=None): + if sync: + torch.cuda.synchronize(device=device) + return time.perf_counter() + + +def count_params(model: nn.Module): + return sum([m.numel() for m in model.parameters()]) + + +def resolve_precision(precision: str): + assert precision in ('amp', 'float16', 'bfloat16', 'float32') + use_amp = False + model_dtype = torch.float32 + data_dtype = torch.float32 + if precision == 'amp': + use_amp = True + elif precision == 'float16': + model_dtype = torch.float16 + data_dtype = torch.float16 + elif precision == 'bfloat16': + model_dtype = torch.bfloat16 + data_dtype = torch.bfloat16 + return use_amp, model_dtype, data_dtype + + +class BenchmarkRunner: + def __init__( + self, model_name, detail=False, device='cuda', torchscript=False, precision='float32', + num_warm_iter=10, num_bench_iter=50, **kwargs): + self.model_name = model_name + self.detail = detail + self.device = device + self.use_amp, self.model_dtype, self.data_dtype = resolve_precision(precision) + self.channels_last = kwargs.pop('channels_last', False) + self.amp_autocast = torch.cuda.amp.autocast if self.use_amp else suppress + + self.model = evo_deit.evo_deit_base_patch16_224() + self.model.to( + device=self.device, + dtype=self.model_dtype, + memory_format=torch.channels_last if self.channels_last else None) + self.num_classes = self.model.num_classes + self.param_count = count_params(self.model) + _logger.info('Model %s created, param count: %d' % (model_name, self.param_count)) + if torchscript: + self.model = torch.jit.script(self.model) + + data_config = resolve_data_config(kwargs, model=self.model, use_test_size=True) + self.input_size = data_config['input_size'] + self.batch_size = kwargs.pop('batch_size', 256) + + self.example_inputs = None + self.num_warm_iter = num_warm_iter + self.num_bench_iter = num_bench_iter + self.log_freq = num_bench_iter // 5 + if 'cuda' in self.device: + self.time_fn = partial(cuda_timestamp, device=self.device) + else: + self.time_fn = timestamp + + def _init_input(self): + self.example_inputs = torch.randn( + (self.batch_size,) + self.input_size, device=self.device, dtype=self.data_dtype) + if self.channels_last: + self.example_inputs = self.example_inputs.contiguous(memory_format=torch.channels_last) + + +class InferenceBenchmarkRunner(BenchmarkRunner): + + def __init__(self, model_name, device='cuda', torchscript=False, **kwargs): + super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs) + self.model.eval() + + def run(self): + def _step(): + t_step_start = self.time_fn() + with self.amp_autocast(): + output = self.model(self.example_inputs) + t_step_end = self.time_fn(True) + return t_step_end - t_step_start + + _logger.info( + f'Running inference benchmark on {self.model_name} for {self.num_bench_iter} steps w/ ' + f'input size {self.input_size} and batch size {self.batch_size}.') + + with torch.no_grad(): + self._init_input() + + for _ in range(self.num_warm_iter): + _step() + + total_step = 0. + num_samples = 0 + t_run_start = self.time_fn() + for i in range(self.num_bench_iter): + delta_fwd = _step() + total_step += delta_fwd + num_samples += self.batch_size + num_steps = i + 1 + if num_steps % self.log_freq == 0: + _logger.info( + f"Infer [{num_steps}/{self.num_bench_iter}]." + f" {num_samples / total_step:0.2f} samples/sec." + f" {1000 * total_step / num_steps:0.3f} ms/step.") + t_run_end = self.time_fn(True) + t_run_elapsed = t_run_end - t_run_start + + results = dict( + samples_per_sec=round(num_samples / t_run_elapsed, 2), + step_time=round(1000 * total_step / self.num_bench_iter, 3), + batch_size=self.batch_size, + img_size=self.input_size[-1], + param_count=round(self.param_count / 1e6, 2), + ) + + _logger.info( + f"Inference benchmark of {self.model_name} done. " + f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/step") + + return results + + +class TrainBenchmarkRunner(BenchmarkRunner): + + def __init__(self, model_name, device='cuda', torchscript=False, **kwargs): + super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs) + self.model.train() + + if kwargs.pop('smoothing', 0) > 0: + self.loss = nn.CrossEntropyLoss().to(self.device) + else: + self.loss = nn.CrossEntropyLoss().to(self.device) + self.target_shape = tuple() + + self.optimizer = create_optimizer( + self.model, + optimizer_name=kwargs.pop('opt', 'sgd'), + learning_rate=kwargs.pop('lr', 1e-4)) + + def _gen_target(self, batch_size): + return torch.empty( + (batch_size,) + self.target_shape, device=self.device, dtype=torch.long).random_(self.num_classes) + + def run(self): + def _step(detail=False): + self.optimizer.zero_grad() # can this be ignored? + t_start = self.time_fn() + t_fwd_end = t_start + t_bwd_end = t_start + with self.amp_autocast(): + output = self.model(self.example_inputs) + if isinstance(output, tuple): + output = output[0] + if detail: + t_fwd_end = self.time_fn(True) + target = self._gen_target(output.shape[0]) + self.loss(output, target).backward() + if detail: + t_bwd_end = self.time_fn(True) + self.optimizer.step() + t_end = self.time_fn(True) + if detail: + delta_fwd = t_fwd_end - t_start + delta_bwd = t_bwd_end - t_fwd_end + delta_opt = t_end - t_bwd_end + return delta_fwd, delta_bwd, delta_opt + else: + delta_step = t_end - t_start + return delta_step + + _logger.info( + f'Running train benchmark on {self.model_name} for {self.num_bench_iter} steps w/ ' + f'input size {self.input_size} and batch size {self.batch_size}.') + + self._init_input() + + for _ in range(self.num_warm_iter): + _step() + + t_run_start = self.time_fn() + if self.detail: + total_fwd = 0. + total_bwd = 0. + total_opt = 0. + num_samples = 0 + for i in range(self.num_bench_iter): + delta_fwd, delta_bwd, delta_opt = _step(True) + num_samples += self.batch_size + total_fwd += delta_fwd + total_bwd += delta_bwd + total_opt += delta_opt + num_steps = (i + 1) + if num_steps % self.log_freq == 0: + total_step = total_fwd + total_bwd + total_opt + _logger.info( + f"Train [{num_steps}/{self.num_bench_iter}]." + f" {num_samples / total_step:0.2f} samples/sec." + f" {1000 * total_fwd / num_steps:0.3f} ms/step fwd," + f" {1000 * total_bwd / num_steps:0.3f} ms/step bwd," + f" {1000 * total_opt / num_steps:0.3f} ms/step opt." + ) + total_step = total_fwd + total_bwd + total_opt + t_run_elapsed = self.time_fn() - t_run_start + results = dict( + samples_per_sec=round(num_samples / t_run_elapsed, 2), + step_time=round(1000 * total_step / self.num_bench_iter, 3), + fwd_time=round(1000 * total_fwd / self.num_bench_iter, 3), + bwd_time=round(1000 * total_bwd / self.num_bench_iter, 3), + opt_time=round(1000 * total_opt / self.num_bench_iter, 3), + batch_size=self.batch_size, + img_size=self.input_size[-1], + param_count=round(self.param_count / 1e6, 2), + ) + else: + total_step = 0. + num_samples = 0 + for i in range(self.num_bench_iter): + delta_step = _step(False) + num_samples += self.batch_size + total_step += delta_step + num_steps = (i + 1) + if num_steps % self.log_freq == 0: + _logger.info( + f"Train [{num_steps}/{self.num_bench_iter}]." + f" {num_samples / total_step:0.2f} samples/sec." + f" {1000 * total_step / num_steps:0.3f} ms/step.") + t_run_elapsed = self.time_fn() - t_run_start + results = dict( + samples_per_sec=round(num_samples / t_run_elapsed, 2), + step_time=round(1000 * total_step / self.num_bench_iter, 3), + batch_size=self.batch_size, + img_size=self.input_size[-1], + param_count=round(self.param_count / 1e6, 2), + ) + + _logger.info( + f"Train benchmark of {self.model_name} done. " + f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/sample") + + return results + + +def decay_batch_exp(batch_size, factor=0.5, divisor=16): + out_batch_size = batch_size * factor + if out_batch_size > divisor: + out_batch_size = (out_batch_size + 1) // divisor * divisor + else: + out_batch_size = batch_size - 1 + return max(0, int(out_batch_size)) + + +def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs): + batch_size = initial_batch_size + results = dict() + while batch_size >= 1: + try: + bench = bench_fn(model_name=model_name, batch_size=batch_size, **bench_kwargs) + results = bench.run() + return results + except RuntimeError as e: + torch.cuda.empty_cache() + batch_size = decay_batch_exp(batch_size) + print(f'Error: {str(e)} while running benchmark. Reducing batch size to {batch_size} for retry.') + return results + + +def benchmark(args): + if args.amp: + _logger.warning("Overriding precision to 'amp' since --amp flag set.") + args.precision = 'amp' + _logger.info(f'Benchmarking in {args.precision} precision. ' + f'{"NHWC" if args.channels_last else "NCHW"} layout. ' + f'torchscript {"enabled" if args.torchscript else "disabled"}') + + bench_kwargs = vars(args).copy() + bench_kwargs.pop('amp') + model = bench_kwargs.pop('model') + batch_size = bench_kwargs.pop('batch_size') + + bench_fns = (InferenceBenchmarkRunner,) + prefixes = ('infer',) + if args.bench == 'both': + bench_fns = ( + InferenceBenchmarkRunner, + TrainBenchmarkRunner + ) + prefixes = ('infer', 'train') + elif args.bench == 'train': + bench_fns = TrainBenchmarkRunner, + prefixes = 'train', + + model_results = OrderedDict(model=model) + for prefix, bench_fn in zip(prefixes, bench_fns): + run_results = _try_run(model, bench_fn, initial_batch_size=batch_size, bench_kwargs=bench_kwargs) + if prefix: + run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()} + model_results.update(run_results) + param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0)) + model_results.setdefault('param_count', param_count) + model_results.pop('train_param_count', 0) + return model_results + + +def main(): + setup_default_logging() + args = parser.parse_args() + model_cfgs = [] + model_names = [] + + if args.model_list: + args.model = '' + with open(args.model_list) as f: + model_names = [line.rstrip() for line in f] + model_cfgs = [(n, None) for n in model_names] + elif args.model == 'all': + # validate all models in a list of names with pretrained checkpoints + args.pretrained = True + model_names = list_models(pretrained=True, exclude_filters=['*in21k']) + model_cfgs = [(n, None) for n in model_names] + elif not is_model(args.model): + # model name doesn't exist, try as wildcard filter + model_names = list_models(args.model) + model_cfgs = [(n, None) for n in model_names] + + if len(model_cfgs): + results_file = args.results_file or './benchmark.csv' + _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) + results = [] + try: + for m, _ in model_cfgs: + if not m: + continue + args.model = m + r = benchmark(args) + results.append(r) + except KeyboardInterrupt as e: + pass + sort_key = 'train_samples_per_sec' if 'train' in args.bench else 'infer_samples_per_sec' + results = sorted(results, key=lambda x: x[sort_key], reverse=True) + if len(results): + write_results(results_file, results) + + import json + json_str = json.dumps(results, indent=4) + print(json_str) + else: + benchmark(args) + + +def write_results(results_file, results): + with open(results_file, mode='w') as cf: + dw = csv.DictWriter(cf, fieldnames=results[0].keys()) + dw.writeheader() + for r in results: + dw.writerow(r) + cf.flush() + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/datasets.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/datasets.py new file mode 100644 index 0000000000..6482601780 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/datasets.py @@ -0,0 +1,185 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json + +from torchvision import datasets, transforms +from torchvision.datasets.folder import ImageFolder, default_loader + +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from timm.data import create_transform + + +class INatDataset(ImageFolder): + def __init__(self, root, train=True, year=2018, transform=None, target_transform=None, + category='name', loader=default_loader): + self.transform = transform + self.loader = loader + self.target_transform = target_transform + self.year = year + # assert category in ['kingdom','phylum','class','order','supercategory','family','genus','name'] + path_json = os.path.join(root, f'{"train" if train else "val"}{year}.json') + with open(path_json) as json_file: + data = json.load(json_file) + + with open(os.path.join(root, 'categories.json')) as json_file: + data_catg = json.load(json_file) + + path_json_for_targeter = os.path.join(root, f"train{year}.json") + + with open(path_json_for_targeter) as json_file: + data_for_targeter = json.load(json_file) + + targeter = {} + indexer = 0 + for elem in data_for_targeter['annotations']: + king = [] + king.append(data_catg[int(elem['category_id'])][category]) + if king[0] not in targeter.keys(): + targeter[king[0]] = indexer + indexer += 1 + self.nb_classes = len(targeter) + + self.samples = [] + for elem in data['images']: + cut = elem['file_name'].split('/') + target_current = int(cut[2]) + path_current = os.path.join(root, cut[0], cut[2], cut[3]) + + categors = data_catg[target_current] + target_current_true = targeter[categors[category]] + self.samples.append((path_current, target_current_true)) + + # __getitem__ and __len__ inherited from ImageFolder + + +def build_dataset(is_train, args): + transform = build_transform(is_train, args) + + if args.data_set == 'CIFAR': + dataset = datasets.CIFAR100(args.data_path, train=is_train, transform=transform) + nb_classes = 100 + elif args.data_set == 'IMNET': + root = os.path.join(args.data_path, 'train' if is_train else 'val') + dataset = datasets.ImageFolder(root, transform=transform) + nb_classes = 1000 + elif args.data_set == 'INAT': + dataset = INatDataset(args.data_path, train=is_train, year=2018, + category=args.inat_category, transform=transform) + nb_classes = dataset.nb_classes + elif args.data_set == 'INAT19': + dataset = INatDataset(args.data_path, train=is_train, year=2019, + category=args.inat_category, transform=transform) + nb_classes = dataset.nb_classes + + return dataset, nb_classes + + +def build_transform(is_train, args): + resize_im = args.input_size > 32 + if is_train: + # this should always dispatch to transforms_imagenet_train + transform = create_transform( + input_size=args.input_size, + is_training=True, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=args.train_interpolation, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + ) + if not resize_im: + # replace RandomResizedCropAndInterpolation with + # RandomCrop + transform.transforms[0] = transforms.RandomCrop( + args.input_size, padding=4) + return transform + + t = [] + if resize_im: + size = int((256 / 224) * args.input_size) + t.append( + transforms.Resize(size, interpolation=3), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(args.input_size)) + + t.append(transforms.ToTensor()) + t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) + return transforms.Compose(t) + + +def build_dataset2(is_train, args): + transform = build_transform2(is_train, args) + + if args.data_set == 'CIFAR': + dataset = datasets.CIFAR100(args.data_path, train=is_train, transform=transform) + nb_classes = 100 + elif args.data_set == 'IMNET': + root = os.path.join(args.data_path, 'train' if is_train else 'val') + dataset = datasets.ImageFolder(root, transform=transform) + nb_classes = 1000 + elif args.data_set == 'INAT': + dataset = INatDataset(args.data_path, train=is_train, year=2018, + category=args.inat_category, transform=transform) + nb_classes = dataset.nb_classes + elif args.data_set == 'INAT19': + dataset = INatDataset(args.data_path, train=is_train, year=2019, + category=args.inat_category, transform=transform) + nb_classes = dataset.nb_classes + + return dataset, nb_classes + + + + +def build_transform2(is_train, args): + resize_im = args.input_size > 32 + if is_train: + # this should always dispatch to transforms_imagenet_train + transform = create_transform( + input_size=args.input_size, + is_training=True, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=args.train_interpolation, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + ) + if not resize_im: + # replace RandomResizedCropAndInterpolation with + # RandomCrop + transform.transforms[0] = transforms.RandomCrop( + args.input_size, padding=4) + return transform + + t = [] + if resize_im: + size = int((256 / 224) * args.input_size) + t.append( + transforms.Resize(size, interpolation=3), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(args.input_size)) + t.append(transforms.ToTensor()) + + return transforms.Compose(t) + + +def get_post_process(): + t = [] + t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) + return transforms.Compose(t) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/engine_levit.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/engine_levit.py new file mode 100644 index 0000000000..7f6eab9332 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/engine_levit.py @@ -0,0 +1,214 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import sys +import time +from typing import Iterable, Optional +import os + +import torch + +from timm.data import Mixup +from timm.utils import accuracy, ModelEma + +from levit.losses_levit import DistillationLoss +import utils +try: + # noinspection PyUnresolvedReferences + from apex import amp +except ImportError: + amp = None + +import torch_npu + +#获取Iterable的长度: +def count(iterable): + c=0 + for el in iterable: c+=1 + return c + + +def train_one_epoch(args, + model: torch.nn.Module, criterion: DistillationLoss, + data_loader: Iterable, optimizer: torch.optim.Optimizer, + device: torch.device, epoch: int, loss_scaler, + clip_grad: float = 0, + clip_mode: str = 'norm', + model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, + set_training_mode=True): + model.train(set_training_mode) + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue( + window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + print_freq = 100 + mode1 = False + mode2 = False # True + mode4 = False + CTPEP = False + num_steps = len(data_loader) + start = time.time() + step_n = 0 + if epoch < 200: + model.module.stage_wise_prune = False + model.module.set_learn_tradeoff(False) + else: + model.module.stage_wise_prune = True + model.module.set_learn_tradeoff(True) + + for samples, targets in metric_logger.log_every( + data_loader, print_freq, header): + samples = samples.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + # 不计算前5个epoch时间内,若需要生成prof的时候,把下面这一段注释掉 + step_n += 1 + if step_n < 5: + start = time.time() + + #节约时间,计算1000个step的fps,然后输出 + if step_n % 1000 == 999 and args.train_type == 'fps': + timm_999=time.time()-start + Fps_step = 995 * args.batch_size * utils.get_world_size() / float(timm_999) + print("fps:", Fps_step) + sys.exit() + + + + if mixup_fn is not None: + samples, targets = mixup_fn(samples, targets) + + + if True: # with torch.cuda.amp.autocast(): + # outputs = model(samples) + if (mode1 or mode4): + outputs = model(samples, epoch) + loss = criterion(samples, outputs, targets) # net1distill + elif (mode2 or CTPEP): + outputs = model(samples, epoch) + loss = criterion(samples, outputs, targets) + else: + outputs = model(samples) + loss = criterion(samples, outputs, targets) + + loss_value = loss.item() + + if not math.isfinite(loss_value): + print("Loss is {}, stopping training".format(loss_value)) + sys.exit(1) + + optimizer.zero_grad() + + # this attribute is added by timm on one optimizer (adahessian) + is_second_order = hasattr( + optimizer, 'is_second_order') and optimizer.is_second_order + + loss_scaler(loss, optimizer, clip_grad=clip_grad, clip_mode=clip_mode, + parameters=model.parameters(), create_graph=is_second_order) + torch_npu.npu.synchronize() + + if model_ema is not None: + model_ema.update(model) + + metric_logger.update(loss=loss_value) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + # 不用prof生成时的代码结束位置 + epoch_time = time.time() - start + # gather the stats from all processes + metric_logger.synchronize_between_processes() + # 计算每个epoch的fps + Fps_epoch = (num_steps - 5) * args.batch_size * utils.get_world_size() / float(epoch_time) + print("Averaged stats:", metric_logger,"fps:",Fps_epoch) + return {k: meter.global_avg for k, meter in metric_logger.meters.items()},Fps_epoch + + +@torch.no_grad() +def evaluate(data_loader, model, device, epoch=None): + criterion = torch.nn.CrossEntropyLoss() + + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + # switch to evaluation mode + model.eval() + mode1 = False + mode2 = False + mutual = False # True + cls = True + mode4 = False + CTPEP = False # False + + for images, target in metric_logger.log_every(data_loader, 10, header): + images = images.to(device, non_blocking=True) + target = target.to(device, non_blocking=True) + + # compute output + if True: #with torch.cuda.amp.autocast():这里添加了if TRUE,考虑到NPU不能用CUDA的混合精度训练 + if mode1: + output = model(images, epoch) + acc1, acc5 = accuracy(output[1], target, topk=(1, 5)) + acc12, acc52 = accuracy(output[2], target, topk=(1, 5)) + acc13, acc53 = accuracy(output[3], target, topk=(1, 5)) + print("net 2 accuracy: {}, {}, net 3 accuracy: {}, {}, net 4 accuracy: {}, {}".format(acc1.item(), + acc5.item(), + acc12.item(), + acc52.item(), + acc13.item(), + acc53.item())) + output = output[0] + elif mode2: + output = model(images, epoch) + acc1, acc5 = accuracy(output[1], target, topk=(1, 5)) + acc12, acc52 = accuracy(output[2], target, topk=(1, 5)) + acc13, acc53 = accuracy(output[3], target, topk=(1, 5)) + acc14, acc54 = accuracy(output[4], target, topk=(1, 5)) + print( + "net 2 accuracy: {}, {}, net 3 accuracy: {}, {}, net 4 accuracy: {}, {}, net merge accuracy: {}, {}".format( + acc1.item(), acc5.item(), acc12.item(), acc52.item(), acc13.item(), acc53.item(), acc14.item(), + acc54.item())) + output = output[0] + elif mode4: + output = model(images, epoch) + acc1, acc5 = accuracy(output[1], target, topk=(1, 5)) + acc12, acc52 = accuracy(output[2], target, topk=(1, 5)) + print("net 2 accuracy: {}, {}, net 3 accuracy: {}, {}".format(acc1.item(), acc5.item(), acc12.item(), + acc52.item())) + output = output[0] + elif mutual: + output = model(images) + acc1, acc5 = accuracy(output[1], target, topk=(1, 5)) + print("net depth accuracy: {}, {}".format(acc1, acc5)) + output = output[0] + elif cls: + if CTPEP: + output = model(images, epoch) + else: + output = model(images) + else: + output = model(images) + loss = criterion(output, target) + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + batch_size = images.shape[0] + metric_logger.update(loss=loss.item()) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' + .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) + print(output.mean().item(), output.std().item()) + + return {k: meter.global_avg for k, meter in metric_logger.meters.items()} diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/mixup.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/mixup.py new file mode 100644 index 0000000000..a475602ad7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/mixup.py @@ -0,0 +1,319 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch_npu + +def one_hot(x, num_classes, on_value=1., off_value=0., device='npu'): + x = x.long().view(-1, 1) + return torch.full((x.size()[0], num_classes), off_value, device=device).scatter_(1, x, on_value) + + +def mixup_target(target, num_classes, lam=1., smoothing=0.0, device='npu'): + off_value = smoothing / num_classes + on_value = 1. - smoothing + off_value + y1 = one_hot(target, num_classes, on_value=on_value, off_value=off_value, device=device) + y2 = one_hot(target.flip(0), num_classes, on_value=on_value, off_value=off_value, device=device) + return y1 * lam + y2 * (1. - lam) + + +def rand_bbox(img_shape, lam, margin=0., count=None): + """ Standard CutMix bounding-box + Generates a random square bbox based on lambda value. This impl includes + support for enforcing a border margin as percent of bbox dimensions. + + Args: + img_shape (tuple): Image shape as tuple + lam (float): Cutmix lambda value + margin (float): Percentage of bbox dimension to enforce as margin (reduce amount of box outside image) + count (int): Number of bbox to generate + """ + ratio = np.sqrt(1 - lam) + img_h, img_w = img_shape[-2:] + cut_h, cut_w = int(img_h * ratio), int(img_w * ratio) + margin_y, margin_x = int(margin * cut_h), int(margin * cut_w) + cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count) + cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count) + yl = np.clip(cy - cut_h // 2, 0, img_h) + yh = np.clip(cy + cut_h // 2, 0, img_h) + xl = np.clip(cx - cut_w // 2, 0, img_w) + xh = np.clip(cx + cut_w // 2, 0, img_w) + return yl, yh, xl, xh + + +def rand_bbox_minmax(img_shape, minmax, count=None): + """ Min-Max CutMix bounding-box + Inspired by Darknet cutmix impl, generates a random rectangular bbox + based on min/max percent values applied to each dimension of the input image. + + Typical defaults for minmax are usually in the .2-.3 for min and .8-.9 range for max. + + Args: + img_shape (tuple): Image shape as tuple + minmax (tuple or list): Min and max bbox ratios (as percent of image size) + count (int): Number of bbox to generate + """ + assert len(minmax) == 2 + img_h, img_w = img_shape[-2:] + cut_h = np.random.randint(int(img_h * minmax[0]), int(img_h * minmax[1]), size=count) + cut_w = np.random.randint(int(img_w * minmax[0]), int(img_w * minmax[1]), size=count) + yl = np.random.randint(0, img_h - cut_h, size=count) + xl = np.random.randint(0, img_w - cut_w, size=count) + yu = yl + cut_h + xu = xl + cut_w + return yl, yu, xl, xu + + +def cutmix_bbox_and_lam(img_shape, lam, ratio_minmax=None, correct_lam=True, count=None): + """ Generate bbox and apply lambda correction. + """ + if ratio_minmax is not None: + yl, yu, xl, xu = rand_bbox_minmax(img_shape, ratio_minmax, count=count) + else: + yl, yu, xl, xu = rand_bbox(img_shape, lam, count=count) + if correct_lam or ratio_minmax is not None: + bbox_area = (yu - yl) * (xu - xl) + lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1]) + return (yl, yu, xl, xu), lam + + +class Mixup: + """ Mixup/Cutmix that applies different params to each element or whole batch + + Args: + mixup_alpha (float): mixup alpha value, mixup is active if > 0. + cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0. + cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None. + prob (float): probability of applying mixup or cutmix per batch or element + switch_prob (float): probability of switching to cutmix instead of mixup when both are active + mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element) + correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders + label_smoothing (float): apply label smoothing to the mixed target tensor + num_classes (int): number of classes for target + """ + def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None, prob=1.0, switch_prob=0.5, + mode='batch', correct_lam=True, label_smoothing=0.1, num_classes=1000): + self.mixup_alpha = mixup_alpha + self.cutmix_alpha = cutmix_alpha + self.cutmix_minmax = cutmix_minmax + if self.cutmix_minmax is not None: + assert len(self.cutmix_minmax) == 2 + # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe + self.cutmix_alpha = 1.0 + self.mix_prob = prob + self.switch_prob = switch_prob + self.label_smoothing = label_smoothing + self.num_classes = num_classes + self.mode = mode + self.correct_lam = correct_lam # correct lambda based on clipped area for cutmix + self.mixup_enabled = True # set to false to disable mixing (intended tp be set by train loop) + + def _params_per_elem(self, batch_size): + lam = np.ones(batch_size, dtype=np.float32) + use_cutmix = np.zeros(batch_size, dtype=np.bool) + if self.mixup_enabled: + if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: + use_cutmix = np.random.rand(batch_size) < self.switch_prob + lam_mix = np.where( + use_cutmix, + np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size), + np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)) + elif self.mixup_alpha > 0.: + lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size) + elif self.cutmix_alpha > 0.: + use_cutmix = np.ones(batch_size, dtype=np.bool) + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size) + else: + assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." + lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam) + return lam, use_cutmix + + def _params_per_batch(self): + lam = 1. + use_cutmix = False + if self.mixup_enabled and np.random.rand() < self.mix_prob: + if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: + use_cutmix = np.random.rand() < self.switch_prob + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) if use_cutmix else \ + np.random.beta(self.mixup_alpha, self.mixup_alpha) + elif self.mixup_alpha > 0.: + lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha) + elif self.cutmix_alpha > 0.: + use_cutmix = True + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) + else: + assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." + lam = float(lam_mix) + return lam, use_cutmix + + def _mix_elem(self, x): + batch_size = len(x) + lam_batch, use_cutmix = self._params_per_elem(batch_size) + x_orig = x.clone() # need to keep an unmodified original for mixing source + for i in range(batch_size): + j = batch_size - i - 1 + lam = lam_batch[i] + if lam != 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + x[i] = x[i] * lam + x_orig[j] * (1 - lam) + return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) + + def _mix_pair(self, x): + batch_size = len(x) + lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) + x_orig = x.clone() # need to keep an unmodified original for mixing source + for i in range(batch_size // 2): + j = batch_size - i - 1 + lam = lam_batch[i] + if lam != 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] + x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + x[i] = x[i] * lam + x_orig[j] * (1 - lam) + x[j] = x[j] * lam + x_orig[i] * (1 - lam) + lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) + return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) + + def _mix_batch(self, x): + lam, use_cutmix = self._params_per_batch() + if lam == 1.: + return 1. + if use_cutmix: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[:, :, yl:yh, xl:xh] = x.flip(0)[:, :, yl:yh, xl:xh] + else: + x_flipped = x.flip(0).mul_(1. - lam) + x.mul_(lam).add_(x_flipped) + return lam + + def __call__(self, x, target): + assert len(x) % 2 == 0, 'Batch size should be even when using this' + if self.mode == 'elem': + lam = self._mix_elem(x) + elif self.mode == 'pair': + lam = self._mix_pair(x) + else: + lam = self._mix_batch(x) + target = mixup_target(target, self.num_classes, lam, self.label_smoothing) + return x, target + + +class FastCollateMixup(Mixup): + """ Fast Collate w/ Mixup/Cutmix that applies different params to each element or whole batch + + A Mixup impl that's performed while collating the batches. + """ + + def _mix_elem_collate(self, output, batch, half=False): + batch_size = len(batch) + num_elem = batch_size // 2 if half else batch_size + assert len(output) == num_elem + lam_batch, use_cutmix = self._params_per_elem(num_elem) + for i in range(num_elem): + j = batch_size - i - 1 + lam = lam_batch[i] + mixed = batch[i][0] + if lam != 1.: + if use_cutmix[i]: + if not half: + mixed = mixed.copy() + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) + np.rint(mixed, out=mixed) + output[i] += torch.from_numpy(mixed.astype(np.uint8)) + if half: + lam_batch = np.concatenate((lam_batch, np.ones(num_elem))) + return torch.tensor(lam_batch).unsqueeze(1) + + def _mix_pair_collate(self, output, batch): + batch_size = len(batch) + lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) + for i in range(batch_size // 2): + j = batch_size - i - 1 + lam = lam_batch[i] + mixed_i = batch[i][0] + mixed_j = batch[j][0] + assert 0 <= lam <= 1.0 + if lam < 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + patch_i = mixed_i[:, yl:yh, xl:xh].copy() + mixed_i[:, yl:yh, xl:xh] = mixed_j[:, yl:yh, xl:xh] + mixed_j[:, yl:yh, xl:xh] = patch_i + lam_batch[i] = lam + else: + mixed_temp = mixed_i.astype(np.float32) * lam + mixed_j.astype(np.float32) * (1 - lam) + mixed_j = mixed_j.astype(np.float32) * lam + mixed_i.astype(np.float32) * (1 - lam) + mixed_i = mixed_temp + np.rint(mixed_j, out=mixed_j) + np.rint(mixed_i, out=mixed_i) + output[i] += torch.from_numpy(mixed_i.astype(np.uint8)) + output[j] += torch.from_numpy(mixed_j.astype(np.uint8)) + lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) + return torch.tensor(lam_batch).unsqueeze(1) + + def _mix_batch_collate(self, output, batch): + batch_size = len(batch) + lam, use_cutmix = self._params_per_batch() + if use_cutmix: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + for i in range(batch_size): + j = batch_size - i - 1 + mixed = batch[i][0] + if lam != 1.: + if use_cutmix: + mixed = mixed.copy() # don't want to modify the original while iterating + mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] + else: + mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) + np.rint(mixed, out=mixed) + output[i] += torch.from_numpy(mixed.astype(np.uint8)) + return lam + + def __call__(self, batch, _=None): + batch_size = len(batch) + assert batch_size % 2 == 0, 'Batch size should be even when using this' + half = 'half' in self.mode + if half: + batch_size //= 2 + output = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8) + if self.mode == 'elem' or self.mode == 'half': + lam = self._mix_elem_collate(output, batch, half=half) + elif self.mode == 'pair': + lam = self._mix_pair_collate(output, batch) + else: + lam = self._mix_batch_collate(output, batch) + target = torch.tensor([b[1] for b in batch], dtype=torch.int64) + target = mixup_target(target, self.num_classes, lam, self.label_smoothing, device='cpu') + target = target[:batch_size] + return output, target + diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/optim_factory.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/optim_factory.py new file mode 100644 index 0000000000..f70cd95d92 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/fix_timm/optim_factory.py @@ -0,0 +1,187 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import torch +import torch.nn as nn +import torch.optim as optim + +from .adafactor import Adafactor +from .adahessian import Adahessian +from .adamp import AdamP +from .lookahead import Lookahead +from .nadam import Nadam +from .novograd import NovoGrad +from .nvnovograd import NvNovoGrad +from .radam import RAdam +from .rmsprop_tf import RMSpropTF +from .sgdp import SGDP +from .adabelief import AdaBelief + +try: + from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD,NpuFusedAdamW + has_apex = True +except ImportError: + has_apex = False + + +def add_weight_decay(model, weight_decay=1e-5, skip_list=()): + decay = [] + no_decay = [] + for name, param in model.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: + no_decay.append(param) + else: + decay.append(param) + return [ + {'params': no_decay, 'weight_decay': 0.}, + {'params': decay, 'weight_decay': weight_decay}] + + +def optimizer_kwargs(cfg): + """ cfg/argparse to kwargs helper + Convert optimizer args in argparse args or cfg like object to keyword args for updated create fn. + """ + kwargs = dict( + optimizer_name=cfg.opt, + learning_rate=cfg.lr, + weight_decay=cfg.weight_decay, + momentum=cfg.momentum) + if getattr(cfg, 'opt_eps', None) is not None: + kwargs['eps'] = cfg.opt_eps + if getattr(cfg, 'opt_betas', None) is not None: + kwargs['betas'] = cfg.opt_betas + if getattr(cfg, 'opt_args', None) is not None: + kwargs.update(cfg.opt_args) + return kwargs + + +def create_optimizer(args, model, filter_bias_and_bn=True): + """ Legacy optimizer factory for backwards compatibility. + NOTE: Use create_optimizer_v2 for new code. + """ + return create_optimizer_v2( + model, + **optimizer_kwargs(cfg=args), + filter_bias_and_bn=filter_bias_and_bn, + ) + + +def create_optimizer_v2( + model: nn.Module, + optimizer_name: str = 'sgd', + learning_rate: Optional[float] = None, + weight_decay: float = 0., + momentum: float = 0.9, + filter_bias_and_bn: bool = True, + **kwargs): + """ Create an optimizer. + + TODO currently the model is passed in and all parameters are selected for optimization. + For more general use an interface that allows selection of parameters to optimize and lr groups, one of: + * a filter fn interface that further breaks params into groups in a weight_decay compatible fashion + * expose the parameters interface and leave it up to caller + + Args: + model (nn.Module): model containing parameters to optimize + optimizer_name: name of optimizer to create + learning_rate: initial learning rate + weight_decay: weight decay to apply in optimizer + momentum: momentum for momentum based optimizers (others may use betas via kwargs) + filter_bias_and_bn: filter out bias, bn and other 1d params from weight decay + **kwargs: extra optimizer specific kwargs to pass through + + Returns: + Optimizer + """ + opt_lower = optimizer_name.lower() + if weight_decay and filter_bias_and_bn: + skip = {} + if hasattr(model, 'no_weight_decay'): + skip = model.no_weight_decay() + parameters = add_weight_decay(model, weight_decay, skip) + weight_decay = 0. + else: + parameters = model.parameters() + if 'fused' in opt_lower: + assert has_apex and torch.cuda.is_available(), 'APEX and CUDA required for fused optimizers' + + opt_args = dict(lr=learning_rate, weight_decay=weight_decay, **kwargs) + opt_split = opt_lower.split('_') + opt_lower = opt_split[-1] + if opt_lower == 'sgd' or opt_lower == 'nesterov': + opt_args.pop('eps', None) + optimizer = optim.SGD(parameters, momentum=momentum, nesterov=True, **opt_args) + elif opt_lower == 'momentum': + opt_args.pop('eps', None) + optimizer = optim.SGD(parameters, momentum=momentum, nesterov=False, **opt_args) + elif opt_lower == 'adam': + optimizer = optim.Adam(parameters, **opt_args) + elif opt_lower == 'adabelief': + optimizer = AdaBelief(parameters, rectify=False, **opt_args) + elif opt_lower == 'adamw': + # optimizer = optim.AdamW(parameters, **opt_args) + optimizer = NpuFusedAdamW(parameters, **opt_args) + elif opt_lower == 'nadam': + optimizer = Nadam(parameters, **opt_args) + elif opt_lower == 'radam': + optimizer = RAdam(parameters, **opt_args) + elif opt_lower == 'adamp': + optimizer = AdamP(parameters, wd_ratio=0.01, nesterov=True, **opt_args) + elif opt_lower == 'sgdp': + optimizer = SGDP(parameters, momentum=momentum, nesterov=True, **opt_args) + elif opt_lower == 'adadelta': + optimizer = optim.Adadelta(parameters, **opt_args) + elif opt_lower == 'adafactor': + if not learning_rate: + opt_args['lr'] = None + optimizer = Adafactor(parameters, **opt_args) + elif opt_lower == 'adahessian': + optimizer = Adahessian(parameters, **opt_args) + elif opt_lower == 'rmsprop': + optimizer = optim.RMSprop(parameters, alpha=0.9, momentum=momentum, **opt_args) + elif opt_lower == 'rmsproptf': + optimizer = RMSpropTF(parameters, alpha=0.9, momentum=momentum, **opt_args) + elif opt_lower == 'novograd': + optimizer = NovoGrad(parameters, **opt_args) + elif opt_lower == 'nvnovograd': + optimizer = NvNovoGrad(parameters, **opt_args) + elif opt_lower == 'fusedsgd': + opt_args.pop('eps', None) + optimizer = FusedSGD(parameters, momentum=momentum, nesterov=True, **opt_args) + elif opt_lower == 'fusedmomentum': + opt_args.pop('eps', None) + optimizer = FusedSGD(parameters, momentum=momentum, nesterov=False, **opt_args) + elif opt_lower == 'fusedadam': + optimizer = FusedAdam(parameters, adam_w_mode=False, **opt_args) + elif opt_lower == 'fusedadamw': + optimizer = FusedAdam(parameters, adam_w_mode=True, **opt_args) + elif opt_lower == 'fusedlamb': + optimizer = FusedLAMB(parameters, **opt_args) + elif opt_lower == 'fusednovograd': + opt_args.setdefault('betas', (0.95, 0.98)) + optimizer = FusedNovoGrad(parameters, **opt_args) + else: + assert False and "Invalid optimizer" + raise ValueError + + if len(opt_split) > 1: + if opt_split[0] == 'lookahead': + optimizer = Lookahead(optimizer) + + return optimizer diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit.py new file mode 100644 index 0000000000..7041bfd5bb --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit.py @@ -0,0 +1,788 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import utils +import torch.nn as nn + +from timm.models.vision_transformer import trunc_normal_ +from timm.models.registry import register_model + +specification = { + 'EvoLeViT_128S': { + 'C': '128_256_384', 'D': 16, 'N': '4_6_8', 'X': '2_3_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-128S-96703c44.pth'}, + 'EvoLeViT_128': { + 'C': '128_256_384', 'D': 16, 'N': '4_8_12', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-128-b88c2750.pth'}, + 'EvoLeViT_192': { + 'C': '192_288_384', 'D': 32, 'N': '3_5_6', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-192-92712e41.pth'}, + 'EvoLeViT_256': { + 'C': '256_384_512', 'D': 32, 'N': '4_6_8', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-256-13b5763e.pth'}, + 'EvoLeViT_384': { + 'C': '384_512_768', 'D': 32, 'N': '6_9_12', 'X': '4_4_4', 'drop_path': 0.1, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-384-9bdaf2e2.pth'}, +} + +prune_ratio_list = { + 'EvoLeViT_128S': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], + 'EvoLeViT_128': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_192': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_256': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], +} + +__all__ = [specification.keys()] + + +@register_model +def EvoLeViT_128S(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_128S'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_128S']) + + +@register_model +def EvoLeViT_128(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_128'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_128']) + + +@register_model +def EvoLeViT_192(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_192'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_192']) + + +@register_model +def EvoLeViT_256(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_256'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_256']) + + +@register_model +def EvoLeViT_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_384']) + + +global_attn = 0 +ori_indices = None +learn_tradeoff_mode = True + + +def easy_gather(x, indices): + # x: B,N,C; indices: B,N + B, N, C = x.shape + N_new = indices.shape[1] + offset = torch.arange(B, dtype=torch.long, device=x.device).view(B, 1) * N + indices = indices + offset + out = x.reshape(B * N, C)[indices.view(-1)].reshape(B, N_new, C) + return out + + +def merge_tokens(x_drop, score): + # score B,N + # scale + weight = score / torch.sum(score, dim=1, keepdim=True) + x_drop = weight.unsqueeze(-1) * x_drop + return torch.sum(x_drop, dim=1, keepdim=True) + + +class CatModule(torch.nn.Module): + def __init__(self, m1, m2, prune_ratio, N): + super().__init__() + self.m1 = m1 + self.m2 = m2 + self.prune_ratio = prune_ratio + # self.i = i + if prune_ratio < 1.0: + N_ = N - int(N * prune_ratio) + self.drop_fc = nn.AdaptiveAvgPool1d(1) + # self.recover_fc=nn.Linear(1,N_) + + def set_prune_ratio(self, prune_ratio): + self.prune_ratio = prune_ratio + + def forward(self, x_): + global global_attn # ga + global ori_indices # oi + if self.prune_ratio < 1: + x = x_[:, 1:] # split out cls token + + N = x.shape[1] + N_ = int(N * self.prune_ratio) + indices = torch.argsort(global_attn, dim=1, descending=True) + + x_ga_oi = torch.cat((x, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1)), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_sorted, global_attn, ori_indices = x_ga_oi[:, :, :-2], x_ga_oi[:, :, -2], x_ga_oi[:, :, -1] + + if self.training: + x_ = torch.cat((x_[:, :1], x_sorted), dim=1) + else: + x_[:, 1:] = x_sorted + x = x_[:, :N_ + 1] + x_drop = x_[:, N_ + 1:] + + add_token = merge_tokens(x_drop, global_attn[:, N_:]) # B,1,C + x = torch.cat((x, add_token), dim=1) # B,N+1,C + + x, raw_x1 = self.m1(x) + x, raw_x2 = self.m2(x) + x = x[:, :-1] + + # fast update via skip connection + add_token1 = raw_x1[:, -1:] + add_token2 = raw_x2[:, -1:] + x_drop = x_drop + add_token1.expand(-1, x_drop.shape[1], -1) + add_token2.expand(-1, x_drop.shape[1], -1) + + x_ = torch.cat((x, x_drop), dim=1) + # x_[:, N_ + 1:] = x_drop + # x_[:, :N_ + 1] = x + else: + x_, _ = self.m1(x_) + x_, _ = self.m2(x_) + return x_ + + +class StageModule(torch.nn.Module): + def __init__(self, m, prune_ratio): + super().__init__() + self.m = m + self.prune_ratio = prune_ratio + + def forward(self, x_): + global global_attn # ga + global ori_indices # oi + + if isinstance(x_, tuple): + x_ = x_[0] + + if self.prune_ratio < 1: + x = x_[:, 1:] # split out cls token + + N = x.shape[1] + N_ = int(N * self.prune_ratio) + indices = torch.argsort(global_attn, dim=1, descending=True) + + x_ga_oi = torch.cat((x, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1)), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_sorted, global_attn, ori_indices = x_ga_oi[:, :, :-2], x_ga_oi[:, :, -2], x_ga_oi[:, :, -1] + + if self.training: + x_ = torch.cat((x_[:, :1], x_sorted), dim=1) + else: + x_[:, 1:] = x_sorted + + x = x_[:, :N_ + 1] + x_drop = x_[:, N_ + 1:] + + merge_weight = global_attn[:, N_:] + add_token = merge_tokens(x_drop, merge_weight) # B,1,C + x = torch.cat((x, add_token), dim=1) # B,N+1,C + + raw_total = 0 + for blk in self.m: + x, raw = blk(x) + raw_total = raw_total + raw[:, -1:] + + x_drop = x_drop + raw_total.expand(-1, x_drop.shape[1], -1) + + x = x[:, :-1] + if self.training: + x_ = torch.cat((x, x_drop), dim=1) + else: + x_[:, N_ + 1:] = x_drop + x_[:, :N_ + 1] = x + else: + x_ = self.m(x_) + return x_ + + +class Conv2d_BN(torch.nn.Sequential): + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, + groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_module('c', torch.nn.Conv2d( + a, b, ks, stride, pad, dilation, groups, bias=False)) + bn = torch.nn.BatchNorm2d(b) + torch.nn.init.constant_(bn.weight, bn_weight_init) + torch.nn.init.constant_(bn.bias, 0) + self.add_module('bn', bn) + + @torch.no_grad() + def fuse(self): + c, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = c.weight * w[:, None, None, None] + b = bn.bias - bn.running_mean * bn.weight / \ + (bn.running_var + bn.eps) ** 0.5 + m = torch.nn.Conv2d(w.size(1), w.size( + 0), w.shape[2:], stride=self.c.stride, padding=self.c.padding, dilation=self.c.dilation, + groups=self.c.groups) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +class Linear_BN(torch.nn.Sequential): + def __init__(self, a, b, bn_weight_init=1, resolution=-100000): + super().__init__() + self.add_module('c', torch.nn.Linear(a, b, bias=False)) + bn = torch.nn.BatchNorm1d(b) + torch.nn.init.constant_(bn.weight, bn_weight_init) + torch.nn.init.constant_(bn.bias, 0) + self.add_module('bn', bn) + + @torch.no_grad() + def fuse(self): + l, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = l.weight * w[:, None] + b = bn.bias - bn.running_mean * bn.weight / \ + (bn.running_var + bn.eps) ** 0.5 + m = torch.nn.Linear(w.size(1), w.size(0)) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + def forward(self, x): + l, bn = self._modules.values() + x = l(x) + return bn(x.flatten(0, 1)).reshape_as(x) + + +class BN_Linear(torch.nn.Sequential): + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_module('bn', torch.nn.BatchNorm1d(a)) + l = torch.nn.Linear(a, b, bias=bias) + trunc_normal_(l.weight, std=std) + if bias: + torch.nn.init.constant_(l.bias, 0) + self.add_module('l', l) + + @torch.no_grad() + def fuse(self): + bn, l = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + b = bn.bias - self.bn.running_mean * \ + self.bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = l.weight * w[None, :] + if l.bias is None: + b = b @ self.l.weight.T + else: + b = (l.weight @ b[:, None]).view(-1) + self.l.bias + m = torch.nn.Linear(w.size(1), w.size(0)) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +def b16(n, activation, resolution=224): + return torch.nn.Sequential( + Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), + activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), + activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), + activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(torch.nn.Module): + def __init__(self, m, drop, out_raw=False): + super().__init__() + self.m = m + self.drop = drop + self.out_raw = out_raw + + def set_prune_ratio(self, prune_ratio): + pass + + def forward(self, x): + if isinstance(x, tuple): + x = x[0] + if self.training and self.drop > 0: + raw = self.m(x) * torch.rand(x.size(0), 1, 1, + device=x.device).ge_(self.drop).div(1 - self.drop).detach() + else: + raw = self.m(x) + if self.out_raw: + return x + raw, raw + else: + return x + raw + + +class Attention(torch.nn.Module): + def __init__(self, dim, key_dim, num_heads=8, + attn_ratio=4, + activation=None, + resolution=14, posembed=False, global_attn_tradeoff=0.5): + super().__init__() + self.tradeoff = global_attn_tradeoff + + self.learn_tradeoff = torch.nn.Parameter(torch.Tensor([0])) + self.sigmoid = torch.nn.Sigmoid() + + self.num_heads = num_heads + self.scale = key_dim ** -0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, h, resolution=resolution) + self.proj = torch.nn.Sequential(activation(), Linear_BN( + self.dh, dim, bn_weight_init=0, resolution=resolution)) + + self.pos_embed = posembed + + @torch.no_grad() + def train(self, mode=True): + super().train(mode) + if mode and hasattr(self, 'ab'): + del self.ab + + def forward(self, x): # x (B,N,C) + global global_attn + global learn_tradeoff_mode + + B, N, C = x.shape + qkv = self.qkv(x) + q, k, v = qkv.view(B, N, self.num_heads, - + 1).split([self.key_dim, self.key_dim, self.d], dim=3) + q = q.permute(0, 2, 1, 3) + k = k.permute(0, 2, 1, 3) + v = v.permute(0, 2, 1, 3) + + attn_raw = (q @ k.transpose(-2, -1)) * self.scale + + attn = attn_raw.softmax(dim=-1) + + # update global attn + if learn_tradeoff_mode: + tradeoff = self.sigmoid(self.learn_tradeoff) + else: + tradeoff = self.tradeoff + + if isinstance(global_attn, int): + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) # B,N + global_attn = cls_attn + else: + if global_attn.shape[1] - N + 2 == 1: + # no additional token and no pruning + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) + global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn + else: + cls_attn = torch.mean(attn[:, :, 0, 1:-1], dim=1) + + if self.training: + temp_attn = (1 - tradeoff) * global_attn[:, :N - 2] + tradeoff * cls_attn + global_attn = torch.cat((temp_attn, global_attn[:, N - 2:]), dim=1) + else: + global_attn[:, :N - 2] = (1 - tradeoff) * global_attn[:, :N - 2] + tradeoff * cls_attn + + x = (attn @ v).transpose(1, 2).reshape(B, N, self.dh) + x = self.proj(x) + return x + + +class Subsample(torch.nn.Module): + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x, with_cls=True): + if with_cls: + B, N, C = x.shape + x1 = x[:, 1:, :] + x1 = x1.view(B, self.resolution, self.resolution, C)[ + :, ::self.stride, ::self.stride].reshape(B, -1, C) + x = torch.cat((x[:, :1, :], x1), dim=1) + else: + B, N, C = x.shape + x = x.view(B, self.resolution, self.resolution, C)[ + :, ::self.stride, ::self.stride].reshape(B, -1, C) + return x + + +class AttentionSubsample(torch.nn.Module): + def __init__(self, in_dim, out_dim, key_dim, num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, resolution_=7, posembed=False, global_attn_tradeoff=0.5): + super().__init__() + self.tradeoff = global_attn_tradeoff + + self.learn_tradeoff = torch.nn.Parameter(torch.Tensor([0])) + self.sigmoid = torch.nn.Sigmoid() + + self.num_heads = num_heads + self.scale = key_dim ** -0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_ ** 2 + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h, resolution=resolution) + + self.q = torch.nn.Sequential( + Subsample(stride, resolution), + Linear_BN(in_dim, nh_kd, resolution=resolution_)) + self.proj = torch.nn.Sequential(activation(), Linear_BN( + self.dh, out_dim, resolution=resolution_)) + + self.pos_embed = posembed + if posembed: + self.poss = nn.Parameter(torch.zeros(1, resolution ** 2 + 1, in_dim)) + trunc_normal_(self.poss, std=.02) + + self.stride = stride + self.resolution = resolution + + @torch.no_grad() + def train(self, mode=True): + super().train(mode) + if mode and hasattr(self, 'ab'): + del self.ab + + def set_prune_ratio(self, prune_ratio): + pass + + def forward(self, x): + global global_attn # ga + global ori_indices # oi + global learn_tradeoff_mode + + if isinstance(x, tuple): + x = x[0] + + # recover sequence + old_global_scale = torch.sum(global_attn, dim=1, keepdim=True) + + x_patch = x[:, 1:] + indices = torch.argsort(ori_indices, dim=1) + x_ga_oi = torch.cat((x_patch, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1)), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_patch, ga_oi = x_ga_oi[:, :, :-2], x_ga_oi[:, :, -2:] + + # subsample global attn and ori indices + ga_oi = self.q[0](ga_oi, False) + global_attn, ori_indices = ga_oi[:, :, 0], ga_oi[:, :, 1] + + # global_attn, ori_indices = ga_oi[:, :, 0], ga_oi[:, :, 1] + + if self.training: + x = torch.cat((x[:, :1], x_patch), dim=1) + else: + x[:, 1:] = x_patch + + x = x + self.poss + B, N, C = x.shape + k, v = self.kv(x).view(B, N, self.num_heads, - + 1).split([self.key_dim, self.d], dim=3) + k = k.permute(0, 2, 1, 3) # BHNC + v = v.permute(0, 2, 1, 3) # BHNC + q = self.q(x).view(B, self.resolution_2 + 1, self.num_heads, + self.key_dim).permute(0, 2, 1, 3) + + attn_raw = (q @ k.transpose(-2, -1)) * self.scale + + attn = attn_raw.softmax(dim=-1) + + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) # B,N + cls_attn = self.q[0](cls_attn.unsqueeze(-1), False).squeeze(-1) + + if learn_tradeoff_mode: + tradeoff = self.sigmoid(self.learn_tradeoff) + else: + tradeoff = self.tradeoff + + global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn + + # normalize global attention + new_global_scale = torch.sum(global_attn, dim=1, keepdim=True) + scale = old_global_scale / new_global_scale + global_attn = global_attn * scale + + x = (attn @ v).transpose(1, 2).reshape(B, -1, self.dh) + x = self.proj(x) + return x + + +class LeViT(torch.nn.Module): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, img_size=224, + patch_size=16, + in_chans=3, + num_classes=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=torch.nn.Hardswish, + mlp_activation=torch.nn.Hardswish, + distillation=True, + drop_path=0, prune_ratio=None): + super().__init__() + + self.stage_wise_prune = True + + self.num_classes = num_classes + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim[0])) + + self.patch_embed = hybrid_backbone + + self.pos_embed = True + + self.blocks = [] + self.stage_blocks = [] + + down_ops.append(['']) + resolution = img_size // patch_size + if self.pos_embed: + self.poss = nn.Parameter(torch.zeros(1, resolution ** 2 + 1, embed_dim[0])) + trunc_normal_(self.poss, std=.02) + + self.prune_ratio = prune_ratio[0] + self.stage_prune_ratio = prune_ratio[1] + + layer_index = -1 + n = 14 + j = 0 + + for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate( + zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + stage_subblocks = [] + for _ in range(dpth): + layer_index += 1 + + m1 = Residual(Attention( + ed, kd, nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + posembed=self.pos_embed + ), drop_path, out_raw=True) + if self.prune_ratio[layer_index] == 1: + self.stage_blocks.append(m1) + else: + stage_subblocks.append(m1) + + if mr > 0: + h = int(ed * mr) + m2 = Residual(torch.nn.Sequential( + Linear_BN(ed, h, resolution=resolution), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0, + resolution=resolution), + ), drop_path, out_raw=True) + else: + m2 = torch.nn.Identity() + + if self.prune_ratio[layer_index] == 1: + self.stage_blocks.append(m2) + else: + stage_subblocks.append(m2) + + self.blocks.append(CatModule(m1, m2, prune_ratio=self.prune_ratio[layer_index], N=n ** 2)) + if self.prune_ratio[layer_index] < 1: + j = j + 1 + + if len(stage_subblocks) != 0: + stage_subblocks = torch.nn.ModuleList(stage_subblocks) + self.stage_blocks.append(StageModule(stage_subblocks, prune_ratio=self.stage_prune_ratio[i])) + + if do[0] == 'Subsample': + n = int((n + 1) / 2) + # ('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + subsample = AttentionSubsample( + *embed_dim[i:i + 2], key_dim=do[1], num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_, + posembed=self.pos_embed) + self.blocks.append(subsample) + self.stage_blocks.append(subsample) + + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + ffn = Residual(torch.nn.Sequential( + Linear_BN(embed_dim[i + 1], h, + resolution=resolution), + mlp_activation(), + Linear_BN( + h, embed_dim[i + 1], bn_weight_init=0, resolution=resolution), + ), drop_path) + self.blocks.append(ffn) + self.stage_blocks.append(ffn) + + self.blocks = torch.nn.Sequential(*self.blocks) + self.stage_blocks = torch.nn.Sequential(*self.stage_blocks) + + # Classifier head + self.head = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + if distillation: + self.head_dist = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + self.clsc = True + if self.clsc: + self.head_cls = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + if distillation: + self.head_cls_dist = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + + @torch.jit.ignore + def no_weight_decay(self): + return {x for x in self.state_dict().keys() if 'poss' in x} + + def set_learn_tradeoff(self, mode): + global learn_tradeoff_mode + learn_tradeoff_mode = mode + + def set_prune_ratio(self, mode): + pass + + def remove_cls(self): + if hasattr(self, 'head_cls'): + del self.head_cls + if hasattr(self, 'head_cls_dist'): + del self.head_cls_dist + + def forward(self, x): + global global_attn + global ori_indices + global learn_tradeoff_mode + + global_attn = 0 + + x = self.patch_embed(x) + x = x.flatten(2).transpose(1, 2) + + ori_indices = torch.arange(x.shape[1], dtype=torch.long, device=x.device).unsqueeze(0) + ori_indices = ori_indices.expand(x.shape[0], -1) + + cls_token = self.cls_token.expand(x.shape[0], -1, -1) + x = torch.cat((cls_token, x), 1) + if self.pos_embed: + x = x + self.poss + + if self.stage_wise_prune: + x = self.stage_blocks(x) + else: + x = self.blocks(x) + + cls = x[:, 0, :] + x = x[:, 1:, :] + x = x.mean(1) + if self.distillation: + x = self.head(x), self.head_dist(x) + if self.clsc: + if self.training: + xcls = self.head_cls(cls) + xcls_dist = self.head_cls_dist(cls) + return x[0], x[1], xcls, xcls_dist + else: + return (x[0] + x[1]) / 2 + if not self.training: + x = (x[0] + x[1]) / 2 + + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, weights, + num_classes, distillation, pretrained, fuse, prune_ratio): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = torch.nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + # ('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + num_classes=num_classes, + drop_path=drop_path, + distillation=distillation, + prune_ratio=prune_ratio + ) + if pretrained: + checkpoint = torch.hub.load_state_dict_from_url( + weights, map_location='cpu') + model.load_state_dict(checkpoint['model']) + if fuse: + utils.replace_batchnorm(model) + + return model + + +if __name__ == '__main__': + for name in specification: + net = globals()[name](fuse=False, pretrained=False) + net.eval() + net.remove_cls() + net(torch.randn(4, 3, 224, 224)) + print(name, 'Parameters:', sum(p.numel() for p in net.parameters() if p.requires_grad)) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit_384.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit_384.py new file mode 100644 index 0000000000..f6ffff25de --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/evo_levit_384.py @@ -0,0 +1,813 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import utils +import torch.nn as nn + +from timm.models.vision_transformer import trunc_normal_ +from timm.models.registry import register_model + +specification = { + 'EvoLeViT_128S_384': { + 'C': '128_256_384', 'D': 16, 'N': '4_6_8', 'X': '2_3_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-128S-96703c44.pth'}, + 'EvoLeViT_128_384': { + 'C': '128_256_384', 'D': 16, 'N': '4_8_12', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-128-b88c2750.pth'}, + 'EvoLeViT_192_384': { + 'C': '192_288_384', 'D': 32, 'N': '3_5_6', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-192-92712e41.pth'}, + 'EvoLeViT_256_384': { + 'C': '256_384_512', 'D': 32, 'N': '4_6_8', 'X': '4_4_4', 'drop_path': 0, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-256-13b5763e.pth'}, + 'EvoLeViT_384_384': { + 'C': '384_512_768', 'D': 32, 'N': '6_9_12', 'X': '4_4_4', 'drop_path': 0.1, + 'weights': 'https://dl.fbaipublicfiles.com/LeViT/LeViT-384-9bdaf2e2.pth'}, +} + +prune_ratio_list = { + 'EvoLeViT_128S_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], + 'EvoLeViT_128_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_192_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_256_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], + 'EvoLeViT_384_384': [[1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]], +} + +__all__ = [specification.keys()] + + +@register_model +def EvoLeViT_128S_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_128S_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_128S_384']) + + +@register_model +def EvoLeViT_128_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_128_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_128_384']) + + +@register_model +def EvoLeViT_192_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_192_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_192_384']) + + +@register_model +def EvoLeViT_256_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_256_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_256_384']) + + +@register_model +def EvoLeViT_384_384(num_classes=1000, distillation=True, + pretrained=False, fuse=False): + return model_factory(**specification['EvoLeViT_384_384'], num_classes=num_classes, + distillation=distillation, pretrained=pretrained, fuse=fuse, + prune_ratio=prune_ratio_list['EvoLeViT_384_384']) + + +global_attn = 0 +ori_indices = None +learn_tradeoff_mode = True + + +def easy_gather(x, indices): + # x: B,N,C; indices: B,N + B, N, C = x.shape + N_new = indices.shape[1] + offset = torch.arange(B, dtype=torch.long, device=x.device).view(B, 1) * N + indices = indices + offset + out = x.reshape(B * N, C)[indices.view(-1)].reshape(B, N_new, C) + return out + + +def merge_tokens(x_drop, score): + # score B,N + # scale + weight = score / torch.sum(score, dim=1, keepdim=True) + x_drop = weight.unsqueeze(-1) * x_drop + return torch.sum(x_drop, dim=1, keepdim=True) + + +class CatModule(torch.nn.Module): + def __init__(self, m1, m2, prune_ratio, N): + super().__init__() + self.m1 = m1 + self.m2 = m2 + self.prune_ratio = prune_ratio + # self.i = i + if prune_ratio < 1.0: + N_ = N - int(N * prune_ratio) + self.drop_fc = nn.AdaptiveAvgPool1d(1) + # self.recover_fc=nn.Linear(1,N_) + + def set_prune_ratio(self, prune_ratio): + self.prune_ratio = prune_ratio + + def forward(self, x_): + global global_attn # ga + global ori_indices # oi + if self.prune_ratio < 1: + x = x_[:, 1:] # split out cls token + + N = x.shape[1] + N_ = int(N * self.prune_ratio) + global_attn = global_attn.clone() + indices = torch.argsort(global_attn, dim=1, descending=True) + + x_ga_oi = torch.cat((x, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1).half()), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_ga_oi = x_ga_oi.contiguous() # todo + x_sorted, global_attn, ori_indices = x_ga_oi[:, :, :-2].clone(), x_ga_oi[:, :, -2].clone(), x_ga_oi[:, :, -1].clone() # todo + + if self.training: + x_ = torch.cat((x_[:, :1], x_sorted), dim=1) + else: + x_[:, 1:] = x_sorted + x = x_[:, :N_ + 1] + x_drop = x_[:, N_ + 1:] + + add_token = merge_tokens(x_drop, global_attn[:, N_:]) # B,1,C + x = torch.cat((x, add_token), dim=1) # B,N+1,C + + x, raw_x1 = self.m1(x) + x, raw_x2 = self.m2(x) + x = x[:, :-1] + + # fast update via skip connection + add_token1 = raw_x1[:, -1:] + add_token2 = raw_x2[:, -1:] + x_drop = x_drop + add_token1.expand(-1, x_drop.shape[1], -1) + add_token2.expand(-1, x_drop.shape[1], -1) + + x_ = torch.cat((x, x_drop), dim=1) + # x_[:, N_ + 1:] = x_drop + # x_[:, :N_ + 1] = x + else: + x_, _ = self.m1(x_) + x_, _ = self.m2(x_) + return x_ + + +class StageModule(torch.nn.Module): + def __init__(self, m, prune_ratio): + super().__init__() + self.m = m + self.prune_ratio = prune_ratio + + def forward(self, x_): + global global_attn # ga + global ori_indices # oi + + if isinstance(x_, tuple): + x_ = x_[0] + + if self.prune_ratio < 1: + x = x_[:, 1:] # split out cls token + + N = x.shape[1] + N_ = int(N * self.prune_ratio) + indices = torch.argsort(global_attn, dim=1, descending=True) + x_ga_oi = torch.cat((x, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1).half()), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_ga_oi = x_ga_oi.contiguous() # todo + x_sorted, global_attn, ori_indices = x_ga_oi[:, :, :-2].clone(), x_ga_oi[:, :, -2].clone(), x_ga_oi[:, :, -1].clone() # todo + + if self.training: + x_ = torch.cat((x_[:, :1], x_sorted), dim=1) + else: + x_[:, 1:] = x_sorted + + x = x_[:, :N_ + 1].clone() # todo + x_drop = x_[:, N_ + 1:].clone() # todo + + merge_weight = global_attn[:, N_:].clone() # todo + add_token = merge_tokens(x_drop, merge_weight) # B,1,C + x = torch.cat((x, add_token), dim=1) # B,N+1,C + + raw_total = 0 + for blk in self.m: + x, raw = blk(x) + raw_total = raw_total + raw[:, -1:].clone() # todo + + x_drop = x_drop + raw_total.expand(-1, x_drop.shape[1], -1) + + x = x[:, :-1] + if self.training: + x_ = torch.cat((x, x_drop), dim=1) + else: + x_[:, N_ + 1:] = x_drop + x_[:, :N_ + 1] = x + else: + x_ = self.m(x_) + return x_ + + +class Conv2d_BN(torch.nn.Sequential): + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, + groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_module('c', torch.nn.Conv2d( + a, b, ks, stride, pad, dilation, groups, bias=False)) + bn = torch.nn.BatchNorm2d(b) + torch.nn.init.constant_(bn.weight, bn_weight_init) + torch.nn.init.constant_(bn.bias, 0) + self.add_module('bn', bn) + + @torch.no_grad() + def fuse(self): + c, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = c.weight * w[:, None, None, None] + b = bn.bias - bn.running_mean * bn.weight / \ + (bn.running_var + bn.eps) ** 0.5 + m = torch.nn.Conv2d(w.size(1), w.size( + 0), w.shape[2:], stride=self.c.stride, padding=self.c.padding, dilation=self.c.dilation, + groups=self.c.groups) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +class Linear_BN(torch.nn.Sequential): + def __init__(self, a, b, bn_weight_init=1, resolution=-100000): + super().__init__() + self.add_module('c', torch.nn.Linear(a, b, bias=False)) + bn = torch.nn.BatchNorm1d(b) + torch.nn.init.constant_(bn.weight, bn_weight_init) + torch.nn.init.constant_(bn.bias, 0) + self.add_module('bn', bn) + + @torch.no_grad() + def fuse(self): + l, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = l.weight * w[:, None] + b = bn.bias - bn.running_mean * bn.weight / \ + (bn.running_var + bn.eps) ** 0.5 + m = torch.nn.Linear(w.size(1), w.size(0)) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + def forward(self, x): + l, bn = self._modules.values() + x = l(x) + return bn(x.flatten(0, 1)).reshape_as(x) + + +class BN_Linear(torch.nn.Sequential): + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_module('bn', torch.nn.BatchNorm1d(a)) + l = torch.nn.Linear(a, b, bias=bias) + trunc_normal_(l.weight, std=std) + if bias: + torch.nn.init.constant_(l.bias, 0) + self.add_module('l', l) + + @torch.no_grad() + def fuse(self): + bn, l = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + b = bn.bias - self.bn.running_mean * \ + self.bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = l.weight * w[None, :] + if l.bias is None: + b = b @ self.l.weight.T + else: + b = (l.weight @ b[:, None]).view(-1) + self.l.bias + m = torch.nn.Linear(w.size(1), w.size(0)) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +def b16(n, activation, resolution=224): + return torch.nn.Sequential( + Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), + activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), + activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), + activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(torch.nn.Module): + def __init__(self, m, drop, out_raw=False): + super().__init__() + self.m = m + self.drop = drop + self.out_raw = out_raw + + def set_prune_ratio(self, prune_ratio): + pass + + def forward(self, x): + if isinstance(x, tuple): + x = x[0] + if self.training and self.drop > 0: + raw = self.m(x) * torch.rand(x.size(0), 1, 1, + device=x.device).ge_(self.drop).div(1 - self.drop).detach() + else: + raw = self.m(x) + if self.out_raw: + return x + raw, raw + else: + return x + raw + + +class MatmulApply(torch.autograd.Function): + @staticmethod + def forward(ctx, self, mat2): + ctx.save_for_backward(self, mat2) + result = torch.matmul(self, mat2.transpose(-2, -1)) + return result + @staticmethod + def backward(ctx, grad): + self, mat2 = ctx.saved_tensors + self_grad = torch.npu_bmmV2(grad, mat2, []) + mat2_grad = torch.npu_bmmV2(grad.transpose(-2, -1), self, []) + return self_grad, mat2_grad + +matmul_transpose = MatmulApply.apply + + +class Attention(torch.nn.Module): + def __init__(self, dim, key_dim, num_heads=8, + attn_ratio=4, + activation=None, + resolution=14, posembed=False, global_attn_tradeoff=0.5): + super().__init__() + self.tradeoff = global_attn_tradeoff + + self.learn_tradeoff = torch.nn.Parameter(torch.Tensor([0])) + self.sigmoid = torch.nn.Sigmoid() + + self.num_heads = num_heads + self.scale = key_dim ** -0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, h, resolution=resolution) + self.proj = torch.nn.Sequential(activation(), Linear_BN( + self.dh, dim, bn_weight_init=0, resolution=resolution)) + + self.pos_embed = posembed + + @torch.no_grad() + def train(self, mode=True): + super().train(mode) + if mode and hasattr(self, 'ab'): + del self.ab + + def forward(self, x): # x (B,N,C) + global global_attn + global learn_tradeoff_mode + + B, N, C = x.shape + qkv = self.qkv(x) + q, k, v = qkv.view(B, N, self.num_heads, - + 1).split([self.key_dim, self.key_dim, self.d], dim=3) + + q = q.permute(0, 2, 1, 3).contiguous() + k = k.permute(0, 2, 1, 3).contiguous() + v = v.permute(0, 2, 1, 3).contiguous() + + # attn_raw = (q @ k.transpose(-2, -1)) * self.scale + attn_raw = matmul_transpose(q, k) * self.scale # todo + + attn = attn_raw.softmax(dim=-1) + + # update global attn + if learn_tradeoff_mode: + tradeoff = self.sigmoid(self.learn_tradeoff) + else: + tradeoff = self.tradeoff + + if isinstance(global_attn, int): + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) # B,N + global_attn = cls_attn + else: + if global_attn.shape[1] - N + 2 == 1: + # no additional token and no pruning + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) + global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn + else: + cls_attn = torch.mean(attn[:, :, 0, 1:-1], dim=1) + + if self.training: + temp_attn = (1 - tradeoff) * global_attn[:, :N - 2] + tradeoff * cls_attn + global_attn = torch.cat((temp_attn, global_attn[:, N - 2:]), dim=1) + else: + global_attn[:, :N - 2] = (1 - tradeoff) * global_attn[:, :N - 2] + tradeoff * cls_attn + + # x = (attn @ v).transpose(1, 2).reshape(B, N, self.dh) + x = (attn @ v).npu_confusion_transpose([0, 2, 1, 3], (B, N, self.dh), True) # todo + x = self.proj(x) + return x + + +class Subsample(torch.nn.Module): + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x, with_cls=True): + if with_cls: + B, N, C = x.shape + x1 = x[:, 1:, :] + x1 = x1.view(B, self.resolution, self.resolution, C)[ + :, ::self.stride, ::self.stride].reshape(B, -1, C).contiguous() + x = torch.cat((x[:, :1, :], x1), dim=1) + else: + B, N, C = x.shape + x = x.view(B, self.resolution, self.resolution, C)[ + :, ::self.stride, ::self.stride].reshape(B, -1, C).contiguous() + return x + + +class AttentionSubsample(torch.nn.Module): + def __init__(self, in_dim, out_dim, key_dim, num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, resolution_=7, posembed=False, global_attn_tradeoff=0.5): + super().__init__() + self.tradeoff = global_attn_tradeoff + + self.learn_tradeoff = torch.nn.Parameter(torch.Tensor([0])) + self.sigmoid = torch.nn.Sigmoid() + + self.num_heads = num_heads + self.scale = key_dim ** -0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_ ** 2 + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h, resolution=resolution) + + self.q = torch.nn.Sequential( + Subsample(stride, resolution), + Linear_BN(in_dim, nh_kd, resolution=resolution_)) + self.proj = torch.nn.Sequential(activation(), Linear_BN( + self.dh, out_dim, resolution=resolution_)) + + self.pos_embed = posembed + if posembed: + self.poss = nn.Parameter(torch.zeros(1, resolution ** 2 + 1, in_dim)) + trunc_normal_(self.poss, std=.02) + + self.stride = stride + self.resolution = resolution + + @torch.no_grad() + def train(self, mode=True): + super().train(mode) + if mode and hasattr(self, 'ab'): + del self.ab + + def set_prune_ratio(self, prune_ratio): + pass + + def forward(self, x): + global global_attn # ga + global ori_indices # oi + global learn_tradeoff_mode + + if isinstance(x, tuple): + x = x[0] + + # recover sequence + old_global_scale = torch.sum(global_attn, dim=1, keepdim=True) + + x_patch = x[:, 1:] + ori_indices = ori_indices.clone() + indices = torch.argsort(ori_indices, dim=1) + x_ga_oi = torch.cat((x_patch, global_attn.unsqueeze(-1), ori_indices.unsqueeze(-1)), dim=-1) + x_ga_oi = easy_gather(x_ga_oi, indices) + x_ga_oi = x_ga_oi.contiguous() # todo + x_patch, ga_oi = x_ga_oi[:, :, :-2].clone(), x_ga_oi[:, :, -2:].clone() # todo + + # subsample global attn and ori indices + ga_oi = self.q[0](ga_oi, False) + global_attn, ori_indices = ga_oi[:, :, 0].clone(), ga_oi[:, :, 1].clone() # todo + + # global_attn, ori_indices = ga_oi[:, :, 0], ga_oi[:, :, 1] + + if self.training: + x = torch.cat((x[:, :1], x_patch), dim=1) + else: + x[:, 1:] = x_patch + + x = x + self.poss + B, N, C = x.shape + k, v = self.kv(x).view(B, N, self.num_heads, - + 1).split([self.key_dim, self.d], dim=3) + k = k.permute(0, 2, 1, 3).contiguous() # BHNC + v = v.permute(0, 2, 1, 3).contiguous() # BHNC + q = self.q(x).view(B, self.resolution_2 + 1, self.num_heads, + self.key_dim).permute(0, 2, 1, 3) + + # attn_raw = (q @ k.transpose(-2, -1)) * self.scale + attn_raw = matmul_transpose(q, k) * self.scale # todo + + attn = attn_raw.softmax(dim=-1) + + cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1) # B,N + cls_attn = self.q[0](cls_attn.unsqueeze(-1), False).squeeze(-1) + + if learn_tradeoff_mode: + tradeoff = self.sigmoid(self.learn_tradeoff) + else: + tradeoff = self.tradeoff + + global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn + + # normalize global attention + new_global_scale = torch.sum(global_attn, dim=1, keepdim=True) + scale = old_global_scale / new_global_scale + global_attn = global_attn * scale + + x = (attn @ v).transpose(1, 2).reshape(B, -1, self.dh) + x = self.proj(x) + return x + + +class LeViT(torch.nn.Module): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, img_size=384, + patch_size=16, + in_chans=3, + num_classes=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=torch.nn.Hardswish, + mlp_activation=torch.nn.Hardswish, + distillation=True, + drop_path=0, prune_ratio=None): + super().__init__() + + self.stage_wise_prune = True + + self.num_classes = num_classes + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim[0])) + + self.patch_embed = hybrid_backbone + + self.pos_embed = True + + self.blocks = [] + self.stage_blocks = [] + + down_ops.append(['']) + resolution = img_size // patch_size + if self.pos_embed: + self.poss = nn.Parameter(torch.zeros(1, resolution ** 2 + 1, embed_dim[0])) + trunc_normal_(self.poss, std=.02) + + self.prune_ratio = prune_ratio[0] + self.stage_prune_ratio = prune_ratio[1] + + layer_index = -1 + n = 14 + j = 0 + + for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate( + zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + stage_subblocks = [] + for _ in range(dpth): + layer_index += 1 + + m1 = Residual(Attention( + ed, kd, nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + posembed=self.pos_embed + ), drop_path, out_raw=True) + if self.prune_ratio[layer_index] == 1: + self.stage_blocks.append(m1) + else: + stage_subblocks.append(m1) + + if mr > 0: + h = int(ed * mr) + m2 = Residual(torch.nn.Sequential( + Linear_BN(ed, h, resolution=resolution), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0, + resolution=resolution), + ), drop_path, out_raw=True) + else: + m2 = torch.nn.Identity() + + if self.prune_ratio[layer_index] == 1: + self.stage_blocks.append(m2) + else: + stage_subblocks.append(m2) + + self.blocks.append(CatModule(m1, m2, prune_ratio=self.prune_ratio[layer_index], N=n ** 2)) + if self.prune_ratio[layer_index] < 1: + j = j + 1 + + if len(stage_subblocks) != 0: + stage_subblocks = torch.nn.ModuleList(stage_subblocks) + self.stage_blocks.append(StageModule(stage_subblocks, prune_ratio=self.stage_prune_ratio[i])) + + if do[0] == 'Subsample': + n = int((n + 1) / 2) + # ('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + subsample = AttentionSubsample( + *embed_dim[i:i + 2], key_dim=do[1], num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_, + posembed=self.pos_embed) + self.blocks.append(subsample) + self.stage_blocks.append(subsample) + + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + ffn = Residual(torch.nn.Sequential( + Linear_BN(embed_dim[i + 1], h, + resolution=resolution), + mlp_activation(), + Linear_BN( + h, embed_dim[i + 1], bn_weight_init=0, resolution=resolution), + ), drop_path) + self.blocks.append(ffn) + self.stage_blocks.append(ffn) + + self.blocks = torch.nn.Sequential(*self.blocks) + self.stage_blocks = torch.nn.Sequential(*self.stage_blocks) + + # Classifier head + self.head = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + if distillation: + self.head_dist = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + self.clsc = True + if self.clsc: + self.head_cls = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + if distillation: + self.head_cls_dist = BN_Linear( + embed_dim[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + + @torch.jit.ignore + def no_weight_decay(self): + return {x for x in self.state_dict().keys() if 'poss' in x} + + def set_learn_tradeoff(self, mode): + global learn_tradeoff_mode + learn_tradeoff_mode = mode + + def set_prune_ratio(self, mode): + pass + + def remove_cls(self): + if hasattr(self, 'head_cls'): + del self.head_cls + if hasattr(self, 'head_cls_dist'): + del self.head_cls_dist + + def forward(self, x): + global global_attn + global ori_indices + global learn_tradeoff_mode + + global_attn = 0 + + x = self.patch_embed(x) + x = x.flatten(2).transpose(1, 2) + + ori_indices = torch.arange(x.shape[1], dtype=torch.long, device=x.device).unsqueeze(0) + ori_indices = ori_indices.expand(x.shape[0], -1) + + cls_token = self.cls_token.expand(x.shape[0], -1, -1) + x = torch.cat((cls_token, x), 1) + if self.pos_embed: + x = x + self.poss + + if self.stage_wise_prune: + x = self.stage_blocks(x) + else: + x = self.blocks(x) + + cls = x[:, 0, :] + x = x[:, 1:, :] + x = x.mean(1) + if self.distillation: + x = self.head(x), self.head_dist(x) + if self.clsc: + if self.training: + xcls = self.head_cls(cls) + xcls_dist = self.head_cls_dist(cls) + return x[0], x[1], xcls, xcls_dist + else: + return (x[0] + x[1]) / 2 + if not self.training: + x = (x[0] + x[1]) / 2 + + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, weights, + num_classes, distillation, pretrained, fuse, prune_ratio): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = torch.nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + # ('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + num_classes=num_classes, + drop_path=drop_path, + distillation=distillation, + prune_ratio=prune_ratio + ) + if pretrained: + checkpoint = torch.hub.load_state_dict_from_url( + weights, map_location='cpu') + model.load_state_dict(checkpoint['model']) + if fuse: + utils.replace_batchnorm(model) + + return model + + +if __name__ == '__main__': + if __name__ == '__main__': + for name in specification: + net = globals()[name](fuse=False, pretrained=False) + net.eval() + net.remove_cls() + net(torch.randn(2, 3, 384, 384)) + print(name, 'Parameters:', sum(p.numel() for p in net.parameters() if p.requires_grad)) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/losses_levit.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/losses_levit.py new file mode 100644 index 0000000000..e1d5a224fe --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/levit/losses_levit.py @@ -0,0 +1,127 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.nn import functional as F + + +class DistillationLoss(torch.nn.Module): + """ + This module wraps a standard criterion and adds an extra knowledge distillation loss by + taking a teacher model prediction and using it as additional supervision. + """ + + def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, + distillation_type: str, alpha: float, tau: float): + super().__init__() + self.base_criterion = base_criterion + self.teacher_model = teacher_model + assert distillation_type in ['none', 'soft', 'hard', 'mode1', 'mode2', 'mutual', 'cls', 'mode4'] + self.distillation_type = distillation_type + self.alpha = alpha + self.tau = tau + + def forward(self, inputs, outputs, labels): + """ + Args: + inputs: The original inputs that are feed to the teacher model + outputs: the outputs of the model to be trained. It is expected to be + either a Tensor, or a Tuple[Tensor, Tensor], with the original output + in the first position and the distillation predictions as the second output + labels: the labels for the base criterion + """ + outputs_kd = None + if not isinstance(outputs, torch.Tensor): + # assume that the model outputs a tuple of [outputs, outputs_kd] + if self.distillation_type == 'mode1': + outputs, outputs_kd, outn2, outn3, outn4 = outputs + elif self.distillation_type == 'mode2': + outputs, outputs_kd, outn2, outn3, outn4, outmg = outputs + elif self.distillation_type == 'mode4': + outputs, outputs_kd, outn2, outn3 = outputs + elif self.distillation_type == 'mutual': + outputs, outputs_kd, outm = outputs + elif self.distillation_type == 'cls': + outputs, outputs_kd, outc, outc_kd = outputs + else: + outputs, outputs_kd = outputs + base_loss = self.base_criterion(outputs, labels) + if self.distillation_type == 'none': + return base_loss + elif self.distillation_type == 'mode1': + base_loss += self.base_criterion(outn2, labels) + base_loss += self.base_criterion(outn3, labels) + base_loss += self.base_criterion(outn4, labels) + base_loss = base_loss / 4.0 + elif self.distillation_type == 'mode2': + base_loss += self.base_criterion(outn2, labels) + base_loss += self.base_criterion(outn4, labels) + base_loss = base_loss / 3.0 + elif self.distillation_type == 'mode4': + base_loss += self.base_criterion(outn2, labels) + base_loss += self.base_criterion(outn3, labels) + base_loss = base_loss / 3.0 + elif self.distillation_type == 'mutual': + base_loss += self.base_criterion(outm, labels) + base_loss = base_loss / 2.0 + elif self.distillation_type == 'cls': + base_loss = base_loss + self.base_criterion(outc, labels) + base_loss = base_loss / 2.0 + + if outputs_kd is None: + raise ValueError("When knowledge distillation is enabled, the model is " + "expected to return a Tuple[Tensor, Tensor] with the output of the " + "class_token and the dist_token") + # don't backprop throught the teacher + with torch.no_grad(): + teacher_outputs = self.teacher_model(inputs) + + if self.distillation_type == 'soft': + T = self.tau + # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 + # with slight modifications + distillation_loss = F.kl_div( + F.log_softmax(outputs_kd / T, dim=1), + F.log_softmax(teacher_outputs / T, dim=1), + reduction='sum', + log_target=True + ) * (T * T) / outputs_kd.numel() + elif self.distillation_type == 'hard': + distillation_loss = F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + elif self.distillation_type == 'mode2': + distillation_loss = F.cross_entropy( + outn3, outmg.argmax(dim=1)) + F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + distillation_loss = distillation_loss / 2.0 + elif self.distillation_type == 'mode1': + distillation_loss = F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + elif self.distillation_type == 'mutual': + distillation_loss = F.cross_entropy( + outm, teacher_outputs.argmax(dim=1)) + F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + distillation_loss = distillation_loss / 2.0 + elif self.distillation_type == 'cls': + distillation_loss = F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + F.cross_entropy( + outc_kd, teacher_outputs.argmax(dim=1)) + distillation_loss = distillation_loss / 2.0 + elif self.distillation_type == 'mode4': + distillation_loss = F.cross_entropy( + outputs_kd, teacher_outputs.argmax(dim=1)) + + loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha + return loss diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/main_levit.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/main_levit.py new file mode 100644 index 0000000000..9d3c2a145b --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/main_levit.py @@ -0,0 +1,506 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import datetime +import numpy as np +import time +import torch +import torch.backends.cudnn as cudnn +import json +import os + +from pathlib import Path + +from timm.data import Mixup +from timm.models import create_model +from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy +from timm.scheduler import create_scheduler +from timm.optim import create_optimizer +from timm.utils import ApexScaler, get_state_dict, ModelEma + +from datasets import build_dataset +from engine_levit import train_one_epoch, evaluate +from levit.losses_levit import DistillationLoss +from samplers import RASampler +import utils + +from levit import evo_levit +from levit import evo_levit_384 +try: + # noinspection PyUnresolvedReferences + from apex import amp +except ImportError: + amp = None + +import torch_npu + +def get_args_parser(): + parser = argparse.ArgumentParser( + 'LeViT training and evaluation script', add_help=False) + parser.add_argument('--batch-size', default=256, type=int) + parser.add_argument('--epochs', default=300, type=int) + + # Model parameters + parser.add_argument('--model', default='LeViT_256', type=str, metavar='MODEL', + help='Name of model to train') + parser.add_argument('--input-size', default=224, + type=int, help='images input size') + + parser.add_argument('--model-ema', action='store_true') + parser.add_argument( + '--no-model-ema', action='store_false', dest='model_ema') + parser.set_defaults(model_ema=True) + parser.add_argument('--model-ema-decay', type=float, + default=0.99996, help='') + parser.add_argument('--model-ema-force-cpu', + action='store_true', default=False, help='') + + # Optimizer parameters + parser.add_argument('--opt', default='adamw', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "adamw"') + parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: 1e-8)') + parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') + parser.add_argument('--clip-grad', type=float, default=0.01, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') + parser.add_argument('--clip-mode', type=str, default='agc', + help='Gradient clipping mode. One of ("norm", "value", "agc")') + parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') + parser.add_argument('--weight-decay', type=float, default=0.025, + help='weight decay (default: 0.025)') + # Learning rate schedule parameters + parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "cosine"') + parser.add_argument('--lr', type=float, default=5e-4, metavar='LR', + help='learning rate (default: 5e-4)') + parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') + parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') + parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') + parser.add_argument('--warmup-lr', type=float, default=1e-6, metavar='LR', + help='warmup learning rate (default: 1e-6)') + parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') + + parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', + help='epoch interval to decay LR') + parser.add_argument('--warmup-epochs', type=int, default=5, metavar='N', + help='epochs to warmup LR, if scheduler supports') + parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') + parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') + parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + + # Augmentation parameters + parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') + parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME', + help='Use AutoAugment policy. "v0" or "original". " + \ + "(default: rand-m9-mstd0.5-inc1)'), + parser.add_argument('--smoothing', type=float, default=0.1, + help='Label smoothing (default: 0.1)') + parser.add_argument('--train-interpolation', type=str, default='bicubic', + help='Training interpolation (random, bilinear, bicubic default: "bicubic")') + + parser.add_argument('--repeated-aug', action='store_true') + parser.add_argument('--no-repeated-aug', + action='store_false', dest='repeated_aug') + parser.set_defaults(repeated_aug=True) + + # * Random Erase params + parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT', + help='Random erase prob (default: 0.25)') + parser.add_argument('--remode', type=str, default='pixel', + help='Random erase mode (default: "pixel")') + parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') + parser.add_argument('--resplit', action='store_true', default=False, + help='Do not random erase first (clean) augmentation split') + + # * Mixup params + parser.add_argument('--mixup', type=float, default=0.8, + help='mixup alpha, mixup enabled if > 0. (default: 0.8)') + parser.add_argument('--cutmix', type=float, default=1.0, + help='cutmix alpha, cutmix enabled if > 0. (default: 1.0)') + parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, + help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') + parser.add_argument('--mixup-prob', type=float, default=1.0, + help='Probability of performing mixup or cutmix when either/both is enabled') + parser.add_argument('--mixup-switch-prob', type=float, default=0.5, + help='Probability of switching to cutmix when both mixup and cutmix enabled') + parser.add_argument('--mixup-mode', type=str, default='batch', + help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') + + # Distillation parameters + parser.add_argument('--teacher-model', default='regnety_160', type=str, metavar='MODEL', + help='Name of teacher model to train (default: "regnety_160"') + parser.add_argument('--teacher-path', type=str, + default='./regnety_160-a5fe301d.pth') + parser.add_argument('--distillation-type', default='cls', + choices=['none', 'soft', 'hard', 'cls'], type=str, help="") + parser.add_argument('--distillation-alpha', + default=0.5, type=float, help="") + parser.add_argument('--distillation-tau', default=1.0, type=float, help="") + + # * Finetuning params + parser.add_argument('--finetune', default='', + help='finetune from checkpoint') + + # Dataset parameters + parser.add_argument('--data_path', default='/datasets01/imagenet_full_size/061417/', type=str, + help='dataset path') + parser.add_argument('--data-set', default='IMNET', choices=['CIFAR', 'IMNET', 'INAT', 'INAT19'], + type=str, help='Image Net dataset path') + parser.add_argument('--inat-category', default='name', + choices=['kingdom', 'phylum', 'class', 'order', + 'supercategory', 'family', 'genus', 'name'], + type=str, help='semantic granularity') + + parser.add_argument('--output_dir', default='', + help='path where to save, empty for no saving') + parser.add_argument('--device', default='cuda', + help='device to use for training / testing') + parser.add_argument('--seed', default=0, type=int) + parser.add_argument('--resume', default='', help='resume from checkpoint')#/home/zym/save/checkpoint.pth + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument('--eval', action='store_true', + help='Perform evaluation only') + parser.add_argument('--dist-eval', action='store_true', + default=False, help='Enabling distributed evaluation') + parser.add_argument('--num_workers', default=10, type=int) + parser.add_argument('--pin-mem', action='store_true', + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') + parser.add_argument('--no-pin-mem', action='store_false', dest='pin_mem', + help='') + parser.set_defaults(pin_mem=True)#默认为true + + # distributed training parameters + parser.add_argument('--world_size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist_url', default='env://', + help='url used to set up distributed training') + parser.add_argument('--local_rank', default=-1) # 新增 + # training strategy + parser.add_argument('--prune-warmup-epoch', type=int, default=0, + help='epoch to start pruning') + + #add apex + parser.add_argument('--apex', default=True,action='store_true', + help='Use apex for mixed precision training') + parser.add_argument('--apex_opt_level', default='O2', type=str, + help='For apex mixed precision training' + 'O0 for FP32 training, O1 for mixed precision training.' + 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet') + parser.add_argument('--loss_scale_value', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') + + #add fps or acc + parser.add_argument('--train_type', default='acc', type=str, + help='support 1p performance or accuracy') + return parser + + +def main(args): + utils.init_distributed_mode(args) + + print(args) + + if args.distillation_type != 'none' and args.finetune and not args.eval: + raise NotImplementedError( + "Finetuning with distillation not yet supported") + + + local_rank = utils.get_rank() + device = torch.device(f'npu:{local_rank}') + + # fix the seed for reproducibility + seed = args.seed + utils.get_rank() + torch.manual_seed(seed) + np.random.seed(seed) + # random.seed(seed) + + cudnn.benchmark = True + + dataset_train, args.nb_classes = build_dataset(is_train=True, args=args) + dataset_val, _ = build_dataset(is_train=False, args=args) + + if True: # args.distributed: + num_tasks = utils.get_world_size() + global_rank = utils.get_rank() + if args.repeated_aug: + sampler_train = RASampler( + dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + ) + else: + sampler_train = torch.utils.data.DistributedSampler( + dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + ) + if args.dist_eval: + if len(dataset_val) % num_tasks != 0: + print('Warning: Enabling distributed evaluation with an eval dataset not divisible by process number. ' + 'This will slightly alter validation results as extra duplicate entries are added to achieve ' + 'equal num of samples per-process.') + sampler_val = torch.utils.data.DistributedSampler( + dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=False) + else: + sampler_val = torch.utils.data.SequentialSampler(dataset_val) + else: + sampler_train = torch.utils.data.RandomSampler(dataset_train) + sampler_val = torch.utils.data.SequentialSampler(dataset_val) + + data_loader_train = torch.utils.data.DataLoader( + dataset_train, sampler=sampler_train, + batch_size=args.batch_size, + num_workers=args.num_workers, + pin_memory=args.pin_mem, + drop_last=True, + ) + + data_loader_val = torch.utils.data.DataLoader( + dataset_val, sampler=sampler_val, + batch_size=int(1.5 * args.batch_size), + num_workers=args.num_workers, + pin_memory=args.pin_mem, + drop_last=False + ) + + mixup_fn = None + mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None + if mixup_active: + mixup_fn = Mixup( + mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, + prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode, + label_smoothing=args.smoothing, num_classes=args.nb_classes) + + print(f"Creating model: {args.model}") + + model = evo_levit_384.EvoLeViT_256_384( + num_classes=args.nb_classes, + distillation=(args.distillation_type != 'none'), + #pretrained=args.eval, + fuse=args.eval, + ) + print("device:", device) + model.to(device) + # learning rate 的步长 + linear_scaled_lr = args.lr * args.batch_size * utils.get_world_size() / 512.0 + args.lr = linear_scaled_lr + optimizer = create_optimizer(args, model) + model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale=128.0 , combine_grad=True) + + if args.finetune: + if args.finetune.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.finetune, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.finetune, map_location='cpu') + + checkpoint_model = checkpoint['model'] + state_dict = model.state_dict() + for k in ['head.weight', 'head.bias', + 'head_dist.weight', 'head_dist.bias']: + if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape: + print(f"Removing key {k} from pretrained checkpoint") + del checkpoint_model[k] + + model.load_state_dict(checkpoint_model, strict=False) + + model_ema = None + if args.model_ema: + # Important to create EMA model after cuda(), DP wrapper, and AMP but + # before SyncBN and DDP wrapper + model_ema = ModelEma( + model, + decay=args.model_ema_decay, + device='cpu' if args.model_ema_force_cpu else '', + resume='') + + model_without_ddp = model + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[args.gpu], find_unused_parameters=True,broadcast_buffers=False)#相较于GPU添加了broadcast_buffers=False + model_without_ddp = model.module + + + + n_parameters = sum(p.numel() + for p in model.parameters() if p.requires_grad) + print('number of params:', n_parameters) + + + loss_scaler = ApexScaler() + + lr_scheduler, _ = create_scheduler(args, optimizer) + + criterion = LabelSmoothingCrossEntropy() + + if args.mixup > 0.: + # smoothing is handled with mixup label transform + criterion = SoftTargetCrossEntropy() + elif args.smoothing: + criterion = LabelSmoothingCrossEntropy(smoothing=args.smoothing) + else: + criterion = torch.nn.CrossEntropyLoss() + + teacher_model = None + if args.distillation_type != 'none': + assert args.teacher_path, 'need to specify teacher-path when using distillation' + print(f"Creating teacher model: {args.teacher_model}") + teacher_model = create_model( + args.teacher_model, + pretrained=False, + num_classes=args.nb_classes, + global_pool='avg', + ) + if args.teacher_path.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.teacher_path, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.teacher_path, map_location='cpu') + teacher_model.load_state_dict(checkpoint['model']) + teacher_model.to(device) + teacher_model.eval() + + # wrap the criterion in our custom DistillationLoss, which + # just dispatches to the original criterion if args.distillation_type is + # 'none' + criterion = DistillationLoss( + criterion, teacher_model, args.distillation_type, args.distillation_alpha, args.distillation_tau + ) + + output_dir = Path(args.output_dir) + flag = os.path.exists(args.resume) + if args.resume and flag: + if args.resume.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.resume, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.resume, map_location='cpu') + + model_without_ddp.load_state_dict(checkpoint['model'], strict=False) + if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + if args.model_ema: + utils._load_checkpoint_for_ema( + model_ema, checkpoint['model_ema']) + if 'scaler' in checkpoint: + loss_scaler.load_state_dict(checkpoint['scaler']) + if args.eval: + print("now eval...") + test_stats = evaluate(data_loader_val, model, device) + print( + f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%") + return + + print(f"Start training for {args.epochs} epochs") + start_time = time.time() + max_accuracy = 0.0 + FPS = 0 + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + data_loader_train.sampler.set_epoch(epoch) + + if args.prune_warmup_epoch != 0: + if epoch < args.prune_warmup_epoch: + model.module.set_prune_ratio('no_prune') + if epoch == args.prune_warmup_epoch: + model.module.set_prune_ratio('prune') + + train_stats, fps_epoch = train_one_epoch( + args, + model, criterion, data_loader_train, + optimizer, device, epoch, loss_scaler, + args.clip_grad, args.clip_mode, model_ema, mixup_fn, + set_training_mode=args.finetune == '' # keep in eval mode during finetuning + ) + FPS = FPS + fps_epoch + lr_scheduler.step(epoch) + if args.output_dir: + checkpoint_paths = [output_dir / 'checkpoint.pth'] + for checkpoint_path in checkpoint_paths: + utils.save_on_master({ + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'model_ema': get_state_dict(model_ema), + 'scaler': loss_scaler.state_dict(), + 'args': args, + }, checkpoint_path) + if epoch % 10 == 9 or epoch > 290: + test_stats = evaluate(data_loader_val, model, device) + print( + f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%") + max_accuracy = max(max_accuracy, test_stats["acc1"]) + print(f'Max accuracy: {max_accuracy:.2f}%') + log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, + **{f'test_{k}': v for k, v in test_stats.items()}, + 'epoch': epoch, + 'n_parameters': n_parameters, + 'fps':fps_epoch}#新增日志文件打印fps + + # save best + if test_stats["acc1"] == max_accuracy and args.output_dir: + checkpoint_paths = [output_dir / 'best_checkpoint.pth'] + for checkpoint_path in checkpoint_paths: + utils.save_on_master({ + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'model_ema': get_state_dict(model_ema), + 'scaler': loss_scaler.state_dict(), + 'args': args, + }, checkpoint_path) + + else: + log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, + 'epoch': epoch, + 'n_parameters': n_parameters, + 'fps':fps_epoch}#新增日志文件打印fps + + if args.output_dir and utils.is_main_process(): + with (output_dir / "log.txt").open("a") as f: + f.write(json.dumps(log_stats) + "\n") + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + print(f"Average FPS {FPS / args.epochs}") + + + +if __name__ == '__main__': + option = {} + option["ACL_OP_COMPILER_CACHE_MODE"] = "enable" # cache + option["ACL_OP_COMPILER_CACHE_DIR"] = "./cache" + # option={"autotune": "enable", "autotunegraphdumppath": "./graphs"} + torch.npu.set_option(option) + parser = argparse.ArgumentParser( + 'LeViT training and evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + if args.output_dir: + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + main(args) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/samplers.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/samplers.py new file mode 100644 index 0000000000..640305632c --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/samplers.py @@ -0,0 +1,74 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.distributed as dist +import math + + +class RASampler(torch.utils.data.Sampler): + """Sampler that restricts data loading to a subset of the dataset for distributed, + with repeated augmentation. + It ensures that different each augmented version of a sample will be visible to a + different process (GPU) + Heavily based on torch.utils.data.DistributedSampler + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError( + "Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) + self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) + self.shuffle = shuffle + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + if self.shuffle: + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = list(range(len(self.dataset))) + + # add extra samples to make it evenly divisible + indices = [ele for ele in indices for i in range(3)] + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + indices = indices[self.rank:self.total_size:self.num_replicas] + assert len(indices) == self.num_samples + + return iter(indices[:self.num_selected_samples]) + + def __len__(self): + return self.num_selected_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/env_npu.sh b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/env_npu.sh new file mode 100644 index 0000000000..084782cac1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/env_npu.sh @@ -0,0 +1,68 @@ +#!/bin/bash +CANN_INSTALL_PATH_CONF='/etc/Ascend/ascend_cann_install.info' + +if [ -f $CANN_INSTALL_PATH_CONF ]; then + CANN_INSTALL_PATH=$(cat $CANN_INSTALL_PATH_CONF | grep Install_Path | cut -d "=" -f 2) +else + CANN_INSTALL_PATH="/usr/local/Ascend" +fi + +if [ -d ${CANN_INSTALL_PATH}/ascend-toolkit/latest ]; then + source ${CANN_INSTALL_PATH}/ascend-toolkit/set_env.sh +else + source ${CANN_INSTALL_PATH}/nnae/set_env.sh +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +#设置device侧日志登记为error +msnpureport -g error -d 0 +msnpureport -g error -d 1 +msnpureport -g error -d 2 +msnpureport -g error -d 3 +msnpureport -g error -d 4 +msnpureport -g error -d 5 +msnpureport -g error -d 6 +msnpureport -g error -d 7 +#关闭Device侧Event日志 +msnpureport -e disable + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_1P.sh b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_1P.sh new file mode 100644 index 0000000000..6fe8be9240 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_1P.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="Evo-Levit_256_384" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +export WORLD_SIZE=1 +# 路径参数初始化 +#配置dataset的路径,请按照实际情况填写测试路径 +data_path="" +#配置输出checkpoint文件的路径,请按照实际情况填写测试路径 +cur_path=`pwd` +output_dir=${cur_path}/save +#参数校验,data_path和output_dir为必传参数 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 -m torch.distributed.launch --nproc_per_node=1 main_levit.py \ + --model EvoLeViT_256_384 \ + --input-size 384 \ + --batch-size 128 \ + --data_path ${data_path} \ + --output_dir ${output_dir}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'fps:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "fps:" '{print $NF}'` +FPS=${FPS#* } +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长` +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'Averaged' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_8P.sh b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_8P.sh new file mode 100644 index 0000000000..072837f5e9 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_full_8P.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="Evo-Levit_256_384" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=8 +export WORLD_SIZE=8 +# 路径参数初始化 +#配置dataset的路径,请按照实际情况填写测试路径 +data_path="" +#配置输出checkpoint文件的路径,请按照实际情况填写测试路径 +cur_path=`pwd` +output_dir=${cur_path}/save +#参数校验,data_path和output_dir为必传参数 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3 -m torch.distributed.launch --nproc_per_node=8 main_levit.py \ + --model EvoLeViT_256_384 \ + --input-size 384 \ + --batch-size 128 \ + --data_path ${data_path} \ + --output_dir ${output_dir}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'fps:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "fps:" '{print $NF}'` +FPS=${FPS#* } +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'Averaged' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_1P.sh b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_1P.sh new file mode 100644 index 0000000000..337c292d63 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_1P.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="Evo-Levit_256_384" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +export WORLD_SIZE=1 +# 路径参数初始化 +#配置dataset的路径,请按照实际情况填写测试路径 +data_path="" +#配置输出checkpoint文件的路径,请按照实际情况填写测试路径 +cur_path=`pwd` +output_dir=${cur_path}/save +#参数校验,data_path和output_dir为必传参数 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 -m torch.distributed.launch --nproc_per_node=1 main_levit.py \ + --model EvoLeViT_256_384 \ + --input-size 384 \ + --batch-size 128 \ + --epochs 1 \ + --train_type fps\ + --data_path ${data_path} \ + --output_dir ${output_dir}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'fps:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "fps:" '{print $NF}'` +FPS=${FPS#* } +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'Averaged' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_8P.sh b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_8P.sh new file mode 100644 index 0000000000..654e170197 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/test/train_performance_8P.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="Evo-Levit_256_384" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=8 +export WORLD_SIZE=8 +# 路径参数初始化 +#配置dataset的路径,请按照实际情况填写测试路径 +data_path="" +#配置输出checkpoint文件的路径,请按照实际情况填写测试路径 +cur_path=`pwd` +output_dir=${cur_path}/save +#参数校验,data_path和output_dir为必传参数 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +###############checkpoint的保存路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +output_path=${cur_path}/save +if [ -d ${cur_path}/save ];then + rm -rf ${cur_path}/save + mkdir -p ${cur_path}/save +else + mkdir -p ${cur_path}/save +fi + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3 -m torch.distributed.launch --nproc_per_node=8 main_levit.py \ + --model EvoLeViT_256_384 \ + --input-size 384 \ + --batch-size 128 \ + --epochs 1 \ + --train_type fps\ + --data_path ${data_path} \ + --output_dir ${output_dir}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'fps:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "fps:" '{print $NF}'` +FPS=${FPS#* } +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'Averaged' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/utils.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/utils.py new file mode 100644 index 0000000000..648d491803 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/utils.py @@ -0,0 +1,277 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os +import time +from collections import defaultdict, deque +import datetime + +import torch +import torch.distributed as dist +import torch_npu + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], + dtype=torch.float32, device='npu') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + # print('count and total is: {}, {}'.format(int(t[0]),int(t[1]))) + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + log_msg = [ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ] + if torch_npu.npu.is_available(): + log_msg.append('max mem: {memory:.0f}') + log_msg = self.delimiter.join(log_msg) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch_npu.npu.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=0)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def _load_checkpoint_for_ema(model_ema, checkpoint): + """ + Workaround for ModelEma._load_checkpoint to accept an already-loaded object + """ + mem_file = io.BytesIO() + torch.save(checkpoint, mem_file) + mem_file.seek(0) + model_ema._load_checkpoint(mem_file) + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.local_rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + print(f"RANK and WORLD_SIZE in environ: {args.local_rank}/{args.world_size}") + elif 'SLURM_PROCID' in os.environ: + args.local_rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.local_rank % torch.cuda.device_count() + else: + print('Not using distributed mode') + args.distributed = False + return + + + args.distributed = True + + #NPU + torch_npu.npu.set_device(args.gpu) + args.dist_backend = 'hccl' + + print('| distributed init (rank {}): {}'.format( + args.local_rank, args.dist_url), flush=True) + #NPU + torch.distributed.init_process_group(backend=args.dist_backend, world_size=args.world_size, rank=args.local_rank) + torch.distributed.barrier() + setup_for_distributed(args.local_rank == 0) + + +def replace_batchnorm(net): + for child_name, child in net.named_children(): + if hasattr(child, 'fuse'): + setattr(net, child_name, child.fuse()) + elif isinstance(child, torch.nn.Conv2d): + child.bias = torch.nn.Parameter(torch.zeros(child.weight.size(0))) + elif isinstance(child, torch.nn.BatchNorm2d): + setattr(net, child_name, torch.nn.Identity()) + else: + replace_batchnorm(child) + + +def replace_layernorm(net): + import apex + for child_name, child in net.named_children(): + if isinstance(child, torch.nn.LayerNorm): + setattr(net, child_name, apex.normalization.FusedLayerNorm( + child.weight.size(0))) + else: + replace_layernorm(child) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/visualize.py b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/visualize.py new file mode 100644 index 0000000000..c91d13b433 --- /dev/null +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/visualize.py @@ -0,0 +1,599 @@ +# encoding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +import torch.backends.cudnn as cudnn + +from pathlib import Path + +from timm.data import Mixup +from timm.models import create_model +from timm.scheduler import create_scheduler +from timm.optim import create_optimizer +from timm.utils import NativeScaler, get_state_dict, ModelEma + +from datasets import build_dataset2, get_post_process + +import utils + +from timm.utils import accuracy, ModelEma +from torchvision import utils as vutils + +import torch +from torchvision import transforms + +from PIL import Image +import os + +from deit import evo_deit_vis + + +def get_transform(input_size): + t = [] + resize_im = (input_size != 224) + if resize_im: + size = int((256 / 224) * args.input_size) + t.append( + transforms.Resize(size, interpolation=3), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(args.input_size)) + t.append(transforms.ToTensor()) + else: + t.append(transforms.ToTensor()) + + return transforms.Compose(t) + + +def get_keep_indices(decisions): + keep_indices = [] + for i in range(3): + if i == 0: + keep_indices.append(decisions[i]) + else: + keep_indices.append(keep_indices[-1][decisions[i]]) + return keep_indices + + +def gen_masked_tokens(tokens, indices, alpha=0.3): + indices = [i for i in range(196) if i not in indices] + tokens = tokens.copy() + tokens[indices] = alpha * tokens[indices] + (1 - alpha) * 255 + return tokens + + +def recover_image(tokens): + # image: (C, 196, 16, 16) + image = tokens.reshape(14, 14, 16, 16, 3).swapaxes(1, 2).reshape(224, 224, 3) + return image + + +def gen_visualization(image, keep_indices): + # keep_indices = get_keep_indices(decisions) + image_tokens = image.reshape(14, 16, 14, 16, 3).swapaxes(1, 2).reshape(196, 16, 16, 3) + + viz = recover_image(gen_masked_tokens(image_tokens, keep_indices)) + return viz + + +def get_args_parser(): + parser = argparse.ArgumentParser('DeiT training and evaluation script', add_help=False) + parser.add_argument('--batch-size', default=64, type=int) + parser.add_argument('--epochs', default=300, type=int) + + # Model parameters + parser.add_argument('--model', default='deit_base_patch16_224', type=str, metavar='MODEL', + help='Name of model to train') + parser.add_argument('--input-size', default=224, type=int, help='images input size') + + parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', + help='Dropout rate (default: 0.)') + parser.add_argument('--drop-path', type=float, default=0.1, metavar='PCT', + help='Drop path rate (default: 0.1)') + + parser.add_argument('--model-ema', action='store_true') + parser.add_argument('--no-model-ema', action='store_false', dest='model_ema') + parser.set_defaults(model_ema=True) + parser.add_argument('--model-ema-decay', type=float, default=0.99996, help='') + parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, help='') + + # Optimizer parameters + parser.add_argument('--opt', default='adamw', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "adamw"') + parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: 1e-8)') + parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') + parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') + parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') + parser.add_argument('--weight-decay', type=float, default=0.05, + help='weight decay (default: 0.05)') + # Learning rate schedule parameters + parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "cosine"') + parser.add_argument('--lr', type=float, default=5e-4, metavar='LR', + help='learning rate (default: 5e-4)') + parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') + parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') + parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') + parser.add_argument('--warmup-lr', type=float, default=1e-6, metavar='LR', + help='warmup learning rate (default: 1e-6)') + parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') + + parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', + help='epoch interval to decay LR') + parser.add_argument('--warmup-epochs', type=int, default=5, metavar='N', + help='epochs to warmup LR, if scheduler supports') + parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') + parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') + parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + + # Augmentation parameters + parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') + parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME', + help='Use AutoAugment policy. "v0" or "original". " + \ + "(default: rand-m9-mstd0.5-inc1)'), + parser.add_argument('--smoothing', type=float, default=0.1, help='Label smoothing (default: 0.1)') + parser.add_argument('--train-interpolation', type=str, default='bicubic', + help='Training interpolation (random, bilinear, bicubic default: "bicubic")') + + parser.add_argument('--repeated-aug', action='store_true') + parser.add_argument('--no-repeated-aug', action='store_false', dest='repeated_aug') + parser.set_defaults(repeated_aug=True) + + # * Random Erase params + parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT', + help='Random erase prob (default: 0.25)') + parser.add_argument('--remode', type=str, default='pixel', + help='Random erase mode (default: "pixel")') + parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') + parser.add_argument('--resplit', action='store_true', default=False, + help='Do not random erase first (clean) augmentation split') + + # * Mixup params + parser.add_argument('--mixup', type=float, default=0.8, + help='mixup alpha, mixup enabled if > 0. (default: 0.8)') + parser.add_argument('--cutmix', type=float, default=1.0, + help='cutmix alpha, cutmix enabled if > 0. (default: 1.0)') + parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, + help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') + parser.add_argument('--mixup-prob', type=float, default=1.0, + help='Probability of performing mixup or cutmix when either/both is enabled') + parser.add_argument('--mixup-switch-prob', type=float, default=0.5, + help='Probability of switching to cutmix when both mixup and cutmix enabled') + parser.add_argument('--mixup-mode', type=str, default='batch', + help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') + + # Distillation parameters + parser.add_argument('--teacher-model', default='regnety_160', type=str, metavar='MODEL', + help='Name of teacher model to train (default: "regnety_160"') + parser.add_argument('--teacher-path', type=str, default='') + parser.add_argument('--distillation-type', default='none', choices=['none', 'soft', 'hard'], type=str, help="") + parser.add_argument('--distillation-alpha', default=0.5, type=float, help="") + parser.add_argument('--distillation-tau', default=1.0, type=float, help="") + + # * Finetuning params + parser.add_argument('--finetune', default='', help='finetune from checkpoint') + + # Dataset parameters + parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', type=str, + help='dataset path') + parser.add_argument('--data-set', default='IMNET', choices=['CIFAR', 'IMNET', 'INAT', 'INAT19'], + type=str, help='Image Net dataset path') + parser.add_argument('--inat-category', default='name', + choices=['kingdom', 'phylum', 'class', 'order', 'supercategory', 'family', 'genus', 'name'], + type=str, help='semantic granularity') + + parser.add_argument('--output_dir', default='./test_img/', help='path where to save') + parser.add_argument('--device', default='cuda', + help='device to use for training / testing') + parser.add_argument('--seed', default=0, type=int) + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument('--eval', action='store_false', help='Perform evaluation only') + parser.add_argument('--dist-eval', action='store_true', default=False, help='Enabling distributed evaluation') + parser.add_argument('--num_workers', default=10, type=int) + parser.add_argument('--pin-mem', action='store_true', + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') + parser.add_argument('--no-pin-mem', action='store_false', dest='pin_mem', + help='') + parser.set_defaults(pin_mem=True) + + # distributed training parameters + parser.add_argument('--world_size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') + parser.add_argument('--excel_filename', type=str, default='attention_matrix_cls', help='filename of saving excel') + + # visualization + parser.add_argument('--img-path', default='', type=str, + help='path to images to be visualized. Set '' to visualize batch images in imagenet val.') + parser.add_argument('--save-name', default='', type=str, + help='name to save when visualizing a single image. Set '' to save name as the original image.') + parser.add_argument('--layer-wise-prune', action='store_true', + help='set true when visualize a model trained without layer to stage training strategy') + return parser + + +def save_image_tensor(input_tensor: torch.Tensor, filename): + """ + 将tensor保存为图片 + :param input_tensor: 要保存的tensor + :param filename: 保存的文件名 + """ + assert ((len(input_tensor.shape) == 4 and input_tensor.shape[0] == 1) or len(input_tensor.shape) == 3) + # 复制一份 + input_tensor = input_tensor.clone().detach() + # 到cpu + input_tensor = input_tensor.to(torch.device('cpu')) + # 反归一化 + # input_tensor = unnormalize(input_tensor) + vutils.save_image(input_tensor, filename) + + +@torch.no_grad() +def visualize_single_img(img_input, model, device, transform, post_process, save_name): + model.eval() + # set stage_wise_prune = True if the trained model is under layer-to-stage training strategy + model.stage_wise_prune = not args.layer_wise_prune + + # img: 1, 3, H, W + image_raw = transform(img_input) + save_image_tensor(image_raw, Path(args.output_dir, '{}.jpg'.format(save_name))) + images = post_process(image_raw) + images = images.unsqueeze(0) + images = images.to(device, non_blocking=True) + print(images.shape) + # compute output + with torch.cuda.amp.autocast(): + output = model(images) + vis_dict = model.get_vis_dict() + image_raw = image_raw * 255 + image_raw = image_raw.squeeze(0).permute(1, 2, 0).cpu().numpy() + for k in vis_dict: + keep_indices = vis_dict[k] + viz = gen_visualization(image_raw, keep_indices) + viz = torch.from_numpy(viz).permute(2, 0, 1) + + viz = viz / 255 + + save_image_tensor(viz, + Path(args.output_dir, '{}_{}.jpg'.format(save_name, k))) + print("Visualization finished") + + +@torch.no_grad() +def visualize(data_loader, model, device, post_process): + criterion = torch.nn.CrossEntropyLoss() + + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + # switch to evaluation mode + model.eval() + + # set stage_wise_prune = True if the trained model is under layer-to-stage training strategy + model.stage_wise_prune = not args.layer_wise_prune + + for images_raw_full, target_full in metric_logger.log_every(data_loader, 10, header): + B = images_raw_full.shape[0] + for index in range(B): + images_raw = images_raw_full[index:index + 1] + target = target_full[index:index + 1] + assert images_raw.shape[0] == 1 + images = post_process(images_raw) + + name = 'label{}_seed{}_index{}.jpg'.format(str(target.item()), int(args.seed), index) + save_image_tensor(images_raw, Path(args.output_dir, name)) + images = images.to(device, non_blocking=True) + target = target.to(device, non_blocking=True) + + # compute output + with torch.cuda.amp.autocast(): + output = model(images) + vis_dict = model.get_vis_dict() + loss = criterion(output, target) + + images_raw = images_raw * 255 + images_raw = images_raw.squeeze(0).permute(1, 2, 0).cpu().numpy() + # if np.max(images_raw) > 3: + # images_raw = images_raw / 255 + + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + if acc1 == 0: + judger = 'wrong' + elif acc1 == 100: + judger = 'right' + else: + raise ValueError('xxxx') + + for k in vis_dict: + keep_indices = vis_dict[k] + viz = gen_visualization(images_raw, keep_indices) + viz = torch.from_numpy(viz).permute(2, 0, 1) + viz = viz / 255 + + name = 'label{}_seed{}_{}_index{}_{}.jpg'.format( + str(target.item()), + int(args.seed), k, index, judger) + save_image_tensor(viz, Path(args.output_dir, name)) + + batch_size = images.shape[0] + metric_logger.update(loss=loss.item()) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + print("Visualization finished") + break + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' + .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) + + return {k: meter.global_avg for k, meter in metric_logger.meters.items()} + + +def vis_single(args): + device = torch.device(args.device) + + # fix the seed for reproducibility + seed = args.seed + torch.manual_seed(seed) + np.random.seed(seed) + # random.seed(seed) + + cudnn.benchmark = True + + transform = get_transform(input_size=224) # set input_size to other value if the test image is not 224*224 + post_process = get_post_process() + + print("Creating model: {args.model}") + model = create_model( + args.model, + pretrained=False, + num_classes=1000, + drop_rate=args.drop, + drop_path_rate=args.drop_path, + drop_block_rate=None, + ) + + model.to(device) + + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + print('number of params:', n_parameters) + + if args.resume: + if args.resume.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.resume, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.resume, map_location='cpu') + + model.load_state_dict(checkpoint['model']) + + img_input = Image.open(args.img_path) + if args.save_name == '': + save_name = os.path.basename(args.img_path).split('.')[0] + else: + save_name = args.save_name + if args.eval: + test_stats = visualize_single_img(img_input, model, device, transform, post_process, save_name=save_name) + return + + +def vis_batch(args): + utils.init_distributed_mode(args) + print(args) + + if args.distillation_type != 'none' and args.finetune and not args.eval: + raise NotImplementedError("Finetuning with distillation not yet supported") + + device = torch.device(args.device) + + # fix the seed for reproducibility + seed = args.seed + torch.manual_seed(seed) + np.random.seed(seed) + # random.seed(seed) + + cudnn.benchmark = True + + # dataset_train, args.nb_classes = build_dataset(is_train=True, args=args) + dataset_val, args.nb_classes = build_dataset2(is_train=False, args=args) + post_process = get_post_process() + + if True: # args.distributed: + num_tasks = utils.get_world_size() + global_rank = utils.get_rank() + # if args.repeated_aug: + # sampler_train = RASampler( + # dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + # ) + # else: + # sampler_train = torch.utils.data.DistributedSampler( + # dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + # ) + if args.dist_eval: + if len(dataset_val) % num_tasks != 0: + print('Warning: Enabling distributed evaluation with an eval dataset not divisible by process number. ' + 'This will slightly alter validation results as extra duplicate entries are added to achieve ' + 'equal num of samples per-process.') + # sampler_val = torch.utils.data.DistributedSampler( + # dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=False) + sampler_val = torch.utils.data.DistributedSampler( + dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=True) + else: + # sampler_val = torch.utils.data.SequentialSampler(dataset_val) + sampler_val = torch.utils.data.RandomSampler(dataset_val) + else: + # sampler_train = torch.utils.data.RandomSampler(dataset_train) + # sampler_val = torch.utils.data.SequentialSampler(dataset_val) + sampler_val = torch.utils.data.RandomSampler(dataset_val) + # data_loader_train = torch.utils.data.DataLoader( + # dataset_train, sampler=sampler_train, + # batch_size=args.batch_size, + # num_workers=args.num_workers, + # pin_memory=args.pin_mem, + # drop_last=True, + # ) + + data_loader_val = torch.utils.data.DataLoader( + dataset_val, sampler=sampler_val, + batch_size=args.batch_size, + num_workers=args.num_workers, + pin_memory=args.pin_mem, + drop_last=False, + ) + + print("Creating model: {args.model}") + model = create_model( + args.model, + pretrained=False, + num_classes=args.nb_classes, + drop_rate=args.drop, + drop_path_rate=args.drop_path, + drop_block_rate=None, + ) + + if args.finetune: + if args.finetune.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.finetune, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.finetune, map_location='cpu') + + checkpoint_model = checkpoint['model'] + state_dict = model.state_dict() + for k in ['head.weight', 'head.bias', 'head_dist.weight', 'head_dist.bias']: + if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape: + print("Removing key {k} from pretrained checkpoint") + del checkpoint_model[k] + + # interpolate position embedding + pos_embed_checkpoint = checkpoint_model['pos_embed'] + embedding_size = pos_embed_checkpoint.shape[-1] + num_patches = model.patch_embed.num_patches + num_extra_tokens = model.pos_embed.shape[-2] - num_patches + # height (== width) for the checkpoint position embedding + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + # height (== width) for the new position embedding + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + checkpoint_model['pos_embed'] = new_pos_embed + + model.load_state_dict(checkpoint_model, strict=False) + + model.to(device) + + model_ema = None + if args.model_ema: + # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper + model_ema = ModelEma( + model, + decay=args.model_ema_decay, + device='cpu' if args.model_ema_force_cpu else '', + resume='') + + model_without_ddp = model + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + model_without_ddp = model.module + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + print('number of params:', n_parameters) + + linear_scaled_lr = args.lr * args.batch_size * utils.get_world_size() / 512.0 + args.lr = linear_scaled_lr + optimizer = create_optimizer(args, model_without_ddp) + loss_scaler = NativeScaler() + + lr_scheduler, _ = create_scheduler(args, optimizer) + + if args.distillation_type != 'none': + assert args.teacher_path, 'need to specify teacher-path when using distillation' + print("Creating teacher model: {args.teacher_model}") + teacher_model = create_model( + args.teacher_model, + pretrained=False, + num_classes=args.nb_classes, + global_pool='avg', + ) + if args.teacher_path.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.teacher_path, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.teacher_path, map_location='cpu') + teacher_model.load_state_dict(checkpoint['model']) + teacher_model.to(device) + teacher_model.eval() + + # wrap the criterion in our custom DistillationLoss, which + # just dispatches to the original criterion if args.distillation_type is 'none' + + if args.resume: + if args.resume.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.resume, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.resume, map_location='cpu') + + model_without_ddp.load_state_dict(checkpoint['model']) + if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + if args.model_ema: + utils._load_checkpoint_for_ema(model_ema, checkpoint['model_ema']) + if 'scaler' in checkpoint: + loss_scaler.load_state_dict(checkpoint['scaler']) + + if args.eval: + test_stats = visualize(data_loader_val, model, device, post_process=post_process) + print("Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%") + return + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('DeiT training and evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + if args.output_dir: + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + args.eval = True + + if args.img_path == '': + # To visualize batch images of imagenet val, please run this: + vis_batch(args) + else: + # To visualize a single image, please run this: + vis_single(args) -- Gitee From 3721892ba8b8973b399aeac036be90cb7dd8e0a6 Mon Sep 17 00:00:00 2001 From: zhangyanmin <2716635239@qq.com> Date: Wed, 9 Nov 2022 14:00:37 +0000 Subject: [PATCH 2/3] updata README --- .../Evo-Levit_256_384/README.md | 248 ++++++++++-------- 1 file changed, 139 insertions(+), 109 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md index 51c8512781..4b449cde2e 100644 --- a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md @@ -1,154 +1,184 @@ # Evo-Levit for PyTorch - -[TOC] +- [概述](#概述) +- [准备训练环境](#准备训练环境) +- [开始训练](#开始训练) +- [训练结果展示](#训练结果展示) +- [版本说明](#版本说明) # 概述 ## 简述 -Evo-ViT的具体框架设计,包括基于全局class attention的token选择以及慢速-快速双流token更新两个模块。其根据全局class attention的排序判断高信息token和低信息token,将低信息token整合为一个归纳token,和高信息token一起输入到原始多头注意力(Multi-head Self-Attention, MSA)模块以及前向传播(Fast Fed-forward Network, FFN)模块中进行精细更新。更新后的归纳token用来快速更新低信息token。全局class attention也在精细更新过程中进行同步更新变化。 +Evo-ViT的具体框架设计,包括基于全局class attention的token选择以及慢速、快速双流token更新两个模块。其根据全局class attention的排序判断高信息token和低信息token,将低信息token整合为一个归纳token,和高信息token一起输入到原始多头注意力(Multi-head Self-Attention, MSA)模块以及前向传播(Fast Fed-forward Network, FFN)模块中进行精细更新。更新后的归纳token用来快速更新低信息token。全局class attention也在精细更新过程中进行同步更新变化。 + +- 参考实现: + + ``` + url=https://github.com/YifanXu74/Evo-ViT + commit_id=4c5d9b30b0a3c9b1e7b8687a9490555bd9d714ca + ``` -- 参考实现 -``` -url = https://github.com/YifanXu74/Evo-ViT.git -``` +- 适配昇腾 AI 处理器的实现: -- 适配昇腾AI处理器的实现 -- 通过Git获取代码方法如下 + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/cv/classification + ``` + +- 通过Git获取代码方法如下: -``` -git clone {url} # 克隆仓库的代码 -cd {code_path} # 切换到模型代码所在路径 -``` + ``` + git clone {url} # 克隆仓库的代码 + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +- 通过单击“立即下载”,下载源码包。 # 准备训练环境 ## 准备环境 -- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示 - - **表1** 版本配套表 - -| 配套 | 版本 | -| ---------- | ------------------------------------------------------------ | -| 固件与驱动 | [1.0.12](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | -| CANN | [5.0.3](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | -| PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) | - -- 安装依赖 - -``` -pip install timm==0.4.12 -pip install torchvision==0.9.1 -pip install torch_npu-1.8.1rc2.20220607-cp37-cp37m-linux_aarch64.whl -pip install torch-1.8.1+ascend.rc2.20220607-cp37-cp37m-linux_aarch64.whl -pip install apex-0.1+ascend.20220607-cp37-cp37m-linux_aarch64.whl -``` - -- 关于timm包的NPU优化补丁 - -``` -# 需要先cd到当前文件目录,一般timm包的安装位置在/usr/local/lib/python3.7/dist-packages/timm/ -#先后生成补丁并升级包 -diff -uN {timm_path}/data/mixup.py {code_path}/fix_timm/mixup.py >mixup.patch -diff -uN {timm_path}/optim/optim_factory.py {code_path}/fix_timm/optim_factory.py >optim.patch -patch -p0 {timm_path}/data/mixup.py mixup.patch -patch -p0 {timm_path}/optim/optim_factory.py optim.patch -``` - -## 数据集 - -1. 获取数据集 - -​ 选用的数据集是ImageNet,用户自行获取将数据集上传到服务器任意路径下并解压。 - -​ ImageNet数据集的目录结构参考如下所示 - -``` -├── ImageNet2012 - ├──train - ├──类别1 - │──图片1 - │──图片2 - │ ... - ├──类别2 - │──图片1 - │──图片2 - │ ... - ├──... - ├──val - ├──类别1 - │──图片1 - │──图片2 - │ ... - ├──类别2 - │──图片1 - │──图片2 - │ ... -``` - -## 获取Teacher checkpoint +- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。 + + **表 1** 版本配套表 + + | 配套 | 版本 | + | ---------- | ------------------------------------------------------------ | + | 固件与驱动 | [1.0.17](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial ) | + | CANN | [6.0.RC1](https://www.hiascend.com/software/cann/commercial?version=6.0.RC1 ) | + | PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) | + +- 环境准备指导。 + + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 + +- 安装依赖。 + + ``` + pip install timm==0.4.12 + pip install torchvision==0.9.1 + pip install torch_npu-1.8.1rc2.20220607-cp37-cp37m-linux_aarch64.whl + pip install torch-1.8.1+ascend.rc2.20220607-cp37-cp37m-linux_aarch64.whl + pip install apex-0.1+ascend.20220607-cp37-cp37m-linux_aarch64.whl + ``` + +- 关于timm包的NPU优化补丁。 + + ``` + # 需要先cd到当前文件目录,一般timm包的安装位置在/usr/local/lib/python3.7/dist-packages/timm/ + #先后生成补丁并升级包 + diff -uN {timm_path}/data/mixup.py {code_path}/fix_timm/mixup.py >mixup.patch + diff -uN {timm_path}/optim/optim_factory.py {code_path}/fix_timm/optim_factory.py >optim.patch + patch -p0 {timm_path}/data/mixup.py mixup.patch + patch -p0 {timm_path}/optim/optim_factory.py optim.patch + ``` + + + + +## 准备数据集 + +1. 获取数据集。 + + 用户自行获取原始数据集ImageNet2012,将数据集上传到服务器任意路径下并解压。 + + 以ImageNet2012数据集为例,数据集目录结构参考如下所示。 + + ``` + ├── ImageNet2012 + ├──train + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ├──... + ├──val + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ``` + + > **说明:** + > 数据集路径以用户自行定义的路径为准 + +## 获取预训练模型 Evo-Vit模型训练需要配置teacher—model,获取方式为在GitHub的[Evo-Vit]([GitHub - YifanXu74/Evo-ViT: Official implement of Evo-ViT: Slow-Fast Token Evolution for Dynamic Vision Transformer](https://github.com/YifanXu74/Evo-ViT)),checkpoint文件可以在该仓库自行下载,也可以直接使用网址进行下载,网址如下 https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth +预训练模型需要放置在模型文件夹下,与main_levit.py或者README处于同级目录下。与源码中的与配置参数的默认值 ”./regnety_160-a5fe301d.pth“保持一致。 + # 开始训练 ## 训练模型 +1. 进入解压后的源码包根目录。 -1. 进入解压后的源码包根目录 + ``` + cd /${模型文件夹名称} + ``` -``` -cd /Evo-Levit_256_384 -``` +2. 运行训练脚本。 -2. 运行训练脚本 + 该模型支持单机单卡训练和单机8卡训练,开始训练前,请用户根据实际路径配置data_path参数。 -该模型支持单机单卡训练和单机8卡训练,开始训练前,请用户根据实际路径配置data_path参数。 + - 单机单卡训练 -- 单机单卡训练 + 启动单卡训练。 -​ 启动单卡训练 + ``` + bash ./test/train_full_1P.sh --data_path=/data/xxx/ + ``` -``` -bash ./test/train_full_1p.sh --data_path=/home/zym/imagenet/ -``` + - 单机8卡训练 -- 单机 8卡训练 + 启动8卡训练。 -​ 启动8卡训练 + ``` + bash ./test/train_full_8P.sh --data_path=/data/xxx/ + ``` -``` -bash ./test/train_full_8p.sh --data_path=/home/zym/imagenet/ -``` + --data\_path参数填写数据集路径。 -训练完成后,权重文件保存在 参数设置 的路径下,并输出模型训练精度和性能信息 + 模型训练脚本参数说明如下。 -# 训练结果展示 + ``` + 公共参数: + --data_path //数据集路径 + --epochs //重复训练次数 + --batch-size //训练批次大小 + --nproc_per_node //数字表示启用单卡还是多卡 + ``` + + 训练完成后,权重文件保存在当前路径的save中,并输出模型训练精度和性能信息。 -**表2** 训练结果展示表 +# 训练结果展示 -| NAME | PT版本 | 精度 | FPS | Epochs | AMP_Type | -| ------ | ------ | ----- | ---- | ------ | -------- | -| 1P-GPU | 1.8.1 | - | 51 | 1 | O1 | -| 1P-NPU | 1.8.1 | - | 59 | 1 | O1 | -| 8P-GPU | 1.8.1 | 73.54 | 487 | 100 | O1 | -| 8P-NPU | 1.8.1 | 74.11 | 496 | 100 | O1 | +**表 2** 训练结果展示表 +| NAME | PT版本 | 精度 | FPS | Epochs | AMP_Type | +| ------ | ------ | ----: | ------ | -----: | -------- | +| 1P-GPU | 1.8.1 | - | 51 | 1 | O1 | +| 1P-NPU | 1.8.1 | - | 66.93 | 1 | O1 | +| 8P-GPU | 1.8.1 | 73.54 | 487 | 100 | O1 | +| 8P-NPU | 1.8.1 | 74.32 | 510.72 | 100 | O1 | # 版本说明 ## 变更 -2022.09.17:首次发布 - -2022.10.21: 新增teacher checkpoint网址,更新bash命令 - -2022.11.09:将NPU1P的fps更新为符合设备的59 - -# 已知问题 - +2022.11.09:首次发布。 +## 已知问题 +无。 \ No newline at end of file -- Gitee From 9d925f2ebd43c41c4ecd3e14a8b6034276ba75ab Mon Sep 17 00:00:00 2001 From: zhangyanmin <2716635239@qq.com> Date: Thu, 10 Nov 2022 05:34:16 +0000 Subject: [PATCH 3/3] update README --- PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md index 4b449cde2e..33492b0c86 100644 --- a/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md +++ b/PyTorch/contrib/cv/classification/Evo-Levit_256_384/README.md @@ -116,7 +116,7 @@ Evo-ViT的具体框架设计,包括基于全局class attention的token选择 Evo-Vit模型训练需要配置teacher—model,获取方式为在GitHub的[Evo-Vit]([GitHub - YifanXu74/Evo-ViT: Official implement of Evo-ViT: Slow-Fast Token Evolution for Dynamic Vision Transformer](https://github.com/YifanXu74/Evo-ViT)),checkpoint文件可以在该仓库自行下载,也可以直接使用网址进行下载,网址如下 https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth -预训练模型需要放置在模型文件夹下,与main_levit.py或者README处于同级目录下。与源码中的与配置参数的默认值 ”./regnety_160-a5fe301d.pth“保持一致。 +预训练模型需要放置在模型文件夹下,与main_levit.py或者README处于同级目录下。与源码中的配置参数的默认值 ”./regnety_160-a5fe301d.pth“保持一致。 # 开始训练 -- Gitee