From c307caae927948127a6cdbf7c23622457c469b51 Mon Sep 17 00:00:00 2001 From: zjy <1363845850@qq.com> Date: Mon, 15 Aug 2022 10:19:13 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E8=A5=BF=E5=8C=97=E5=B7=A5=E4=B8=9A?= =?UTF-8?q?=E5=A4=A7=E5=AD=A6=E3=80=91=E3=80=90=E9=AB=98=E6=A0=A1=E8=B4=A1?= =?UTF-8?q?=E7=8C=AE=E3=80=91=E3=80=90PyTorch=E3=80=91ShiftViT=E9=87=8D?= =?UTF-8?q?=E6=96=B0=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ShiftViT_for_PyTorch/.gitignore | 350 ++++++++++++ .../ShiftViT_for_PyTorch/CODE_OF_CONDUCT.md | 9 + .../ShiftViT_for_PyTorch/LICENSE | 21 + .../ShiftViT_for_PyTorch/NOTICE | 11 + .../ShiftViT_for_PyTorch/README.md | 130 +++++ .../ShiftViT_for_PyTorch/README_raw.md | 128 +++++ .../ShiftViT_for_PyTorch/SECURITY.md | 41 ++ .../ShiftViT_for_PyTorch/SUPPORT.md | 25 + .../ShiftViT_for_PyTorch/bind_pyt.py | 141 +++++ .../ShiftViT_for_PyTorch/datasets.py | 73 +++ .../ShiftViT_for_PyTorch/engine.py | 175 ++++++ .../ShiftViT_for_PyTorch/logger.py | 48 ++ .../ShiftViT_for_PyTorch/losses.py | 78 +++ .../ShiftViT_for_PyTorch/main.py | 496 ++++++++++++++++++ .../ShiftViT_for_PyTorch/mixup_nova.py | 48 ++ .../ShiftViT_for_PyTorch/models/__init__.py | 17 + .../ShiftViT_for_PyTorch/models/registry.py | 275 ++++++++++ .../ShiftViT_for_PyTorch/models/shiftvit.py | 365 +++++++++++++ .../ShiftViT_for_PyTorch/models/smlp.py | 143 +++++ .../models/spach/__init__.py | 20 + .../models/spach/layers/__init__.py | 19 + .../models/spach/layers/channel_func.py | 46 ++ .../models/spach/layers/spatial_func.py | 122 +++++ .../models/spach/layers/stem.py | 110 ++++ .../ShiftViT_for_PyTorch/models/spach/misc.py | 92 ++++ .../models/spach/spach.py | 201 +++++++ .../models/spach/spach_ms.py | 147 ++++++ .../ShiftViT_for_PyTorch/requirements.txt | 4 + .../ShiftViT_for_PyTorch/samplers.py | 73 +++ .../ShiftViT_for_PyTorch/test/env_npu.sh | 79 +++ .../test/train_full_1p.sh | 150 ++++++ .../test/train_full_8p.sh | 150 ++++++ .../test/train_performance_1p.sh | 152 ++++++ .../test/train_performance_8p.sh | 150 ++++++ .../ShiftViT_for_PyTorch/utils.py | 298 +++++++++++ 35 files changed, 4387 insertions(+) create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/.gitignore create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/CODE_OF_CONDUCT.md create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/LICENSE create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/NOTICE create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README.md create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README_raw.md create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SECURITY.md create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SUPPORT.md create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/bind_pyt.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/datasets.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/engine.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/logger.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/losses.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/main.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/mixup_nova.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/__init__.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/registry.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/shiftvit.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/smlp.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/__init__.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/__init__.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/channel_func.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/spatial_func.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/stem.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/misc.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach_ms.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/requirements.txt create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/samplers.py create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_1p.sh create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_8p.sh create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_1p.sh create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_8p.sh create mode 100644 PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/utils.py diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/.gitignore b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/.gitignore new file mode 100644 index 0000000000..dfcfd56f44 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/.gitignore @@ -0,0 +1,350 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/CODE_OF_CONDUCT.md b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..f9ba8cf65f --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Microsoft Open Source Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). + +Resources: + +- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) +- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/LICENSE b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/LICENSE new file mode 100644 index 0000000000..9e841e7a26 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/NOTICE b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/NOTICE new file mode 100644 index 0000000000..2cd7bd7638 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/NOTICE @@ -0,0 +1,11 @@ + NOTICES + + This repository incorporates material as listed below or described in the code. + + Component: + main.py, losses.py, datasets.py, engine.py, utils.py, logger.py, samplers.py + + Open Source License/Copyright Notice: + MIT License + Copyright (c) 2015-present, Facebook, Inc. + All rights reserved. diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README.md new file mode 100644 index 0000000000..4344a86a96 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README.md @@ -0,0 +1,130 @@ +# ShiftViT + +## 模型简介 + +ShiftViT由microsoft开发,是一种图像分类的模型, + +源仓库:[microsoft/SPACH](https://github.com/microsoft/SPACH). + +## 依赖安装 ++ install numactl: + +``` +apt-get install numactl # for Ubuntu +yum install numactl # for CentOS +``` + ++ install requirement +``` +pip3 install torchvision==0.6.0 +pip3 install einops==0.4.1 +pip3 install --no-deps timm==0.4.5 + +# other recommended requirements +apex==0.1+ascend.20220315 +torch==1.5.0+ascend.post5.20220315 +``` + +- source env and build: + +``` +source test/env_npu.sh +``` + +- Download the ImageNet dataset from http://www.image-net.org/ + - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) + +## 训练方法 + +训练脚本在test文件夹下 + +按照下面的方法进行训练 + +```bash +# 1p精度训练 +bash ./test/train_full_1p.sh --data_path=real_data_path + +# 1p性能训练 +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# 8p精度训练 +bash ./test/train_full_8p.sh --data_path=real_data_path + +# 8p性能训练 +bash ./test/train_performance_8p.sh --data_path=real_data_path + +``` + +日志地址 + + test/output/devie_id/train_${device_id}.log # training detail log + + test/output/devie_id/shiftvit_light_tiny_bs128_8p_perf.log # 8p training performance result log + + test/output/devie_id/shiftvit_light_tiny_bs128_8p_acc.log # 8p training accuracy result log + +## ShiftViT训练结果 + +1. GPU训练 + + GPU训练使用的是提供的服务器 + + 精度目标由源仓库中数据提供 + + 性能标杆根据服务器上的结果给出 + + 运行日志(4epochs):obs://zjy-lenet/shiftvit/gpu/ + + 日志较大,建议使用`awk '/Total/' stdout.txt`查看运行性能记录 + + 建议使用`awk '/Accuracy/' stdout.txt`查看运行精度记录 + + 建议使用`awk '/FPS/' stdout.txt`查看运行FPS + +2. NPU训练 + + NPU训练使用的是提供的服务器 + + 运行日志(300 epochs):obs://zjy-lenet/shiftvit/npu/ + + 日志较大,建议使用`awk '/Total/' train_0.log`查看运行性能记录 + + 建议使用`awk '/Accuracy/' train_0.log`查看运行精度记录 + + 建议使用`awk '/FPS/' train_0.log`查看运行FPS + +3. 训练结果 + + batch_size=128 + + | Acc@1 | FPS | Name | Epochs | AMP_Type | + | :----: | :----: | :------: | :------: | :--: | + | 79.4% | 1478.4869*| GPU-8p | 300 | O2 | + | | 1732.51 | NPU-1p | 1 | O2 | + | 78.8% | 1732.8838 | NPU-8p | 300 | O2 | + + 注*:这里的数据收集有点偏,NPU收集舍弃的是前3个step,GPU收集舍弃的是前5个step,这个数据可能有点偏小 + +# 自验报告 + ```shell + # 1p train perf + # 是否正确输出了性能log文件 + bash test/train_performance_1p.sh --data_path=xxx + # 验收结果: OK / Failed + # 备注: 目标性能301FPS;验收测试性能FPS163.308; + + # 8p train perf + # 是否正确输出了性能log文件 + bash test/train_performance_8p.sh --data_path=xxx + # 验收结果: OK + # 备注: 目标性能 FPS;验收测试性能FPS; + + # 8p train full + # 是否正确输出了性能精度log文件,是否正确保存了模型文件 + bash test/train_full_8p.sh --data_path=xxx + # 验收结果: OK + # 备注: 目标精度79.4;验收精度79.36; + ``` +**Special notes for your reviewers**: + +验收的时候可能会遇到环境变量的可能问题,可以把test脚本中,执行python命令之前的source命令的环境变量改为Ascend toolkit下的set_env.sh \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README_raw.md b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README_raw.md new file mode 100644 index 0000000000..9fc37986f5 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/README_raw.md @@ -0,0 +1,128 @@ +This repository contains Pytorch evaluation code, training code and pretrained models for the following projects: + ++ SPACH ([A Battle of Network Structures: An Empirical Study of CNN, Transformer, and MLP](https://arxiv.org/abs/2108.13002)) ++ sMLP ([Sparse MLP for Image Recognition: Is Self-Attention Really Necessary?](https://arxiv.org/abs/2109.05422)) ++ ShiftViT ([When Shift Operation Meets Vision Transformer: An Extremely Simple Alternative to Attention Mechanism](https://arxiv.org/abs/2201.10801)) + +Other unofficial implementations: + ++ ShiftViT + + [Keras](https://keras.io/examples/vision/shiftvit/) by [Aritra Roy Gosthipaty](https://twitter.com/ariG23498) and [Ritwik Raha](https://twitter.com/ritwik_raha) + +# Main Results on ImageNet with Pretrained Models + + +| name | acc@1 | #params | FLOPs | url | +| ------------------ | ----- | ------- | ----- | ------------------------------------------------------------ | +| SPACH-Conv-MS-S | 81.6 | 44M | 7.2G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/spach_ms_conv_s.pth) | +| SPACH-Trans-MS-S | 82.9 | 40M | 7.6G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/spach_ms_trans_s.pth) | +| SPACH-MLP-MS-S | 82.1 | 46M | 8.2G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/spach_ms_mlp_s.pth) | +| SPACH-Hybrid-MS-S | 83.7 | 63M | 11.2G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/spach_ms_hybrid_s.pth) | +| SPACH-Hybrid-MS-S+ | 83.9 | 63M | 12.3G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/spach_ms_hybrid_s+.pth) | +| sMLPNet-T | 81.9 | 24M | 5.0G | | +| sMLPNet-S | 83.1 | 49M | 10.3G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/smlp_s.pth) | +| sMLPNet-B | 83.4 | 66M | 14.0G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/smlp_b.pth) | +| Shift-T / light | 79.4 | 20M | 3.0G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/shiftvit_tiny_light.pth) | +| Shift-T | 81.7 | 29M | 4.5G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/shiftvit_tiny_r2.pth) | +| Shift-S / light | 81.6 | 34M | 5.7G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/shiftvit_small_light.pth) | +| Shift-S | 82.8 | 50M | 8.8G | [github](https://github.com/microsoft/SPACH/releases/download/v1.0/shiftvit_small_r2.pth) | + +# Usage + +## Install +First, clone the repo and install requirements: + +```bash +git clone https://github.com/microsoft/Spach +pip install -r requirements.txt +``` + +## Data preparation + +Download and extract ImageNet train and val images from http://image-net.org/. +The directory structure is the standard layout for the torchvision [`datasets.ImageFolder`](https://pytorch.org/docs/stable/torchvision/datasets.html#imagefolder), +and the training and validation data is expected to be in the `train/` folder and `val/` folder respectively: + +``` +/path/to/imagenet/ + train/ + class1/ + img1.jpeg + class2/ + img2.jpeg + val/ + class1/ + img3.jpeg + class/2 + img4.jpeg +``` + +## Evaluation + +To evaluate a pre-trained model on ImageNet val with a single GPU run: + +```bash +python main.py --eval --resume --model --data-path +``` + +For example, to evaluate the SPACH-Hybrid-MS-S model, run + +```bash +python main.py --eval --resume --model spach_ms_s_patch4_224_hybrid spach_ms_hybrid_s.pth --data-path +``` + +giving +```bash +* Acc@1 83.658 Acc@5 96.762 loss 0.688 +``` + +You can find all supported models in `models/registry.py.` + +## Training + +One can simply call the following script to run training process. Distributed training is recommended even on single GPU node. + +```bash +python -m torch.distributed.launch --nproc_per_node --use_env main.py +--model +--data-path +--output_dir +--dist-eval +``` + +# Citation + +``` +@article{zhao2021battle, + title={A Battle of Network Structures: An Empirical Study of CNN, Transformer, and MLP}, + author={Zhao, Yucheng and Wang, Guangting and Tang, Chuanxin and Luo, Chong and Zeng, Wenjun and Zha, Zheng-Jun}, + journal={arXiv preprint arXiv:2108.13002}, + year={2021} +} + +@article{tang2021sparse, + title={Sparse MLP for Image Recognition: Is Self-Attention Really Necessary?}, + author={Tang, Chuanxin and Zhao, Yucheng and Wang, Guangting and Luo, Chong and Xie, Wenxuan and Zeng, Wenjun}, + journal={arXiv preprint arXiv:2109.05422}, + year={2021} +} + +``` + +# Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +# Acknowledgement + +Our code are built on top of [DeiT](https://github.com/facebookresearch/deit). We test throughput following [Swin Transformer](https://github.com/microsoft/Swin-Transformer) diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SECURITY.md b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SECURITY.md new file mode 100644 index 0000000000..f7b89984f0 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SUPPORT.md b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SUPPORT.md new file mode 100644 index 0000000000..8b05616fc9 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/SUPPORT.md @@ -0,0 +1,25 @@ +# TODO: The maintainer of this repo has not yet edited this file + +**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? + +- **No CSS support:** Fill out this template with information about how to file issues and get help. +- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). +- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide. + +*Then remove this first heading from this SUPPORT.MD file before publishing your repo.* + +# Support + +## How to file issues and get help + +This project uses GitHub Issues to track bugs and feature requests. Please search the existing +issues before filing new issues to avoid duplicates. For new issues, file your bug or +feature request as a new Issue. + +For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE +FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER +CHANNEL. WHERE WILL YOU HELP PEOPLE?**. + +## Microsoft Support Policy + +Support for this **PROJECT or PRODUCT** is limited to the resources listed above. diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/bind_pyt.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/bind_pyt.py new file mode 100644 index 0000000000..55daf6571d --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/bind_pyt.py @@ -0,0 +1,141 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import sys +import subprocess +import os +import socket +from argparse import ArgumentParser, REMAINDER + +import torch + + +def parse_args(): + """ + Helper function parsing the command line options + @retval ArgumentParser + """ + parser = ArgumentParser(description="PyTorch distributed training launch " + "helper utilty that will spawn up " + "multiple distributed processes") + + # Optional arguments for the launch helper + parser.add_argument("--nnodes", type=int, default=1, + help="The number of nodes to use for distributed " + "training") + parser.add_argument("--node_rank", type=int, default=0, + help="The rank of the node for multi-node distributed " + "training") + parser.add_argument("--nproc_per_node", type=int, default=8, + help="The number of processes to launch on each node, " + "for GPU training, this is recommended to be set " + "to the number of GPUs in your system so that " + "each process can be bound to a single GPU.") + parser.add_argument("--master_addr", default="127.0.0.1", type=str, + help="Master node (rank 0)'s address, should be either " + "the IP address or the hostname of node 0, for " + "single node multi-proc training, the " + "--master_addr can simply be 127.0.0.1") + parser.add_argument("--master_port", default=29688, type=int, + help="Master node (rank 0)'s free port that needs to " + "be used for communciation during distributed " + "training") + parser.add_argument('--no_hyperthreads', action='store_true', + help='Flag to disable binding to hyperthreads') + parser.add_argument('--no_membind', action='store_true', + help='Flag to disable memory binding') + + # non-optional arguments for binding + parser.add_argument("--nsockets_per_node", type=int, required=True, + help="Number of CPU sockets on a node") + parser.add_argument("--ncores_per_socket", type=int, required=True, + help="Number of CPU cores per socket") + + # positional + parser.add_argument("training_script", type=str, + help="The full path to the single GPU training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script") + + # rest from the training program + parser.add_argument('training_script_args', nargs=REMAINDER) + parser.add_argument("--data_path", type=str, default='') + return parser.parse_args() + + +def main(): + args = parse_args() + + # variables for numactrl binding + + NSOCKETS = args.nsockets_per_node + NGPUS_PER_SOCKET = (args.nproc_per_node // args.nsockets_per_node) + ( + 1 if (args.nproc_per_node % args.nsockets_per_node) else 0) + NCORES_PER_GPU = args.ncores_per_socket // NGPUS_PER_SOCKET + + # world size in terms of number of processes + dist_world_size = args.nproc_per_node * args.nnodes + + # set PyTorch distributed related environmental variables + current_env = os.environ.copy() + current_env["MASTER_ADDR"] = args.master_addr + current_env["MASTER_PORT"] = str(args.master_port) + current_env["WORLD_SIZE"] = str(dist_world_size) + current_env['NODE_RANK'] = str(args.node_rank) + + processes = [] + + for local_rank in range(0, args.nproc_per_node): + # each process's rank + dist_rank = args.nproc_per_node * args.node_rank + local_rank + current_env["RANK"] = str(dist_rank) + current_env['LOCAL_RANK'] = str(local_rank) + + # form numactrl binding command + cpu_ranges = [local_rank * NCORES_PER_GPU, + (local_rank + 1) * NCORES_PER_GPU - 1, + local_rank * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS), + (local_rank + 1) * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS) - 1] + + numactlargs = [] + if args.no_hyperthreads: + numactlargs += ["--physcpubind={}-{}".format(*cpu_ranges[0:2])] + else: + numactlargs += ["--physcpubind={}-{},{}-{}".format(*cpu_ranges)] + + if not args.no_membind: + memnode = local_rank // NGPUS_PER_SOCKET + numactlargs += ["--membind={}".format(memnode)] + + # spawn the processes + cmd = ["/usr/bin/numactl"] \ + + numactlargs \ + + [sys.executable, + "-u", + args.training_script, + "--local_rank={}".format(local_rank) + ] \ + + args.training_script_args + + process = subprocess.Popen(cmd, env=current_env) + processes.append(process) + + for process in processes: + process.wait() + + +if __name__ == "__main__": + main() + diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/datasets.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/datasets.py new file mode 100644 index 0000000000..6e1033291f --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/datasets.py @@ -0,0 +1,73 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +import os +import json + +from torchvision import datasets, transforms +from torchvision.datasets.folder import ImageFolder, default_loader + +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from timm.data import create_transform + +from torch.utils.data import Dataset + + +def build_dataset(is_train, args): + transform = build_transform(is_train, args) + + if args.data_set == 'IMNET': + root = os.path.join(args.data_path, 'train' if is_train else 'val') + dataset = datasets.ImageFolder(root, transform=transform) + nb_classes = 1000 + else: + raise NotImplementedError("Support ImageNet only.") + + return dataset, nb_classes + + +def build_transform(is_train, args): + resize_im = args.input_size > 32 + if is_train: + # this should always dispatch to transforms_imagenet_train + transform = create_transform( + input_size=args.input_size, + is_training=True, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=args.train_interpolation, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + ) + if not resize_im: + # replace RandomResizedCropAndInterpolation with + # RandomCrop + transform.transforms[0] = transforms.RandomCrop( + args.input_size, padding=4) + return transform + + t = [] + if resize_im: + size = int((256 / 224) * args.input_size) + t.append( + transforms.Resize(size, interpolation=3), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(args.input_size)) + + t.append(transforms.ToTensor()) + t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) + return transforms.Compose(t) diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/engine.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/engine.py new file mode 100644 index 0000000000..4ff8eb7ed4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/engine.py @@ -0,0 +1,175 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +""" +Train and eval functions used in main.py +""" +import math +from operator import mod +import sys +from typing import Iterable, Optional +import time +import logging +import os +import torch + +from timm.data import Mixup +from timm.utils import accuracy, ModelEma + +from losses import DistillationLoss +import utils +import apex.amp + +def train_one_epoch(model: torch.nn.Module, criterion: DistillationLoss, + data_loader: Iterable, optimizer: torch.optim.Optimizer, + device: torch.device, epoch: int, + output_dir: str, batch_size: int, + max_norm: float = 0, + model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, + set_training_mode=True, logger=logging, use_npu=False): + model.train(set_training_mode) + metric_logger = utils.MetricLogger(delimiter=" ", logger=logger) + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + print_freq = 10 + i = 0 + + for samples, targets in metric_logger.log_every(data_loader, print_freq, batch_size, header, use_npu=use_npu): + if i == 10: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + samples = samples.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + if mixup_fn is not None: + samples, targets = mixup_fn(samples, targets) + + outputs = model(samples) + loss = criterion(samples, outputs, targets) + loss_value = loss.item() + + if not math.isfinite(loss_value): + logger.info("Loss is {}, stopping training".format(loss_value)) + sys.exit(1) + + optimizer.zero_grad() + + # this attribute is added by timm on one optimizer (adahessian) + is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order + + with apex.amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward(create_graph=is_second_order) + + optimizer.step() + + metric_logger.update(loss=loss_value) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + print(prof.key_averages().table(sort_by="self_cpu_time_total")) + prof.export_chrome_trace("/home/zhangjiangyuan/SPACH-main/output_8p_perf.prof") + else: + samples = samples.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + if mixup_fn is not None: + samples, targets = mixup_fn(samples, targets) + + outputs = model(samples) + loss = criterion(samples, outputs, targets) + loss_value = loss.item() + + if not math.isfinite(loss_value): + logger.info("Loss is {}, stopping training".format(loss_value)) + sys.exit(1) + + optimizer.zero_grad() + + # this attribute is added by timm on one optimizer (adahessian) + is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order + + with apex.amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward(create_graph=is_second_order) + + optimizer.step() + + metric_logger.update(loss=loss_value) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + i += 1 + # gather the stats from all processes + metric_logger.synchronize_between_processes(use_npu=use_npu, device=device) + logger.info(f"Averaged stats: {metric_logger}") + return {k: meter.global_avg for k, meter in metric_logger.meters.items()} + + +@torch.no_grad() +def evaluate(data_loader, model, device, batch_size, logger=logging, use_npu=False): + criterion = torch.nn.CrossEntropyLoss() + + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + # switch to evaluation mode + model.eval() + + for images, target in metric_logger.log_every(data_loader, 10, batch_size, header, use_npu=use_npu): + images = images.to(device, non_blocking=True) + target = target.to(device, non_blocking=True) + + output = model(images) + loss = criterion(output, target) + + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + + batch_size = images.shape[0] + metric_logger.update(loss=loss.item()) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + # gather the stats from all processes + metric_logger.synchronize_between_processes(use_npu=use_npu, device=device) + logger.info('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' + .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) + + return {k: meter.global_avg for k, meter in metric_logger.meters.items()} + + +@torch.no_grad() +def throughput(data_loader, model, logger=logging, use_npu=False): + model.eval() + + if use_npu: + for idx, (images, _) in enumerate(data_loader): + images = images.npu(non_blocking=True) + batch_size = images.shape[0] + for i in range(50): + model(images) + logger.info(f"throughput averaged with 30 times") + tic1 = time.time() + for i in range(30): + model(images) + tic2 = time.time() + logger.info(f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}") + return + else: + for idx, (images, _) in enumerate(data_loader): + images = images.cuda(non_blocking=True) + batch_size = images.shape[0] + for i in range(50): + model(images) + logger.info(f"throughput averaged with 30 times") + tic1 = time.time() + for i in range(30): + model(images) + tic2 = time.time() + logger.info(f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}") + return diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/logger.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/logger.py new file mode 100644 index 0000000000..52f4ac9d5f --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/logger.py @@ -0,0 +1,48 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +import os +import sys +import logging + + +# @functools.lru_cache() +def create_logger(output_dir, dist_rank=0, name=''): + # create logger + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = False + + # create formatter + fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' + + # create console handlers for master process + if dist_rank == 0: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter( + logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(console_handler) + + # create file handlers + if len(output_dir) > 0: + file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a') + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(file_handler) + file_handler.flush() + + return logger diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/losses.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/losses.py new file mode 100644 index 0000000000..2b3fe0ee63 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/losses.py @@ -0,0 +1,78 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +""" +Implements the knowledge distillation loss +""" +import torch +from torch.nn import functional as F + + +class DistillationLoss(torch.nn.Module): + """ + This module wraps a standard criterion and adds an extra knowledge distillation loss by + taking a teacher model prediction and using it as additional supervision. + """ + def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, + distillation_type: str, alpha: float, tau: float): + super().__init__() + self.base_criterion = base_criterion + self.teacher_model = teacher_model + assert distillation_type in ['none', 'soft', 'hard'] + self.distillation_type = distillation_type + self.alpha = alpha + self.tau = tau + + def forward(self, inputs, outputs, labels): + """ + Args: + inputs: The original inputs that are feed to the teacher model + outputs: the outputs of the model to be trained. It is expected to be + either a Tensor, or a Tuple[Tensor, Tensor], with the original output + in the first position and the distillation predictions as the second output + labels: the labels for the base criterion + """ + outputs_kd = None + if not isinstance(outputs, torch.Tensor): + # assume that the model outputs a tuple of [outputs, outputs_kd] + outputs, outputs_kd = outputs + base_loss = self.base_criterion(outputs, labels) + if self.distillation_type == 'none': + return base_loss + + if outputs_kd is None: + raise ValueError("When knowledge distillation is enabled, the model is " + "expected to return a Tuple[Tensor, Tensor] with the output of the " + "class_token and the dist_token") + # don't backprop throught the teacher + with torch.no_grad(): + teacher_outputs = self.teacher_model(inputs) + + if self.distillation_type == 'soft': + T = self.tau + # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 + # with slight modifications + distillation_loss = F.kl_div( + F.log_softmax(outputs_kd / T, dim=1), + F.log_softmax(teacher_outputs / T, dim=1), + reduction='sum', + log_target=True + ) * (T * T) / outputs_kd.numel() + elif self.distillation_type == 'hard': + distillation_loss = F.cross_entropy(outputs_kd, teacher_outputs.argmax(dim=1)) + + loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha + return loss diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/main.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/main.py new file mode 100644 index 0000000000..f69304df1c --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/main.py @@ -0,0 +1,496 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +import argparse +import datetime +import numpy as np +import time +import torch +import torch.backends.cudnn as cudnn +import json +import os +import apex +from pathlib import Path + +from mixup_nova import Mixup_nova as Mixup +# from timm.data import Mixup +from timm.models import create_model +from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy +from timm.scheduler import create_scheduler +from timm.optim import create_optimizer +from timm.utils import NativeScaler, get_state_dict, ModelEma + +from datasets import build_dataset +from engine import train_one_epoch, evaluate, throughput +from losses import DistillationLoss +from samplers import RASampler +import models +import utils +from logger import create_logger + + +def get_args_parser(): + parser = argparse.ArgumentParser('Training and evaluation script', add_help=False) + parser.add_argument('--batch-size', default=128, type=int) + parser.add_argument('--epochs', default=300, type=int) + + # Model parameters + parser.add_argument('--model', default='smlpnet_tiny', type=str, metavar='MODEL', + help='Name of model to train') + parser.add_argument('--input-size', default=224, type=int, help='images input size') + + parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', + help='Dropout rate (default: 0.)') + parser.add_argument('--drop-path', type=float, default=0.1, metavar='PCT', + help='Drop path rate (default: 0.1)') + + parser.add_argument('--model-ema', action='store_true') + parser.add_argument('--no-model-ema', action='store_false', dest='model_ema') + parser.set_defaults(model_ema=True) + parser.add_argument('--model-ema-decay', type=float, default=0.99996, help='') + parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, help='') + + # Optimizer parameters + parser.add_argument('--opt', default='adamw', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "adamw"') + parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: 1e-8)') + parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') + parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') + parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') + parser.add_argument('--weight-decay', type=float, default=0.05, + help='weight decay (default: 0.05)') + # Learning rate schedule parameters + parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "cosine"') + parser.add_argument('--lr', type=float, default=5e-4, metavar='LR', + help='learning rate (default: 5e-4)') + parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') + parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') + parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') + parser.add_argument('--warmup-lr', type=float, default=1e-6, metavar='LR', + help='warmup learning rate (default: 1e-6)') + parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') + + parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', + help='epoch interval to decay LR') + parser.add_argument('--warmup-epochs', type=int, default=20, metavar='N', + help='epochs to warmup LR, if scheduler supports') + parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') + parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') + parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + + # Augmentation parameters + parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') + parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME', + help='Use AutoAugment policy. "v0" or "original". " + \ + "(default: rand-m9-mstd0.5-inc1)'), + parser.add_argument('--smoothing', type=float, default=0.1, help='Label smoothing (default: 0.1)') + parser.add_argument('--train-interpolation', type=str, default='bicubic', + help='Training interpolation (random, bilinear, bicubic default: "bicubic")') + + parser.add_argument('--repeated-aug', action='store_true') + parser.add_argument('--no-repeated-aug', action='store_false', dest='repeated_aug') + parser.set_defaults(repeated_aug=True) + + # * Random Erase params + parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT', + help='Random erase prob (default: 0.25)') + parser.add_argument('--remode', type=str, default='pixel', + help='Random erase mode (default: "pixel")') + parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') + parser.add_argument('--resplit', action='store_true', default=False, + help='Do not random erase first (clean) augmentation split') + + # * Mixup params + parser.add_argument('--mixup', type=float, default=0.8, + help='mixup alpha, mixup enabled if > 0. (default: 0.8)') + parser.add_argument('--cutmix', type=float, default=1.0, + help='cutmix alpha, cutmix enabled if > 0. (default: 1.0)') + parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, + help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') + parser.add_argument('--mixup-prob', type=float, default=1.0, + help='Probability of performing mixup or cutmix when either/both is enabled') + parser.add_argument('--mixup-switch-prob', type=float, default=0.5, + help='Probability of switching to cutmix when both mixup and cutmix enabled') + parser.add_argument('--mixup-mode', type=str, default='batch', + help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') + + # Distillation parameters + parser.add_argument('--teacher-model', default='regnety_160', type=str, metavar='MODEL', + help='Name of teacher model to train (default: "regnety_160"') + parser.add_argument('--teacher-path', type=str, default='') + parser.add_argument('--distillation-type', default='none', choices=['none', 'soft', 'hard'], type=str, help="") + parser.add_argument('--distillation-alpha', default=0.5, type=float, help="") + parser.add_argument('--distillation-tau', default=1.0, type=float, help="") + + # * Finetuning params + parser.add_argument('--finetune', default='', help='finetune from checkpoint') + + # Dataset parameters + parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', type=str, + help='dataset path') + parser.add_argument('--data-set', default='IMNET', choices=['CIFAR', 'IMNET', 'INAT', 'INAT19'], + type=str, help='Image Net dataset path') + parser.add_argument('--inat-category', default='name', + choices=['kingdom', 'phylum', 'class', 'order', 'supercategory', 'family', 'genus', 'name'], + type=str, help='semantic granularity') + + parser.add_argument('--output_dir', default='', + help='path where to save, empty for no saving') + parser.add_argument('--device', default='npu', + help='device to use for training / testing') + parser.add_argument('--seed', default=0, type=int) + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument('--eval', action='store_true', help='Perform evaluation only') + parser.add_argument('--dist-eval', action='store_true', default=False, help='Enabling distributed evaluation') + parser.add_argument('--num_workers', default=10, type=int) + parser.add_argument('--pin-mem', action='store_true', + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') + parser.add_argument('--no-pin-mem', action='store_false', dest='pin_mem', + help='') + parser.set_defaults(pin_mem=True) + + # distributed training parameters + parser.add_argument('--world_size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') + parser.add_argument("--local_rank", type=int, default=0) + # parameters for training on preemptible clusters + parser.add_argument('--auto-resume', action='store_true') + parser.add_argument('--no-auto-resume', action='store_false', dest='auto_resume') + parser.set_defaults(auto_resume=True) + + # spach parameters + parser.add_argument('--stem-type', default='conv1', type=str, choices=['conv1', 'conv4']) + parser.add_argument('--shared-spatial-func', action='store_true') + # npu parameters + parser.add_argument('--npu', action='store_true', default=False, help='Enabling npu training') + # parameters for benchmark + parser.add_argument('--throughput', action='store_true') + + return parser + + +def parse_model_args(args): + model = args.model + model_args = [] + if model.startswith('spach'): + model_args = ['stem_type', 'shared_spatial_func'] + args = vars(args) + model_args = {_: args[_] for _ in model_args} + return model_args + + +def main(args): + + utils.init_distributed_mode(args) + logger = create_logger(args.output_dir, utils.get_rank(), args.model) + logger.info(args) + + if args.distillation_type != 'none' and args.finetune and not args.eval: + raise NotImplementedError("Finetuning with distillation not yet supported") + + if args.npu: + device = f'npu:{str(utils.get_rank())}' + else: + device = torch.device(args.device) + + # fix the seed for reproducibility + seed = args.seed + utils.get_rank() + torch.manual_seed(seed) + np.random.seed(seed) + # random.seed(seed) + cudnn.benchmark = True + + dataset_train, args.nb_classes = build_dataset(is_train=True, args=args) + dataset_val, _ = build_dataset(is_train=False, args=args) + + if args.distributed: # args.distributed: + num_tasks = utils.get_world_size() + global_rank = utils.get_rank() + if args.repeated_aug: + sampler_train = RASampler( + dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + ) + else: + sampler_train = torch.utils.data.DistributedSampler( + dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + ) + if args.dist_eval: + if len(dataset_val) % num_tasks != 0: + logger.info('Warning: Enabling distributed evaluation with an eval dataset not divisible by process number. ' + 'This will slightly alter validation results as extra duplicate entries are added to achieve ' + 'equal num of samples per-process.') + sampler_val = torch.utils.data.DistributedSampler( + dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=False) + else: + sampler_val = torch.utils.data.SequentialSampler(dataset_val) + else: + sampler_train = torch.utils.data.RandomSampler(dataset_train) + sampler_val = torch.utils.data.SequentialSampler(dataset_val) + + data_loader_train = torch.utils.data.DataLoader( + dataset_train, sampler=sampler_train, + batch_size=args.batch_size, + num_workers=args.num_workers, + pin_memory=args.pin_mem, + drop_last=True, + ) + + data_loader_val = torch.utils.data.DataLoader( + dataset_val, sampler=sampler_val, + batch_size=int(1.5 * args.batch_size), + num_workers=args.num_workers, + pin_memory=args.pin_mem, + drop_last=False + ) + + mixup_fn = None + mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None + if mixup_active: + mixup_fn = Mixup( + mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, + prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode, + label_smoothing=args.smoothing, num_classes=args.nb_classes) + + logger.info(f"Creating model: {args.model}") + model = create_model( + args.model, + pretrained=False, + num_classes=args.nb_classes, + drop_rate=args.drop, + drop_path_rate=args.drop_path, + drop_block_rate=None, + **parse_model_args(args) + ) + + if args.finetune: + if args.finetune.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.finetune, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.finetune, map_location='cpu') + + checkpoint_model = checkpoint['model'] + state_dict = model.state_dict() + for k in ['head.weight', 'head.bias', 'head_dist.weight', 'head_dist.bias']: + if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape: + logger.info(f"Removing key {k} from pretrained checkpoint") + del checkpoint_model[k] + + # interpolate position embedding + pos_embed_checkpoint = checkpoint_model['pos_embed'] + embedding_size = pos_embed_checkpoint.shape[-1] + num_patches = model.patch_embed.num_patches + num_extra_tokens = model.pos_embed.shape[-2] - num_patches + # height (== width) for the checkpoint position embedding + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + # height (== width) for the new position embedding + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + checkpoint_model['pos_embed'] = new_pos_embed + + model.load_state_dict(checkpoint_model, strict=False) + + print(device, flush=True) + model.to(device) + + model_ema = None + if args.model_ema: + # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper + model_ema = ModelEma( + model, + decay=args.model_ema_decay, + device='cpu' if args.model_ema_force_cpu else '', + resume='') + + model_without_ddp = model + linear_scaled_lr = args.lr * args.batch_size * utils.get_world_size() / 512.0 + args.lr = linear_scaled_lr + optimizer = apex.optimizers.NpuFusedAdamW(model.parameters(), args.lr, + weight_decay=args.weight_decay) + lr_scheduler, _ = create_scheduler(args, optimizer) + model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) + model_without_ddp = model.module + + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + logger.info(f'number of params: {n_parameters}') + if hasattr(model_without_ddp, 'flops'): + try: + flops = model_without_ddp.flops() + logger.info(f"number of GFLOPs: {flops / 1e9}") + except Exception as e: + logger.exception(e) + + criterion = LabelSmoothingCrossEntropy() + + if args.mixup > 0.: + # smoothing is handled with mixup label transform + criterion = SoftTargetCrossEntropy() + elif args.smoothing: + criterion = LabelSmoothingCrossEntropy(smoothing=args.smoothing) + else: + criterion = torch.nn.CrossEntropyLoss() + + teacher_model = None + if args.distillation_type != 'none': + assert args.teacher_path, 'need to specify teacher-path when using distillation' + logger.info(f"Creating teacher model: {args.teacher_model}") + teacher_model = create_model( + args.teacher_model, + pretrained=False, + num_classes=args.nb_classes, + global_pool='avg', + ) + if args.teacher_path.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.teacher_path, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.teacher_path, map_location='cpu') + teacher_model.load_state_dict(checkpoint['model']) + teacher_model.to(device) + teacher_model.eval() + + # wrap the criterion in our custom DistillationLoss, which + # just dispatches to the original criterion if args.distillation_type is 'none' + criterion = DistillationLoss( + criterion, teacher_model, args.distillation_type, args.distillation_alpha, args.distillation_tau + ) + + output_dir = Path(args.output_dir) + if args.auto_resume: + _resume = str((output_dir / 'checkpoint.pth').absolute()) + if os.path.exists(_resume): + logger.info(f'auto resume from {output_dir}/checkpoint.pth') + args.resume = _resume + + if args.resume: + if args.resume.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.resume, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model']) + if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + if args.model_ema: + utils._load_checkpoint_for_ema(model_ema, checkpoint['model_ema']) + + if args.eval: + test_stats = evaluate(data_loader_val, model, device, args.batch_size, logger=logger, use_npu=args.npu) + logger.info(f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%") + return + + if args.throughput: + throughput(data_loader_val, model, logger=logger, use_npu=args.npu) + return + + criterion = criterion.to(device) + logger.info(f"Start training for {args.epochs} epochs") + start_time = time.time() + max_accuracy = 0.0 + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + data_loader_train.sampler.set_epoch(epoch) + + train_stats = train_one_epoch( + model, criterion, data_loader_train, + optimizer, device, epoch, args.output_dir, args.batch_size, + args.clip_grad, model_ema, mixup_fn, + set_training_mode=args.finetune == '', # keep in eval mode during finetuning + logger=logger, + use_npu=args.npu + ) + + lr_scheduler.step(epoch) + if args.output_dir and epoch % 5==0: + checkpoint_paths = [output_dir / f'checkpoint_{str(epoch)}.pth'] + for checkpoint_path in checkpoint_paths: + utils.save_on_master({ + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'model_ema': get_state_dict(model_ema), + 'amp': apex.amp.state_dict(), + 'args': args, + }, checkpoint_path) + + test_stats = evaluate(data_loader_val, model, device, args.batch_size, logger=logger, use_npu=args.npu) + logger.info(f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%") + + if test_stats["acc1"] > max_accuracy: + best_checkpoint_path = output_dir / 'best.pth' + utils.save_on_master({ + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'model_ema': get_state_dict(model_ema), + 'amp': apex.amp.state_dict(), + 'args': args, + }, best_checkpoint_path) + max_accuracy = max(max_accuracy, test_stats["acc1"]) + logger.info(f'Max accuracy: {max_accuracy:.2f}%') + + log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, + **{f'test_{k}': v for k, v in test_stats.items()}, + 'epoch': epoch, + 'n_parameters': n_parameters} + + if args.output_dir and utils.is_main_process(): + with (output_dir / "log.txt").open("a") as f: + f.write(json.dumps(log_stats) + "\n") + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + logger.info('Training time {}'.format(total_time_str)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('DeiT training and evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + if args.output_dir: + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + main(args) diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/mixup_nova.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/mixup_nova.py new file mode 100644 index 0000000000..a86506ae5c --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/mixup_nova.py @@ -0,0 +1,48 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import timm.data.mixup as mixup +import torch + +def one_hot(x, num_classes, on_value=1., off_value=0.): + x = x.long().view(-1, 1) + device = x.device + return torch.full((x.size()[0], num_classes), off_value, device=device).scatter_(1, x, on_value) + + +def mixup_target(target, num_classes, lam=1., smoothing=0.0): + off_value = smoothing / num_classes + on_value = 1. - smoothing + off_value + device = target.device + y1 = one_hot(target, num_classes, on_value=on_value, off_value=off_value) + y2 = one_hot(target.flip(0), num_classes, on_value=on_value, off_value=off_value) + return y1 * lam + y2 * (1. - lam) + +class Mixup_nova(mixup.Mixup): + def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None, prob=1.0, switch_prob=0.5, + mode='batch', correct_lam=True, label_smoothing=0.1, num_classes=1000): + super().__init__(mixup_alpha=mixup_alpha, cutmix_alpha=cutmix_alpha, cutmix_minmax=cutmix_minmax, prob=prob, switch_prob=switch_prob, + mode=mode, correct_lam=correct_lam, label_smoothing=label_smoothing, num_classes=num_classes) + + def __call__(self, x, target): + assert len(x) % 2 == 0, 'Batch size should be even when using this' + if self.mode == 'elem': + lam = self._mix_elem(x) + elif self.mode == 'pair': + lam = self._mix_pair(x) + else: + lam = self._mix_batch(x) + target = mixup_target(target, self.num_classes, lam, self.label_smoothing) + return x, target + diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/__init__.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/__init__.py new file mode 100644 index 0000000000..a1c231acf2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from .registry import * diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/registry.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/registry.py new file mode 100644 index 0000000000..f094e30d3a --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/registry.py @@ -0,0 +1,275 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from timm.models.registry import register_model +from .smlp import sMLPNet +from .spach import Spach, SpachMS +from .shiftvit import ShiftViT + + +# sMLP +@register_model +def smlpnet_tiny(pretrained=False, **kwargs): + model = sMLPNet(dim=80, alpha=3, patch_size=4, depths=[2,8,14,2], dp_rate=0.0, **kwargs) + return model + + +@register_model +def smlpnet_small(pretrained=False, **kwargs): + model = sMLPNet(dim=96, alpha=3, patch_size=4, depths=[2,10,24,2], dp_rate=0.2, **kwargs) + return model + + +@register_model +def smlpnet_base(pretrained=False, **kwargs): + model = sMLPNet(dim=112, alpha=3, patch_size=4, depths=[2,10,24,2], dp_rate=0.3, **kwargs) + return model + + +# SPACH +@register_model +def spach_xxs_patch16_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=384, token_ratio=0.5, num_heads=12, channel_ratio=2.0) + cfgs['net_arch'] = [('mlp', 12)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_xxs_patch16_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=384, token_ratio=0.5, num_heads=12, channel_ratio=2.0) + cfgs['net_arch'] = [('pass', 12)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_xxs_patch16_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=192, token_ratio=0.5, num_heads=6, channel_ratio=2.0) + cfgs['net_arch'] = [('attn', 12)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_xs_patch16_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=384, token_ratio=0.5, num_heads=12, channel_ratio=2.0) + cfgs['net_arch'] = [('mlp', 24)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_xs_patch16_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=384, token_ratio=0.5, num_heads=12, channel_ratio=2.0) + cfgs['net_arch'] = [('pass', 24)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_xs_patch16_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=384, token_ratio=0.5, num_heads=12, channel_ratio=2.0) + cfgs['net_arch'] = [('attn', 12)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_s_patch16_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=512, token_ratio=0.5, num_heads=16, channel_ratio=3.0) + cfgs['net_arch'] = [('mlp', 24)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_s_patch16_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=512, token_ratio=0.5, num_heads=16, channel_ratio=3.0) + cfgs['net_arch'] = [('pass', 24)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_s_patch16_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=16, hidden_dim=512, token_ratio=0.5, num_heads=16, channel_ratio=3.0) + cfgs['net_arch'] = [('attn', 12)] + cfgs.update(kwargs) + model = Spach(**cfgs) + return model + + +@register_model +def spach_ms_xxs_patch4_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=64, token_ratio=0.5, num_heads=2, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 2)], [('pass', 2)], [('pass', 6)], [('pass', 2)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xxs_patch4_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=64, token_ratio=0.5, num_heads=2, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 2)], [('mlp', 2)], [('mlp', 6)], [('mlp', 2)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xxs_patch4_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=32, token_ratio=0.5, num_heads=1, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 2)], [('attn', 2)], [('attn', 6)], [('attn', 2)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xs_patch4_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=96, token_ratio=0.5, num_heads=3, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 3)], [('pass', 4)], [('pass', 12)], [('pass', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xs_patch4_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=96, token_ratio=0.5, num_heads=3, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 3)], [('mlp', 4)], [('mlp', 12)], [('mlp', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xs_patch4_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=64, token_ratio=0.5, num_heads=2, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 3)], [('attn', 4)], [('attn', 12)], [('attn', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_s_patch4_224_conv(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=128, token_ratio=0.5, num_heads=4, channel_ratio=3.0) + cfgs['net_arch'] = [[('pass', 3)], [('pass', 4)], [('pass', 12)], [('pass', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_s_patch4_224_mlp(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=128, token_ratio=0.5, num_heads=4, channel_ratio=3.0) + cfgs['net_arch'] = [[('pass', 3)], [('mlp', 4)], [('mlp', 12)], [('mlp', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_s_patch4_224_attn(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=96, token_ratio=0.5, num_heads=3, channel_ratio=3.0) + cfgs['net_arch'] = [[('pass', 3)], [('attn', 4)], [('attn', 12)], [('attn', 3)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_xs_patch4_224_hybrid(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=96, token_ratio=0.5, num_heads=3, channel_ratio=2.0) + cfgs['net_arch'] = [[('pass', 3)], [('pass', 4)], [('pass', 2), ('attn', 10)], [('pass', 1), ('attn', 2)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +@register_model +def spach_ms_s_patch4_224_hybrid(pretrained=False, **kwargs): + cfgs = dict(img_size=224, patch_size=4, hidden_dim=128, token_ratio=0.5, num_heads=4, channel_ratio=3.0) + cfgs['net_arch'] = [[('pass', 3)], [('pass', 2), ('attn', 2)], [('pass', 2), ('attn', 10)], [('pass', 1), ('attn', 2)]] + cfgs.update(kwargs) + model = SpachMS(**cfgs) + return model + + +# shift vit +@register_model +def shiftvit_light_tiny(**kwargs): + model = ShiftViT(embed_dim=96, depths=(2, 2, 6, 2), mlp_ratio=4, drop_path_rate=0.2, n_div=12) + return model + + +@register_model +def shiftvit_r4_tiny(**kwargs): + model = ShiftViT(embed_dim=96, depths=(2, 2, 12, 3), mlp_ratio=4, drop_path_rate=0.2, n_div=12) + return model + + +@register_model +def shiftvit_r2_tiny(**kwargs): + model = ShiftViT(embed_dim=96, depths=(6, 8, 18, 6), mlp_ratio=2, drop_path_rate=0.2, n_div=12) + return model + + +@register_model +def shiftvit_light_small(**kwargs): + model = ShiftViT(embed_dim=96, depths=(2, 2, 18, 2), mlp_ratio=4, drop_path_rate=0.4, n_div=12) + return model + + +@register_model +def shiftvit_r4_small(**kwargs): + model = ShiftViT(embed_dim=96, depths=(2, 6, 24, 4), mlp_ratio=4, drop_path_rate=0.4, n_div=12) + return model + + +@register_model +def shiftvit_r2_small(**kwargs): + model = ShiftViT(embed_dim=96, depths=(10, 18, 36, 10), mlp_ratio=2, drop_path_rate=0.4, n_div=12) + return model + + +@register_model +def shiftvit_light_base(**kwargs): + model = ShiftViT(embed_dim=128, depths=(2, 2, 18, 2), mlp_ratio=4, drop_path_rate=0.5, n_div=16) + return model + + +@register_model +def shiftvit_r4_base(**kwargs): + model = ShiftViT(embed_dim=128, depths=(4, 6, 22, 4), mlp_ratio=4, drop_path_rate=0.5, n_div=16) + return model + + +@register_model +def shiftvit_r2_base(**kwargs): + model = ShiftViT(embed_dim=128, depths=(10, 18, 36, 10), mlp_ratio=2, drop_path_rate=0.6, n_div=16) + return model diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/shiftvit.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/shiftvit.py new file mode 100644 index 0000000000..6b243f2802 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/shiftvit.py @@ -0,0 +1,365 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import torch +import torch.nn as nn +import torch.utils.checkpoint as checkpoint +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from functools import partial + + +class GroupNorm(nn.GroupNorm): + + def __init__(self, num_channels, num_groups=1): + """ We use GroupNorm (group = 1) to approximate LayerNorm + for [N, C, H, W] layout""" + super(GroupNorm, self).__init__(num_groups, num_channels) + + +class Mlp(nn.Module): + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): + """ MLP network in FFN. By default, the MLP is implemented by + nn.Linear. However, in our implementation, the data layout is + in format of [N, C, H, W], therefore we use 1x1 convolution to + implement fully-connected MLP layers. + + Args: + in_features (int): input channels + hidden_features (int): hidden channels, if None, set to in_features + out_features (int): out channels, if None, set to in_features + act_layer (callable): activation function class type + drop (float): drop out probability + """ + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.act = act_layer() + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class ShiftViTBlock(nn.Module): + + def __init__(self, + dim, + n_div=12, + mlp_ratio=4., + drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + input_resolution=None): + """ The building block of Shift-ViT network. + + Args: + dim (int): feature dimension + n_div (int): how many divisions are used. Totally, 4/n_div of + channels will be shifted. + mlp_ratio (float): expand ratio of MLP network. + drop (float): drop out prob. + drop_path (float): drop path prob. + act_layer (callable): activation function class type. + norm_layer (callable): normalization layer class type. + input_resolution (tuple): input resolution. This optional variable + is used to calculate the flops. + + """ + super(ShiftViTBlock, self).__init__() + self.dim = dim + self.input_resolution = input_resolution + self.mlp_ratio = mlp_ratio + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop) + self.n_div = n_div + + def forward(self, x): + x = self.shift_feat(x, self.n_div) + shortcut = x + x = shortcut + self.drop_path(self.mlp(self.norm2(x))) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}," \ + f"input_resolution={self.input_resolution}," \ + f"shift percentage={4.0 / self.n_div * 100}%." + + @staticmethod + def shift_feat(x, n_div): + B, C, H, W = x.shape + g = C // n_div + out = torch.zeros_like(x) + + out[:, g * 0:g * 1, :, :-1] = x[:, g * 0:g * 1, :, 1:] # shift left + out[:, g * 1:g * 2, :, 1:] = x[:, g * 1:g * 2, :, :-1] # shift right + out[:, g * 2:g * 3, :-1, :] = x[:, g * 2:g * 3, 1:, :] # shift up + out[:, g * 3:g * 4, 1:, :] = x[:, g * 3:g * 4, :-1, :] # shift down + + out[:, g * 4:, :, :] = x[:, g * 4:, :, :] # no shift + return out + + +class PatchMerging(nn.Module): + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Conv2d(dim, 2 * dim, (2, 2), stride=2, bias=False) + self.norm = norm_layer(dim) + + def forward(self, x): + x = self.norm(x) + x = self.reduction(x) + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + +class BasicLayer(nn.Module): + + def __init__(self, + dim, + input_resolution, + depth, + n_div=12, + mlp_ratio=4., + drop=0., + drop_path=None, + norm_layer=None, + downsample=None, + use_checkpoint=False, + act_layer=nn.GELU): + + super(BasicLayer, self).__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + ShiftViTBlock(dim=dim, + n_div=n_div, + mlp_ratio=mlp_ratio, + drop=drop, + drop_path=drop_path[i], + norm_layer=norm_layer, + act_layer=act_layer, + input_resolution=input_resolution) + for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, + dim=dim, + norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x): + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}," \ + f"input_resolution={self.input_resolution}," \ + f"depth={self.depth}" + + +class PatchEmbed(nn.Module): + r""" Image to Patch Embedding + + Args: + img_size (int, tuple): Image size. + patch_size (int, tuple): Patch token size. + in_chans (int): Number of input image channels. + embed_dim (int): Number of linear projection output channels. + norm_layer (nn.Module, optional): Normalization layer. + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], + img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_chans, embed_dim, + kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = self.proj(x) + if self.norm is not None: + x = self.norm(x) + return x + + +class ShiftViT(nn.Module): + + def __init__(self, + n_div=12, + img_size=224, + patch_size=4, + in_chans=3, + num_classes=1000, + embed_dim=96, + depths=(2, 2, 6, 2), + mlp_ratio=4., + drop_rate=0., + drop_path_rate=0.1, + norm_layer='GN1', + act_layer='GELU', + patch_norm=True, + use_checkpoint=False, + **kwargs): + super().__init__() + assert norm_layer in ('GN1', 'BN') + if norm_layer == 'BN': + norm_layer = nn.BatchNorm2d + elif norm_layer == 'GN1': + norm_layer = partial(GroupNorm, num_groups=1) + else: + raise NotImplementedError + + if act_layer == 'GELU': + act_layer = nn.GELU + elif act_layer == 'RELU': + act_layer = partial(nn.ReLU, inplace=False) + else: + raise NotImplementedError + + self.num_classes = num_classes + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.patch_norm = patch_norm + self.num_features = int(embed_dim * 2 ** (self.num_layers - 1)) + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=in_chans, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth decay rule + dpr = [x.item() + for x in torch.linspace(0, drop_path_rate, sum(depths))] + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer), + n_div=n_div, + input_resolution=(patches_resolution[0] // (2 ** i_layer), + patches_resolution[1] // (2 ** i_layer)), + depth=depths[i_layer], + mlp_ratio=self.mlp_ratio, + drop=drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint, + act_layer=act_layer) + self.layers.append(layer) + + self.norm = norm_layer(self.num_features) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.head = nn.Linear(self.num_features, num_classes) \ + if num_classes > 0 else nn.Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.Conv1d, nn.Conv2d)): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.LayerNorm, nn.GroupNorm)): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward_features(self, x): + x = self.patch_embed(x) + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x) + + x = self.norm(x) # B L C + x = self.avgpool(x) # B C 1 + x = torch.flatten(x, 1) + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/smlp.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/smlp.py new file mode 100644 index 0000000000..17330cdcba --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/smlp.py @@ -0,0 +1,143 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import torch +from torch import nn +from einops.layers.torch import Rearrange +from timm.models.layers import DropPath + + +class FeedForward(nn.Module): + def __init__(self, dim, hidden_dim, dropout=0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + return self.net(x) + + +class BN_Activ_Conv(nn.Module): + def __init__(self, in_channels, activation, out_channels, kernel_size, stride=(1, 1), dilation=(1, 1), groups=1): + super(BN_Activ_Conv, self).__init__() + self.BN = nn.BatchNorm2d(out_channels) + self.Activation = activation + padding = [int((dilation[j] * (kernel_size[j] - 1) - stride[j] + 1) / 2) for j in range(2)] # Same padding + self.Conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups=groups, bias=False) + + def forward(self, img): + img = self.BN(img) + img = self.Activation(img) + img = self.Conv(img) + return img + + +class sMLPBlock(nn.Module): + def __init__(self, W, H, channels): + super().__init__() + assert W == H + self.channels = channels + self.activation = nn.GELU() + self.BN = nn.BatchNorm2d(channels) + self.proj_h = nn.Conv2d(H, H, (1, 1)) + self.proh_w = nn.Conv2d(W, W, (1, 1)) + self.fuse = nn.Conv2d(channels*3, channels, (1,1), (1,1), bias=False) + + def forward(self, x): + x = self.activation(self.BN(x)) + x_h = self.proj_h(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) + x_w = self.proh_w(x.permute(0, 2, 1, 3)).permute(0, 2, 1, 3) + x = self.fuse(torch.cat([x, x_h, x_w], dim=1)) + return x + + +class DWConvBlock(nn.Module): + def __init__(self, channels): + super().__init__() + self.conv_merge = BN_Activ_Conv(channels, nn.GELU(), channels, (3, 3), groups=channels) + + def forward(self, img): + img = self.conv_merge(img) + return img + + +class sMLPNet(nn.Module): + + def __init__(self, in_chans=3, dim=80, alpha=3, num_classes=1000, patch_size=4, image_size=224, depths=[2,8,14,2], dp_rate=0., + **kwargs): + super(sMLPNet, self).__init__() + ''' + (B,H,W,C): (B,(image_size// patch_size)**2,dim) + ''' + + assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.' + self.num_patch = image_size // patch_size + self.depths = depths + + self.to_patch_embedding = nn.ModuleList([]) + self.token_mix = nn.ModuleList([]) + self.channel_mix = nn.ModuleList([]) + self.drop_path = nn.ModuleList([]) + + net_num_blocks = sum(self.depths) + net_block_idx = 0 + for i in range(len(self.depths)): + ratio = 2 ** i + if i == 0: + self.to_patch_embedding.append(nn.Sequential(nn.Conv2d(in_chans, dim, patch_size, patch_size, bias=False))) + else: + self.to_patch_embedding.append(nn.Sequential(nn.Conv2d(dim * ratio // 2, dim * ratio, 2, 2, bias=False))) + + for j in range(self.depths[i]): + block_dpr = dp_rate * net_block_idx / (net_num_blocks - 1) # stochastic depth linear decay rule + self.drop_path.append(DropPath(block_dpr) if block_dpr > 0. else nn.Identity()) + net_block_idx += 1 + + self.channel_mix.append(nn.Sequential( + Rearrange('b c h w -> b h w c'), + nn.LayerNorm(dim*ratio), + FeedForward(dim*ratio,dim*ratio*alpha), + Rearrange('b h w c -> b c h w')) + ) + + self.token_mix.append(nn.Sequential(DWConvBlock(dim*ratio), sMLPBlock(self.num_patch//ratio, self.num_patch//ratio, dim * ratio))) + + self.batch_norm = nn.BatchNorm2d(dim*2**(len(self.depths)-1)) + + self.mlp_head = nn.Sequential( + nn.Linear(dim * 2**(len(self.depths)-1), num_classes) + ) + + def forward(self, x): + + shift = 0 + for i in range(len(self.depths)): + x = self.to_patch_embedding[i](x) + for j in range(self.depths[i]): + x = x + self.drop_path[j+shift](self.token_mix[j+shift](x)) + x = x + self.drop_path[j+shift](self.channel_mix[j+shift](x)) + shift += self.depths[i] + + x = self.batch_norm(x) + + x = x.mean(dim=[2,3]).flatten(1) + + return self.mlp_head(x) diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/__init__.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/__init__.py new file mode 100644 index 0000000000..a5cd47f745 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from .spach import Spach +from .spach_ms import SpachMS + +__all__ = ['Spach', 'SpachMS'] diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/__init__.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/__init__.py new file mode 100644 index 0000000000..b882ec93e1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from .channel_func import ChannelMLP +from .spatial_func import DWConv, SPATIAL_FUNC +from .stem import STEM_LAYER + +__all__ = ['ChannelMLP', 'DWConv', 'SPATIAL_FUNC', 'STEM_LAYER'] diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/channel_func.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/channel_func.py new file mode 100644 index 0000000000..b27dee16d0 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/channel_func.py @@ -0,0 +1,46 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from torch import nn + + +class ChannelMLP(nn.Module): + """Channel MLP""" + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., **kwargs): + super(ChannelMLP, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + self.hidden_features = hidden_features + self.out_features = out_features + + def forward(self, x): + B, N, C = x.shape + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + def flops(self, input_shape): + _, N, C = input_shape + flops = 0 + flops += (C + 1) * self.hidden_features * N + flops += (self.hidden_features + 1) * self.out_features * N + return flops \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/spatial_func.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/spatial_func.py new file mode 100644 index 0000000000..259c56abdb --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/spatial_func.py @@ -0,0 +1,122 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from torch import nn +from einops import rearrange + +from ..misc import Reshape2HW, Reshape2N + + +class SpatialAttention(nn.Module): + """Spatial Attention""" + def __init__(self, dim, num_heads, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., **kwargs): + super(SpatialAttention, self).__init__() + head_dim = dim // num_heads + + self.num_heads = num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x) + qkv = rearrange(qkv, "b n (three heads head_c) -> three b heads n head_c", three=3, heads=self.num_heads) + q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] + + attn = (q @ k.transpose(-2, -1)) # B, head, N, N + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + out = (attn @ v) # B, head, N, C + out = rearrange(out, "b heads n head_c -> b n (heads head_c)") + + out = self.proj(out) + out = self.proj_drop(out) + + return out + + def flops(self, input_shape): + _, N, C = input_shape + flops = 0 + # qkv + flops += 3 * C * C * N + # q@k + flops += N ** 2 * C + # attn@v + flops += N ** 2 * C + # proj + flops += C * C * N + return flops + +class SpatialMLP(nn.Module): + """Spatial MLP""" + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., **kwargs): + super(SpatialMLP, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + self.hidden_features = hidden_features + self.out_features = out_features + + def forward(self, x): + B, N, C = x.shape + x = x.transpose(1, 2) + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + x = x.transpose(1, 2) + return x + + def flops(self, input_shape): + _, N, C = input_shape + flops = 0 + flops += (N + 1) * self.hidden_features * C + flops += (self.hidden_features + 1) * self.out_features * C + return flops + + +class DWConv(nn.Module): + def __init__(self, dim, kernel_size=3): + super(DWConv, self).__init__() + self.dim = dim + self.kernel_size = kernel_size + + padding = (kernel_size - 1) // 2 + self.net = nn.Sequential(Reshape2HW(), + nn.Conv2d(dim, dim, kernel_size, 1, padding, groups=dim), + Reshape2N()) + + + def forward(self, x): + x = self.net(x) + return x + + def flops(self, input_shape): + _, N, C = input_shape + flops = N * self.dim * (3 * 3 + 1) + return flops + + +SPATIAL_FUNC = {'attn': SpatialAttention, 'mlp': SpatialMLP, 'pass': None} diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/stem.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/stem.py new file mode 100644 index 0000000000..056a7b7e8c --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/layers/stem.py @@ -0,0 +1,110 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from torch import nn + +from timm.models.layers import to_2tuple + +from ..misc import check_upstream_shape + + +class PatchEmbed(nn.Module): + """1-conv patch embedding layer""" + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, downstream=False): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.downstream = downstream + self.img_size = img_size + self.patch_size = patch_size + self.stem_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) + self.num_patches = self.stem_shape[0] * self.stem_shape[1] + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + self.out_size = None + + # for flops + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x): + if not self.downstream: + check_upstream_shape(x, self.img_size) + x = self.proj(x) + return x + + def flops(self, input_shape=None): + flops = self.num_patches * self.embed_dim * (sum(self.patch_size) * self.in_chans + 1) # Ho*Wo*Co*(K^2*Ci+1) + return flops + + +class Conv4PatchEmbed(nn.Module): + """4-conv patch embedding layer""" + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, downstream=False, hidden_chans=64): + super(Conv4PatchEmbed, self).__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.downstream = downstream + self.img_size = img_size + self.patch_size = patch_size + self.stem_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) + self.num_patches = self.stem_shape[0] * self.stem_shape[1] + + sub_patch_size = (patch_size[0]//2, patch_size[1]//2) + + self.proj = nn.Sequential( + nn.Conv2d(in_chans, hidden_chans, kernel_size=7, stride=2, padding=3, bias=False), + nn.BatchNorm2d(hidden_chans), + nn.ReLU(), + nn.Conv2d(hidden_chans, hidden_chans, 3, 1, 1, bias=False), + nn.BatchNorm2d(hidden_chans), + nn.ReLU(), + nn.Conv2d(hidden_chans, hidden_chans, 3, 1, 1, bias=False), + nn.BatchNorm2d(hidden_chans), + nn.ReLU(), + nn.Conv2d(hidden_chans, embed_dim, kernel_size=sub_patch_size, stride=sub_patch_size) + ) + + # for flops + self.inside_num_patches = self.num_patches * sum(sub_patch_size) + self.in_chans = in_chans + self.new_patch_size = sub_patch_size + self.embed_dim = embed_dim + self.hidden_chans = hidden_chans + + def forward(self, x): + if not self.downstream: + check_upstream_shape(x, self.img_size) + x = self.proj(x) + return x + + def flops(self, input_shape=None): + flops = 0 + flops += self.inside_num_patches * self.hidden_chans * self.in_chans * 7 * 7 # Ho*Wo*Co*K^2*Ci+1 + flops += self.inside_num_patches * self.hidden_chans + + flops += self.inside_num_patches * self.hidden_chans * self.hidden_chans * 3 * 3 + flops += self.inside_num_patches * self.hidden_chans + + flops += self.inside_num_patches * self.hidden_chans * self.hidden_chans * 3 * 3 + flops += self.inside_num_patches * self.hidden_chans + + flops += self.num_patches * self.embed_dim * (sum(self.new_patch_size)*self.hidden_chans + 1) # Ho*Wo*Co*(K^2*Ci+1) + + return flops + + +STEM_LAYER = {'conv1': PatchEmbed, 'conv4': Conv4PatchEmbed} diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/misc.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/misc.py new file mode 100644 index 0000000000..4c28bca9c8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/misc.py @@ -0,0 +1,92 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from functools import partial + +from torch import nn +from einops import rearrange + +from timm.models.layers import to_2tuple + + +def check_upstream_shape(x, img_size=(224, 224)): + _, _, H, W = x.shape + assert H == img_size[0] and W == img_size[1], \ + f"Input image size ({H}*{W}) doesn't match model ({img_size[0]}*{img_size[1]})." + + +def reshape2n(x): + return rearrange(x, 'b c h w -> b (h w) c') + + +def reshape2hw(x, hw=None): + n = x.shape[1] + if hw is None: + hw = to_2tuple(int(n ** 0.5)) + assert n == hw[0] * hw[1], f"N={n} is not equal to H={hw[0]}*W={hw[1]}" + return rearrange(x, 'b (h w) c -> b c h w', h=hw[0]) + + +def downsample_conv(in_channels, out_channels, kernel_size=2, stride=2, padding=0, dilation=1, norm_layer=None): + assert norm_layer is None, "only support default normalization" + norm_layer = norm_layer or partial(nn.GroupNorm, num_groups=1, num_channels=out_channels) + kernel_size = 1 if stride == 1 and dilation == 1 else kernel_size + dilation = dilation if kernel_size > 1 else 1 + return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, + dilation=dilation, bias=False), + norm_layer() + ) + + +class Reshape2N(nn.Module): + def __init__(self): + super(Reshape2N, self).__init__() + + def forward(self, x): + return reshape2n(x) + + +class Reshape2HW(nn.Module): + def __init__(self, hw=None): + super(Reshape2HW, self).__init__() + self.hw = hw + + def forward(self, x): + return reshape2hw(x, self.hw) + + +class DownsampleConv(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=2, stride=2, padding=0, dilation=1, norm_layer=None): + super(DownsampleConv, self).__init__() + self.net = nn.Sequential( + Reshape2HW(), + downsample_conv(in_channels, out_channels, kernel_size, stride, padding, dilation, norm_layer), + Reshape2N() + ) + + self.kernel_size = kernel_size + self.in_channels = in_channels + self.out_channels = out_channels + + def forward(self, x): + return self.net(x) + + def flops(self, input_shape): + _, N, C = input_shape # C == out_channels + flops = 0 + flops += N * self.out_channels * self.in_channels * self.kernel_size**2 + flops += N * self.out_channels + return flops diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach.py new file mode 100644 index 0000000000..63874d69a4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach.py @@ -0,0 +1,201 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from functools import partial + +import torch +from torch import nn +from timm.models.layers import DropPath +from einops.layers.torch import Reduce + +from .layers import DWConv, SPATIAL_FUNC, ChannelMLP, STEM_LAYER +from .misc import reshape2n + + +class MixingBlock(nn.Module): + def __init__(self, dim, + spatial_func=None, scaled=True, init_values=1e-4, shared_spatial_func=False, + norm_layer=partial(nn.LayerNorm, eps=1e-6), act_layer=nn.GELU, drop_path=0., cpe=True, + num_heads=None, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., # attn + in_features=None, hidden_features=None, drop=0., # mlp + channel_ratio=2.0 + ): + super(MixingBlock, self).__init__() + + spatial_kwargs = dict(act_layer=act_layer, + in_features=in_features, hidden_features=hidden_features, drop=drop, # mlp + dim=dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=proj_drop # attn + ) + + self.valid_spatial_func = True + + if spatial_func is not None: + if shared_spatial_func: + self.spatial_func = spatial_func + else: + self.spatial_func = spatial_func(**spatial_kwargs) + self.norm1 = norm_layer(dim) + if scaled: + self.gamma_1 = nn.Parameter(init_values * torch.ones(1, 1, dim), requires_grad=True) + else: + self.gamma_1 = 1. + else: + self.valid_spatial_func = False + + self.channel_func = ChannelMLP(in_features=dim, hidden_features=int(dim*channel_ratio), act_layer=act_layer, + drop=drop) + + self.norm2 = norm_layer(dim) + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + + self.cpe = cpe + if cpe: + self.cpe_net = DWConv(dim) + + + def forward(self, x): + in_x = x + if self.valid_spatial_func: + x = x + self.drop_path(self.gamma_1 * self.spatial_func(self.norm1(in_x))) + if self.cpe: + x = x + self.cpe_net(in_x) + + x = x + self.drop_path(self.channel_func(self.norm2(x))) + + return x + + def flops(self, input_shape): + _, N, C = input_shape + flops = 0 + if self.valid_spatial_func: + flops += self.spatial_func.flops(input_shape) + flops += N * C * 2 # norm + skip + if self.cpe: + flops += self.cpe_net.flops(input_shape) + + flops += self.channel_func.flops(input_shape) + flops += N * C * 2 + return flops + + +class Spach(nn.Module): + def __init__(self, + num_classes=1000, + img_size=224, + in_chans=3, + hidden_dim=384, + patch_size=16, + net_arch=None, + act_layer=nn.GELU, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + stem_type='conv1', + scaled=True, init_values=1e-4, drop_path_rate=0., cpe=True, shared_spatial_func=False, # mixing block + num_heads=12, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0., # attn + token_ratio=0.5, channel_ratio=2.0, drop_rate=0., # mlp + downstream=False, + **kwargs + ): + super(Spach, self).__init__() + self.num_classes = num_classes + self.hidden_dim = hidden_dim + self.downstream = downstream + + self.stem = STEM_LAYER[stem_type]( + img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=hidden_dim, downstream=downstream) + self.norm1 = norm_layer(hidden_dim) + + block_kwargs = dict(dim=hidden_dim, scaled=scaled, init_values=init_values, cpe=cpe, + shared_spatial_func=shared_spatial_func, norm_layer=norm_layer, act_layer=act_layer, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=proj_drop, # attn + in_features=self.stem.num_patches, hidden_features=int(self.stem.num_patches * token_ratio), channel_ratio=channel_ratio, drop=drop_rate) # mlp + + self.blocks = self.make_blocks(net_arch, block_kwargs, drop_path_rate, shared_spatial_func) + self.norm2 = norm_layer(hidden_dim) + + if not downstream: + self.pool = Reduce('b n c -> b c', reduction='mean') + self.head = nn.Linear(hidden_dim, self.num_classes) + + self.init_weights() + + def make_blocks(self, net_arch, block_kwargs, drop_path, shared_spatial_func): + if shared_spatial_func: + assert len(net_arch) == 1, '`shared_spatial_func` only support unitary spatial function' + assert net_arch[0][0] != 'pass', '`shared_spatial_func` do not support pass' + spatial_func = SPATIAL_FUNC[net_arch[0][0]](**block_kwargs) + else: + spatial_func = None + blocks = [] + for func_type, depth in net_arch: + for i in range(depth): + blocks.append(MixingBlock(spatial_func=spatial_func or SPATIAL_FUNC[func_type], drop_path=drop_path, + **block_kwargs)) + return nn.Sequential(*blocks) + + def init_weights(self): + for n, m in self.named_modules(): + _init_weights(m, n) + + def forward_features(self, x): + x = self.stem(x) + x = reshape2n(x) + x = self.norm1(x) + + x = self.blocks(x) + x = self.norm2(x) + + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.pool(x) + x = self.head(x) + return x + + def flops(self): + flops = 0 + shape = (1, self.stem.num_patches, self.hidden_dim) + # stem + flops += self.stem.flops() + flops += sum(shape) + # blocks + flops += sum([i.flops(shape) for i in self.blocks]) + flops += sum(shape) + # head + flops += self.hidden_dim * self.num_classes + return flops + + +def _init_weights(m, n: str): + if isinstance(m, nn.Linear): + if n.startswith('head'): + nn.init.zeros_(m.weight) + nn.init.zeros_(m.bias) + else: + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + if 'mlp' in n: + nn.init.normal_(m.bias, std=1e-6) + else: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach_ms.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach_ms.py new file mode 100644 index 0000000000..d4d769dd84 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/models/spach/spach_ms.py @@ -0,0 +1,147 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from functools import partial + +from torch import nn +from einops.layers.torch import Reduce + +from .spach import MixingBlock, _init_weights +from .layers import STEM_LAYER, SPATIAL_FUNC +from .misc import DownsampleConv, reshape2n + + +class SpachMS(nn.Module): + def __init__(self, + num_classes=1000, + img_size=224, + in_chans=3, + hidden_dim=384, + patch_size=16, + net_arch=None, + act_layer=nn.GELU, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + stem_type='conv1', + scaled=True, init_values=1e-4, drop_path_rate=0., cpe=True, shared_spatial_func=False, # mixing block + num_heads=12, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0., # attn + token_ratio=0.5, channel_ratio=2.0, drop_rate=0., # mlp + downstream=False, + **kwargs + ): + super(SpachMS, self).__init__() + assert len(net_arch) == 4 + self.num_classes = num_classes + self.hidden_dim = hidden_dim + self.downstream = downstream + + self.stem = STEM_LAYER[stem_type]( + img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=hidden_dim, downstream=downstream) + self.norm1 = norm_layer(hidden_dim) + + block_kwargs = dict(scaled=scaled, init_values=init_values, cpe=cpe, + shared_spatial_func=shared_spatial_func, norm_layer=norm_layer, act_layer=act_layer, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=proj_drop, # attn + channel_ratio=channel_ratio, drop=drop_rate) # mlp + + stage_modules = self.make_blocks(hidden_dim, self.stem.num_patches, net_arch, block_kwargs, drop_path_rate, + shared_spatial_func, token_ratio) + for stage in stage_modules: + self.add_module(*stage) + hidden_dim = hidden_dim * 8 + self.norm2 = norm_layer(hidden_dim) + + if not downstream: + self.pool = Reduce('b n c -> b c', reduction='mean') + self.head = nn.Linear(hidden_dim, self.num_classes) + + self.init_weights() + + def make_blocks(self, dim, seq_len, net_arch, block_kwargs, drop_path, shared_spatial_func, token_ratio): + stages = [] + num_blocks = sum(sum([depth for _, depth in stage_arch]) for stage_arch in net_arch) + block_idx = 0 + + for stage_idx, stage_arch in enumerate(net_arch): + stage_name = f'layer{stage_idx + 1}' + blocks = [] + if stage_idx > 0: + down_kwargs = dict(in_channels=dim, out_channels=dim * 2) + downsample = DownsampleConv(**down_kwargs) + blocks.append(downsample) + dim = dim * 2 + seq_len = seq_len // 4 + + block_kwargs.update(dict(dim=dim, in_features=seq_len, hidden_features=int(seq_len * token_ratio))) + + if stage_idx > 0 and shared_spatial_func: + assert len(stage_arch) == 1, '`shared_spatial_func` only support unitary spatial function' + assert stage_arch[0][0] != 'pass', '`shared_spatial_func` do not support pass' + spatial_func = SPATIAL_FUNC[stage_arch[0][0]](**block_kwargs) + else: + spatial_func = None + + for func_type, depth in stage_arch: + for i in range(depth): + block_dpr = drop_path * block_idx / (num_blocks - 1) # stochastic depth linear decay rule + blocks.append(MixingBlock(spatial_func=spatial_func or SPATIAL_FUNC[func_type], drop_path=block_dpr, + **block_kwargs)) + block_idx += 1 + stages.append((stage_name, nn.Sequential(*blocks))) + + return stages + + def init_weights(self): + for n, m in self.named_modules(): + _init_weights(m, n) + + def forward_features(self, x): + x = self.stem(x) + x = reshape2n(x) + x = self.norm1(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.norm2(x) + + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.pool(x) + x = self.head(x) + return x + + def flops(self): + flops = 0 + shape = (1, self.stem.num_patches, self.hidden_dim) + # stem + flops += self.stem.flops() + flops += sum(shape) + # layer1,2,3,4 + flops += sum([i.flops(shape) for i in self.layer1]) + shape = (1, self.stem.num_patches//4, self.hidden_dim*2) + flops += sum([i.flops(shape) for i in self.layer2]) + shape = (1, self.stem.num_patches//16, self.hidden_dim*4) + flops += sum([i.flops(shape) for i in self.layer3]) + shape = (1, self.stem.num_patches//64, self.hidden_dim*8) + flops += sum([i.flops(shape) for i in self.layer4]) + flops += sum(shape) + # head + flops += self.hidden_dim * 8 * self.num_classes + return flops \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/requirements.txt b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/requirements.txt new file mode 100644 index 0000000000..825d68d5d0 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/requirements.txt @@ -0,0 +1,4 @@ +torch==1.7.0 +torchvision==0.8.1 +timm==0.3.2 +einops==0.3.2 diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/samplers.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/samplers.py new file mode 100644 index 0000000000..8ab355d572 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/samplers.py @@ -0,0 +1,73 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +import torch +import torch.distributed as dist +import math + + +class RASampler(torch.utils.data.Sampler): + """Sampler that restricts data loading to a subset of the dataset for distributed, + with repeated augmentation. + It ensures that different each augmented version of a sample will be visible to a + different process (GPU) + Heavily based on torch.utils.data.DistributedSampler + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) + self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) + self.shuffle = shuffle + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + if self.shuffle: + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = list(range(len(self.dataset))) + + # add extra samples to make it evenly divisible + indices = [ele for ele in indices for i in range(3)] + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + indices = indices[self.rank:self.total_size:self.num_replicas] + assert len(indices) == self.num_samples + + return iter(indices[:self.num_selected_samples]) + + def __len__(self): + return self.num_selected_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/env_npu.sh b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/env_npu.sh new file mode 100644 index 0000000000..a975f7978c --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/env_npu.sh @@ -0,0 +1,79 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启2个非连续combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置是否开启3个非连续combined标志,0-关闭/1-开启 +export TRI_COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +# HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +# HCCL默认超时时间120s较少,修改为1800s对齐PyTorch默认设置 +export HCCL_CONNECT_TIMEOUT=1800 + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_1p.sh new file mode 100644 index 0000000000..e9d7657cec --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_1p.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="shiftvit_light_tiny" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/imagenet/" + +# 训练最大iter数 +max_iter=10010 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/${ASCEND_DEVICE_ID} +else + mkdir -p ${test_path_dir}/output/${ASCEND_DEVICE_ID} +fi + +# 变量 +export SPACH_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +KERNEL_NUM=$(($(nproc)/8)) +for i in $(seq 0 0) +do + if [ $(uname -m) = "aarch64" ] + then + PID_START=$((KERNEL_NUM * i)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END \ + python3.7 -u main.py \ + --model shiftvit_light_tiny \ + --data-path ${data_path} \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + --npu \ + --num_workers 16\ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7 -u main.py \ + --model shiftvit_light_tiny \ + --data-path ${data_path} \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + --npu \ + --num_workers 16\ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'FPS:'| awk '{sum+=$10} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +AvgFPS=${FPS} + +#最后一个迭代loss值 +MinLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Averaged stats:' | awk 'BEGIN {min = 65536} {if ($12+0 < min+0) min=$12} END {print min}'` +MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Max accuracy' | awk 'BEGIN {max = 0} {if ($9+0 > max+0) max=$9} END {print max}'` +echo "MaxAccuracy = ${MaxAccuracy}" +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "AvgFPS = ${AvgFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MinLoss = ${MinLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MaxAccuracy = ${MaxAccuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_8p.sh new file mode 100644 index 0000000000..24e0a5cdde --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_full_8p.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="shiftvit_light_tiny" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/imagenet/" + +# 训练最大iter数 +max_iter=1210 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export SPACH_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + # source /home/wangchy/SpanBERT/code/env.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +get_lscpu_value() { + awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}" +} + +lscpu_out=$(lscpu) +n_sockets=4 +n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}") + +echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}" + +export PYTHONPATH=../:$PYTHONPATH + +python3.7 -u -m bind_pyt \ + --nsockets_per_node ${n_sockets} \ + --ncores_per_socket ${n_cores_per_socket} \ + --master_addr $(hostname -I |awk '{print $1}') \ + --no_hyperthreads \ + --no_membind "$@" main.py \ + --model shiftvit_light_tiny \ + --npu \ + --data-path ${data_path} \ + --pin-mem \ + --dist-eval \ + --num_workers 16 \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'FPS:'| awk '{sum+=$10} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +AvgFPS=${FPS} + +#最后一个迭代loss值 +MinLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Averaged stats:' | awk 'BEGIN {min = 65536} {if ($12+0 < min+0) min=$12} END {print min}'` +MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Max accuracy' | awk 'BEGIN {max = 0} {if ($9+0 > max+0) max=$9} END {print max}'` +echo "MaxAccuracy = ${MaxAccuracy}" +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "AvgFPS = ${AvgFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MinLoss = ${MinLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MaxAccuracy = ${MaxAccuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_1p.sh new file mode 100644 index 0000000000..592efbc030 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_1p.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="shiftvit_light_tiny" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/imagenet/" + +# 训练最大iter数 +max_iter=10010 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/${ASCEND_DEVICE_ID} +else + mkdir -p ${test_path_dir}/output/${ASCEND_DEVICE_ID} +fi + +# 变量 +export SPACH_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +KERNEL_NUM=$(($(nproc)/8)) +for i in $(seq 0 0) +do + if [ $(uname -m) = "aarch64" ] + then + PID_START=$((KERNEL_NUM * i)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END \ + python3.7 -u main.py \ + --model shiftvit_light_tiny \ + --data-path ${data_path} \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + --npu \ + --num_workers 16\ + --epochs 1 \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7 -u main.py \ + --model shiftvit_light_tiny \ + --data-path ${data_path} \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + --npu \ + --num_workers 16\ + --epochs 1 \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'FPS:'| awk '{sum+=$10} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +AvgFPS=${FPS} + +#最后一个迭代loss值 +MinLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Averaged stats:' | awk 'BEGIN {min = 65536} {if ($12+0 < min+0) min=$12} END {print min}'` +MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Max accuracy' | awk 'BEGIN {max = 0} {if ($9+0 > max+0) max=$9} END {print max}'` +echo "MaxAccuracy = ${MaxAccuracy}" +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "AvgFPS = ${AvgFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MinLoss = ${MinLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MaxAccuracy = ${MaxAccuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_8p.sh new file mode 100644 index 0000000000..db64bb23d2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/test/train_performance_8p.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="shiftvit_light_tiny" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/imagenet/" + +# 训练最大iter数 +max_iter=1210 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export SPACH_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +get_lscpu_value() { + awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}" +} + +lscpu_out=$(lscpu) +n_sockets=4 +n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}") + +echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}" + +export PYTHONPATH=../:$PYTHONPATH + +python3.7 -u -m bind_pyt \ + --nsockets_per_node ${n_sockets} \ + --ncores_per_socket ${n_cores_per_socket} \ + --master_addr $(hostname -I |awk '{print $1}') \ + --no_hyperthreads \ + --no_membind "$@" main.py \ + --model shiftvit_light_tiny \ + --npu \ + --data-path ${data_path} \ + --pin-mem \ + --dist-eval \ + --num_workers 16 \ + --epochs 5 \ + --output_dir ${test_path_dir}/output/${ASCEND_DEVICE_ID} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'FPS:'| awk '{sum+=$10} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +AvgFPS=${FPS} + +#最后一个迭代loss值 +MinLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Averaged stats:' | awk 'BEGIN {min = 65536} {if ($12+0 < min+0) min=$12} END {print min}'` +MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'Max accuracy' | awk 'BEGIN {max = 0} {if ($9+0 > max+0) max=$9} END {print max}'` +echo "MaxAccuracy = ${MaxAccuracy}" +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "AvgFPS = ${AvgFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MinLoss = ${MinLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "MaxAccuracy = ${MaxAccuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/utils.py b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/utils.py new file mode 100644 index 0000000000..e117062cfb --- /dev/null +++ b/PyTorch/contrib/cv/classification/ShiftViT_for_PyTorch/utils.py @@ -0,0 +1,298 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import io +import os +import time +from collections import defaultdict, deque +import datetime +import logging + +import torch +import torch.distributed as dist + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self, use_npu=False, device='cuda'): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + if use_npu: + t = torch.tensor([self.count, self.total], dtype=torch.float64, device=device) + else: + t = torch.tensor([self.count, self.total], dtype=torch.float64, device=device) + + dist.barrier() + # dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +class MetricLogger(object): + def __init__(self, delimiter="\t", logger=logging): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + self.logger = logger + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self, use_npu=False, device='cuda'): + for meter in self.meters.values(): + meter.synchronize_between_processes(use_npu=use_npu, device=device) + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, batch_size, header=None, use_npu=False): + i = 0 + if not header: + header = '' + start_time = time.time() + skip_pre_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + log_msg = [ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ] + if use_npu: + if torch.npu.is_available(): + log_msg.append('max mem: {memory:.0f}') + else: + if torch.cuda.is_available(): + log_msg.append('max mem: {memory:.0f}') + log_msg = self.delimiter.join(log_msg) + MB = 1024.0 * 1024.0 + for i, obj in enumerate(iterable): + if i == 3: + skip_pre_time = time.time() + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if use_npu: + if torch.npu.is_available(): + self.logger.info(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.npu.max_memory_allocated() / MB)) + else: + self.logger.info(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + else: + if torch.cuda.is_available(): + self.logger.info(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + self.logger.info(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + FPS_valid_time = time.time() - skip_pre_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + self.logger.info('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + self.logger.info('iters num: {}, batch_size: {}, world_size: {}'.format( + len(iterable), batch_size, get_world_size())) + self.logger.info('{} FPS: {} ({:.4f} s / it)'.format( + header, float(len(iterable) * batch_size * get_world_size()) / float(FPS_valid_time), float(FPS_valid_time) / float(len(iterable)))) + + +def _load_checkpoint_for_ema(model_ema, checkpoint): + """ + Workaround for ModelEma._load_checkpoint to accept an already-loaded object + """ + mem_file = io.BytesIO() + torch.save(checkpoint, mem_file) + mem_file.seek(0) + model_ema._load_checkpoint(mem_file) + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + if args.npu: + args.gpu = args.rank % torch.npu.device_count() + else: + args.gpu = args.rank % torch.cuda.device_count() + elif 'OMPI_COMM_WORLD_SIZE' in os.environ and 'OMPI_COMM_WORLD_RANK' in os.environ: + args.world_size = int(os.environ['OMPI_COMM_WORLD_SIZE']) + args.rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + args.gpu = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK']) + args.dist_url = "tcp://%s:%s" % (os.environ['MASTER_ADDR'], os.environ['MASTER_PORT']) + print(f'dist train on amlk8s| word_size {args.world_size} | rank {args.rank} | gpu {args.gpu} | dist_url {args.dist_url}') + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + if args.npu: + torch.distributed.init_process_group(backend='hccl', + world_size=args.world_size, rank=args.rank) + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(loc) + print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url), flush=True) + else: + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + -- Gitee